1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_GENERIC
,
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1042 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1047 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2
| m_GENERIC
,
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386
| m_486
| m_K6_GEODE
,
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1068 /* X86_TUNE_USE_MOV0 */
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1083 /* X86_TUNE_READ_MODIFY */
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1088 | m_GENERIC
/* | m_PENT4 ? */,
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT
| m_486
| m_386
),
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386
| m_PENT4
| m_NOCONA
,
1096 /* X86_TUNE_QIMODE_MATH */
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1116 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1123 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1128 | m_GENERIC
| m_GEODE
),
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10
,
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO
| m_PENT4
| m_NOCONA
,
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1168 /* X86_TUNE_SHIFT1 */
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10
,
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10
,
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1251 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1253 static enum stringop_alg stringop_alg
= no_stringop
;
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1262 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1263 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1270 /* ax, dx, cx, bx */
1271 AREG
, DREG
, CREG
, BREG
,
1272 /* si, di, bp, sp */
1273 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1275 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1276 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1281 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1283 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1285 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1286 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1287 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1291 /* The "default" register map used in 32bit mode. */
1293 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1304 static int const x86_64_int_parameter_registers
[6] =
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1310 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1316 static int const x86_64_int_return_registers
[4] =
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1387 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1401 rtx ix86_compare_op0
= NULL_RTX
;
1402 rtx ix86_compare_op1
= NULL_RTX
;
1403 rtx ix86_compare_emitted
= NULL_RTX
;
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1408 /* Define the structure for the machine field in struct function. */
1410 struct stack_local_entry
GTY(())
1412 unsigned short mode
;
1415 struct stack_local_entry
*next
;
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1442 HOST_WIDE_INT frame
;
1444 int outgoing_arguments_size
;
1447 HOST_WIDE_INT to_allocate
;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset
;
1450 HOST_WIDE_INT hard_frame_pointer_offset
;
1451 HOST_WIDE_INT stack_pointer_offset
;
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov
;
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel
;
1461 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1463 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath
;
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune
;
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch
;
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse
;
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm
;
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer
;
1482 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary
;
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost
;
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1493 int ix86_section_threshold
= 65536;
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix
[16];
1497 int internal_label_prefix_len
;
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1509 X86_64_INTEGER_CLASS
,
1510 X86_64_INTEGERSI_CLASS
,
1517 X86_64_COMPLEX_X87_CLASS
,
1520 static const char * const x86_64_reg_class_name
[] =
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1526 #define MAX_CLASSES 4
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1530 static bool ext_80387_constants_init
= 0;
1533 static struct machine_function
* ix86_init_machine_status (void);
1534 static rtx
ix86_function_value (tree
, tree
, bool);
1535 static int ix86_function_regparm (tree
, tree
);
1536 static void ix86_compute_frame_layout (struct ix86_frame
*);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1547 /* Implement TARGET_HANDLE_OPTION. */
1550 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1557 target_flags
&= ~MASK_3DNOW_A
;
1558 target_flags_explicit
|= MASK_3DNOW_A
;
1565 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1566 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1573 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSSE3
1575 target_flags_explicit
|= (MASK_SSE2
| MASK_SSE3
| MASK_SSSE3
1583 target_flags
&= ~(MASK_SSE3
| MASK_SSSE3
| MASK_SSE4A
);
1584 target_flags_explicit
|= MASK_SSE3
| MASK_SSSE3
| MASK_SSE4A
;
1591 target_flags
&= ~(MASK_SSSE3
| MASK_SSE4A
);
1592 target_flags_explicit
|= MASK_SSSE3
| MASK_SSE4A
;
1599 target_flags
&= ~MASK_SSE4A
;
1600 target_flags_explicit
|= MASK_SSE4A
;
1609 /* Sometimes certain combinations of command options do not make
1610 sense on a particular target machine. You can define a macro
1611 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1612 defined, is executed once just after all the command options have
1615 Don't use this macro to turn on various extra optimizations for
1616 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1619 override_options (void)
1622 int ix86_tune_defaulted
= 0;
1623 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1625 /* Comes from final.c -- no real reason to change it. */
1626 #define MAX_CODE_ALIGN 16
1630 const struct processor_costs
*cost
; /* Processor costs */
1631 const int target_enable
; /* Target flags to enable. */
1632 const int target_disable
; /* Target flags to disable. */
1633 const int align_loop
; /* Default alignments. */
1634 const int align_loop_max_skip
;
1635 const int align_jump
;
1636 const int align_jump_max_skip
;
1637 const int align_func
;
1639 const processor_target_table
[PROCESSOR_max
] =
1641 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1642 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1643 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1644 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1645 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1646 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1647 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1648 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1649 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1650 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1651 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1652 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1653 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1654 {&amdfam10_cost
, 0, 0, 32, 24, 32, 7, 32}
1657 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1660 const char *const name
; /* processor name or nickname. */
1661 const enum processor_type processor
;
1662 const enum pta_flags
1668 PTA_PREFETCH_SSE
= 1 << 4,
1670 PTA_3DNOW_A
= 1 << 6,
1674 PTA_POPCNT
= 1 << 10,
1676 PTA_SSE4A
= 1 << 12,
1677 PTA_NO_SAHF
= 1 << 13
1680 const processor_alias_table
[] =
1682 {"i386", PROCESSOR_I386
, 0},
1683 {"i486", PROCESSOR_I486
, 0},
1684 {"i586", PROCESSOR_PENTIUM
, 0},
1685 {"pentium", PROCESSOR_PENTIUM
, 0},
1686 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1687 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1688 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1689 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1690 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1691 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1692 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1693 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1694 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1695 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1696 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1697 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1698 | PTA_MMX
| PTA_PREFETCH_SSE
},
1699 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1700 | PTA_MMX
| PTA_PREFETCH_SSE
},
1701 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1702 | PTA_MMX
| PTA_PREFETCH_SSE
},
1703 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1704 | PTA_MMX
| PTA_PREFETCH_SSE
1705 | PTA_CX16
| PTA_NO_SAHF
},
1706 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1707 | PTA_64BIT
| PTA_MMX
1708 | PTA_PREFETCH_SSE
| PTA_CX16
},
1709 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1711 {"k6", PROCESSOR_K6
, PTA_MMX
},
1712 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1713 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1714 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1716 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1717 | PTA_3DNOW
| PTA_3DNOW_A
},
1718 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1719 | PTA_3DNOW_A
| PTA_SSE
},
1720 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1721 | PTA_3DNOW_A
| PTA_SSE
},
1722 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1723 | PTA_3DNOW_A
| PTA_SSE
},
1724 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1725 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1726 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1727 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1729 {"k8-sse3", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1730 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1731 | PTA_SSE3
| PTA_NO_SAHF
},
1732 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1733 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1734 | PTA_SSE2
| PTA_NO_SAHF
},
1735 {"opteron-sse3", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1736 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1737 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
1738 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1739 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1740 | PTA_SSE2
| PTA_NO_SAHF
},
1741 {"athlon64-sse3", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1742 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1743 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
1744 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1745 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1746 | PTA_SSE2
| PTA_NO_SAHF
},
1747 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1748 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1749 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1750 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1751 {"barcelona", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1752 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1753 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1754 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1755 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1756 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1759 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1761 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1762 SUBTARGET_OVERRIDE_OPTIONS
;
1765 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1766 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1769 /* -fPIC is the default for x86_64. */
1770 if (TARGET_MACHO
&& TARGET_64BIT
)
1773 /* Set the default values for switches whose default depends on TARGET_64BIT
1774 in case they weren't overwritten by command line options. */
1777 /* Mach-O doesn't support omitting the frame pointer for now. */
1778 if (flag_omit_frame_pointer
== 2)
1779 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1780 if (flag_asynchronous_unwind_tables
== 2)
1781 flag_asynchronous_unwind_tables
= 1;
1782 if (flag_pcc_struct_return
== 2)
1783 flag_pcc_struct_return
= 0;
1787 if (flag_omit_frame_pointer
== 2)
1788 flag_omit_frame_pointer
= 0;
1789 if (flag_asynchronous_unwind_tables
== 2)
1790 flag_asynchronous_unwind_tables
= 0;
1791 if (flag_pcc_struct_return
== 2)
1792 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1795 /* Need to check -mtune=generic first. */
1796 if (ix86_tune_string
)
1798 if (!strcmp (ix86_tune_string
, "generic")
1799 || !strcmp (ix86_tune_string
, "i686")
1800 /* As special support for cross compilers we read -mtune=native
1801 as -mtune=generic. With native compilers we won't see the
1802 -mtune=native, as it was changed by the driver. */
1803 || !strcmp (ix86_tune_string
, "native"))
1806 ix86_tune_string
= "generic64";
1808 ix86_tune_string
= "generic32";
1810 else if (!strncmp (ix86_tune_string
, "generic", 7))
1811 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1815 if (ix86_arch_string
)
1816 ix86_tune_string
= ix86_arch_string
;
1817 if (!ix86_tune_string
)
1819 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1820 ix86_tune_defaulted
= 1;
1823 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1824 need to use a sensible tune option. */
1825 if (!strcmp (ix86_tune_string
, "generic")
1826 || !strcmp (ix86_tune_string
, "x86-64")
1827 || !strcmp (ix86_tune_string
, "i686"))
1830 ix86_tune_string
= "generic64";
1832 ix86_tune_string
= "generic32";
1835 if (ix86_stringop_string
)
1837 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1838 stringop_alg
= rep_prefix_1_byte
;
1839 else if (!strcmp (ix86_stringop_string
, "libcall"))
1840 stringop_alg
= libcall
;
1841 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
1842 stringop_alg
= rep_prefix_4_byte
;
1843 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
1844 stringop_alg
= rep_prefix_8_byte
;
1845 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
1846 stringop_alg
= loop_1_byte
;
1847 else if (!strcmp (ix86_stringop_string
, "loop"))
1848 stringop_alg
= loop
;
1849 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
1850 stringop_alg
= unrolled_loop
;
1852 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
1854 if (!strcmp (ix86_tune_string
, "x86-64"))
1855 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1856 "-mtune=generic instead as appropriate.");
1858 if (!ix86_arch_string
)
1859 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1860 if (!strcmp (ix86_arch_string
, "generic"))
1861 error ("generic CPU can be used only for -mtune= switch");
1862 if (!strncmp (ix86_arch_string
, "generic", 7))
1863 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1865 if (ix86_cmodel_string
!= 0)
1867 if (!strcmp (ix86_cmodel_string
, "small"))
1868 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1869 else if (!strcmp (ix86_cmodel_string
, "medium"))
1870 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1871 else if (!strcmp (ix86_cmodel_string
, "large"))
1872 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
1874 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
1875 else if (!strcmp (ix86_cmodel_string
, "32"))
1876 ix86_cmodel
= CM_32
;
1877 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1878 ix86_cmodel
= CM_KERNEL
;
1880 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1884 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1885 use of rip-relative addressing. This eliminates fixups that
1886 would otherwise be needed if this object is to be placed in a
1887 DLL, and is essentially just as efficient as direct addressing. */
1888 if (TARGET_64BIT_MS_ABI
)
1889 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
1890 else if (TARGET_64BIT
)
1891 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1893 ix86_cmodel
= CM_32
;
1895 if (ix86_asm_string
!= 0)
1898 && !strcmp (ix86_asm_string
, "intel"))
1899 ix86_asm_dialect
= ASM_INTEL
;
1900 else if (!strcmp (ix86_asm_string
, "att"))
1901 ix86_asm_dialect
= ASM_ATT
;
1903 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1905 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1906 error ("code model %qs not supported in the %s bit mode",
1907 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1908 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1909 sorry ("%i-bit mode not compiled in",
1910 (target_flags
& MASK_64BIT
) ? 64 : 32);
1912 for (i
= 0; i
< pta_size
; i
++)
1913 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1915 ix86_arch
= processor_alias_table
[i
].processor
;
1916 /* Default cpu tuning to the architecture. */
1917 ix86_tune
= ix86_arch
;
1918 if (processor_alias_table
[i
].flags
& PTA_MMX
1919 && !(target_flags_explicit
& MASK_MMX
))
1920 target_flags
|= MASK_MMX
;
1921 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1922 && !(target_flags_explicit
& MASK_3DNOW
))
1923 target_flags
|= MASK_3DNOW
;
1924 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1925 && !(target_flags_explicit
& MASK_3DNOW_A
))
1926 target_flags
|= MASK_3DNOW_A
;
1927 if (processor_alias_table
[i
].flags
& PTA_SSE
1928 && !(target_flags_explicit
& MASK_SSE
))
1929 target_flags
|= MASK_SSE
;
1930 if (processor_alias_table
[i
].flags
& PTA_SSE2
1931 && !(target_flags_explicit
& MASK_SSE2
))
1932 target_flags
|= MASK_SSE2
;
1933 if (processor_alias_table
[i
].flags
& PTA_SSE3
1934 && !(target_flags_explicit
& MASK_SSE3
))
1935 target_flags
|= MASK_SSE3
;
1936 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1937 && !(target_flags_explicit
& MASK_SSSE3
))
1938 target_flags
|= MASK_SSSE3
;
1939 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1940 x86_prefetch_sse
= true;
1941 if (processor_alias_table
[i
].flags
& PTA_CX16
)
1942 x86_cmpxchg16b
= true;
1943 if (processor_alias_table
[i
].flags
& PTA_POPCNT
1944 && !(target_flags_explicit
& MASK_POPCNT
))
1945 target_flags
|= MASK_POPCNT
;
1946 if (processor_alias_table
[i
].flags
& PTA_ABM
1947 && !(target_flags_explicit
& MASK_ABM
))
1948 target_flags
|= MASK_ABM
;
1949 if (processor_alias_table
[i
].flags
& PTA_SSE4A
1950 && !(target_flags_explicit
& MASK_SSE4A
))
1951 target_flags
|= MASK_SSE4A
;
1952 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
1954 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1955 error ("CPU you selected does not support x86-64 "
1961 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1963 ix86_arch_mask
= 1u << ix86_arch
;
1964 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
1965 ix86_arch_features
[i
] &= ix86_arch_mask
;
1967 for (i
= 0; i
< pta_size
; i
++)
1968 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1970 ix86_tune
= processor_alias_table
[i
].processor
;
1971 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1973 if (ix86_tune_defaulted
)
1975 ix86_tune_string
= "x86-64";
1976 for (i
= 0; i
< pta_size
; i
++)
1977 if (! strcmp (ix86_tune_string
,
1978 processor_alias_table
[i
].name
))
1980 ix86_tune
= processor_alias_table
[i
].processor
;
1983 error ("CPU you selected does not support x86-64 "
1986 /* Intel CPUs have always interpreted SSE prefetch instructions as
1987 NOPs; so, we can enable SSE prefetch instructions even when
1988 -mtune (rather than -march) points us to a processor that has them.
1989 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1990 higher processors. */
1991 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1992 x86_prefetch_sse
= true;
1996 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1998 ix86_tune_mask
= 1u << ix86_tune
;
1999 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2000 ix86_tune_features
[i
] &= ix86_tune_mask
;
2003 ix86_cost
= &size_cost
;
2005 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2006 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2007 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2009 /* Arrange to set up i386_stack_locals for all functions. */
2010 init_machine_status
= ix86_init_machine_status
;
2012 /* Validate -mregparm= value. */
2013 if (ix86_regparm_string
)
2016 warning (0, "-mregparm is ignored in 64-bit mode");
2017 i
= atoi (ix86_regparm_string
);
2018 if (i
< 0 || i
> REGPARM_MAX
)
2019 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2024 ix86_regparm
= REGPARM_MAX
;
2026 /* If the user has provided any of the -malign-* options,
2027 warn and use that value only if -falign-* is not set.
2028 Remove this code in GCC 3.2 or later. */
2029 if (ix86_align_loops_string
)
2031 warning (0, "-malign-loops is obsolete, use -falign-loops");
2032 if (align_loops
== 0)
2034 i
= atoi (ix86_align_loops_string
);
2035 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2036 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2038 align_loops
= 1 << i
;
2042 if (ix86_align_jumps_string
)
2044 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2045 if (align_jumps
== 0)
2047 i
= atoi (ix86_align_jumps_string
);
2048 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2049 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2051 align_jumps
= 1 << i
;
2055 if (ix86_align_funcs_string
)
2057 warning (0, "-malign-functions is obsolete, use -falign-functions");
2058 if (align_functions
== 0)
2060 i
= atoi (ix86_align_funcs_string
);
2061 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2062 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2064 align_functions
= 1 << i
;
2068 /* Default align_* from the processor table. */
2069 if (align_loops
== 0)
2071 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2072 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2074 if (align_jumps
== 0)
2076 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2077 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2079 if (align_functions
== 0)
2081 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2084 /* Validate -mbranch-cost= value, or provide default. */
2085 ix86_branch_cost
= ix86_cost
->branch_cost
;
2086 if (ix86_branch_cost_string
)
2088 i
= atoi (ix86_branch_cost_string
);
2090 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2092 ix86_branch_cost
= i
;
2094 if (ix86_section_threshold_string
)
2096 i
= atoi (ix86_section_threshold_string
);
2098 error ("-mlarge-data-threshold=%d is negative", i
);
2100 ix86_section_threshold
= i
;
2103 if (ix86_tls_dialect_string
)
2105 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2106 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2107 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2108 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2109 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2110 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2112 error ("bad value (%s) for -mtls-dialect= switch",
2113 ix86_tls_dialect_string
);
2116 if (ix87_precision_string
)
2118 i
= atoi (ix87_precision_string
);
2119 if (i
!= 32 && i
!= 64 && i
!= 80)
2120 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2123 /* Keep nonleaf frame pointers. */
2124 if (flag_omit_frame_pointer
)
2125 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2126 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2127 flag_omit_frame_pointer
= 1;
2129 /* If we're doing fast math, we don't care about comparison order
2130 wrt NaNs. This lets us use a shorter comparison sequence. */
2131 if (flag_finite_math_only
)
2132 target_flags
&= ~MASK_IEEE_FP
;
2134 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2135 since the insns won't need emulation. */
2136 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2137 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2139 /* Likewise, if the target doesn't have a 387, or we've specified
2140 software floating point, don't use 387 inline intrinsics. */
2142 target_flags
|= MASK_NO_FANCY_MATH_387
;
2144 /* Turn on SSE3 builtins for -mssse3. */
2146 target_flags
|= MASK_SSE3
;
2148 /* Turn on SSE3 builtins for -msse4a. */
2150 target_flags
|= MASK_SSE3
;
2152 /* Turn on SSE2 builtins for -msse3. */
2154 target_flags
|= MASK_SSE2
;
2156 /* Turn on SSE builtins for -msse2. */
2158 target_flags
|= MASK_SSE
;
2160 /* Turn on MMX builtins for -msse. */
2163 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2164 x86_prefetch_sse
= true;
2167 /* Turn on MMX builtins for 3Dnow. */
2169 target_flags
|= MASK_MMX
;
2171 /* Turn on POPCNT builtins for -mabm. */
2173 target_flags
|= MASK_POPCNT
;
2178 warning (0, "-mrtd is ignored in 64bit mode");
2180 /* Enable by default the SSE and MMX builtins. Do allow the user to
2181 explicitly disable any of these. In particular, disabling SSE and
2182 MMX for kernel code is extremely useful. */
2184 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| TARGET_SUBTARGET64_DEFAULT
)
2185 & ~target_flags_explicit
);
2189 /* i386 ABI does not specify red zone. It still makes sense to use it
2190 when programmer takes care to stack from being destroyed. */
2191 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2192 target_flags
|= MASK_NO_RED_ZONE
;
2195 /* Validate -mpreferred-stack-boundary= value, or provide default.
2196 The default of 128 bits is for Pentium III's SSE __m128. We can't
2197 change it because of optimize_size. Otherwise, we can't mix object
2198 files compiled with -Os and -On. */
2199 ix86_preferred_stack_boundary
= 128;
2200 if (ix86_preferred_stack_boundary_string
)
2202 i
= atoi (ix86_preferred_stack_boundary_string
);
2203 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2204 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2205 TARGET_64BIT
? 4 : 2);
2207 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2210 /* Accept -msseregparm only if at least SSE support is enabled. */
2211 if (TARGET_SSEREGPARM
2213 error ("-msseregparm used without SSE enabled");
2215 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2216 if (ix86_fpmath_string
!= 0)
2218 if (! strcmp (ix86_fpmath_string
, "387"))
2219 ix86_fpmath
= FPMATH_387
;
2220 else if (! strcmp (ix86_fpmath_string
, "sse"))
2224 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2225 ix86_fpmath
= FPMATH_387
;
2228 ix86_fpmath
= FPMATH_SSE
;
2230 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2231 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2235 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2236 ix86_fpmath
= FPMATH_387
;
2238 else if (!TARGET_80387
)
2240 warning (0, "387 instruction set disabled, using SSE arithmetics");
2241 ix86_fpmath
= FPMATH_SSE
;
2244 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2247 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2250 /* If the i387 is disabled, then do not return values in it. */
2252 target_flags
&= ~MASK_FLOAT_RETURNS
;
2254 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2255 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2257 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2259 /* ??? Unwind info is not correct around the CFG unless either a frame
2260 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2261 unwind info generation to be aware of the CFG and propagating states
2263 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2264 || flag_exceptions
|| flag_non_call_exceptions
)
2265 && flag_omit_frame_pointer
2266 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2268 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2269 warning (0, "unwind tables currently require either a frame pointer "
2270 "or -maccumulate-outgoing-args for correctness");
2271 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2274 /* For sane SSE instruction set generation we need fcomi instruction.
2275 It is safe to enable all CMOVE instructions. */
2279 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2282 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2283 p
= strchr (internal_label_prefix
, 'X');
2284 internal_label_prefix_len
= p
- internal_label_prefix
;
2288 /* When scheduling description is not available, disable scheduler pass
2289 so it won't slow down the compilation and make x87 code slower. */
2290 if (!TARGET_SCHEDULE
)
2291 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2293 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2294 set_param_value ("simultaneous-prefetches",
2295 ix86_cost
->simultaneous_prefetches
);
2296 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2297 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2300 /* Return true if this goes in large data/bss. */
2303 ix86_in_large_data_p (tree exp
)
2305 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2308 /* Functions are never large data. */
2309 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2312 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2314 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2315 if (strcmp (section
, ".ldata") == 0
2316 || strcmp (section
, ".lbss") == 0)
2322 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2324 /* If this is an incomplete type with size 0, then we can't put it
2325 in data because it might be too big when completed. */
2326 if (!size
|| size
> ix86_section_threshold
)
2333 /* Switch to the appropriate section for output of DECL.
2334 DECL is either a `VAR_DECL' node or a constant of some sort.
2335 RELOC indicates whether forming the initial value of DECL requires
2336 link-time relocations. */
2338 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2342 x86_64_elf_select_section (tree decl
, int reloc
,
2343 unsigned HOST_WIDE_INT align
)
2345 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2346 && ix86_in_large_data_p (decl
))
2348 const char *sname
= NULL
;
2349 unsigned int flags
= SECTION_WRITE
;
2350 switch (categorize_decl_for_section (decl
, reloc
))
2355 case SECCAT_DATA_REL
:
2356 sname
= ".ldata.rel";
2358 case SECCAT_DATA_REL_LOCAL
:
2359 sname
= ".ldata.rel.local";
2361 case SECCAT_DATA_REL_RO
:
2362 sname
= ".ldata.rel.ro";
2364 case SECCAT_DATA_REL_RO_LOCAL
:
2365 sname
= ".ldata.rel.ro.local";
2369 flags
|= SECTION_BSS
;
2372 case SECCAT_RODATA_MERGE_STR
:
2373 case SECCAT_RODATA_MERGE_STR_INIT
:
2374 case SECCAT_RODATA_MERGE_CONST
:
2378 case SECCAT_SRODATA
:
2385 /* We don't split these for medium model. Place them into
2386 default sections and hope for best. */
2391 /* We might get called with string constants, but get_named_section
2392 doesn't like them as they are not DECLs. Also, we need to set
2393 flags in that case. */
2395 return get_section (sname
, flags
, NULL
);
2396 return get_named_section (decl
, sname
, reloc
);
2399 return default_elf_select_section (decl
, reloc
, align
);
2402 /* Build up a unique section name, expressed as a
2403 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2404 RELOC indicates whether the initial value of EXP requires
2405 link-time relocations. */
2407 static void ATTRIBUTE_UNUSED
2408 x86_64_elf_unique_section (tree decl
, int reloc
)
2410 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2411 && ix86_in_large_data_p (decl
))
2413 const char *prefix
= NULL
;
2414 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2415 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2417 switch (categorize_decl_for_section (decl
, reloc
))
2420 case SECCAT_DATA_REL
:
2421 case SECCAT_DATA_REL_LOCAL
:
2422 case SECCAT_DATA_REL_RO
:
2423 case SECCAT_DATA_REL_RO_LOCAL
:
2424 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2427 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2430 case SECCAT_RODATA_MERGE_STR
:
2431 case SECCAT_RODATA_MERGE_STR_INIT
:
2432 case SECCAT_RODATA_MERGE_CONST
:
2433 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2435 case SECCAT_SRODATA
:
2442 /* We don't split these for medium model. Place them into
2443 default sections and hope for best. */
2451 plen
= strlen (prefix
);
2453 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2454 name
= targetm
.strip_name_encoding (name
);
2455 nlen
= strlen (name
);
2457 string
= alloca (nlen
+ plen
+ 1);
2458 memcpy (string
, prefix
, plen
);
2459 memcpy (string
+ plen
, name
, nlen
+ 1);
2461 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2465 default_unique_section (decl
, reloc
);
2468 #ifdef COMMON_ASM_OP
2469 /* This says how to output assembler code to declare an
2470 uninitialized external linkage data object.
2472 For medium model x86-64 we need to use .largecomm opcode for
2475 x86_elf_aligned_common (FILE *file
,
2476 const char *name
, unsigned HOST_WIDE_INT size
,
2479 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2480 && size
> (unsigned int)ix86_section_threshold
)
2481 fprintf (file
, ".largecomm\t");
2483 fprintf (file
, "%s", COMMON_ASM_OP
);
2484 assemble_name (file
, name
);
2485 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2486 size
, align
/ BITS_PER_UNIT
);
2490 /* Utility function for targets to use in implementing
2491 ASM_OUTPUT_ALIGNED_BSS. */
2494 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2495 const char *name
, unsigned HOST_WIDE_INT size
,
2498 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2499 && size
> (unsigned int)ix86_section_threshold
)
2500 switch_to_section (get_named_section (decl
, ".lbss", 0));
2502 switch_to_section (bss_section
);
2503 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2504 #ifdef ASM_DECLARE_OBJECT_NAME
2505 last_assemble_variable_decl
= decl
;
2506 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2508 /* Standard thing is just output label for the object. */
2509 ASM_OUTPUT_LABEL (file
, name
);
2510 #endif /* ASM_DECLARE_OBJECT_NAME */
2511 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2515 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2517 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2518 make the problem with not enough registers even worse. */
2519 #ifdef INSN_SCHEDULING
2521 flag_schedule_insns
= 0;
2525 /* The Darwin libraries never set errno, so we might as well
2526 avoid calling them when that's the only reason we would. */
2527 flag_errno_math
= 0;
2529 /* The default values of these switches depend on the TARGET_64BIT
2530 that is not known at this moment. Mark these values with 2 and
2531 let user the to override these. In case there is no command line option
2532 specifying them, we will set the defaults in override_options. */
2534 flag_omit_frame_pointer
= 2;
2535 flag_pcc_struct_return
= 2;
2536 flag_asynchronous_unwind_tables
= 2;
2537 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2538 SUBTARGET_OPTIMIZATION_OPTIONS
;
2542 /* Decide whether we can make a sibling call to a function. DECL is the
2543 declaration of the function being targeted by the call and EXP is the
2544 CALL_EXPR representing the call. */
2547 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2552 /* If we are generating position-independent code, we cannot sibcall
2553 optimize any indirect call, or a direct call to a global function,
2554 as the PLT requires %ebx be live. */
2555 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2562 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2563 if (POINTER_TYPE_P (func
))
2564 func
= TREE_TYPE (func
);
2567 /* Check that the return value locations are the same. Like
2568 if we are returning floats on the 80387 register stack, we cannot
2569 make a sibcall from a function that doesn't return a float to a
2570 function that does or, conversely, from a function that does return
2571 a float to a function that doesn't; the necessary stack adjustment
2572 would not be executed. This is also the place we notice
2573 differences in the return value ABI. Note that it is ok for one
2574 of the functions to have void return type as long as the return
2575 value of the other is passed in a register. */
2576 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2577 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2579 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2581 if (!rtx_equal_p (a
, b
))
2584 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2586 else if (!rtx_equal_p (a
, b
))
2589 /* If this call is indirect, we'll need to be able to use a call-clobbered
2590 register for the address of the target function. Make sure that all
2591 such registers are not used for passing parameters. */
2592 if (!decl
&& !TARGET_64BIT
)
2596 /* We're looking at the CALL_EXPR, we need the type of the function. */
2597 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2598 type
= TREE_TYPE (type
); /* pointer type */
2599 type
= TREE_TYPE (type
); /* function type */
2601 if (ix86_function_regparm (type
, NULL
) >= 3)
2603 /* ??? Need to count the actual number of registers to be used,
2604 not the possible number of registers. Fix later. */
2609 /* Dllimport'd functions are also called indirectly. */
2610 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2611 && decl
&& DECL_DLLIMPORT_P (decl
)
2612 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2615 /* If we forced aligned the stack, then sibcalling would unalign the
2616 stack, which may break the called function. */
2617 if (cfun
->machine
->force_align_arg_pointer
)
2620 /* Otherwise okay. That also includes certain types of indirect calls. */
2624 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2625 calling convention attributes;
2626 arguments as in struct attribute_spec.handler. */
2629 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2631 int flags ATTRIBUTE_UNUSED
,
2634 if (TREE_CODE (*node
) != FUNCTION_TYPE
2635 && TREE_CODE (*node
) != METHOD_TYPE
2636 && TREE_CODE (*node
) != FIELD_DECL
2637 && TREE_CODE (*node
) != TYPE_DECL
)
2639 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2640 IDENTIFIER_POINTER (name
));
2641 *no_add_attrs
= true;
2645 /* Can combine regparm with all attributes but fastcall. */
2646 if (is_attribute_p ("regparm", name
))
2650 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2652 error ("fastcall and regparm attributes are not compatible");
2655 cst
= TREE_VALUE (args
);
2656 if (TREE_CODE (cst
) != INTEGER_CST
)
2658 warning (OPT_Wattributes
,
2659 "%qs attribute requires an integer constant argument",
2660 IDENTIFIER_POINTER (name
));
2661 *no_add_attrs
= true;
2663 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2665 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2666 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2667 *no_add_attrs
= true;
2671 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2672 TYPE_ATTRIBUTES (*node
))
2673 && compare_tree_int (cst
, REGPARM_MAX
-1))
2675 error ("%s functions limited to %d register parameters",
2676 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2684 /* Do not warn when emulating the MS ABI. */
2685 if (!TARGET_64BIT_MS_ABI
)
2686 warning (OPT_Wattributes
, "%qs attribute ignored",
2687 IDENTIFIER_POINTER (name
));
2688 *no_add_attrs
= true;
2692 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2693 if (is_attribute_p ("fastcall", name
))
2695 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2697 error ("fastcall and cdecl attributes are not compatible");
2699 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2701 error ("fastcall and stdcall attributes are not compatible");
2703 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2705 error ("fastcall and regparm attributes are not compatible");
2709 /* Can combine stdcall with fastcall (redundant), regparm and
2711 else if (is_attribute_p ("stdcall", name
))
2713 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2715 error ("stdcall and cdecl attributes are not compatible");
2717 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2719 error ("stdcall and fastcall attributes are not compatible");
2723 /* Can combine cdecl with regparm and sseregparm. */
2724 else if (is_attribute_p ("cdecl", name
))
2726 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2728 error ("stdcall and cdecl attributes are not compatible");
2730 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2732 error ("fastcall and cdecl attributes are not compatible");
2736 /* Can combine sseregparm with all attributes. */
2741 /* Return 0 if the attributes for two types are incompatible, 1 if they
2742 are compatible, and 2 if they are nearly compatible (which causes a
2743 warning to be generated). */
2746 ix86_comp_type_attributes (tree type1
, tree type2
)
2748 /* Check for mismatch of non-default calling convention. */
2749 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2751 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2754 /* Check for mismatched fastcall/regparm types. */
2755 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2756 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2757 || (ix86_function_regparm (type1
, NULL
)
2758 != ix86_function_regparm (type2
, NULL
)))
2761 /* Check for mismatched sseregparm types. */
2762 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2763 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2766 /* Check for mismatched return types (cdecl vs stdcall). */
2767 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2768 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2774 /* Return the regparm value for a function with the indicated TYPE and DECL.
2775 DECL may be NULL when calling function indirectly
2776 or considering a libcall. */
2779 ix86_function_regparm (tree type
, tree decl
)
2782 int regparm
= ix86_regparm
;
2787 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2789 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2791 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2794 /* Use register calling convention for local functions when possible. */
2795 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
2796 && flag_unit_at_a_time
&& !profile_flag
)
2798 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2801 int local_regparm
, globals
= 0, regno
;
2804 /* Make sure no regparm register is taken by a
2805 global register variable. */
2806 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2807 if (global_regs
[local_regparm
])
2810 /* We can't use regparm(3) for nested functions as these use
2811 static chain pointer in third argument. */
2812 if (local_regparm
== 3
2813 && decl_function_context (decl
)
2814 && !DECL_NO_STATIC_CHAIN (decl
))
2817 /* If the function realigns its stackpointer, the prologue will
2818 clobber %ecx. If we've already generated code for the callee,
2819 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2820 scanning the attributes for the self-realigning property. */
2821 f
= DECL_STRUCT_FUNCTION (decl
);
2822 if (local_regparm
== 3
2823 && (f
? !!f
->machine
->force_align_arg_pointer
2824 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
2825 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2828 /* Each global register variable increases register preassure,
2829 so the more global reg vars there are, the smaller regparm
2830 optimization use, unless requested by the user explicitly. */
2831 for (regno
= 0; regno
< 6; regno
++)
2832 if (global_regs
[regno
])
2835 = globals
< local_regparm
? local_regparm
- globals
: 0;
2837 if (local_regparm
> regparm
)
2838 regparm
= local_regparm
;
2845 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2846 DFmode (2) arguments in SSE registers for a function with the
2847 indicated TYPE and DECL. DECL may be NULL when calling function
2848 indirectly or considering a libcall. Otherwise return 0. */
2851 ix86_function_sseregparm (tree type
, tree decl
)
2853 gcc_assert (!TARGET_64BIT
);
2855 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2856 by the sseregparm attribute. */
2857 if (TARGET_SSEREGPARM
2858 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2863 error ("Calling %qD with attribute sseregparm without "
2864 "SSE/SSE2 enabled", decl
);
2866 error ("Calling %qT with attribute sseregparm without "
2867 "SSE/SSE2 enabled", type
);
2874 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2875 (and DFmode for SSE2) arguments in SSE registers. */
2876 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2878 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2880 return TARGET_SSE2
? 2 : 1;
2886 /* Return true if EAX is live at the start of the function. Used by
2887 ix86_expand_prologue to determine if we need special help before
2888 calling allocate_stack_worker. */
2891 ix86_eax_live_at_start_p (void)
2893 /* Cheat. Don't bother working forward from ix86_function_regparm
2894 to the function type to whether an actual argument is located in
2895 eax. Instead just look at cfg info, which is still close enough
2896 to correct at this point. This gives false positives for broken
2897 functions that might use uninitialized data that happens to be
2898 allocated in eax, but who cares? */
2899 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2902 /* Return true if TYPE has a variable argument list. */
2905 type_has_variadic_args_p (tree type
)
2907 tree n
, t
= TYPE_ARG_TYPES (type
);
2912 while ((n
= TREE_CHAIN (t
)) != NULL
)
2915 return TREE_VALUE (t
) != void_type_node
;
2918 /* Value is the number of bytes of arguments automatically
2919 popped when returning from a subroutine call.
2920 FUNDECL is the declaration node of the function (as a tree),
2921 FUNTYPE is the data type of the function (as a tree),
2922 or for a library call it is an identifier node for the subroutine name.
2923 SIZE is the number of bytes of arguments passed on the stack.
2925 On the 80386, the RTD insn may be used to pop them if the number
2926 of args is fixed, but if the number is variable then the caller
2927 must pop them all. RTD can't be used for library calls now
2928 because the library is compiled with the Unix compiler.
2929 Use of RTD is a selectable option, since it is incompatible with
2930 standard Unix calling sequences. If the option is not selected,
2931 the caller must always pop the args.
2933 The attribute stdcall is equivalent to RTD on a per module basis. */
2936 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2940 /* None of the 64-bit ABIs pop arguments. */
2944 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2946 /* Cdecl functions override -mrtd, and never pop the stack. */
2947 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
2949 /* Stdcall and fastcall functions will pop the stack if not
2951 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2952 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2955 if (rtd
&& ! type_has_variadic_args_p (funtype
))
2959 /* Lose any fake structure return argument if it is passed on the stack. */
2960 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2961 && !KEEP_AGGREGATE_RETURN_POINTER
)
2963 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2965 return GET_MODE_SIZE (Pmode
);
2971 /* Argument support functions. */
2973 /* Return true when register may be used to pass function parameters. */
2975 ix86_function_arg_regno_p (int regno
)
2978 const int *parm_regs
;
2983 return (regno
< REGPARM_MAX
2984 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
2986 return (regno
< REGPARM_MAX
2987 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2988 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2989 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2990 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2995 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3000 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3001 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3005 /* RAX is used as hidden argument to va_arg functions. */
3006 if (!TARGET_64BIT_MS_ABI
&& regno
== 0)
3009 if (TARGET_64BIT_MS_ABI
)
3010 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3012 parm_regs
= x86_64_int_parameter_registers
;
3013 for (i
= 0; i
< REGPARM_MAX
; i
++)
3014 if (regno
== parm_regs
[i
])
3019 /* Return if we do not know how to pass TYPE solely in registers. */
3022 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3024 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3027 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3028 The layout_type routine is crafty and tries to trick us into passing
3029 currently unsupported vector types on the stack by using TImode. */
3030 return (!TARGET_64BIT
&& mode
== TImode
3031 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3034 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3035 for a call to a function whose data type is FNTYPE.
3036 For a library call, FNTYPE is 0. */
3039 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3040 tree fntype
, /* tree ptr for function decl */
3041 rtx libname
, /* SYMBOL_REF of library name or 0 */
3044 memset (cum
, 0, sizeof (*cum
));
3046 /* Set up the number of registers to use for passing arguments. */
3047 cum
->nregs
= ix86_regparm
;
3049 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3051 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3052 cum
->warn_sse
= true;
3053 cum
->warn_mmx
= true;
3054 cum
->maybe_vaarg
= (fntype
3055 ? (!TYPE_ARG_TYPES (fntype
)
3056 || type_has_variadic_args_p (fntype
))
3061 /* If there are variable arguments, then we won't pass anything
3062 in registers in 32-bit mode. */
3063 if (cum
->maybe_vaarg
)
3073 /* Use ecx and edx registers if function has fastcall attribute,
3074 else look for regparm information. */
3077 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3083 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3086 /* Set up the number of SSE registers used for passing SFmode
3087 and DFmode arguments. Warn for mismatching ABI. */
3088 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3092 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3093 But in the case of vector types, it is some vector mode.
3095 When we have only some of our vector isa extensions enabled, then there
3096 are some modes for which vector_mode_supported_p is false. For these
3097 modes, the generic vector support in gcc will choose some non-vector mode
3098 in order to implement the type. By computing the natural mode, we'll
3099 select the proper ABI location for the operand and not depend on whatever
3100 the middle-end decides to do with these vector types. */
3102 static enum machine_mode
3103 type_natural_mode (tree type
)
3105 enum machine_mode mode
= TYPE_MODE (type
);
3107 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3109 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3110 if ((size
== 8 || size
== 16)
3111 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3112 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3114 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3116 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3117 mode
= MIN_MODE_VECTOR_FLOAT
;
3119 mode
= MIN_MODE_VECTOR_INT
;
3121 /* Get the mode which has this inner mode and number of units. */
3122 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3123 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3124 && GET_MODE_INNER (mode
) == innermode
)
3134 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3135 this may not agree with the mode that the type system has chosen for the
3136 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3137 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3140 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3145 if (orig_mode
!= BLKmode
)
3146 tmp
= gen_rtx_REG (orig_mode
, regno
);
3149 tmp
= gen_rtx_REG (mode
, regno
);
3150 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3151 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3157 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3158 of this code is to classify each 8bytes of incoming argument by the register
3159 class and assign registers accordingly. */
3161 /* Return the union class of CLASS1 and CLASS2.
3162 See the x86-64 PS ABI for details. */
3164 static enum x86_64_reg_class
3165 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3167 /* Rule #1: If both classes are equal, this is the resulting class. */
3168 if (class1
== class2
)
3171 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3173 if (class1
== X86_64_NO_CLASS
)
3175 if (class2
== X86_64_NO_CLASS
)
3178 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3179 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3180 return X86_64_MEMORY_CLASS
;
3182 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3183 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3184 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3185 return X86_64_INTEGERSI_CLASS
;
3186 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3187 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3188 return X86_64_INTEGER_CLASS
;
3190 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3192 if (class1
== X86_64_X87_CLASS
3193 || class1
== X86_64_X87UP_CLASS
3194 || class1
== X86_64_COMPLEX_X87_CLASS
3195 || class2
== X86_64_X87_CLASS
3196 || class2
== X86_64_X87UP_CLASS
3197 || class2
== X86_64_COMPLEX_X87_CLASS
)
3198 return X86_64_MEMORY_CLASS
;
3200 /* Rule #6: Otherwise class SSE is used. */
3201 return X86_64_SSE_CLASS
;
3204 /* Classify the argument of type TYPE and mode MODE.
3205 CLASSES will be filled by the register class used to pass each word
3206 of the operand. The number of words is returned. In case the parameter
3207 should be passed in memory, 0 is returned. As a special case for zero
3208 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3210 BIT_OFFSET is used internally for handling records and specifies offset
3211 of the offset in bits modulo 256 to avoid overflow cases.
3213 See the x86-64 PS ABI for details.
3217 classify_argument (enum machine_mode mode
, tree type
,
3218 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3220 HOST_WIDE_INT bytes
=
3221 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3222 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3224 /* Variable sized entities are always passed/returned in memory. */
3228 if (mode
!= VOIDmode
3229 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3232 if (type
&& AGGREGATE_TYPE_P (type
))
3236 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3238 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3242 for (i
= 0; i
< words
; i
++)
3243 classes
[i
] = X86_64_NO_CLASS
;
3245 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3246 signalize memory class, so handle it as special case. */
3249 classes
[0] = X86_64_NO_CLASS
;
3253 /* Classify each field of record and merge classes. */
3254 switch (TREE_CODE (type
))
3257 /* And now merge the fields of structure. */
3258 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3260 if (TREE_CODE (field
) == FIELD_DECL
)
3264 if (TREE_TYPE (field
) == error_mark_node
)
3267 /* Bitfields are always classified as integer. Handle them
3268 early, since later code would consider them to be
3269 misaligned integers. */
3270 if (DECL_BIT_FIELD (field
))
3272 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3273 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3274 + tree_low_cst (DECL_SIZE (field
), 0)
3277 merge_classes (X86_64_INTEGER_CLASS
,
3282 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3283 TREE_TYPE (field
), subclasses
,
3284 (int_bit_position (field
)
3285 + bit_offset
) % 256);
3288 for (i
= 0; i
< num
; i
++)
3291 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3293 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3301 /* Arrays are handled as small records. */
3304 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3305 TREE_TYPE (type
), subclasses
, bit_offset
);
3309 /* The partial classes are now full classes. */
3310 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3311 subclasses
[0] = X86_64_SSE_CLASS
;
3312 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3313 subclasses
[0] = X86_64_INTEGER_CLASS
;
3315 for (i
= 0; i
< words
; i
++)
3316 classes
[i
] = subclasses
[i
% num
];
3321 case QUAL_UNION_TYPE
:
3322 /* Unions are similar to RECORD_TYPE but offset is always 0.
3324 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3326 if (TREE_CODE (field
) == FIELD_DECL
)
3330 if (TREE_TYPE (field
) == error_mark_node
)
3333 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3334 TREE_TYPE (field
), subclasses
,
3338 for (i
= 0; i
< num
; i
++)
3339 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3348 /* Final merger cleanup. */
3349 for (i
= 0; i
< words
; i
++)
3351 /* If one class is MEMORY, everything should be passed in
3353 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3356 /* The X86_64_SSEUP_CLASS should be always preceded by
3357 X86_64_SSE_CLASS. */
3358 if (classes
[i
] == X86_64_SSEUP_CLASS
3359 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3360 classes
[i
] = X86_64_SSE_CLASS
;
3362 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3363 if (classes
[i
] == X86_64_X87UP_CLASS
3364 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3365 classes
[i
] = X86_64_SSE_CLASS
;
3370 /* Compute alignment needed. We align all types to natural boundaries with
3371 exception of XFmode that is aligned to 64bits. */
3372 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3374 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3377 mode_alignment
= 128;
3378 else if (mode
== XCmode
)
3379 mode_alignment
= 256;
3380 if (COMPLEX_MODE_P (mode
))
3381 mode_alignment
/= 2;
3382 /* Misaligned fields are always returned in memory. */
3383 if (bit_offset
% mode_alignment
)
3387 /* for V1xx modes, just use the base mode */
3388 if (VECTOR_MODE_P (mode
)
3389 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3390 mode
= GET_MODE_INNER (mode
);
3392 /* Classification of atomic types. */
3397 classes
[0] = X86_64_SSE_CLASS
;
3400 classes
[0] = X86_64_SSE_CLASS
;
3401 classes
[1] = X86_64_SSEUP_CLASS
;
3410 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3411 classes
[0] = X86_64_INTEGERSI_CLASS
;
3413 classes
[0] = X86_64_INTEGER_CLASS
;
3417 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3422 if (!(bit_offset
% 64))
3423 classes
[0] = X86_64_SSESF_CLASS
;
3425 classes
[0] = X86_64_SSE_CLASS
;
3428 classes
[0] = X86_64_SSEDF_CLASS
;
3431 classes
[0] = X86_64_X87_CLASS
;
3432 classes
[1] = X86_64_X87UP_CLASS
;
3435 classes
[0] = X86_64_SSE_CLASS
;
3436 classes
[1] = X86_64_SSEUP_CLASS
;
3439 classes
[0] = X86_64_SSE_CLASS
;
3442 classes
[0] = X86_64_SSEDF_CLASS
;
3443 classes
[1] = X86_64_SSEDF_CLASS
;
3446 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3449 /* This modes is larger than 16 bytes. */
3457 classes
[0] = X86_64_SSE_CLASS
;
3458 classes
[1] = X86_64_SSEUP_CLASS
;
3464 classes
[0] = X86_64_SSE_CLASS
;
3470 gcc_assert (VECTOR_MODE_P (mode
));
3475 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3477 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3478 classes
[0] = X86_64_INTEGERSI_CLASS
;
3480 classes
[0] = X86_64_INTEGER_CLASS
;
3481 classes
[1] = X86_64_INTEGER_CLASS
;
3482 return 1 + (bytes
> 8);
3486 /* Examine the argument and return set number of register required in each
3487 class. Return 0 iff parameter should be passed in memory. */
3489 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3490 int *int_nregs
, int *sse_nregs
)
3492 enum x86_64_reg_class
class[MAX_CLASSES
];
3493 int n
= classify_argument (mode
, type
, class, 0);
3499 for (n
--; n
>= 0; n
--)
3502 case X86_64_INTEGER_CLASS
:
3503 case X86_64_INTEGERSI_CLASS
:
3506 case X86_64_SSE_CLASS
:
3507 case X86_64_SSESF_CLASS
:
3508 case X86_64_SSEDF_CLASS
:
3511 case X86_64_NO_CLASS
:
3512 case X86_64_SSEUP_CLASS
:
3514 case X86_64_X87_CLASS
:
3515 case X86_64_X87UP_CLASS
:
3519 case X86_64_COMPLEX_X87_CLASS
:
3520 return in_return
? 2 : 0;
3521 case X86_64_MEMORY_CLASS
:
3527 /* Construct container for the argument used by GCC interface. See
3528 FUNCTION_ARG for the detailed description. */
3531 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3532 tree type
, int in_return
, int nintregs
, int nsseregs
,
3533 const int *intreg
, int sse_regno
)
3535 /* The following variables hold the static issued_error state. */
3536 static bool issued_sse_arg_error
;
3537 static bool issued_sse_ret_error
;
3538 static bool issued_x87_ret_error
;
3540 enum machine_mode tmpmode
;
3542 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3543 enum x86_64_reg_class
class[MAX_CLASSES
];
3547 int needed_sseregs
, needed_intregs
;
3548 rtx exp
[MAX_CLASSES
];
3551 n
= classify_argument (mode
, type
, class, 0);
3554 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3557 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3560 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3561 some less clueful developer tries to use floating-point anyway. */
3562 if (needed_sseregs
&& !TARGET_SSE
)
3566 if (!issued_sse_ret_error
)
3568 error ("SSE register return with SSE disabled");
3569 issued_sse_ret_error
= true;
3572 else if (!issued_sse_arg_error
)
3574 error ("SSE register argument with SSE disabled");
3575 issued_sse_arg_error
= true;
3580 /* Likewise, error if the ABI requires us to return values in the
3581 x87 registers and the user specified -mno-80387. */
3582 if (!TARGET_80387
&& in_return
)
3583 for (i
= 0; i
< n
; i
++)
3584 if (class[i
] == X86_64_X87_CLASS
3585 || class[i
] == X86_64_X87UP_CLASS
3586 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3588 if (!issued_x87_ret_error
)
3590 error ("x87 register return with x87 disabled");
3591 issued_x87_ret_error
= true;
3596 /* First construct simple cases. Avoid SCmode, since we want to use
3597 single register to pass this type. */
3598 if (n
== 1 && mode
!= SCmode
)
3601 case X86_64_INTEGER_CLASS
:
3602 case X86_64_INTEGERSI_CLASS
:
3603 return gen_rtx_REG (mode
, intreg
[0]);
3604 case X86_64_SSE_CLASS
:
3605 case X86_64_SSESF_CLASS
:
3606 case X86_64_SSEDF_CLASS
:
3607 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3608 case X86_64_X87_CLASS
:
3609 case X86_64_COMPLEX_X87_CLASS
:
3610 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3611 case X86_64_NO_CLASS
:
3612 /* Zero sized array, struct or class. */
3617 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3619 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3622 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3623 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3624 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3625 && class[1] == X86_64_INTEGER_CLASS
3626 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3627 && intreg
[0] + 1 == intreg
[1])
3628 return gen_rtx_REG (mode
, intreg
[0]);
3630 /* Otherwise figure out the entries of the PARALLEL. */
3631 for (i
= 0; i
< n
; i
++)
3635 case X86_64_NO_CLASS
:
3637 case X86_64_INTEGER_CLASS
:
3638 case X86_64_INTEGERSI_CLASS
:
3639 /* Merge TImodes on aligned occasions here too. */
3640 if (i
* 8 + 8 > bytes
)
3641 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3642 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3646 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3647 if (tmpmode
== BLKmode
)
3649 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3650 gen_rtx_REG (tmpmode
, *intreg
),
3654 case X86_64_SSESF_CLASS
:
3655 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3656 gen_rtx_REG (SFmode
,
3657 SSE_REGNO (sse_regno
)),
3661 case X86_64_SSEDF_CLASS
:
3662 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3663 gen_rtx_REG (DFmode
,
3664 SSE_REGNO (sse_regno
)),
3668 case X86_64_SSE_CLASS
:
3669 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3673 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3674 gen_rtx_REG (tmpmode
,
3675 SSE_REGNO (sse_regno
)),
3677 if (tmpmode
== TImode
)
3686 /* Empty aligned struct, union or class. */
3690 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3691 for (i
= 0; i
< nexps
; i
++)
3692 XVECEXP (ret
, 0, i
) = exp
[i
];
3696 /* Update the data in CUM to advance over an argument of mode MODE
3697 and data type TYPE. (TYPE is null for libcalls where that information
3698 may not be available.) */
3701 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3702 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3718 cum
->words
+= words
;
3719 cum
->nregs
-= words
;
3720 cum
->regno
+= words
;
3722 if (cum
->nregs
<= 0)
3730 if (cum
->float_in_sse
< 2)
3733 if (cum
->float_in_sse
< 1)
3744 if (!type
|| !AGGREGATE_TYPE_P (type
))
3746 cum
->sse_words
+= words
;
3747 cum
->sse_nregs
-= 1;
3748 cum
->sse_regno
+= 1;
3749 if (cum
->sse_nregs
<= 0)
3761 if (!type
|| !AGGREGATE_TYPE_P (type
))
3763 cum
->mmx_words
+= words
;
3764 cum
->mmx_nregs
-= 1;
3765 cum
->mmx_regno
+= 1;
3766 if (cum
->mmx_nregs
<= 0)
3777 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3778 tree type
, HOST_WIDE_INT words
)
3780 int int_nregs
, sse_nregs
;
3782 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3783 cum
->words
+= words
;
3784 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3786 cum
->nregs
-= int_nregs
;
3787 cum
->sse_nregs
-= sse_nregs
;
3788 cum
->regno
+= int_nregs
;
3789 cum
->sse_regno
+= sse_nregs
;
3792 cum
->words
+= words
;
3796 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
3797 HOST_WIDE_INT words
)
3799 /* Otherwise, this should be passed indirect. */
3800 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
3802 cum
->words
+= words
;
3811 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3812 tree type
, int named ATTRIBUTE_UNUSED
)
3814 HOST_WIDE_INT bytes
, words
;
3816 if (mode
== BLKmode
)
3817 bytes
= int_size_in_bytes (type
);
3819 bytes
= GET_MODE_SIZE (mode
);
3820 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3823 mode
= type_natural_mode (type
);
3825 if (TARGET_64BIT_MS_ABI
)
3826 function_arg_advance_ms_64 (cum
, bytes
, words
);
3827 else if (TARGET_64BIT
)
3828 function_arg_advance_64 (cum
, mode
, type
, words
);
3830 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
3833 /* Define where to put the arguments to a function.
3834 Value is zero to push the argument on the stack,
3835 or a hard register in which to store the argument.
3837 MODE is the argument's machine mode.
3838 TYPE is the data type of the argument (as a tree).
3839 This is null for libcalls where that information may
3841 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3842 the preceding args and about the function being called.
3843 NAMED is nonzero if this argument is a named parameter
3844 (otherwise it is an extra parameter matching an ellipsis). */
3847 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3848 enum machine_mode orig_mode
, tree type
,
3849 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3851 static bool warnedsse
, warnedmmx
;
3853 /* Avoid the AL settings for the Unix64 ABI. */
3854 if (mode
== VOIDmode
)
3870 if (words
<= cum
->nregs
)
3872 int regno
= cum
->regno
;
3874 /* Fastcall allocates the first two DWORD (SImode) or
3875 smaller arguments to ECX and EDX. */
3878 if (mode
== BLKmode
|| mode
== DImode
)
3881 /* ECX not EAX is the first allocated register. */
3885 return gen_rtx_REG (mode
, regno
);
3890 if (cum
->float_in_sse
< 2)
3893 if (cum
->float_in_sse
< 1)
3903 if (!type
|| !AGGREGATE_TYPE_P (type
))
3905 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3908 warning (0, "SSE vector argument without SSE enabled "
3912 return gen_reg_or_parallel (mode
, orig_mode
,
3913 cum
->sse_regno
+ FIRST_SSE_REG
);
3921 if (!type
|| !AGGREGATE_TYPE_P (type
))
3923 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3926 warning (0, "MMX vector argument without MMX enabled "
3930 return gen_reg_or_parallel (mode
, orig_mode
,
3931 cum
->mmx_regno
+ FIRST_MMX_REG
);
3940 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3941 enum machine_mode orig_mode
, tree type
)
3943 /* Handle a hidden AL argument containing number of registers
3944 for varargs x86-64 functions. */
3945 if (mode
== VOIDmode
)
3946 return GEN_INT (cum
->maybe_vaarg
3947 ? (cum
->sse_nregs
< 0
3952 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3954 &x86_64_int_parameter_registers
[cum
->regno
],
3959 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3960 enum machine_mode orig_mode
, int named
)
3964 /* Avoid the AL settings for the Unix64 ABI. */
3965 if (mode
== VOIDmode
)
3968 /* If we've run out of registers, it goes on the stack. */
3969 if (cum
->nregs
== 0)
3972 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3974 /* Only floating point modes are passed in anything but integer regs. */
3975 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3978 regno
= cum
->regno
+ FIRST_SSE_REG
;
3983 /* Unnamed floating parameters are passed in both the
3984 SSE and integer registers. */
3985 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3986 t2
= gen_rtx_REG (mode
, regno
);
3987 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3988 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3989 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3993 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3997 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
3998 tree type
, int named
)
4000 enum machine_mode mode
= omode
;
4001 HOST_WIDE_INT bytes
, words
;
4003 if (mode
== BLKmode
)
4004 bytes
= int_size_in_bytes (type
);
4006 bytes
= GET_MODE_SIZE (mode
);
4007 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4009 /* To simplify the code below, represent vector types with a vector mode
4010 even if MMX/SSE are not active. */
4011 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4012 mode
= type_natural_mode (type
);
4014 if (TARGET_64BIT_MS_ABI
)
4015 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4016 else if (TARGET_64BIT
)
4017 return function_arg_64 (cum
, mode
, omode
, type
);
4019 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4022 /* A C expression that indicates when an argument must be passed by
4023 reference. If nonzero for an argument, a copy of that argument is
4024 made in memory and a pointer to the argument is passed instead of
4025 the argument itself. The pointer is passed in whatever way is
4026 appropriate for passing a pointer to that type. */
4029 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4030 enum machine_mode mode ATTRIBUTE_UNUSED
,
4031 tree type
, bool named ATTRIBUTE_UNUSED
)
4033 if (TARGET_64BIT_MS_ABI
)
4037 /* Arrays are passed by reference. */
4038 if (TREE_CODE (type
) == ARRAY_TYPE
)
4041 if (AGGREGATE_TYPE_P (type
))
4043 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4044 are passed by reference. */
4045 int el2
= exact_log2 (int_size_in_bytes (type
));
4046 return !(el2
>= 0 && el2
<= 3);
4050 /* __m128 is passed by reference. */
4051 /* ??? How to handle complex? For now treat them as structs,
4052 and pass them by reference if they're too large. */
4053 if (GET_MODE_SIZE (mode
) > 8)
4056 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4062 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4063 ABI. Only called if TARGET_SSE. */
4065 contains_128bit_aligned_vector_p (tree type
)
4067 enum machine_mode mode
= TYPE_MODE (type
);
4068 if (SSE_REG_MODE_P (mode
)
4069 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4071 if (TYPE_ALIGN (type
) < 128)
4074 if (AGGREGATE_TYPE_P (type
))
4076 /* Walk the aggregates recursively. */
4077 switch (TREE_CODE (type
))
4081 case QUAL_UNION_TYPE
:
4085 /* Walk all the structure fields. */
4086 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4088 if (TREE_CODE (field
) == FIELD_DECL
4089 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4096 /* Just for use if some languages passes arrays by value. */
4097 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4108 /* Gives the alignment boundary, in bits, of an argument with the
4109 specified mode and type. */
4112 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4116 align
= TYPE_ALIGN (type
);
4118 align
= GET_MODE_ALIGNMENT (mode
);
4119 if (align
< PARM_BOUNDARY
)
4120 align
= PARM_BOUNDARY
;
4123 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4124 make an exception for SSE modes since these require 128bit
4127 The handling here differs from field_alignment. ICC aligns MMX
4128 arguments to 4 byte boundaries, while structure fields are aligned
4129 to 8 byte boundaries. */
4131 align
= PARM_BOUNDARY
;
4134 if (!SSE_REG_MODE_P (mode
))
4135 align
= PARM_BOUNDARY
;
4139 if (!contains_128bit_aligned_vector_p (type
))
4140 align
= PARM_BOUNDARY
;
4148 /* Return true if N is a possible register number of function value. */
4151 ix86_function_value_regno_p (int regno
)
4158 case FIRST_FLOAT_REG
:
4159 if (TARGET_64BIT_MS_ABI
)
4161 return TARGET_FLOAT_RETURNS_IN_80387
;
4167 if (TARGET_MACHO
|| TARGET_64BIT
)
4175 /* Define how to find the value returned by a function.
4176 VALTYPE is the data type of the value (as a tree).
4177 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4178 otherwise, FUNC is 0. */
4181 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4182 tree fntype
, tree fn
)
4186 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4187 we normally prevent this case when mmx is not available. However
4188 some ABIs may require the result to be returned like DImode. */
4189 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4190 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4192 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4193 we prevent this case when sse is not available. However some ABIs
4194 may require the result to be returned like integer TImode. */
4195 else if (mode
== TImode
4196 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4197 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4199 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4200 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4201 regno
= FIRST_FLOAT_REG
;
4203 /* Most things go in %eax. */
4206 /* Override FP return register with %xmm0 for local functions when
4207 SSE math is enabled or for functions with sseregparm attribute. */
4208 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4210 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4211 if ((sse_level
>= 1 && mode
== SFmode
)
4212 || (sse_level
== 2 && mode
== DFmode
))
4213 regno
= FIRST_SSE_REG
;
4216 return gen_rtx_REG (orig_mode
, regno
);
4220 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4225 /* Handle libcalls, which don't provide a type node. */
4226 if (valtype
== NULL
)
4238 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4241 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4245 return gen_rtx_REG (mode
, 0);
4249 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4250 REGPARM_MAX
, SSE_REGPARM_MAX
,
4251 x86_64_int_return_registers
, 0);
4253 /* For zero sized structures, construct_container returns NULL, but we
4254 need to keep rest of compiler happy by returning meaningful value. */
4256 ret
= gen_rtx_REG (orig_mode
, 0);
4262 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4264 unsigned int regno
= 0;
4268 if (mode
== SFmode
|| mode
== DFmode
)
4269 regno
= FIRST_SSE_REG
;
4270 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4271 regno
= FIRST_SSE_REG
;
4274 return gen_rtx_REG (orig_mode
, regno
);
4278 ix86_function_value_1 (tree valtype
, tree fntype_or_decl
,
4279 enum machine_mode orig_mode
, enum machine_mode mode
)
4284 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4285 fn
= fntype_or_decl
;
4286 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4288 if (TARGET_64BIT_MS_ABI
)
4289 return function_value_ms_64 (orig_mode
, mode
);
4290 else if (TARGET_64BIT
)
4291 return function_value_64 (orig_mode
, mode
, valtype
);
4293 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4297 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4298 bool outgoing ATTRIBUTE_UNUSED
)
4300 enum machine_mode mode
, orig_mode
;
4302 orig_mode
= TYPE_MODE (valtype
);
4303 mode
= type_natural_mode (valtype
);
4304 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4308 ix86_libcall_value (enum machine_mode mode
)
4310 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4313 /* Return true iff type is returned in memory. */
4316 return_in_memory_32 (tree type
, enum machine_mode mode
)
4320 if (mode
== BLKmode
)
4323 size
= int_size_in_bytes (type
);
4325 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4328 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4330 /* User-created vectors small enough to fit in EAX. */
4334 /* MMX/3dNow values are returned in MM0,
4335 except when it doesn't exits. */
4337 return (TARGET_MMX
? 0 : 1);
4339 /* SSE values are returned in XMM0, except when it doesn't exist. */
4341 return (TARGET_SSE
? 0 : 1);
4356 return_in_memory_64 (tree type
, enum machine_mode mode
)
4358 int needed_intregs
, needed_sseregs
;
4359 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4363 return_in_memory_ms_64 (tree type
, enum machine_mode mode
)
4365 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4367 /* __m128 and friends are returned in xmm0. */
4368 if (size
== 16 && VECTOR_MODE_P (mode
))
4371 /* Otherwise, the size must be exactly in [1248]. */
4372 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8);
4376 ix86_return_in_memory (tree type
)
4378 enum machine_mode mode
= type_natural_mode (type
);
4380 if (TARGET_64BIT_MS_ABI
)
4381 return return_in_memory_ms_64 (type
, mode
);
4382 else if (TARGET_64BIT
)
4383 return return_in_memory_64 (type
, mode
);
4385 return return_in_memory_32 (type
, mode
);
4388 /* Return false iff TYPE is returned in memory. This version is used
4389 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4390 but differs notably in that when MMX is available, 8-byte vectors
4391 are returned in memory, rather than in MMX registers. */
4394 ix86_sol10_return_in_memory (tree type
)
4397 enum machine_mode mode
= type_natural_mode (type
);
4400 return return_in_memory_64 (type
, mode
);
4402 if (mode
== BLKmode
)
4405 size
= int_size_in_bytes (type
);
4407 if (VECTOR_MODE_P (mode
))
4409 /* Return in memory only if MMX registers *are* available. This
4410 seems backwards, but it is consistent with the existing
4417 else if (mode
== TImode
)
4419 else if (mode
== XFmode
)
4425 /* When returning SSE vector types, we have a choice of either
4426 (1) being abi incompatible with a -march switch, or
4427 (2) generating an error.
4428 Given no good solution, I think the safest thing is one warning.
4429 The user won't be able to use -Werror, but....
4431 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4432 called in response to actually generating a caller or callee that
4433 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4434 via aggregate_value_p for general type probing from tree-ssa. */
4437 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4439 static bool warnedsse
, warnedmmx
;
4441 if (!TARGET_64BIT
&& type
)
4443 /* Look at the return type of the function, not the function type. */
4444 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4446 if (!TARGET_SSE
&& !warnedsse
)
4449 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4452 warning (0, "SSE vector return without SSE enabled "
4457 if (!TARGET_MMX
&& !warnedmmx
)
4459 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4462 warning (0, "MMX vector return without MMX enabled "
4472 /* Create the va_list data type. */
4475 ix86_build_builtin_va_list (void)
4477 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4479 /* For i386 we use plain pointer to argument area. */
4480 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4481 return build_pointer_type (char_type_node
);
4483 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4484 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4486 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4487 unsigned_type_node
);
4488 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4489 unsigned_type_node
);
4490 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4492 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4495 va_list_gpr_counter_field
= f_gpr
;
4496 va_list_fpr_counter_field
= f_fpr
;
4498 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4499 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4500 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4501 DECL_FIELD_CONTEXT (f_sav
) = record
;
4503 TREE_CHAIN (record
) = type_decl
;
4504 TYPE_NAME (record
) = type_decl
;
4505 TYPE_FIELDS (record
) = f_gpr
;
4506 TREE_CHAIN (f_gpr
) = f_fpr
;
4507 TREE_CHAIN (f_fpr
) = f_ovf
;
4508 TREE_CHAIN (f_ovf
) = f_sav
;
4510 layout_type (record
);
4512 /* The correct type is an array type of one element. */
4513 return build_array_type (record
, build_index_type (size_zero_node
));
4516 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4519 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4529 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4532 /* Indicate to allocate space on the stack for varargs save area. */
4533 ix86_save_varrargs_registers
= 1;
4534 cfun
->stack_alignment_needed
= 128;
4536 save_area
= frame_pointer_rtx
;
4537 set
= get_varargs_alias_set ();
4539 for (i
= cum
->regno
;
4541 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4544 mem
= gen_rtx_MEM (Pmode
,
4545 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4546 MEM_NOTRAP_P (mem
) = 1;
4547 set_mem_alias_set (mem
, set
);
4548 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4549 x86_64_int_parameter_registers
[i
]));
4552 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4554 /* Now emit code to save SSE registers. The AX parameter contains number
4555 of SSE parameter registers used to call this function. We use
4556 sse_prologue_save insn template that produces computed jump across
4557 SSE saves. We need some preparation work to get this working. */
4559 label
= gen_label_rtx ();
4560 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4562 /* Compute address to jump to :
4563 label - 5*eax + nnamed_sse_arguments*5 */
4564 tmp_reg
= gen_reg_rtx (Pmode
);
4565 nsse_reg
= gen_reg_rtx (Pmode
);
4566 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4567 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4568 gen_rtx_MULT (Pmode
, nsse_reg
,
4573 gen_rtx_CONST (DImode
,
4574 gen_rtx_PLUS (DImode
,
4576 GEN_INT (cum
->sse_regno
* 4))));
4578 emit_move_insn (nsse_reg
, label_ref
);
4579 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4581 /* Compute address of memory block we save into. We always use pointer
4582 pointing 127 bytes after first byte to store - this is needed to keep
4583 instruction size limited by 4 bytes. */
4584 tmp_reg
= gen_reg_rtx (Pmode
);
4585 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4586 plus_constant (save_area
,
4587 8 * REGPARM_MAX
+ 127)));
4588 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4589 MEM_NOTRAP_P (mem
) = 1;
4590 set_mem_alias_set (mem
, set
);
4591 set_mem_align (mem
, BITS_PER_WORD
);
4593 /* And finally do the dirty job! */
4594 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4595 GEN_INT (cum
->sse_regno
), label
));
4600 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4602 int set
= get_varargs_alias_set ();
4605 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
4609 mem
= gen_rtx_MEM (Pmode
,
4610 plus_constant (virtual_incoming_args_rtx
,
4611 i
* UNITS_PER_WORD
));
4612 MEM_NOTRAP_P (mem
) = 1;
4613 set_mem_alias_set (mem
, set
);
4615 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4616 emit_move_insn (mem
, reg
);
4621 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4622 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4625 CUMULATIVE_ARGS next_cum
;
4629 /* This argument doesn't appear to be used anymore. Which is good,
4630 because the old code here didn't suppress rtl generation. */
4631 gcc_assert (!no_rtl
);
4636 fntype
= TREE_TYPE (current_function_decl
);
4637 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4638 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4639 != void_type_node
));
4641 /* For varargs, we do not want to skip the dummy va_dcl argument.
4642 For stdargs, we do want to skip the last named argument. */
4645 function_arg_advance (&next_cum
, mode
, type
, 1);
4647 if (TARGET_64BIT_MS_ABI
)
4648 setup_incoming_varargs_ms_64 (&next_cum
);
4650 setup_incoming_varargs_64 (&next_cum
);
4653 /* Implement va_start. */
4656 ix86_va_start (tree valist
, rtx nextarg
)
4658 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4659 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4660 tree gpr
, fpr
, ovf
, sav
, t
;
4663 /* Only 64bit target needs something special. */
4664 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4666 std_expand_builtin_va_start (valist
, nextarg
);
4670 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4671 f_fpr
= TREE_CHAIN (f_gpr
);
4672 f_ovf
= TREE_CHAIN (f_fpr
);
4673 f_sav
= TREE_CHAIN (f_ovf
);
4675 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4676 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4677 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4678 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4679 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4681 /* Count number of gp and fp argument registers used. */
4682 words
= current_function_args_info
.words
;
4683 n_gpr
= current_function_args_info
.regno
;
4684 n_fpr
= current_function_args_info
.sse_regno
;
4686 if (cfun
->va_list_gpr_size
)
4688 type
= TREE_TYPE (gpr
);
4689 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4690 build_int_cst (type
, n_gpr
* 8));
4691 TREE_SIDE_EFFECTS (t
) = 1;
4692 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4695 if (cfun
->va_list_fpr_size
)
4697 type
= TREE_TYPE (fpr
);
4698 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4699 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4700 TREE_SIDE_EFFECTS (t
) = 1;
4701 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4704 /* Find the overflow area. */
4705 type
= TREE_TYPE (ovf
);
4706 t
= make_tree (type
, virtual_incoming_args_rtx
);
4708 t
= build2 (PLUS_EXPR
, type
, t
,
4709 build_int_cst (type
, words
* UNITS_PER_WORD
));
4710 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4711 TREE_SIDE_EFFECTS (t
) = 1;
4712 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4714 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4716 /* Find the register save area.
4717 Prologue of the function save it right above stack frame. */
4718 type
= TREE_TYPE (sav
);
4719 t
= make_tree (type
, frame_pointer_rtx
);
4720 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4721 TREE_SIDE_EFFECTS (t
) = 1;
4722 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4726 /* Implement va_arg. */
4729 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4731 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4732 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4733 tree gpr
, fpr
, ovf
, sav
, t
;
4735 tree lab_false
, lab_over
= NULL_TREE
;
4740 enum machine_mode nat_mode
;
4742 /* Only 64bit target needs something special. */
4743 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4744 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4746 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4747 f_fpr
= TREE_CHAIN (f_gpr
);
4748 f_ovf
= TREE_CHAIN (f_fpr
);
4749 f_sav
= TREE_CHAIN (f_ovf
);
4751 valist
= build_va_arg_indirect_ref (valist
);
4752 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4753 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4754 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4755 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4757 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4759 type
= build_pointer_type (type
);
4760 size
= int_size_in_bytes (type
);
4761 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4763 nat_mode
= type_natural_mode (type
);
4764 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4765 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4767 /* Pull the value out of the saved registers. */
4769 addr
= create_tmp_var (ptr_type_node
, "addr");
4770 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4774 int needed_intregs
, needed_sseregs
;
4776 tree int_addr
, sse_addr
;
4778 lab_false
= create_artificial_label ();
4779 lab_over
= create_artificial_label ();
4781 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4783 need_temp
= (!REG_P (container
)
4784 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4785 || TYPE_ALIGN (type
) > 128));
4787 /* In case we are passing structure, verify that it is consecutive block
4788 on the register save area. If not we need to do moves. */
4789 if (!need_temp
&& !REG_P (container
))
4791 /* Verify that all registers are strictly consecutive */
4792 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4796 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4798 rtx slot
= XVECEXP (container
, 0, i
);
4799 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4800 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4808 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4810 rtx slot
= XVECEXP (container
, 0, i
);
4811 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4812 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4824 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4825 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4826 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4827 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4830 /* First ensure that we fit completely in registers. */
4833 t
= build_int_cst (TREE_TYPE (gpr
),
4834 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4835 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4836 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4837 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4838 gimplify_and_add (t
, pre_p
);
4842 t
= build_int_cst (TREE_TYPE (fpr
),
4843 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4845 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4846 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4847 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4848 gimplify_and_add (t
, pre_p
);
4851 /* Compute index to start of area used for integer regs. */
4854 /* int_addr = gpr + sav; */
4855 t
= fold_convert (ptr_type_node
, gpr
);
4856 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4857 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4858 gimplify_and_add (t
, pre_p
);
4862 /* sse_addr = fpr + sav; */
4863 t
= fold_convert (ptr_type_node
, fpr
);
4864 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4865 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4866 gimplify_and_add (t
, pre_p
);
4871 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4874 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4875 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4876 gimplify_and_add (t
, pre_p
);
4878 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4880 rtx slot
= XVECEXP (container
, 0, i
);
4881 rtx reg
= XEXP (slot
, 0);
4882 enum machine_mode mode
= GET_MODE (reg
);
4883 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4884 tree addr_type
= build_pointer_type (piece_type
);
4887 tree dest_addr
, dest
;
4889 if (SSE_REGNO_P (REGNO (reg
)))
4891 src_addr
= sse_addr
;
4892 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4896 src_addr
= int_addr
;
4897 src_offset
= REGNO (reg
) * 8;
4899 src_addr
= fold_convert (addr_type
, src_addr
);
4900 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4901 size_int (src_offset
));
4902 src
= build_va_arg_indirect_ref (src_addr
);
4904 dest_addr
= fold_convert (addr_type
, addr
);
4905 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4906 size_int (INTVAL (XEXP (slot
, 1))));
4907 dest
= build_va_arg_indirect_ref (dest_addr
);
4909 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4910 gimplify_and_add (t
, pre_p
);
4916 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4917 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4918 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4919 gimplify_and_add (t
, pre_p
);
4923 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4924 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4925 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4926 gimplify_and_add (t
, pre_p
);
4929 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4930 gimplify_and_add (t
, pre_p
);
4932 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4933 append_to_statement_list (t
, pre_p
);
4936 /* ... otherwise out of the overflow area. */
4938 /* Care for on-stack alignment if needed. */
4939 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4940 || integer_zerop (TYPE_SIZE (type
)))
4944 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4945 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4946 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4947 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4948 build_int_cst (TREE_TYPE (t
), -align
));
4950 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4952 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4953 gimplify_and_add (t2
, pre_p
);
4955 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4956 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4957 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4958 gimplify_and_add (t
, pre_p
);
4962 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4963 append_to_statement_list (t
, pre_p
);
4966 ptrtype
= build_pointer_type (type
);
4967 addr
= fold_convert (ptrtype
, addr
);
4970 addr
= build_va_arg_indirect_ref (addr
);
4971 return build_va_arg_indirect_ref (addr
);
4974 /* Return nonzero if OPNUM's MEM should be matched
4975 in movabs* patterns. */
4978 ix86_check_movabs (rtx insn
, int opnum
)
4982 set
= PATTERN (insn
);
4983 if (GET_CODE (set
) == PARALLEL
)
4984 set
= XVECEXP (set
, 0, 0);
4985 gcc_assert (GET_CODE (set
) == SET
);
4986 mem
= XEXP (set
, opnum
);
4987 while (GET_CODE (mem
) == SUBREG
)
4988 mem
= SUBREG_REG (mem
);
4989 gcc_assert (MEM_P (mem
));
4990 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4993 /* Initialize the table of extra 80387 mathematical constants. */
4996 init_ext_80387_constants (void)
4998 static const char * cst
[5] =
5000 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5001 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5002 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5003 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5004 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5008 for (i
= 0; i
< 5; i
++)
5010 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5011 /* Ensure each constant is rounded to XFmode precision. */
5012 real_convert (&ext_80387_constants_table
[i
],
5013 XFmode
, &ext_80387_constants_table
[i
]);
5016 ext_80387_constants_init
= 1;
5019 /* Return true if the constant is something that can be loaded with
5020 a special instruction. */
5023 standard_80387_constant_p (rtx x
)
5025 enum machine_mode mode
= GET_MODE (x
);
5029 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5032 if (x
== CONST0_RTX (mode
))
5034 if (x
== CONST1_RTX (mode
))
5037 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5039 /* For XFmode constants, try to find a special 80387 instruction when
5040 optimizing for size or on those CPUs that benefit from them. */
5042 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5046 if (! ext_80387_constants_init
)
5047 init_ext_80387_constants ();
5049 for (i
= 0; i
< 5; i
++)
5050 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5054 /* Load of the constant -0.0 or -1.0 will be split as
5055 fldz;fchs or fld1;fchs sequence. */
5056 if (real_isnegzero (&r
))
5058 if (real_identical (&r
, &dconstm1
))
5064 /* Return the opcode of the special instruction to be used to load
5068 standard_80387_constant_opcode (rtx x
)
5070 switch (standard_80387_constant_p (x
))
5094 /* Return the CONST_DOUBLE representing the 80387 constant that is
5095 loaded by the specified special instruction. The argument IDX
5096 matches the return value from standard_80387_constant_p. */
5099 standard_80387_constant_rtx (int idx
)
5103 if (! ext_80387_constants_init
)
5104 init_ext_80387_constants ();
5120 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5124 /* Return 1 if mode is a valid mode for sse. */
5126 standard_sse_mode_p (enum machine_mode mode
)
5143 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5146 standard_sse_constant_p (rtx x
)
5148 enum machine_mode mode
= GET_MODE (x
);
5150 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5152 if (vector_all_ones_operand (x
, mode
)
5153 && standard_sse_mode_p (mode
))
5154 return TARGET_SSE2
? 2 : -1;
5159 /* Return the opcode of the special instruction to be used to load
5163 standard_sse_constant_opcode (rtx insn
, rtx x
)
5165 switch (standard_sse_constant_p (x
))
5168 if (get_attr_mode (insn
) == MODE_V4SF
)
5169 return "xorps\t%0, %0";
5170 else if (get_attr_mode (insn
) == MODE_V2DF
)
5171 return "xorpd\t%0, %0";
5173 return "pxor\t%0, %0";
5175 return "pcmpeqd\t%0, %0";
5180 /* Returns 1 if OP contains a symbol reference */
5183 symbolic_reference_mentioned_p (rtx op
)
5188 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5191 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5192 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5198 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5199 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5203 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5210 /* Return 1 if it is appropriate to emit `ret' instructions in the
5211 body of a function. Do this only if the epilogue is simple, needing a
5212 couple of insns. Prior to reloading, we can't tell how many registers
5213 must be saved, so return 0 then. Return 0 if there is no frame
5214 marker to de-allocate. */
5217 ix86_can_use_return_insn_p (void)
5219 struct ix86_frame frame
;
5221 if (! reload_completed
|| frame_pointer_needed
)
5224 /* Don't allow more than 32 pop, since that's all we can do
5225 with one instruction. */
5226 if (current_function_pops_args
5227 && current_function_args_size
>= 32768)
5230 ix86_compute_frame_layout (&frame
);
5231 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5234 /* Value should be nonzero if functions must have frame pointers.
5235 Zero means the frame pointer need not be set up (and parms may
5236 be accessed via the stack pointer) in functions that seem suitable. */
5239 ix86_frame_pointer_required (void)
5241 /* If we accessed previous frames, then the generated code expects
5242 to be able to access the saved ebp value in our frame. */
5243 if (cfun
->machine
->accesses_prev_frame
)
5246 /* Several x86 os'es need a frame pointer for other reasons,
5247 usually pertaining to setjmp. */
5248 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5251 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5252 the frame pointer by default. Turn it back on now if we've not
5253 got a leaf function. */
5254 if (TARGET_OMIT_LEAF_FRAME_POINTER
5255 && (!current_function_is_leaf
5256 || ix86_current_function_calls_tls_descriptor
))
5259 if (current_function_profile
)
5265 /* Record that the current function accesses previous call frames. */
5268 ix86_setup_frame_addresses (void)
5270 cfun
->machine
->accesses_prev_frame
= 1;
5273 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5274 # define USE_HIDDEN_LINKONCE 1
5276 # define USE_HIDDEN_LINKONCE 0
5279 static int pic_labels_used
;
5281 /* Fills in the label name that should be used for a pc thunk for
5282 the given register. */
5285 get_pc_thunk_name (char name
[32], unsigned int regno
)
5287 gcc_assert (!TARGET_64BIT
);
5289 if (USE_HIDDEN_LINKONCE
)
5290 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5292 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5296 /* This function generates code for -fpic that loads %ebx with
5297 the return address of the caller and then returns. */
5300 ix86_file_end (void)
5305 for (regno
= 0; regno
< 8; ++regno
)
5309 if (! ((pic_labels_used
>> regno
) & 1))
5312 get_pc_thunk_name (name
, regno
);
5317 switch_to_section (darwin_sections
[text_coal_section
]);
5318 fputs ("\t.weak_definition\t", asm_out_file
);
5319 assemble_name (asm_out_file
, name
);
5320 fputs ("\n\t.private_extern\t", asm_out_file
);
5321 assemble_name (asm_out_file
, name
);
5322 fputs ("\n", asm_out_file
);
5323 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5327 if (USE_HIDDEN_LINKONCE
)
5331 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5333 TREE_PUBLIC (decl
) = 1;
5334 TREE_STATIC (decl
) = 1;
5335 DECL_ONE_ONLY (decl
) = 1;
5337 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5338 switch_to_section (get_named_section (decl
, NULL
, 0));
5340 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5341 fputs ("\t.hidden\t", asm_out_file
);
5342 assemble_name (asm_out_file
, name
);
5343 fputc ('\n', asm_out_file
);
5344 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5348 switch_to_section (text_section
);
5349 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5352 xops
[0] = gen_rtx_REG (SImode
, regno
);
5353 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5354 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5355 output_asm_insn ("ret", xops
);
5358 if (NEED_INDICATE_EXEC_STACK
)
5359 file_end_indicate_exec_stack ();
5362 /* Emit code for the SET_GOT patterns. */
5365 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5371 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5373 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5374 xops
[2] = gen_rtx_MEM (Pmode
,
5375 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5376 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5378 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5379 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5380 an unadorned address. */
5381 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5382 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5383 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5387 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5389 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5391 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5394 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5396 output_asm_insn ("call\t%a2", xops
);
5399 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5400 is what will be referenced by the Mach-O PIC subsystem. */
5402 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5405 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5406 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5409 output_asm_insn ("pop{l}\t%0", xops
);
5414 get_pc_thunk_name (name
, REGNO (dest
));
5415 pic_labels_used
|= 1 << REGNO (dest
);
5417 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5418 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5419 output_asm_insn ("call\t%X2", xops
);
5420 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5421 is what will be referenced by the Mach-O PIC subsystem. */
5424 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5426 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5427 CODE_LABEL_NUMBER (label
));
5434 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5435 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5437 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5442 /* Generate an "push" pattern for input ARG. */
5447 return gen_rtx_SET (VOIDmode
,
5449 gen_rtx_PRE_DEC (Pmode
,
5450 stack_pointer_rtx
)),
5454 /* Return >= 0 if there is an unused call-clobbered register available
5455 for the entire function. */
5458 ix86_select_alt_pic_regnum (void)
5460 if (current_function_is_leaf
&& !current_function_profile
5461 && !ix86_current_function_calls_tls_descriptor
)
5464 for (i
= 2; i
>= 0; --i
)
5465 if (!regs_ever_live
[i
])
5469 return INVALID_REGNUM
;
5472 /* Return 1 if we need to save REGNO. */
5474 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5476 if (pic_offset_table_rtx
5477 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5478 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5479 || current_function_profile
5480 || current_function_calls_eh_return
5481 || current_function_uses_const_pool
))
5483 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5488 if (current_function_calls_eh_return
&& maybe_eh_return
)
5493 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5494 if (test
== INVALID_REGNUM
)
5501 if (cfun
->machine
->force_align_arg_pointer
5502 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5505 return (regs_ever_live
[regno
]
5506 && !call_used_regs
[regno
]
5507 && !fixed_regs
[regno
]
5508 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5511 /* Return number of registers to be saved on the stack. */
5514 ix86_nsaved_regs (void)
5519 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5520 if (ix86_save_reg (regno
, true))
5525 /* Return the offset between two registers, one to be eliminated, and the other
5526 its replacement, at the start of a routine. */
5529 ix86_initial_elimination_offset (int from
, int to
)
5531 struct ix86_frame frame
;
5532 ix86_compute_frame_layout (&frame
);
5534 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5535 return frame
.hard_frame_pointer_offset
;
5536 else if (from
== FRAME_POINTER_REGNUM
5537 && to
== HARD_FRAME_POINTER_REGNUM
)
5538 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5541 gcc_assert (to
== STACK_POINTER_REGNUM
);
5543 if (from
== ARG_POINTER_REGNUM
)
5544 return frame
.stack_pointer_offset
;
5546 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5547 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5551 /* Fill structure ix86_frame about frame of currently computed function. */
5554 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5556 HOST_WIDE_INT total_size
;
5557 unsigned int stack_alignment_needed
;
5558 HOST_WIDE_INT offset
;
5559 unsigned int preferred_alignment
;
5560 HOST_WIDE_INT size
= get_frame_size ();
5562 frame
->nregs
= ix86_nsaved_regs ();
5565 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5566 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5568 /* During reload iteration the amount of registers saved can change.
5569 Recompute the value as needed. Do not recompute when amount of registers
5570 didn't change as reload does multiple calls to the function and does not
5571 expect the decision to change within single iteration. */
5573 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5575 int count
= frame
->nregs
;
5577 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5578 /* The fast prologue uses move instead of push to save registers. This
5579 is significantly longer, but also executes faster as modern hardware
5580 can execute the moves in parallel, but can't do that for push/pop.
5582 Be careful about choosing what prologue to emit: When function takes
5583 many instructions to execute we may use slow version as well as in
5584 case function is known to be outside hot spot (this is known with
5585 feedback only). Weight the size of function by number of registers
5586 to save as it is cheap to use one or two push instructions but very
5587 slow to use many of them. */
5589 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5590 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5591 || (flag_branch_probabilities
5592 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5593 cfun
->machine
->use_fast_prologue_epilogue
= false;
5595 cfun
->machine
->use_fast_prologue_epilogue
5596 = !expensive_function_p (count
);
5598 if (TARGET_PROLOGUE_USING_MOVE
5599 && cfun
->machine
->use_fast_prologue_epilogue
)
5600 frame
->save_regs_using_mov
= true;
5602 frame
->save_regs_using_mov
= false;
5605 /* Skip return address and saved base pointer. */
5606 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5608 frame
->hard_frame_pointer_offset
= offset
;
5610 /* Do some sanity checking of stack_alignment_needed and
5611 preferred_alignment, since i386 port is the only using those features
5612 that may break easily. */
5614 gcc_assert (!size
|| stack_alignment_needed
);
5615 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5616 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5617 gcc_assert (stack_alignment_needed
5618 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5620 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5621 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5623 /* Register save area */
5624 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5627 if (ix86_save_varrargs_registers
)
5629 offset
+= X86_64_VARARGS_SIZE
;
5630 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5633 frame
->va_arg_size
= 0;
5635 /* Align start of frame for local function. */
5636 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5637 & -stack_alignment_needed
) - offset
;
5639 offset
+= frame
->padding1
;
5641 /* Frame pointer points here. */
5642 frame
->frame_pointer_offset
= offset
;
5646 /* Add outgoing arguments area. Can be skipped if we eliminated
5647 all the function calls as dead code.
5648 Skipping is however impossible when function calls alloca. Alloca
5649 expander assumes that last current_function_outgoing_args_size
5650 of stack frame are unused. */
5651 if (ACCUMULATE_OUTGOING_ARGS
5652 && (!current_function_is_leaf
|| current_function_calls_alloca
5653 || ix86_current_function_calls_tls_descriptor
))
5655 offset
+= current_function_outgoing_args_size
;
5656 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5659 frame
->outgoing_arguments_size
= 0;
5661 /* Align stack boundary. Only needed if we're calling another function
5663 if (!current_function_is_leaf
|| current_function_calls_alloca
5664 || ix86_current_function_calls_tls_descriptor
)
5665 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5666 & -preferred_alignment
) - offset
;
5668 frame
->padding2
= 0;
5670 offset
+= frame
->padding2
;
5672 /* We've reached end of stack frame. */
5673 frame
->stack_pointer_offset
= offset
;
5675 /* Size prologue needs to allocate. */
5676 frame
->to_allocate
=
5677 (size
+ frame
->padding1
+ frame
->padding2
5678 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5680 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5681 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5682 frame
->save_regs_using_mov
= false;
5684 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5685 && current_function_is_leaf
5686 && !ix86_current_function_calls_tls_descriptor
)
5688 frame
->red_zone_size
= frame
->to_allocate
;
5689 if (frame
->save_regs_using_mov
)
5690 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5691 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5692 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5695 frame
->red_zone_size
= 0;
5696 frame
->to_allocate
-= frame
->red_zone_size
;
5697 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5699 fprintf (stderr
, "\n");
5700 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5701 fprintf (stderr
, "size: %ld\n", (long)size
);
5702 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5703 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5704 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5705 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5706 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5707 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5708 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5709 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5710 (long)frame
->hard_frame_pointer_offset
);
5711 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5712 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5713 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5714 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5718 /* Emit code to save registers in the prologue. */
5721 ix86_emit_save_regs (void)
5726 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5727 if (ix86_save_reg (regno
, true))
5729 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5730 RTX_FRAME_RELATED_P (insn
) = 1;
5734 /* Emit code to save registers using MOV insns. First register
5735 is restored from POINTER + OFFSET. */
5737 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5742 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5743 if (ix86_save_reg (regno
, true))
5745 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5747 gen_rtx_REG (Pmode
, regno
));
5748 RTX_FRAME_RELATED_P (insn
) = 1;
5749 offset
+= UNITS_PER_WORD
;
5753 /* Expand prologue or epilogue stack adjustment.
5754 The pattern exist to put a dependency on all ebp-based memory accesses.
5755 STYLE should be negative if instructions should be marked as frame related,
5756 zero if %r11 register is live and cannot be freely used and positive
5760 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5765 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5766 else if (x86_64_immediate_operand (offset
, DImode
))
5767 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5771 /* r11 is used by indirect sibcall return as well, set before the
5772 epilogue and used after the epilogue. ATM indirect sibcall
5773 shouldn't be used together with huge frame sizes in one
5774 function because of the frame_size check in sibcall.c. */
5776 r11
= gen_rtx_REG (DImode
, R11_REG
);
5777 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5779 RTX_FRAME_RELATED_P (insn
) = 1;
5780 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5784 RTX_FRAME_RELATED_P (insn
) = 1;
5787 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5790 ix86_internal_arg_pointer (void)
5792 bool has_force_align_arg_pointer
=
5793 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5794 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5795 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5796 && DECL_NAME (current_function_decl
)
5797 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5798 && DECL_FILE_SCOPE_P (current_function_decl
))
5799 || ix86_force_align_arg_pointer
5800 || has_force_align_arg_pointer
)
5802 /* Nested functions can't realign the stack due to a register
5804 if (DECL_CONTEXT (current_function_decl
)
5805 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5807 if (ix86_force_align_arg_pointer
)
5808 warning (0, "-mstackrealign ignored for nested functions");
5809 if (has_force_align_arg_pointer
)
5810 error ("%s not supported for nested functions",
5811 ix86_force_align_arg_pointer_string
);
5812 return virtual_incoming_args_rtx
;
5814 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5815 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5818 return virtual_incoming_args_rtx
;
5821 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5822 This is called from dwarf2out.c to emit call frame instructions
5823 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5825 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5827 rtx unspec
= SET_SRC (pattern
);
5828 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5832 case UNSPEC_REG_SAVE
:
5833 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5834 SET_DEST (pattern
));
5836 case UNSPEC_DEF_CFA
:
5837 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5838 INTVAL (XVECEXP (unspec
, 0, 0)));
5845 /* Expand the prologue into a bunch of separate insns. */
5848 ix86_expand_prologue (void)
5852 struct ix86_frame frame
;
5853 HOST_WIDE_INT allocate
;
5855 ix86_compute_frame_layout (&frame
);
5857 if (cfun
->machine
->force_align_arg_pointer
)
5861 /* Grab the argument pointer. */
5862 x
= plus_constant (stack_pointer_rtx
, 4);
5863 y
= cfun
->machine
->force_align_arg_pointer
;
5864 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5865 RTX_FRAME_RELATED_P (insn
) = 1;
5867 /* The unwind info consists of two parts: install the fafp as the cfa,
5868 and record the fafp as the "save register" of the stack pointer.
5869 The later is there in order that the unwinder can see where it
5870 should restore the stack pointer across the and insn. */
5871 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5872 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5873 RTX_FRAME_RELATED_P (x
) = 1;
5874 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5876 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5877 RTX_FRAME_RELATED_P (y
) = 1;
5878 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5879 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5880 REG_NOTES (insn
) = x
;
5882 /* Align the stack. */
5883 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5886 /* And here we cheat like madmen with the unwind info. We force the
5887 cfa register back to sp+4, which is exactly what it was at the
5888 start of the function. Re-pushing the return address results in
5889 the return at the same spot relative to the cfa, and thus is
5890 correct wrt the unwind info. */
5891 x
= cfun
->machine
->force_align_arg_pointer
;
5892 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5893 insn
= emit_insn (gen_push (x
));
5894 RTX_FRAME_RELATED_P (insn
) = 1;
5897 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5898 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5899 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5900 REG_NOTES (insn
) = x
;
5903 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5904 slower on all targets. Also sdb doesn't like it. */
5906 if (frame_pointer_needed
)
5908 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5909 RTX_FRAME_RELATED_P (insn
) = 1;
5911 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5912 RTX_FRAME_RELATED_P (insn
) = 1;
5915 allocate
= frame
.to_allocate
;
5917 if (!frame
.save_regs_using_mov
)
5918 ix86_emit_save_regs ();
5920 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5922 /* When using red zone we may start register saving before allocating
5923 the stack frame saving one cycle of the prologue. */
5924 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5925 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5926 : stack_pointer_rtx
,
5927 -frame
.nregs
* UNITS_PER_WORD
);
5931 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5932 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5933 GEN_INT (-allocate
), -1);
5936 /* Only valid for Win32. */
5937 rtx eax
= gen_rtx_REG (Pmode
, 0);
5941 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
5943 if (TARGET_64BIT_MS_ABI
)
5946 eax_live
= ix86_eax_live_at_start_p ();
5950 emit_insn (gen_push (eax
));
5951 allocate
-= UNITS_PER_WORD
;
5954 emit_move_insn (eax
, GEN_INT (allocate
));
5957 insn
= gen_allocate_stack_worker_64 (eax
);
5959 insn
= gen_allocate_stack_worker_32 (eax
);
5960 insn
= emit_insn (insn
);
5961 RTX_FRAME_RELATED_P (insn
) = 1;
5962 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5963 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5964 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5965 t
, REG_NOTES (insn
));
5969 if (frame_pointer_needed
)
5970 t
= plus_constant (hard_frame_pointer_rtx
,
5973 - frame
.nregs
* UNITS_PER_WORD
);
5975 t
= plus_constant (stack_pointer_rtx
, allocate
);
5976 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
5980 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5982 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5983 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5985 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5986 -frame
.nregs
* UNITS_PER_WORD
);
5989 pic_reg_used
= false;
5990 if (pic_offset_table_rtx
5991 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5992 || current_function_profile
))
5994 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5996 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5997 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5999 pic_reg_used
= true;
6006 if (ix86_cmodel
== CM_LARGE_PIC
)
6008 rtx tmp_reg
= gen_rtx_REG (DImode
,
6009 FIRST_REX_INT_REG
+ 3 /* R11 */);
6010 rtx label
= gen_label_rtx ();
6012 LABEL_PRESERVE_P (label
) = 1;
6013 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6014 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6015 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6016 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6017 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6018 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6019 pic_offset_table_rtx
, tmp_reg
));
6022 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6025 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6027 /* Even with accurate pre-reload life analysis, we can wind up
6028 deleting all references to the pic register after reload.
6029 Consider if cross-jumping unifies two sides of a branch
6030 controlled by a comparison vs the only read from a global.
6031 In which case, allow the set_got to be deleted, though we're
6032 too late to do anything about the ebx save in the prologue. */
6033 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6036 /* Prevent function calls from be scheduled before the call to mcount.
6037 In the pic_reg_used case, make sure that the got load isn't deleted. */
6038 if (current_function_profile
)
6039 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6042 /* Emit code to restore saved registers using MOV insns. First register
6043 is restored from POINTER + OFFSET. */
6045 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6046 int maybe_eh_return
)
6049 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6051 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6052 if (ix86_save_reg (regno
, maybe_eh_return
))
6054 /* Ensure that adjust_address won't be forced to produce pointer
6055 out of range allowed by x86-64 instruction set. */
6056 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6060 r11
= gen_rtx_REG (DImode
, R11_REG
);
6061 emit_move_insn (r11
, GEN_INT (offset
));
6062 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6063 base_address
= gen_rtx_MEM (Pmode
, r11
);
6066 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6067 adjust_address (base_address
, Pmode
, offset
));
6068 offset
+= UNITS_PER_WORD
;
6072 /* Restore function stack, frame, and registers. */
6075 ix86_expand_epilogue (int style
)
6078 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6079 struct ix86_frame frame
;
6080 HOST_WIDE_INT offset
;
6082 ix86_compute_frame_layout (&frame
);
6084 /* Calculate start of saved registers relative to ebp. Special care
6085 must be taken for the normal return case of a function using
6086 eh_return: the eax and edx registers are marked as saved, but not
6087 restored along this path. */
6088 offset
= frame
.nregs
;
6089 if (current_function_calls_eh_return
&& style
!= 2)
6091 offset
*= -UNITS_PER_WORD
;
6093 /* If we're only restoring one register and sp is not valid then
6094 using a move instruction to restore the register since it's
6095 less work than reloading sp and popping the register.
6097 The default code result in stack adjustment using add/lea instruction,
6098 while this code results in LEAVE instruction (or discrete equivalent),
6099 so it is profitable in some other cases as well. Especially when there
6100 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6101 and there is exactly one register to pop. This heuristic may need some
6102 tuning in future. */
6103 if ((!sp_valid
&& frame
.nregs
<= 1)
6104 || (TARGET_EPILOGUE_USING_MOVE
6105 && cfun
->machine
->use_fast_prologue_epilogue
6106 && (frame
.nregs
> 1 || frame
.to_allocate
))
6107 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6108 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6109 && cfun
->machine
->use_fast_prologue_epilogue
6110 && frame
.nregs
== 1)
6111 || current_function_calls_eh_return
)
6113 /* Restore registers. We can use ebp or esp to address the memory
6114 locations. If both are available, default to ebp, since offsets
6115 are known to be small. Only exception is esp pointing directly to the
6116 end of block of saved registers, where we may simplify addressing
6119 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6120 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6121 frame
.to_allocate
, style
== 2);
6123 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6124 offset
, style
== 2);
6126 /* eh_return epilogues need %ecx added to the stack pointer. */
6129 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6131 if (frame_pointer_needed
)
6133 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6134 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6135 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6137 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6138 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6140 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6145 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6146 tmp
= plus_constant (tmp
, (frame
.to_allocate
6147 + frame
.nregs
* UNITS_PER_WORD
));
6148 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6151 else if (!frame_pointer_needed
)
6152 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6153 GEN_INT (frame
.to_allocate
6154 + frame
.nregs
* UNITS_PER_WORD
),
6156 /* If not an i386, mov & pop is faster than "leave". */
6157 else if (TARGET_USE_LEAVE
|| optimize_size
6158 || !cfun
->machine
->use_fast_prologue_epilogue
)
6159 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6162 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6163 hard_frame_pointer_rtx
,
6166 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6168 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6173 /* First step is to deallocate the stack frame so that we can
6174 pop the registers. */
6177 gcc_assert (frame_pointer_needed
);
6178 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6179 hard_frame_pointer_rtx
,
6180 GEN_INT (offset
), style
);
6182 else if (frame
.to_allocate
)
6183 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6184 GEN_INT (frame
.to_allocate
), style
);
6186 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6187 if (ix86_save_reg (regno
, false))
6190 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6192 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6194 if (frame_pointer_needed
)
6196 /* Leave results in shorter dependency chains on CPUs that are
6197 able to grok it fast. */
6198 if (TARGET_USE_LEAVE
)
6199 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6200 else if (TARGET_64BIT
)
6201 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6203 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6207 if (cfun
->machine
->force_align_arg_pointer
)
6209 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6210 cfun
->machine
->force_align_arg_pointer
,
6214 /* Sibcall epilogues don't want a return instruction. */
6218 if (current_function_pops_args
&& current_function_args_size
)
6220 rtx popc
= GEN_INT (current_function_pops_args
);
6222 /* i386 can only pop 64K bytes. If asked to pop more, pop
6223 return address, do explicit add, and jump indirectly to the
6226 if (current_function_pops_args
>= 65536)
6228 rtx ecx
= gen_rtx_REG (SImode
, 2);
6230 /* There is no "pascal" calling convention in any 64bit ABI. */
6231 gcc_assert (!TARGET_64BIT
);
6233 emit_insn (gen_popsi1 (ecx
));
6234 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6235 emit_jump_insn (gen_return_indirect_internal (ecx
));
6238 emit_jump_insn (gen_return_pop_internal (popc
));
6241 emit_jump_insn (gen_return_internal ());
6244 /* Reset from the function's potential modifications. */
6247 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6248 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6250 if (pic_offset_table_rtx
)
6251 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6253 /* Mach-O doesn't support labels at the end of objects, so if
6254 it looks like we might want one, insert a NOP. */
6256 rtx insn
= get_last_insn ();
6259 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6260 insn
= PREV_INSN (insn
);
6264 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6265 fputs ("\tnop\n", file
);
6271 /* Extract the parts of an RTL expression that is a valid memory address
6272 for an instruction. Return 0 if the structure of the address is
6273 grossly off. Return -1 if the address contains ASHIFT, so it is not
6274 strictly valid, but still used for computing length of lea instruction. */
6277 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6279 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6280 rtx base_reg
, index_reg
;
6281 HOST_WIDE_INT scale
= 1;
6282 rtx scale_rtx
= NULL_RTX
;
6284 enum ix86_address_seg seg
= SEG_DEFAULT
;
6286 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6288 else if (GET_CODE (addr
) == PLUS
)
6298 addends
[n
++] = XEXP (op
, 1);
6301 while (GET_CODE (op
) == PLUS
);
6306 for (i
= n
; i
>= 0; --i
)
6309 switch (GET_CODE (op
))
6314 index
= XEXP (op
, 0);
6315 scale_rtx
= XEXP (op
, 1);
6319 if (XINT (op
, 1) == UNSPEC_TP
6320 && TARGET_TLS_DIRECT_SEG_REFS
6321 && seg
== SEG_DEFAULT
)
6322 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6351 else if (GET_CODE (addr
) == MULT
)
6353 index
= XEXP (addr
, 0); /* index*scale */
6354 scale_rtx
= XEXP (addr
, 1);
6356 else if (GET_CODE (addr
) == ASHIFT
)
6360 /* We're called for lea too, which implements ashift on occasion. */
6361 index
= XEXP (addr
, 0);
6362 tmp
= XEXP (addr
, 1);
6363 if (!CONST_INT_P (tmp
))
6365 scale
= INTVAL (tmp
);
6366 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6372 disp
= addr
; /* displacement */
6374 /* Extract the integral value of scale. */
6377 if (!CONST_INT_P (scale_rtx
))
6379 scale
= INTVAL (scale_rtx
);
6382 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6383 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6385 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6386 if (base_reg
&& index_reg
&& scale
== 1
6387 && (index_reg
== arg_pointer_rtx
6388 || index_reg
== frame_pointer_rtx
6389 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6392 tmp
= base
, base
= index
, index
= tmp
;
6393 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6397 if ((base_reg
== hard_frame_pointer_rtx
6398 || base_reg
== frame_pointer_rtx
6399 || base_reg
== arg_pointer_rtx
) && !disp
)
6402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6403 Avoid this by transforming to [%esi+0]. */
6404 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6405 && base_reg
&& !index_reg
&& !disp
6407 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6410 /* Special case: encode reg+reg instead of reg*2. */
6411 if (!base
&& index
&& scale
&& scale
== 2)
6412 base
= index
, base_reg
= index_reg
, scale
= 1;
6414 /* Special case: scaling cannot be encoded without base or displacement. */
6415 if (!base
&& !disp
&& index
&& scale
!= 1)
6427 /* Return cost of the memory address x.
6428 For i386, it is better to use a complex address than let gcc copy
6429 the address into a reg and make a new pseudo. But not if the address
6430 requires to two regs - that would mean more pseudos with longer
6433 ix86_address_cost (rtx x
)
6435 struct ix86_address parts
;
6437 int ok
= ix86_decompose_address (x
, &parts
);
6441 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6442 parts
.base
= SUBREG_REG (parts
.base
);
6443 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6444 parts
.index
= SUBREG_REG (parts
.index
);
6446 /* More complex memory references are better. */
6447 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6449 if (parts
.seg
!= SEG_DEFAULT
)
6452 /* Attempt to minimize number of registers in the address. */
6454 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6456 && (!REG_P (parts
.index
)
6457 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6461 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6463 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6464 && parts
.base
!= parts
.index
)
6467 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6468 since it's predecode logic can't detect the length of instructions
6469 and it degenerates to vector decoded. Increase cost of such
6470 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6471 to split such addresses or even refuse such addresses at all.
6473 Following addressing modes are affected:
6478 The first and last case may be avoidable by explicitly coding the zero in
6479 memory address, but I don't have AMD-K6 machine handy to check this
6483 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6484 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6485 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6491 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6492 this is used for to form addresses to local data when -fPIC is in
6496 darwin_local_data_pic (rtx disp
)
6498 if (GET_CODE (disp
) == MINUS
)
6500 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6501 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6502 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6504 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6505 if (! strcmp (sym_name
, "<pic base>"))
6513 /* Determine if a given RTX is a valid constant. We already know this
6514 satisfies CONSTANT_P. */
6517 legitimate_constant_p (rtx x
)
6519 switch (GET_CODE (x
))
6524 if (GET_CODE (x
) == PLUS
)
6526 if (!CONST_INT_P (XEXP (x
, 1)))
6531 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6534 /* Only some unspecs are valid as "constants". */
6535 if (GET_CODE (x
) == UNSPEC
)
6536 switch (XINT (x
, 1))
6541 return TARGET_64BIT
;
6544 x
= XVECEXP (x
, 0, 0);
6545 return (GET_CODE (x
) == SYMBOL_REF
6546 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6548 x
= XVECEXP (x
, 0, 0);
6549 return (GET_CODE (x
) == SYMBOL_REF
6550 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6555 /* We must have drilled down to a symbol. */
6556 if (GET_CODE (x
) == LABEL_REF
)
6558 if (GET_CODE (x
) != SYMBOL_REF
)
6563 /* TLS symbols are never valid. */
6564 if (SYMBOL_REF_TLS_MODEL (x
))
6567 /* DLLIMPORT symbols are never valid. */
6568 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6569 && SYMBOL_REF_DLLIMPORT_P (x
))
6574 if (GET_MODE (x
) == TImode
6575 && x
!= CONST0_RTX (TImode
)
6581 if (x
== CONST0_RTX (GET_MODE (x
)))
6589 /* Otherwise we handle everything else in the move patterns. */
6593 /* Determine if it's legal to put X into the constant pool. This
6594 is not possible for the address of thread-local symbols, which
6595 is checked above. */
6598 ix86_cannot_force_const_mem (rtx x
)
6600 /* We can always put integral constants and vectors in memory. */
6601 switch (GET_CODE (x
))
6611 return !legitimate_constant_p (x
);
6614 /* Determine if a given RTX is a valid constant address. */
6617 constant_address_p (rtx x
)
6619 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6622 /* Nonzero if the constant value X is a legitimate general operand
6623 when generating PIC code. It is given that flag_pic is on and
6624 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6627 legitimate_pic_operand_p (rtx x
)
6631 switch (GET_CODE (x
))
6634 inner
= XEXP (x
, 0);
6635 if (GET_CODE (inner
) == PLUS
6636 && CONST_INT_P (XEXP (inner
, 1)))
6637 inner
= XEXP (inner
, 0);
6639 /* Only some unspecs are valid as "constants". */
6640 if (GET_CODE (inner
) == UNSPEC
)
6641 switch (XINT (inner
, 1))
6646 return TARGET_64BIT
;
6648 x
= XVECEXP (inner
, 0, 0);
6649 return (GET_CODE (x
) == SYMBOL_REF
6650 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6658 return legitimate_pic_address_disp_p (x
);
6665 /* Determine if a given CONST RTX is a valid memory displacement
6669 legitimate_pic_address_disp_p (rtx disp
)
6673 /* In 64bit mode we can allow direct addresses of symbols and labels
6674 when they are not dynamic symbols. */
6677 rtx op0
= disp
, op1
;
6679 switch (GET_CODE (disp
))
6685 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6687 op0
= XEXP (XEXP (disp
, 0), 0);
6688 op1
= XEXP (XEXP (disp
, 0), 1);
6689 if (!CONST_INT_P (op1
)
6690 || INTVAL (op1
) >= 16*1024*1024
6691 || INTVAL (op1
) < -16*1024*1024)
6693 if (GET_CODE (op0
) == LABEL_REF
)
6695 if (GET_CODE (op0
) != SYMBOL_REF
)
6700 /* TLS references should always be enclosed in UNSPEC. */
6701 if (SYMBOL_REF_TLS_MODEL (op0
))
6703 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6704 && ix86_cmodel
!= CM_LARGE_PIC
)
6712 if (GET_CODE (disp
) != CONST
)
6714 disp
= XEXP (disp
, 0);
6718 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6719 of GOT tables. We should not need these anyway. */
6720 if (GET_CODE (disp
) != UNSPEC
6721 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6722 && XINT (disp
, 1) != UNSPEC_GOTOFF
6723 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6726 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6727 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6733 if (GET_CODE (disp
) == PLUS
)
6735 if (!CONST_INT_P (XEXP (disp
, 1)))
6737 disp
= XEXP (disp
, 0);
6741 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6744 if (GET_CODE (disp
) != UNSPEC
)
6747 switch (XINT (disp
, 1))
6752 /* We need to check for both symbols and labels because VxWorks loads
6753 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6755 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6756 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6758 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6759 While ABI specify also 32bit relocation but we don't produce it in
6760 small PIC model at all. */
6761 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6762 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6764 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6766 case UNSPEC_GOTTPOFF
:
6767 case UNSPEC_GOTNTPOFF
:
6768 case UNSPEC_INDNTPOFF
:
6771 disp
= XVECEXP (disp
, 0, 0);
6772 return (GET_CODE (disp
) == SYMBOL_REF
6773 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6775 disp
= XVECEXP (disp
, 0, 0);
6776 return (GET_CODE (disp
) == SYMBOL_REF
6777 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6779 disp
= XVECEXP (disp
, 0, 0);
6780 return (GET_CODE (disp
) == SYMBOL_REF
6781 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6787 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6788 memory address for an instruction. The MODE argument is the machine mode
6789 for the MEM expression that wants to use this address.
6791 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6792 convert common non-canonical forms to canonical form so that they will
6796 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
6797 rtx addr
, int strict
)
6799 struct ix86_address parts
;
6800 rtx base
, index
, disp
;
6801 HOST_WIDE_INT scale
;
6802 const char *reason
= NULL
;
6803 rtx reason_rtx
= NULL_RTX
;
6805 if (ix86_decompose_address (addr
, &parts
) <= 0)
6807 reason
= "decomposition failed";
6812 index
= parts
.index
;
6814 scale
= parts
.scale
;
6816 /* Validate base register.
6818 Don't allow SUBREG's that span more than a word here. It can lead to spill
6819 failures when the base is one word out of a two word structure, which is
6820 represented internally as a DImode int. */
6829 else if (GET_CODE (base
) == SUBREG
6830 && REG_P (SUBREG_REG (base
))
6831 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6833 reg
= SUBREG_REG (base
);
6836 reason
= "base is not a register";
6840 if (GET_MODE (base
) != Pmode
)
6842 reason
= "base is not in Pmode";
6846 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6847 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6849 reason
= "base is not valid";
6854 /* Validate index register.
6856 Don't allow SUBREG's that span more than a word here -- same as above. */
6865 else if (GET_CODE (index
) == SUBREG
6866 && REG_P (SUBREG_REG (index
))
6867 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6869 reg
= SUBREG_REG (index
);
6872 reason
= "index is not a register";
6876 if (GET_MODE (index
) != Pmode
)
6878 reason
= "index is not in Pmode";
6882 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6883 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6885 reason
= "index is not valid";
6890 /* Validate scale factor. */
6893 reason_rtx
= GEN_INT (scale
);
6896 reason
= "scale without index";
6900 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6902 reason
= "scale is not a valid multiplier";
6907 /* Validate displacement. */
6912 if (GET_CODE (disp
) == CONST
6913 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6914 switch (XINT (XEXP (disp
, 0), 1))
6916 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6917 used. While ABI specify also 32bit relocations, we don't produce
6918 them at all and use IP relative instead. */
6921 gcc_assert (flag_pic
);
6923 goto is_legitimate_pic
;
6924 reason
= "64bit address unspec";
6927 case UNSPEC_GOTPCREL
:
6928 gcc_assert (flag_pic
);
6929 goto is_legitimate_pic
;
6931 case UNSPEC_GOTTPOFF
:
6932 case UNSPEC_GOTNTPOFF
:
6933 case UNSPEC_INDNTPOFF
:
6939 reason
= "invalid address unspec";
6943 else if (SYMBOLIC_CONST (disp
)
6947 && MACHOPIC_INDIRECT
6948 && !machopic_operand_p (disp
)
6954 if (TARGET_64BIT
&& (index
|| base
))
6956 /* foo@dtpoff(%rX) is ok. */
6957 if (GET_CODE (disp
) != CONST
6958 || GET_CODE (XEXP (disp
, 0)) != PLUS
6959 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6960 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6961 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6962 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6964 reason
= "non-constant pic memory reference";
6968 else if (! legitimate_pic_address_disp_p (disp
))
6970 reason
= "displacement is an invalid pic construct";
6974 /* This code used to verify that a symbolic pic displacement
6975 includes the pic_offset_table_rtx register.
6977 While this is good idea, unfortunately these constructs may
6978 be created by "adds using lea" optimization for incorrect
6987 This code is nonsensical, but results in addressing
6988 GOT table with pic_offset_table_rtx base. We can't
6989 just refuse it easily, since it gets matched by
6990 "addsi3" pattern, that later gets split to lea in the
6991 case output register differs from input. While this
6992 can be handled by separate addsi pattern for this case
6993 that never results in lea, this seems to be easier and
6994 correct fix for crash to disable this test. */
6996 else if (GET_CODE (disp
) != LABEL_REF
6997 && !CONST_INT_P (disp
)
6998 && (GET_CODE (disp
) != CONST
6999 || !legitimate_constant_p (disp
))
7000 && (GET_CODE (disp
) != SYMBOL_REF
7001 || !legitimate_constant_p (disp
)))
7003 reason
= "displacement is not constant";
7006 else if (TARGET_64BIT
7007 && !x86_64_immediate_operand (disp
, VOIDmode
))
7009 reason
= "displacement is out of range";
7014 /* Everything looks valid. */
7021 /* Return a unique alias set for the GOT. */
7023 static HOST_WIDE_INT
7024 ix86_GOT_alias_set (void)
7026 static HOST_WIDE_INT set
= -1;
7028 set
= new_alias_set ();
7032 /* Return a legitimate reference for ORIG (an address) using the
7033 register REG. If REG is 0, a new pseudo is generated.
7035 There are two types of references that must be handled:
7037 1. Global data references must load the address from the GOT, via
7038 the PIC reg. An insn is emitted to do this load, and the reg is
7041 2. Static data references, constant pool addresses, and code labels
7042 compute the address as an offset from the GOT, whose base is in
7043 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7044 differentiate them from global data objects. The returned
7045 address is the PIC reg + an unspec constant.
7047 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7048 reg also appears in the address. */
7051 legitimize_pic_address (rtx orig
, rtx reg
)
7058 if (TARGET_MACHO
&& !TARGET_64BIT
)
7061 reg
= gen_reg_rtx (Pmode
);
7062 /* Use the generic Mach-O PIC machinery. */
7063 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7067 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7069 else if (TARGET_64BIT
7070 && ix86_cmodel
!= CM_SMALL_PIC
7071 && gotoff_operand (addr
, Pmode
))
7074 /* This symbol may be referenced via a displacement from the PIC
7075 base address (@GOTOFF). */
7077 if (reload_in_progress
)
7078 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7079 if (GET_CODE (addr
) == CONST
)
7080 addr
= XEXP (addr
, 0);
7081 if (GET_CODE (addr
) == PLUS
)
7083 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7085 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7088 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7089 new = gen_rtx_CONST (Pmode
, new);
7091 tmpreg
= gen_reg_rtx (Pmode
);
7094 emit_move_insn (tmpreg
, new);
7098 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7099 tmpreg
, 1, OPTAB_DIRECT
);
7102 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7104 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7106 /* This symbol may be referenced via a displacement from the PIC
7107 base address (@GOTOFF). */
7109 if (reload_in_progress
)
7110 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7111 if (GET_CODE (addr
) == CONST
)
7112 addr
= XEXP (addr
, 0);
7113 if (GET_CODE (addr
) == PLUS
)
7115 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7117 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7120 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7121 new = gen_rtx_CONST (Pmode
, new);
7122 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7126 emit_move_insn (reg
, new);
7130 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7131 /* We can't use @GOTOFF for text labels on VxWorks;
7132 see gotoff_operand. */
7133 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7135 /* Given that we've already handled dllimport variables separately
7136 in legitimize_address, and all other variables should satisfy
7137 legitimate_pic_address_disp_p, we should never arrive here. */
7138 gcc_assert (!TARGET_64BIT_MS_ABI
);
7140 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7142 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7143 new = gen_rtx_CONST (Pmode
, new);
7144 new = gen_const_mem (Pmode
, new);
7145 set_mem_alias_set (new, ix86_GOT_alias_set ());
7148 reg
= gen_reg_rtx (Pmode
);
7149 /* Use directly gen_movsi, otherwise the address is loaded
7150 into register for CSE. We don't want to CSE this addresses,
7151 instead we CSE addresses from the GOT table, so skip this. */
7152 emit_insn (gen_movsi (reg
, new));
7157 /* This symbol must be referenced via a load from the
7158 Global Offset Table (@GOT). */
7160 if (reload_in_progress
)
7161 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7162 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7163 new = gen_rtx_CONST (Pmode
, new);
7165 new = force_reg (Pmode
, new);
7166 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7167 new = gen_const_mem (Pmode
, new);
7168 set_mem_alias_set (new, ix86_GOT_alias_set ());
7171 reg
= gen_reg_rtx (Pmode
);
7172 emit_move_insn (reg
, new);
7178 if (CONST_INT_P (addr
)
7179 && !x86_64_immediate_operand (addr
, VOIDmode
))
7183 emit_move_insn (reg
, addr
);
7187 new = force_reg (Pmode
, addr
);
7189 else if (GET_CODE (addr
) == CONST
)
7191 addr
= XEXP (addr
, 0);
7193 /* We must match stuff we generate before. Assume the only
7194 unspecs that can get here are ours. Not that we could do
7195 anything with them anyway.... */
7196 if (GET_CODE (addr
) == UNSPEC
7197 || (GET_CODE (addr
) == PLUS
7198 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7200 gcc_assert (GET_CODE (addr
) == PLUS
);
7202 if (GET_CODE (addr
) == PLUS
)
7204 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7206 /* Check first to see if this is a constant offset from a @GOTOFF
7207 symbol reference. */
7208 if (gotoff_operand (op0
, Pmode
)
7209 && CONST_INT_P (op1
))
7213 if (reload_in_progress
)
7214 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7215 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7217 new = gen_rtx_PLUS (Pmode
, new, op1
);
7218 new = gen_rtx_CONST (Pmode
, new);
7219 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7223 emit_move_insn (reg
, new);
7229 if (INTVAL (op1
) < -16*1024*1024
7230 || INTVAL (op1
) >= 16*1024*1024)
7232 if (!x86_64_immediate_operand (op1
, Pmode
))
7233 op1
= force_reg (Pmode
, op1
);
7234 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7240 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7241 new = legitimize_pic_address (XEXP (addr
, 1),
7242 base
== reg
? NULL_RTX
: reg
);
7244 if (CONST_INT_P (new))
7245 new = plus_constant (base
, INTVAL (new));
7248 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7250 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7251 new = XEXP (new, 1);
7253 new = gen_rtx_PLUS (Pmode
, base
, new);
7261 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7264 get_thread_pointer (int to_reg
)
7268 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7272 reg
= gen_reg_rtx (Pmode
);
7273 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7274 insn
= emit_insn (insn
);
7279 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7280 false if we expect this to be used for a memory address and true if
7281 we expect to load the address into a register. */
7284 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7286 rtx dest
, base
, off
, pic
, tp
;
7291 case TLS_MODEL_GLOBAL_DYNAMIC
:
7292 dest
= gen_reg_rtx (Pmode
);
7293 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7295 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7297 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7300 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7301 insns
= get_insns ();
7304 CONST_OR_PURE_CALL_P (insns
) = 1;
7305 emit_libcall_block (insns
, dest
, rax
, x
);
7307 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7308 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7310 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7312 if (TARGET_GNU2_TLS
)
7314 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7316 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7320 case TLS_MODEL_LOCAL_DYNAMIC
:
7321 base
= gen_reg_rtx (Pmode
);
7322 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7324 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7326 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7329 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7330 insns
= get_insns ();
7333 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7334 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7335 CONST_OR_PURE_CALL_P (insns
) = 1;
7336 emit_libcall_block (insns
, base
, rax
, note
);
7338 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7339 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7341 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7343 if (TARGET_GNU2_TLS
)
7345 rtx x
= ix86_tls_module_base ();
7347 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7348 gen_rtx_MINUS (Pmode
, x
, tp
));
7351 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7352 off
= gen_rtx_CONST (Pmode
, off
);
7354 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7356 if (TARGET_GNU2_TLS
)
7358 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7360 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7365 case TLS_MODEL_INITIAL_EXEC
:
7369 type
= UNSPEC_GOTNTPOFF
;
7373 if (reload_in_progress
)
7374 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7375 pic
= pic_offset_table_rtx
;
7376 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7378 else if (!TARGET_ANY_GNU_TLS
)
7380 pic
= gen_reg_rtx (Pmode
);
7381 emit_insn (gen_set_got (pic
));
7382 type
= UNSPEC_GOTTPOFF
;
7387 type
= UNSPEC_INDNTPOFF
;
7390 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7391 off
= gen_rtx_CONST (Pmode
, off
);
7393 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7394 off
= gen_const_mem (Pmode
, off
);
7395 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7397 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7399 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7400 off
= force_reg (Pmode
, off
);
7401 return gen_rtx_PLUS (Pmode
, base
, off
);
7405 base
= get_thread_pointer (true);
7406 dest
= gen_reg_rtx (Pmode
);
7407 emit_insn (gen_subsi3 (dest
, base
, off
));
7411 case TLS_MODEL_LOCAL_EXEC
:
7412 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7413 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7414 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7415 off
= gen_rtx_CONST (Pmode
, off
);
7417 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7419 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7420 return gen_rtx_PLUS (Pmode
, base
, off
);
7424 base
= get_thread_pointer (true);
7425 dest
= gen_reg_rtx (Pmode
);
7426 emit_insn (gen_subsi3 (dest
, base
, off
));
7437 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7440 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7441 htab_t dllimport_map
;
7444 get_dllimport_decl (tree decl
)
7446 struct tree_map
*h
, in
;
7450 size_t namelen
, prefixlen
;
7456 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7458 in
.hash
= htab_hash_pointer (decl
);
7459 in
.base
.from
= decl
;
7460 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7465 *loc
= h
= ggc_alloc (sizeof (struct tree_map
));
7467 h
->base
.from
= decl
;
7468 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7469 DECL_ARTIFICIAL (to
) = 1;
7470 DECL_IGNORED_P (to
) = 1;
7471 DECL_EXTERNAL (to
) = 1;
7472 TREE_READONLY (to
) = 1;
7474 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7475 name
= targetm
.strip_name_encoding (name
);
7476 if (name
[0] == FASTCALL_PREFIX
)
7482 prefix
= "*__imp__";
7484 namelen
= strlen (name
);
7485 prefixlen
= strlen (prefix
);
7486 imp_name
= alloca (namelen
+ prefixlen
+ 1);
7487 memcpy (imp_name
, prefix
, prefixlen
);
7488 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7490 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7491 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7492 SET_SYMBOL_REF_DECL (rtl
, to
);
7493 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7495 rtl
= gen_const_mem (Pmode
, rtl
);
7496 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7498 SET_DECL_RTL (to
, rtl
);
7503 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7504 true if we require the result be a register. */
7507 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7512 gcc_assert (SYMBOL_REF_DECL (symbol
));
7513 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7515 x
= DECL_RTL (imp_decl
);
7517 x
= force_reg (Pmode
, x
);
7521 /* Try machine-dependent ways of modifying an illegitimate address
7522 to be legitimate. If we find one, return the new, valid address.
7523 This macro is used in only one place: `memory_address' in explow.c.
7525 OLDX is the address as it was before break_out_memory_refs was called.
7526 In some cases it is useful to look at this to decide what needs to be done.
7528 MODE and WIN are passed so that this macro can use
7529 GO_IF_LEGITIMATE_ADDRESS.
7531 It is always safe for this macro to do nothing. It exists to recognize
7532 opportunities to optimize the output.
7534 For the 80386, we handle X+REG by loading X into a register R and
7535 using R+REG. R will go in a general reg and indexing will be used.
7536 However, if REG is a broken-out memory address or multiplication,
7537 nothing needs to be done because REG can certainly go in a general reg.
7539 When -fpic is used, special handling is needed for symbolic references.
7540 See comments by legitimize_pic_address in i386.c for details. */
7543 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7548 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7550 return legitimize_tls_address (x
, log
, false);
7551 if (GET_CODE (x
) == CONST
7552 && GET_CODE (XEXP (x
, 0)) == PLUS
7553 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7554 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7556 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7557 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7560 if (flag_pic
&& SYMBOLIC_CONST (x
))
7561 return legitimize_pic_address (x
, 0);
7563 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7565 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7566 return legitimize_dllimport_symbol (x
, true);
7567 if (GET_CODE (x
) == CONST
7568 && GET_CODE (XEXP (x
, 0)) == PLUS
7569 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7570 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7572 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7573 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7577 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7578 if (GET_CODE (x
) == ASHIFT
7579 && CONST_INT_P (XEXP (x
, 1))
7580 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7583 log
= INTVAL (XEXP (x
, 1));
7584 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7585 GEN_INT (1 << log
));
7588 if (GET_CODE (x
) == PLUS
)
7590 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7592 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7593 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7594 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7597 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7598 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7599 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7600 GEN_INT (1 << log
));
7603 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7604 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7605 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7608 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7609 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7610 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7611 GEN_INT (1 << log
));
7614 /* Put multiply first if it isn't already. */
7615 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7617 rtx tmp
= XEXP (x
, 0);
7618 XEXP (x
, 0) = XEXP (x
, 1);
7623 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7624 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7625 created by virtual register instantiation, register elimination, and
7626 similar optimizations. */
7627 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7630 x
= gen_rtx_PLUS (Pmode
,
7631 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7632 XEXP (XEXP (x
, 1), 0)),
7633 XEXP (XEXP (x
, 1), 1));
7637 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7638 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7639 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7640 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7641 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7642 && CONSTANT_P (XEXP (x
, 1)))
7645 rtx other
= NULL_RTX
;
7647 if (CONST_INT_P (XEXP (x
, 1)))
7649 constant
= XEXP (x
, 1);
7650 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7652 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7654 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7655 other
= XEXP (x
, 1);
7663 x
= gen_rtx_PLUS (Pmode
,
7664 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7665 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7666 plus_constant (other
, INTVAL (constant
)));
7670 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7673 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7676 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7679 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7682 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7686 && REG_P (XEXP (x
, 1))
7687 && REG_P (XEXP (x
, 0)))
7690 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7693 x
= legitimize_pic_address (x
, 0);
7696 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7699 if (REG_P (XEXP (x
, 0)))
7701 rtx temp
= gen_reg_rtx (Pmode
);
7702 rtx val
= force_operand (XEXP (x
, 1), temp
);
7704 emit_move_insn (temp
, val
);
7710 else if (REG_P (XEXP (x
, 1)))
7712 rtx temp
= gen_reg_rtx (Pmode
);
7713 rtx val
= force_operand (XEXP (x
, 0), temp
);
7715 emit_move_insn (temp
, val
);
7725 /* Print an integer constant expression in assembler syntax. Addition
7726 and subtraction are the only arithmetic that may appear in these
7727 expressions. FILE is the stdio stream to write to, X is the rtx, and
7728 CODE is the operand print code from the output string. */
7731 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7735 switch (GET_CODE (x
))
7738 gcc_assert (flag_pic
);
7743 if (! TARGET_MACHO
|| TARGET_64BIT
)
7744 output_addr_const (file
, x
);
7747 const char *name
= XSTR (x
, 0);
7749 /* Mark the decl as referenced so that cgraph will
7750 output the function. */
7751 if (SYMBOL_REF_DECL (x
))
7752 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7755 if (MACHOPIC_INDIRECT
7756 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7757 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7759 assemble_name (file
, name
);
7761 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
7762 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7763 fputs ("@PLT", file
);
7770 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7771 assemble_name (asm_out_file
, buf
);
7775 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7779 /* This used to output parentheses around the expression,
7780 but that does not work on the 386 (either ATT or BSD assembler). */
7781 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7785 if (GET_MODE (x
) == VOIDmode
)
7787 /* We can use %d if the number is <32 bits and positive. */
7788 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7789 fprintf (file
, "0x%lx%08lx",
7790 (unsigned long) CONST_DOUBLE_HIGH (x
),
7791 (unsigned long) CONST_DOUBLE_LOW (x
));
7793 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7796 /* We can't handle floating point constants;
7797 PRINT_OPERAND must handle them. */
7798 output_operand_lossage ("floating constant misused");
7802 /* Some assemblers need integer constants to appear first. */
7803 if (CONST_INT_P (XEXP (x
, 0)))
7805 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7807 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7811 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7812 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7814 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7820 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7821 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7823 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7825 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7829 gcc_assert (XVECLEN (x
, 0) == 1);
7830 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7831 switch (XINT (x
, 1))
7834 fputs ("@GOT", file
);
7837 fputs ("@GOTOFF", file
);
7840 fputs ("@PLTOFF", file
);
7842 case UNSPEC_GOTPCREL
:
7843 fputs ("@GOTPCREL(%rip)", file
);
7845 case UNSPEC_GOTTPOFF
:
7846 /* FIXME: This might be @TPOFF in Sun ld too. */
7847 fputs ("@GOTTPOFF", file
);
7850 fputs ("@TPOFF", file
);
7854 fputs ("@TPOFF", file
);
7856 fputs ("@NTPOFF", file
);
7859 fputs ("@DTPOFF", file
);
7861 case UNSPEC_GOTNTPOFF
:
7863 fputs ("@GOTTPOFF(%rip)", file
);
7865 fputs ("@GOTNTPOFF", file
);
7867 case UNSPEC_INDNTPOFF
:
7868 fputs ("@INDNTPOFF", file
);
7871 output_operand_lossage ("invalid UNSPEC as operand");
7877 output_operand_lossage ("invalid expression as operand");
7881 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7882 We need to emit DTP-relative relocations. */
7884 static void ATTRIBUTE_UNUSED
7885 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7887 fputs (ASM_LONG
, file
);
7888 output_addr_const (file
, x
);
7889 fputs ("@DTPOFF", file
);
7895 fputs (", 0", file
);
7902 /* In the name of slightly smaller debug output, and to cater to
7903 general assembler lossage, recognize PIC+GOTOFF and turn it back
7904 into a direct symbol reference.
7906 On Darwin, this is necessary to avoid a crash, because Darwin
7907 has a different PIC label for each routine but the DWARF debugging
7908 information is not associated with any particular routine, so it's
7909 necessary to remove references to the PIC label from RTL stored by
7910 the DWARF output code. */
7913 ix86_delegitimize_address (rtx orig_x
)
7916 /* reg_addend is NULL or a multiple of some register. */
7917 rtx reg_addend
= NULL_RTX
;
7918 /* const_addend is NULL or a const_int. */
7919 rtx const_addend
= NULL_RTX
;
7920 /* This is the result, or NULL. */
7921 rtx result
= NULL_RTX
;
7928 if (GET_CODE (x
) != CONST
7929 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7930 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7933 return XVECEXP (XEXP (x
, 0), 0, 0);
7936 if (GET_CODE (x
) != PLUS
7937 || GET_CODE (XEXP (x
, 1)) != CONST
)
7940 if (REG_P (XEXP (x
, 0))
7941 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7942 /* %ebx + GOT/GOTOFF */
7944 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7946 /* %ebx + %reg * scale + GOT/GOTOFF */
7947 reg_addend
= XEXP (x
, 0);
7948 if (REG_P (XEXP (reg_addend
, 0))
7949 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7950 reg_addend
= XEXP (reg_addend
, 1);
7951 else if (REG_P (XEXP (reg_addend
, 1))
7952 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7953 reg_addend
= XEXP (reg_addend
, 0);
7956 if (!REG_P (reg_addend
)
7957 && GET_CODE (reg_addend
) != MULT
7958 && GET_CODE (reg_addend
) != ASHIFT
)
7964 x
= XEXP (XEXP (x
, 1), 0);
7965 if (GET_CODE (x
) == PLUS
7966 && CONST_INT_P (XEXP (x
, 1)))
7968 const_addend
= XEXP (x
, 1);
7972 if (GET_CODE (x
) == UNSPEC
7973 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7974 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7975 result
= XVECEXP (x
, 0, 0);
7977 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7979 result
= XEXP (x
, 0);
7985 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7987 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7991 /* If X is a machine specific address (i.e. a symbol or label being
7992 referenced as a displacement from the GOT implemented using an
7993 UNSPEC), then return the base term. Otherwise return X. */
7996 ix86_find_base_term (rtx x
)
8002 if (GET_CODE (x
) != CONST
)
8005 if (GET_CODE (term
) == PLUS
8006 && (CONST_INT_P (XEXP (term
, 1))
8007 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8008 term
= XEXP (term
, 0);
8009 if (GET_CODE (term
) != UNSPEC
8010 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8013 term
= XVECEXP (term
, 0, 0);
8015 if (GET_CODE (term
) != SYMBOL_REF
8016 && GET_CODE (term
) != LABEL_REF
)
8022 term
= ix86_delegitimize_address (x
);
8024 if (GET_CODE (term
) != SYMBOL_REF
8025 && GET_CODE (term
) != LABEL_REF
)
8032 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8037 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8039 enum rtx_code second_code
, bypass_code
;
8040 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8041 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8042 code
= ix86_fp_compare_code_to_integer (code
);
8046 code
= reverse_condition (code
);
8057 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8061 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8062 Those same assemblers have the same but opposite lossage on cmov. */
8063 gcc_assert (mode
== CCmode
);
8064 suffix
= fp
? "nbe" : "a";
8084 gcc_assert (mode
== CCmode
);
8106 gcc_assert (mode
== CCmode
);
8107 suffix
= fp
? "nb" : "ae";
8110 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8114 gcc_assert (mode
== CCmode
);
8118 suffix
= fp
? "u" : "p";
8121 suffix
= fp
? "nu" : "np";
8126 fputs (suffix
, file
);
8129 /* Print the name of register X to FILE based on its machine mode and number.
8130 If CODE is 'w', pretend the mode is HImode.
8131 If CODE is 'b', pretend the mode is QImode.
8132 If CODE is 'k', pretend the mode is SImode.
8133 If CODE is 'q', pretend the mode is DImode.
8134 If CODE is 'h', pretend the reg is the 'high' byte register.
8135 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8138 print_reg (rtx x
, int code
, FILE *file
)
8140 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8141 && REGNO (x
) != FRAME_POINTER_REGNUM
8142 && REGNO (x
) != FLAGS_REG
8143 && REGNO (x
) != FPSR_REG
8144 && REGNO (x
) != FPCR_REG
);
8146 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8149 if (code
== 'w' || MMX_REG_P (x
))
8151 else if (code
== 'b')
8153 else if (code
== 'k')
8155 else if (code
== 'q')
8157 else if (code
== 'y')
8159 else if (code
== 'h')
8162 code
= GET_MODE_SIZE (GET_MODE (x
));
8164 /* Irritatingly, AMD extended registers use different naming convention
8165 from the normal registers. */
8166 if (REX_INT_REG_P (x
))
8168 gcc_assert (TARGET_64BIT
);
8172 error ("extended registers have no high halves");
8175 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8178 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8181 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8184 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8187 error ("unsupported operand size for extended register");
8195 if (STACK_TOP_P (x
))
8197 fputs ("st(0)", file
);
8204 if (! ANY_FP_REG_P (x
))
8205 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8210 fputs (hi_reg_name
[REGNO (x
)], file
);
8213 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8215 fputs (qi_reg_name
[REGNO (x
)], file
);
8218 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8220 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8227 /* Locate some local-dynamic symbol still in use by this function
8228 so that we can print its name in some tls_local_dynamic_base
8232 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8236 if (GET_CODE (x
) == SYMBOL_REF
8237 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8239 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8247 get_some_local_dynamic_name (void)
8251 if (cfun
->machine
->some_ld_name
)
8252 return cfun
->machine
->some_ld_name
;
8254 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8256 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8257 return cfun
->machine
->some_ld_name
;
8263 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8264 C -- print opcode suffix for set/cmov insn.
8265 c -- like C, but print reversed condition
8266 F,f -- likewise, but for floating-point.
8267 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8269 R -- print the prefix for register names.
8270 z -- print the opcode suffix for the size of the current operand.
8271 * -- print a star (in certain assembler syntax)
8272 A -- print an absolute memory reference.
8273 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8274 s -- print a shift double count, followed by the assemblers argument
8276 b -- print the QImode name of the register for the indicated operand.
8277 %b0 would print %al if operands[0] is reg 0.
8278 w -- likewise, print the HImode name of the register.
8279 k -- likewise, print the SImode name of the register.
8280 q -- likewise, print the DImode name of the register.
8281 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8282 y -- print "st(0)" instead of "st" as a register.
8283 D -- print condition for SSE cmp instruction.
8284 P -- if PIC, print an @PLT suffix.
8285 X -- don't print any sort of PIC '@' suffix for a symbol.
8286 & -- print some in-use local-dynamic symbol name.
8287 H -- print a memory address offset by 8; used for sse high-parts
8291 print_operand (FILE *file
, rtx x
, int code
)
8298 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8303 assemble_name (file
, get_some_local_dynamic_name ());
8307 switch (ASSEMBLER_DIALECT
)
8314 /* Intel syntax. For absolute addresses, registers should not
8315 be surrounded by braces. */
8319 PRINT_OPERAND (file
, x
, 0);
8329 PRINT_OPERAND (file
, x
, 0);
8334 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8339 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8344 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8349 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8354 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8359 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8364 /* 387 opcodes don't get size suffixes if the operands are
8366 if (STACK_REG_P (x
))
8369 /* Likewise if using Intel opcodes. */
8370 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8373 /* This is the size of op from size of operand. */
8374 switch (GET_MODE_SIZE (GET_MODE (x
)))
8383 #ifdef HAVE_GAS_FILDS_FISTS
8393 if (GET_MODE (x
) == SFmode
)
8408 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8410 #ifdef GAS_MNEMONICS
8436 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8438 PRINT_OPERAND (file
, x
, 0);
8444 /* Little bit of braindamage here. The SSE compare instructions
8445 does use completely different names for the comparisons that the
8446 fp conditional moves. */
8447 switch (GET_CODE (x
))
8462 fputs ("unord", file
);
8466 fputs ("neq", file
);
8470 fputs ("nlt", file
);
8474 fputs ("nle", file
);
8477 fputs ("ord", file
);
8484 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8485 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8487 switch (GET_MODE (x
))
8489 case HImode
: putc ('w', file
); break;
8491 case SFmode
: putc ('l', file
); break;
8493 case DFmode
: putc ('q', file
); break;
8494 default: gcc_unreachable ();
8501 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8504 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8505 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8508 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8511 /* Like above, but reverse condition */
8513 /* Check to see if argument to %c is really a constant
8514 and not a condition code which needs to be reversed. */
8515 if (!COMPARISON_P (x
))
8517 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8520 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8523 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8524 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8527 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8531 /* It doesn't actually matter what mode we use here, as we're
8532 only going to use this for printing. */
8533 x
= adjust_address_nv (x
, DImode
, 8);
8540 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8543 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8546 int pred_val
= INTVAL (XEXP (x
, 0));
8548 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8549 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8551 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8552 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8554 /* Emit hints only in the case default branch prediction
8555 heuristics would fail. */
8556 if (taken
!= cputaken
)
8558 /* We use 3e (DS) prefix for taken branches and
8559 2e (CS) prefix for not taken branches. */
8561 fputs ("ds ; ", file
);
8563 fputs ("cs ; ", file
);
8570 output_operand_lossage ("invalid operand code '%c'", code
);
8575 print_reg (x
, code
, file
);
8579 /* No `byte ptr' prefix for call instructions. */
8580 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8583 switch (GET_MODE_SIZE (GET_MODE (x
)))
8585 case 1: size
= "BYTE"; break;
8586 case 2: size
= "WORD"; break;
8587 case 4: size
= "DWORD"; break;
8588 case 8: size
= "QWORD"; break;
8589 case 12: size
= "XWORD"; break;
8590 case 16: size
= "XMMWORD"; break;
8595 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8598 else if (code
== 'w')
8600 else if (code
== 'k')
8604 fputs (" PTR ", file
);
8608 /* Avoid (%rip) for call operands. */
8609 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8610 && !CONST_INT_P (x
))
8611 output_addr_const (file
, x
);
8612 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8613 output_operand_lossage ("invalid constraints for operand");
8618 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8623 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8624 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8626 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8628 fprintf (file
, "0x%08lx", l
);
8631 /* These float cases don't actually occur as immediate operands. */
8632 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8636 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8637 fprintf (file
, "%s", dstr
);
8640 else if (GET_CODE (x
) == CONST_DOUBLE
8641 && GET_MODE (x
) == XFmode
)
8645 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8646 fprintf (file
, "%s", dstr
);
8651 /* We have patterns that allow zero sets of memory, for instance.
8652 In 64-bit mode, we should probably support all 8-byte vectors,
8653 since we can in fact encode that into an immediate. */
8654 if (GET_CODE (x
) == CONST_VECTOR
)
8656 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8662 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8664 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8667 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8668 || GET_CODE (x
) == LABEL_REF
)
8670 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8673 fputs ("OFFSET FLAT:", file
);
8676 if (CONST_INT_P (x
))
8677 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8679 output_pic_addr_const (file
, x
, code
);
8681 output_addr_const (file
, x
);
8685 /* Print a memory operand whose address is ADDR. */
8688 print_operand_address (FILE *file
, rtx addr
)
8690 struct ix86_address parts
;
8691 rtx base
, index
, disp
;
8693 int ok
= ix86_decompose_address (addr
, &parts
);
8698 index
= parts
.index
;
8700 scale
= parts
.scale
;
8708 if (USER_LABEL_PREFIX
[0] == 0)
8710 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8716 if (!base
&& !index
)
8718 /* Displacement only requires special attention. */
8720 if (CONST_INT_P (disp
))
8722 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8724 if (USER_LABEL_PREFIX
[0] == 0)
8726 fputs ("ds:", file
);
8728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8731 output_pic_addr_const (file
, disp
, 0);
8733 output_addr_const (file
, disp
);
8735 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8738 if (GET_CODE (disp
) == CONST
8739 && GET_CODE (XEXP (disp
, 0)) == PLUS
8740 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8741 disp
= XEXP (XEXP (disp
, 0), 0);
8742 if (GET_CODE (disp
) == LABEL_REF
8743 || (GET_CODE (disp
) == SYMBOL_REF
8744 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8745 fputs ("(%rip)", file
);
8750 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8755 output_pic_addr_const (file
, disp
, 0);
8756 else if (GET_CODE (disp
) == LABEL_REF
)
8757 output_asm_label (disp
);
8759 output_addr_const (file
, disp
);
8764 print_reg (base
, 0, file
);
8768 print_reg (index
, 0, file
);
8770 fprintf (file
, ",%d", scale
);
8776 rtx offset
= NULL_RTX
;
8780 /* Pull out the offset of a symbol; print any symbol itself. */
8781 if (GET_CODE (disp
) == CONST
8782 && GET_CODE (XEXP (disp
, 0)) == PLUS
8783 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8785 offset
= XEXP (XEXP (disp
, 0), 1);
8786 disp
= gen_rtx_CONST (VOIDmode
,
8787 XEXP (XEXP (disp
, 0), 0));
8791 output_pic_addr_const (file
, disp
, 0);
8792 else if (GET_CODE (disp
) == LABEL_REF
)
8793 output_asm_label (disp
);
8794 else if (CONST_INT_P (disp
))
8797 output_addr_const (file
, disp
);
8803 print_reg (base
, 0, file
);
8806 if (INTVAL (offset
) >= 0)
8808 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8812 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8819 print_reg (index
, 0, file
);
8821 fprintf (file
, "*%d", scale
);
8829 output_addr_const_extra (FILE *file
, rtx x
)
8833 if (GET_CODE (x
) != UNSPEC
)
8836 op
= XVECEXP (x
, 0, 0);
8837 switch (XINT (x
, 1))
8839 case UNSPEC_GOTTPOFF
:
8840 output_addr_const (file
, op
);
8841 /* FIXME: This might be @TPOFF in Sun ld. */
8842 fputs ("@GOTTPOFF", file
);
8845 output_addr_const (file
, op
);
8846 fputs ("@TPOFF", file
);
8849 output_addr_const (file
, op
);
8851 fputs ("@TPOFF", file
);
8853 fputs ("@NTPOFF", file
);
8856 output_addr_const (file
, op
);
8857 fputs ("@DTPOFF", file
);
8859 case UNSPEC_GOTNTPOFF
:
8860 output_addr_const (file
, op
);
8862 fputs ("@GOTTPOFF(%rip)", file
);
8864 fputs ("@GOTNTPOFF", file
);
8866 case UNSPEC_INDNTPOFF
:
8867 output_addr_const (file
, op
);
8868 fputs ("@INDNTPOFF", file
);
8878 /* Split one or more DImode RTL references into pairs of SImode
8879 references. The RTL can be REG, offsettable MEM, integer constant, or
8880 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8881 split and "num" is its length. lo_half and hi_half are output arrays
8882 that parallel "operands". */
8885 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8889 rtx op
= operands
[num
];
8891 /* simplify_subreg refuse to split volatile memory addresses,
8892 but we still have to handle it. */
8895 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8896 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8900 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8901 GET_MODE (op
) == VOIDmode
8902 ? DImode
: GET_MODE (op
), 0);
8903 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8904 GET_MODE (op
) == VOIDmode
8905 ? DImode
: GET_MODE (op
), 4);
8909 /* Split one or more TImode RTL references into pairs of DImode
8910 references. The RTL can be REG, offsettable MEM, integer constant, or
8911 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8912 split and "num" is its length. lo_half and hi_half are output arrays
8913 that parallel "operands". */
8916 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8920 rtx op
= operands
[num
];
8922 /* simplify_subreg refuse to split volatile memory addresses, but we
8923 still have to handle it. */
8926 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8927 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8931 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8932 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8937 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8938 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8939 is the expression of the binary operation. The output may either be
8940 emitted here, or returned to the caller, like all output_* functions.
8942 There is no guarantee that the operands are the same mode, as they
8943 might be within FLOAT or FLOAT_EXTEND expressions. */
8945 #ifndef SYSV386_COMPAT
8946 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8947 wants to fix the assemblers because that causes incompatibility
8948 with gcc. No-one wants to fix gcc because that causes
8949 incompatibility with assemblers... You can use the option of
8950 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8951 #define SYSV386_COMPAT 1
8955 output_387_binary_op (rtx insn
, rtx
*operands
)
8957 static char buf
[30];
8960 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8962 #ifdef ENABLE_CHECKING
8963 /* Even if we do not want to check the inputs, this documents input
8964 constraints. Which helps in understanding the following code. */
8965 if (STACK_REG_P (operands
[0])
8966 && ((REG_P (operands
[1])
8967 && REGNO (operands
[0]) == REGNO (operands
[1])
8968 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8969 || (REG_P (operands
[2])
8970 && REGNO (operands
[0]) == REGNO (operands
[2])
8971 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8972 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8975 gcc_assert (is_sse
);
8978 switch (GET_CODE (operands
[3]))
8981 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8982 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8990 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8991 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8999 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9000 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9008 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9009 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9023 if (GET_MODE (operands
[0]) == SFmode
)
9024 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9026 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9031 switch (GET_CODE (operands
[3]))
9035 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9037 rtx temp
= operands
[2];
9038 operands
[2] = operands
[1];
9042 /* know operands[0] == operands[1]. */
9044 if (MEM_P (operands
[2]))
9050 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9052 if (STACK_TOP_P (operands
[0]))
9053 /* How is it that we are storing to a dead operand[2]?
9054 Well, presumably operands[1] is dead too. We can't
9055 store the result to st(0) as st(0) gets popped on this
9056 instruction. Instead store to operands[2] (which I
9057 think has to be st(1)). st(1) will be popped later.
9058 gcc <= 2.8.1 didn't have this check and generated
9059 assembly code that the Unixware assembler rejected. */
9060 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9062 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9066 if (STACK_TOP_P (operands
[0]))
9067 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9069 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9074 if (MEM_P (operands
[1]))
9080 if (MEM_P (operands
[2]))
9086 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9089 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9090 derived assemblers, confusingly reverse the direction of
9091 the operation for fsub{r} and fdiv{r} when the
9092 destination register is not st(0). The Intel assembler
9093 doesn't have this brain damage. Read !SYSV386_COMPAT to
9094 figure out what the hardware really does. */
9095 if (STACK_TOP_P (operands
[0]))
9096 p
= "{p\t%0, %2|rp\t%2, %0}";
9098 p
= "{rp\t%2, %0|p\t%0, %2}";
9100 if (STACK_TOP_P (operands
[0]))
9101 /* As above for fmul/fadd, we can't store to st(0). */
9102 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9104 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9109 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9112 if (STACK_TOP_P (operands
[0]))
9113 p
= "{rp\t%0, %1|p\t%1, %0}";
9115 p
= "{p\t%1, %0|rp\t%0, %1}";
9117 if (STACK_TOP_P (operands
[0]))
9118 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9120 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9125 if (STACK_TOP_P (operands
[0]))
9127 if (STACK_TOP_P (operands
[1]))
9128 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9130 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9133 else if (STACK_TOP_P (operands
[1]))
9136 p
= "{\t%1, %0|r\t%0, %1}";
9138 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9144 p
= "{r\t%2, %0|\t%0, %2}";
9146 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9159 /* Return needed mode for entity in optimize_mode_switching pass. */
9162 ix86_mode_needed (int entity
, rtx insn
)
9164 enum attr_i387_cw mode
;
9166 /* The mode UNINITIALIZED is used to store control word after a
9167 function call or ASM pattern. The mode ANY specify that function
9168 has no requirements on the control word and make no changes in the
9169 bits we are interested in. */
9172 || (NONJUMP_INSN_P (insn
)
9173 && (asm_noperands (PATTERN (insn
)) >= 0
9174 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9175 return I387_CW_UNINITIALIZED
;
9177 if (recog_memoized (insn
) < 0)
9180 mode
= get_attr_i387_cw (insn
);
9185 if (mode
== I387_CW_TRUNC
)
9190 if (mode
== I387_CW_FLOOR
)
9195 if (mode
== I387_CW_CEIL
)
9200 if (mode
== I387_CW_MASK_PM
)
9211 /* Output code to initialize control word copies used by trunc?f?i and
9212 rounding patterns. CURRENT_MODE is set to current control word,
9213 while NEW_MODE is set to new control word. */
9216 emit_i387_cw_initialization (int mode
)
9218 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9223 rtx reg
= gen_reg_rtx (HImode
);
9225 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9226 emit_move_insn (reg
, copy_rtx (stored_mode
));
9228 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9233 /* round toward zero (truncate) */
9234 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9235 slot
= SLOT_CW_TRUNC
;
9239 /* round down toward -oo */
9240 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9241 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9242 slot
= SLOT_CW_FLOOR
;
9246 /* round up toward +oo */
9247 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9248 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9249 slot
= SLOT_CW_CEIL
;
9252 case I387_CW_MASK_PM
:
9253 /* mask precision exception for nearbyint() */
9254 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9255 slot
= SLOT_CW_MASK_PM
;
9267 /* round toward zero (truncate) */
9268 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9269 slot
= SLOT_CW_TRUNC
;
9273 /* round down toward -oo */
9274 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9275 slot
= SLOT_CW_FLOOR
;
9279 /* round up toward +oo */
9280 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9281 slot
= SLOT_CW_CEIL
;
9284 case I387_CW_MASK_PM
:
9285 /* mask precision exception for nearbyint() */
9286 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9287 slot
= SLOT_CW_MASK_PM
;
9295 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9297 new_mode
= assign_386_stack_local (HImode
, slot
);
9298 emit_move_insn (new_mode
, reg
);
9301 /* Output code for INSN to convert a float to a signed int. OPERANDS
9302 are the insn operands. The output may be [HSD]Imode and the input
9303 operand may be [SDX]Fmode. */
9306 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9308 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9309 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9310 int round_mode
= get_attr_i387_cw (insn
);
9312 /* Jump through a hoop or two for DImode, since the hardware has no
9313 non-popping instruction. We used to do this a different way, but
9314 that was somewhat fragile and broke with post-reload splitters. */
9315 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9316 output_asm_insn ("fld\t%y1", operands
);
9318 gcc_assert (STACK_TOP_P (operands
[1]));
9319 gcc_assert (MEM_P (operands
[0]));
9320 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9323 output_asm_insn ("fisttp%z0\t%0", operands
);
9326 if (round_mode
!= I387_CW_ANY
)
9327 output_asm_insn ("fldcw\t%3", operands
);
9328 if (stack_top_dies
|| dimode_p
)
9329 output_asm_insn ("fistp%z0\t%0", operands
);
9331 output_asm_insn ("fist%z0\t%0", operands
);
9332 if (round_mode
!= I387_CW_ANY
)
9333 output_asm_insn ("fldcw\t%2", operands
);
9339 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9340 have the values zero or one, indicates the ffreep insn's operand
9341 from the OPERANDS array. */
9344 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9346 if (TARGET_USE_FFREEP
)
9347 #if HAVE_AS_IX86_FFREEP
9348 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9351 static char retval
[] = ".word\t0xc_df";
9352 int regno
= REGNO (operands
[opno
]);
9354 gcc_assert (FP_REGNO_P (regno
));
9356 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9361 return opno
? "fstp\t%y1" : "fstp\t%y0";
9365 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9366 should be used. UNORDERED_P is true when fucom should be used. */
9369 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9372 rtx cmp_op0
, cmp_op1
;
9373 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9377 cmp_op0
= operands
[0];
9378 cmp_op1
= operands
[1];
9382 cmp_op0
= operands
[1];
9383 cmp_op1
= operands
[2];
9388 if (GET_MODE (operands
[0]) == SFmode
)
9390 return "ucomiss\t{%1, %0|%0, %1}";
9392 return "comiss\t{%1, %0|%0, %1}";
9395 return "ucomisd\t{%1, %0|%0, %1}";
9397 return "comisd\t{%1, %0|%0, %1}";
9400 gcc_assert (STACK_TOP_P (cmp_op0
));
9402 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9404 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9408 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9409 return output_387_ffreep (operands
, 1);
9412 return "ftst\n\tfnstsw\t%0";
9415 if (STACK_REG_P (cmp_op1
)
9417 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9418 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9420 /* If both the top of the 387 stack dies, and the other operand
9421 is also a stack register that dies, then this must be a
9422 `fcompp' float compare */
9426 /* There is no double popping fcomi variant. Fortunately,
9427 eflags is immune from the fstp's cc clobbering. */
9429 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9431 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9432 return output_387_ffreep (operands
, 0);
9437 return "fucompp\n\tfnstsw\t%0";
9439 return "fcompp\n\tfnstsw\t%0";
9444 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9446 static const char * const alt
[16] =
9448 "fcom%z2\t%y2\n\tfnstsw\t%0",
9449 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9450 "fucom%z2\t%y2\n\tfnstsw\t%0",
9451 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9453 "ficom%z2\t%y2\n\tfnstsw\t%0",
9454 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9458 "fcomi\t{%y1, %0|%0, %y1}",
9459 "fcomip\t{%y1, %0|%0, %y1}",
9460 "fucomi\t{%y1, %0|%0, %y1}",
9461 "fucomip\t{%y1, %0|%0, %y1}",
9472 mask
= eflags_p
<< 3;
9473 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9474 mask
|= unordered_p
<< 1;
9475 mask
|= stack_top_dies
;
9477 gcc_assert (mask
< 16);
9486 ix86_output_addr_vec_elt (FILE *file
, int value
)
9488 const char *directive
= ASM_LONG
;
9492 directive
= ASM_QUAD
;
9494 gcc_assert (!TARGET_64BIT
);
9497 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9501 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9503 const char *directive
= ASM_LONG
;
9506 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9507 directive
= ASM_QUAD
;
9509 gcc_assert (!TARGET_64BIT
);
9511 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9512 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9513 fprintf (file
, "%s%s%d-%s%d\n",
9514 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9515 else if (HAVE_AS_GOTOFF_IN_DATA
)
9516 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9518 else if (TARGET_MACHO
)
9520 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9521 machopic_output_function_base_name (file
);
9522 fprintf(file
, "\n");
9526 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9527 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9530 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9534 ix86_expand_clear (rtx dest
)
9538 /* We play register width games, which are only valid after reload. */
9539 gcc_assert (reload_completed
);
9541 /* Avoid HImode and its attendant prefix byte. */
9542 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9543 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9544 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9546 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9547 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9549 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9550 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9556 /* X is an unchanging MEM. If it is a constant pool reference, return
9557 the constant pool rtx, else NULL. */
9560 maybe_get_pool_constant (rtx x
)
9562 x
= ix86_delegitimize_address (XEXP (x
, 0));
9564 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9565 return get_pool_constant (x
);
9571 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9573 int strict
= (reload_in_progress
|| reload_completed
);
9575 enum tls_model model
;
9580 if (GET_CODE (op1
) == SYMBOL_REF
)
9582 model
= SYMBOL_REF_TLS_MODEL (op1
);
9585 op1
= legitimize_tls_address (op1
, model
, true);
9586 op1
= force_operand (op1
, op0
);
9590 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9591 && SYMBOL_REF_DLLIMPORT_P (op1
))
9592 op1
= legitimize_dllimport_symbol (op1
, false);
9594 else if (GET_CODE (op1
) == CONST
9595 && GET_CODE (XEXP (op1
, 0)) == PLUS
9596 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9598 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9599 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
9602 model
= SYMBOL_REF_TLS_MODEL (symbol
);
9604 tmp
= legitimize_tls_address (symbol
, model
, true);
9605 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9606 && SYMBOL_REF_DLLIMPORT_P (symbol
))
9607 tmp
= legitimize_dllimport_symbol (symbol
, true);
9611 tmp
= force_operand (tmp
, NULL
);
9612 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
9613 op0
, 1, OPTAB_DIRECT
);
9619 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9621 if (TARGET_MACHO
&& !TARGET_64BIT
)
9626 rtx temp
= ((reload_in_progress
9627 || ((op0
&& REG_P (op0
))
9629 ? op0
: gen_reg_rtx (Pmode
));
9630 op1
= machopic_indirect_data_reference (op1
, temp
);
9631 op1
= machopic_legitimize_pic_address (op1
, mode
,
9632 temp
== op1
? 0 : temp
);
9634 else if (MACHOPIC_INDIRECT
)
9635 op1
= machopic_indirect_data_reference (op1
, 0);
9643 op1
= force_reg (Pmode
, op1
);
9644 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9646 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9647 op1
= legitimize_pic_address (op1
, reg
);
9656 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9657 || !push_operand (op0
, mode
))
9659 op1
= force_reg (mode
, op1
);
9661 if (push_operand (op0
, mode
)
9662 && ! general_no_elim_operand (op1
, mode
))
9663 op1
= copy_to_mode_reg (mode
, op1
);
9665 /* Force large constants in 64bit compilation into register
9666 to get them CSEed. */
9667 if (TARGET_64BIT
&& mode
== DImode
9668 && immediate_operand (op1
, mode
)
9669 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9670 && !register_operand (op0
, mode
)
9671 && optimize
&& !reload_completed
&& !reload_in_progress
)
9672 op1
= copy_to_mode_reg (mode
, op1
);
9674 if (FLOAT_MODE_P (mode
))
9676 /* If we are loading a floating point constant to a register,
9677 force the value to memory now, since we'll get better code
9678 out the back end. */
9682 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9684 op1
= validize_mem (force_const_mem (mode
, op1
));
9685 if (!register_operand (op0
, mode
))
9687 rtx temp
= gen_reg_rtx (mode
);
9688 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9689 emit_move_insn (op0
, temp
);
9696 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9700 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9702 rtx op0
= operands
[0], op1
= operands
[1];
9704 /* Force constants other than zero into memory. We do not know how
9705 the instructions used to build constants modify the upper 64 bits
9706 of the register, once we have that information we may be able
9707 to handle some of them more efficiently. */
9708 if ((reload_in_progress
| reload_completed
) == 0
9709 && register_operand (op0
, mode
)
9711 && standard_sse_constant_p (op1
) <= 0)
9712 op1
= validize_mem (force_const_mem (mode
, op1
));
9714 /* Make operand1 a register if it isn't already. */
9716 && !register_operand (op0
, mode
)
9717 && !register_operand (op1
, mode
))
9719 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9723 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9726 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9727 straight to ix86_expand_vector_move. */
9728 /* Code generation for scalar reg-reg moves of single and double precision data:
9729 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9733 if (x86_sse_partial_reg_dependency == true)
9738 Code generation for scalar loads of double precision data:
9739 if (x86_sse_split_regs == true)
9740 movlpd mem, reg (gas syntax)
9744 Code generation for unaligned packed loads of single precision data
9745 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9746 if (x86_sse_unaligned_move_optimal)
9749 if (x86_sse_partial_reg_dependency == true)
9761 Code generation for unaligned packed loads of double precision data
9762 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9763 if (x86_sse_unaligned_move_optimal)
9766 if (x86_sse_split_regs == true)
9779 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9788 /* If we're optimizing for size, movups is the smallest. */
9791 op0
= gen_lowpart (V4SFmode
, op0
);
9792 op1
= gen_lowpart (V4SFmode
, op1
);
9793 emit_insn (gen_sse_movups (op0
, op1
));
9797 /* ??? If we have typed data, then it would appear that using
9798 movdqu is the only way to get unaligned data loaded with
9800 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9802 op0
= gen_lowpart (V16QImode
, op0
);
9803 op1
= gen_lowpart (V16QImode
, op1
);
9804 emit_insn (gen_sse2_movdqu (op0
, op1
));
9808 if (TARGET_SSE2
&& mode
== V2DFmode
)
9812 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9814 op0
= gen_lowpart (V2DFmode
, op0
);
9815 op1
= gen_lowpart (V2DFmode
, op1
);
9816 emit_insn (gen_sse2_movupd (op0
, op1
));
9820 /* When SSE registers are split into halves, we can avoid
9821 writing to the top half twice. */
9822 if (TARGET_SSE_SPLIT_REGS
)
9824 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9829 /* ??? Not sure about the best option for the Intel chips.
9830 The following would seem to satisfy; the register is
9831 entirely cleared, breaking the dependency chain. We
9832 then store to the upper half, with a dependency depth
9833 of one. A rumor has it that Intel recommends two movsd
9834 followed by an unpacklpd, but this is unconfirmed. And
9835 given that the dependency depth of the unpacklpd would
9836 still be one, I'm not sure why this would be better. */
9837 zero
= CONST0_RTX (V2DFmode
);
9840 m
= adjust_address (op1
, DFmode
, 0);
9841 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9842 m
= adjust_address (op1
, DFmode
, 8);
9843 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9847 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9849 op0
= gen_lowpart (V4SFmode
, op0
);
9850 op1
= gen_lowpart (V4SFmode
, op1
);
9851 emit_insn (gen_sse_movups (op0
, op1
));
9855 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9856 emit_move_insn (op0
, CONST0_RTX (mode
));
9858 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9860 if (mode
!= V4SFmode
)
9861 op0
= gen_lowpart (V4SFmode
, op0
);
9862 m
= adjust_address (op1
, V2SFmode
, 0);
9863 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9864 m
= adjust_address (op1
, V2SFmode
, 8);
9865 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9868 else if (MEM_P (op0
))
9870 /* If we're optimizing for size, movups is the smallest. */
9873 op0
= gen_lowpart (V4SFmode
, op0
);
9874 op1
= gen_lowpart (V4SFmode
, op1
);
9875 emit_insn (gen_sse_movups (op0
, op1
));
9879 /* ??? Similar to above, only less clear because of quote
9880 typeless stores unquote. */
9881 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9882 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9884 op0
= gen_lowpart (V16QImode
, op0
);
9885 op1
= gen_lowpart (V16QImode
, op1
);
9886 emit_insn (gen_sse2_movdqu (op0
, op1
));
9890 if (TARGET_SSE2
&& mode
== V2DFmode
)
9892 m
= adjust_address (op0
, DFmode
, 0);
9893 emit_insn (gen_sse2_storelpd (m
, op1
));
9894 m
= adjust_address (op0
, DFmode
, 8);
9895 emit_insn (gen_sse2_storehpd (m
, op1
));
9899 if (mode
!= V4SFmode
)
9900 op1
= gen_lowpart (V4SFmode
, op1
);
9901 m
= adjust_address (op0
, V2SFmode
, 0);
9902 emit_insn (gen_sse_storelps (m
, op1
));
9903 m
= adjust_address (op0
, V2SFmode
, 8);
9904 emit_insn (gen_sse_storehps (m
, op1
));
9911 /* Expand a push in MODE. This is some mode for which we do not support
9912 proper push instructions, at least from the registers that we expect
9913 the value to live in. */
9916 ix86_expand_push (enum machine_mode mode
, rtx x
)
9920 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9921 GEN_INT (-GET_MODE_SIZE (mode
)),
9922 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9923 if (tmp
!= stack_pointer_rtx
)
9924 emit_move_insn (stack_pointer_rtx
, tmp
);
9926 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9927 emit_move_insn (tmp
, x
);
9930 /* Helper function of ix86_fixup_binary_operands to canonicalize
9931 operand order. Returns true if the operands should be swapped. */
9934 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9937 rtx dst
= operands
[0];
9938 rtx src1
= operands
[1];
9939 rtx src2
= operands
[2];
9941 /* If the operation is not commutative, we can't do anything. */
9942 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9945 /* Highest priority is that src1 should match dst. */
9946 if (rtx_equal_p (dst
, src1
))
9948 if (rtx_equal_p (dst
, src2
))
9951 /* Next highest priority is that immediate constants come second. */
9952 if (immediate_operand (src2
, mode
))
9954 if (immediate_operand (src1
, mode
))
9957 /* Lowest priority is that memory references should come second. */
9967 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9968 destination to use for the operation. If different from the true
9969 destination in operands[0], a copy operation will be required. */
9972 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9975 rtx dst
= operands
[0];
9976 rtx src1
= operands
[1];
9977 rtx src2
= operands
[2];
9979 /* Canonicalize operand order. */
9980 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9987 /* Both source operands cannot be in memory. */
9988 if (MEM_P (src1
) && MEM_P (src2
))
9990 /* Optimization: Only read from memory once. */
9991 if (rtx_equal_p (src1
, src2
))
9993 src2
= force_reg (mode
, src2
);
9997 src2
= force_reg (mode
, src2
);
10000 /* If the destination is memory, and we do not have matching source
10001 operands, do things in registers. */
10002 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10003 dst
= gen_reg_rtx (mode
);
10005 /* Source 1 cannot be a constant. */
10006 if (CONSTANT_P (src1
))
10007 src1
= force_reg (mode
, src1
);
10009 /* Source 1 cannot be a non-matching memory. */
10010 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10011 src1
= force_reg (mode
, src1
);
10013 operands
[1] = src1
;
10014 operands
[2] = src2
;
10018 /* Similarly, but assume that the destination has already been
10019 set up properly. */
10022 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10023 enum machine_mode mode
, rtx operands
[])
10025 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10026 gcc_assert (dst
== operands
[0]);
10029 /* Attempt to expand a binary operator. Make the expansion closer to the
10030 actual machine, then just general_operand, which will allow 3 separate
10031 memory references (one output, two input) in a single insn. */
10034 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10037 rtx src1
, src2
, dst
, op
, clob
;
10039 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10040 src1
= operands
[1];
10041 src2
= operands
[2];
10043 /* Emit the instruction. */
10045 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10046 if (reload_in_progress
)
10048 /* Reload doesn't know about the flags register, and doesn't know that
10049 it doesn't want to clobber it. We can only do this with PLUS. */
10050 gcc_assert (code
== PLUS
);
10055 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10056 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10059 /* Fix up the destination if needed. */
10060 if (dst
!= operands
[0])
10061 emit_move_insn (operands
[0], dst
);
10064 /* Return TRUE or FALSE depending on whether the binary operator meets the
10065 appropriate constraints. */
10068 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10071 rtx dst
= operands
[0];
10072 rtx src1
= operands
[1];
10073 rtx src2
= operands
[2];
10075 /* Both source operands cannot be in memory. */
10076 if (MEM_P (src1
) && MEM_P (src2
))
10079 /* Canonicalize operand order for commutative operators. */
10080 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10087 /* If the destination is memory, we must have a matching source operand. */
10088 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10091 /* Source 1 cannot be a constant. */
10092 if (CONSTANT_P (src1
))
10095 /* Source 1 cannot be a non-matching memory. */
10096 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10102 /* Attempt to expand a unary operator. Make the expansion closer to the
10103 actual machine, then just general_operand, which will allow 2 separate
10104 memory references (one output, one input) in a single insn. */
10107 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10110 int matching_memory
;
10111 rtx src
, dst
, op
, clob
;
10116 /* If the destination is memory, and we do not have matching source
10117 operands, do things in registers. */
10118 matching_memory
= 0;
10121 if (rtx_equal_p (dst
, src
))
10122 matching_memory
= 1;
10124 dst
= gen_reg_rtx (mode
);
10127 /* When source operand is memory, destination must match. */
10128 if (MEM_P (src
) && !matching_memory
)
10129 src
= force_reg (mode
, src
);
10131 /* Emit the instruction. */
10133 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10134 if (reload_in_progress
|| code
== NOT
)
10136 /* Reload doesn't know about the flags register, and doesn't know that
10137 it doesn't want to clobber it. */
10138 gcc_assert (code
== NOT
);
10143 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10144 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10147 /* Fix up the destination if needed. */
10148 if (dst
!= operands
[0])
10149 emit_move_insn (operands
[0], dst
);
10152 /* Return TRUE or FALSE depending on whether the unary operator meets the
10153 appropriate constraints. */
10156 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10157 enum machine_mode mode ATTRIBUTE_UNUSED
,
10158 rtx operands
[2] ATTRIBUTE_UNUSED
)
10160 /* If one of operands is memory, source and destination must match. */
10161 if ((MEM_P (operands
[0])
10162 || MEM_P (operands
[1]))
10163 && ! rtx_equal_p (operands
[0], operands
[1]))
10168 /* Post-reload splitter for converting an SF or DFmode value in an
10169 SSE register into an unsigned SImode. */
10172 ix86_split_convert_uns_si_sse (rtx operands
[])
10174 enum machine_mode vecmode
;
10175 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10177 large
= operands
[1];
10178 zero_or_two31
= operands
[2];
10179 input
= operands
[3];
10180 two31
= operands
[4];
10181 vecmode
= GET_MODE (large
);
10182 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10184 /* Load up the value into the low element. We must ensure that the other
10185 elements are valid floats -- zero is the easiest such value. */
10188 if (vecmode
== V4SFmode
)
10189 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10191 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10195 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10196 emit_move_insn (value
, CONST0_RTX (vecmode
));
10197 if (vecmode
== V4SFmode
)
10198 emit_insn (gen_sse_movss (value
, value
, input
));
10200 emit_insn (gen_sse2_movsd (value
, value
, input
));
10203 emit_move_insn (large
, two31
);
10204 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10206 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10207 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10209 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10210 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10212 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10213 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10215 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10216 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10218 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10219 if (vecmode
== V4SFmode
)
10220 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10222 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10225 emit_insn (gen_xorv4si3 (value
, value
, large
));
10228 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10229 Expects the 64-bit DImode to be supplied in a pair of integral
10230 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10231 -mfpmath=sse, !optimize_size only. */
10234 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10236 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10237 rtx int_xmm
, fp_xmm
;
10238 rtx biases
, exponents
;
10241 int_xmm
= gen_reg_rtx (V4SImode
);
10242 if (TARGET_INTER_UNIT_MOVES
)
10243 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10244 else if (TARGET_SSE_SPLIT_REGS
)
10246 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10247 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10251 x
= gen_reg_rtx (V2DImode
);
10252 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10253 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10256 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10257 gen_rtvec (4, GEN_INT (0x43300000UL
),
10258 GEN_INT (0x45300000UL
),
10259 const0_rtx
, const0_rtx
));
10260 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10262 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10263 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10265 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10266 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10267 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10268 (0x1.0p84 + double(fp_value_hi_xmm)).
10269 Note these exponents differ by 32. */
10271 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10273 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10274 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10275 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10276 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10277 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10278 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10279 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10280 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10281 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10283 /* Add the upper and lower DFmode values together. */
10285 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10288 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10289 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10290 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10293 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10296 /* Convert an unsigned SImode value into a DFmode. Only currently used
10297 for SSE, but applicable anywhere. */
10300 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10302 REAL_VALUE_TYPE TWO31r
;
10305 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10306 NULL
, 1, OPTAB_DIRECT
);
10308 fp
= gen_reg_rtx (DFmode
);
10309 emit_insn (gen_floatsidf2 (fp
, x
));
10311 real_ldexp (&TWO31r
, &dconst1
, 31);
10312 x
= const_double_from_real_value (TWO31r
, DFmode
);
10314 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10316 emit_move_insn (target
, x
);
10319 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10320 32-bit mode; otherwise we have a direct convert instruction. */
10323 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10325 REAL_VALUE_TYPE TWO32r
;
10326 rtx fp_lo
, fp_hi
, x
;
10328 fp_lo
= gen_reg_rtx (DFmode
);
10329 fp_hi
= gen_reg_rtx (DFmode
);
10331 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10333 real_ldexp (&TWO32r
, &dconst1
, 32);
10334 x
= const_double_from_real_value (TWO32r
, DFmode
);
10335 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10337 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10339 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10342 emit_move_insn (target
, x
);
10345 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10346 For x86_32, -mfpmath=sse, !optimize_size only. */
10348 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10350 REAL_VALUE_TYPE ONE16r
;
10351 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10353 real_ldexp (&ONE16r
, &dconst1
, 16);
10354 x
= const_double_from_real_value (ONE16r
, SFmode
);
10355 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10356 NULL
, 0, OPTAB_DIRECT
);
10357 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10358 NULL
, 0, OPTAB_DIRECT
);
10359 fp_hi
= gen_reg_rtx (SFmode
);
10360 fp_lo
= gen_reg_rtx (SFmode
);
10361 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10362 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10363 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10365 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10367 if (!rtx_equal_p (target
, fp_hi
))
10368 emit_move_insn (target
, fp_hi
);
10371 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10372 then replicate the value for all elements of the vector
10376 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10383 v
= gen_rtvec (4, value
, value
, value
, value
);
10385 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10386 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10387 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10391 v
= gen_rtvec (2, value
, value
);
10393 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10394 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10397 gcc_unreachable ();
10401 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10402 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10403 true, then replicate the mask for all elements of the vector register.
10404 If INVERT is true, then create a mask excluding the sign bit. */
10407 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10409 enum machine_mode vec_mode
;
10410 HOST_WIDE_INT hi
, lo
;
10415 /* Find the sign bit, sign extended to 2*HWI. */
10416 if (mode
== SFmode
)
10417 lo
= 0x80000000, hi
= lo
< 0;
10418 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10419 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10421 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10424 lo
= ~lo
, hi
= ~hi
;
10426 /* Force this value into the low part of a fp vector constant. */
10427 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10428 mask
= gen_lowpart (mode
, mask
);
10430 v
= ix86_build_const_vector (mode
, vect
, mask
);
10431 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10432 return force_reg (vec_mode
, v
);
10435 /* Generate code for floating point ABS or NEG. */
10438 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10441 rtx mask
, set
, use
, clob
, dst
, src
;
10442 bool matching_memory
;
10443 bool use_sse
= false;
10444 bool vector_mode
= VECTOR_MODE_P (mode
);
10445 enum machine_mode elt_mode
= mode
;
10449 elt_mode
= GET_MODE_INNER (mode
);
10452 else if (TARGET_SSE_MATH
)
10453 use_sse
= SSE_FLOAT_MODE_P (mode
);
10455 /* NEG and ABS performed with SSE use bitwise mask operations.
10456 Create the appropriate mask now. */
10458 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10465 /* If the destination is memory, and we don't have matching source
10466 operands or we're using the x87, do things in registers. */
10467 matching_memory
= false;
10470 if (use_sse
&& rtx_equal_p (dst
, src
))
10471 matching_memory
= true;
10473 dst
= gen_reg_rtx (mode
);
10475 if (MEM_P (src
) && !matching_memory
)
10476 src
= force_reg (mode
, src
);
10480 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10481 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10486 set
= gen_rtx_fmt_e (code
, mode
, src
);
10487 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10490 use
= gen_rtx_USE (VOIDmode
, mask
);
10491 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10492 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10493 gen_rtvec (3, set
, use
, clob
)));
10499 if (dst
!= operands
[0])
10500 emit_move_insn (operands
[0], dst
);
10503 /* Expand a copysign operation. Special case operand 0 being a constant. */
10506 ix86_expand_copysign (rtx operands
[])
10508 enum machine_mode mode
, vmode
;
10509 rtx dest
, op0
, op1
, mask
, nmask
;
10511 dest
= operands
[0];
10515 mode
= GET_MODE (dest
);
10516 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10518 if (GET_CODE (op0
) == CONST_DOUBLE
)
10522 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10523 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10525 if (op0
== CONST0_RTX (mode
))
10526 op0
= CONST0_RTX (vmode
);
10529 if (mode
== SFmode
)
10530 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10531 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10533 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10534 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10537 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10539 if (mode
== SFmode
)
10540 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10542 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10546 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10547 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10549 if (mode
== SFmode
)
10550 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10552 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10556 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10557 be a constant, and so has already been expanded into a vector constant. */
10560 ix86_split_copysign_const (rtx operands
[])
10562 enum machine_mode mode
, vmode
;
10563 rtx dest
, op0
, op1
, mask
, x
;
10565 dest
= operands
[0];
10568 mask
= operands
[3];
10570 mode
= GET_MODE (dest
);
10571 vmode
= GET_MODE (mask
);
10573 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10574 x
= gen_rtx_AND (vmode
, dest
, mask
);
10575 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10577 if (op0
!= CONST0_RTX (vmode
))
10579 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10580 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10584 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10585 so we have to do two masks. */
10588 ix86_split_copysign_var (rtx operands
[])
10590 enum machine_mode mode
, vmode
;
10591 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10593 dest
= operands
[0];
10594 scratch
= operands
[1];
10597 nmask
= operands
[4];
10598 mask
= operands
[5];
10600 mode
= GET_MODE (dest
);
10601 vmode
= GET_MODE (mask
);
10603 if (rtx_equal_p (op0
, op1
))
10605 /* Shouldn't happen often (it's useless, obviously), but when it does
10606 we'd generate incorrect code if we continue below. */
10607 emit_move_insn (dest
, op0
);
10611 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10613 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10615 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10616 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10619 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10620 x
= gen_rtx_NOT (vmode
, dest
);
10621 x
= gen_rtx_AND (vmode
, x
, op0
);
10622 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10626 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10628 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10630 else /* alternative 2,4 */
10632 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10633 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10634 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10636 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10638 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10640 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10641 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10643 else /* alternative 3,4 */
10645 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10647 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10648 x
= gen_rtx_AND (vmode
, dest
, op0
);
10650 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10653 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10654 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10657 /* Return TRUE or FALSE depending on whether the first SET in INSN
10658 has source and destination with matching CC modes, and that the
10659 CC mode is at least as constrained as REQ_MODE. */
10662 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10665 enum machine_mode set_mode
;
10667 set
= PATTERN (insn
);
10668 if (GET_CODE (set
) == PARALLEL
)
10669 set
= XVECEXP (set
, 0, 0);
10670 gcc_assert (GET_CODE (set
) == SET
);
10671 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10673 set_mode
= GET_MODE (SET_DEST (set
));
10677 if (req_mode
!= CCNOmode
10678 && (req_mode
!= CCmode
10679 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10683 if (req_mode
== CCGCmode
)
10687 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10691 if (req_mode
== CCZmode
)
10698 gcc_unreachable ();
10701 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10704 /* Generate insn patterns to do an integer compare of OPERANDS. */
10707 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10709 enum machine_mode cmpmode
;
10712 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10713 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10715 /* This is very simple, but making the interface the same as in the
10716 FP case makes the rest of the code easier. */
10717 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10718 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10720 /* Return the test that should be put into the flags user, i.e.
10721 the bcc, scc, or cmov instruction. */
10722 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10725 /* Figure out whether to use ordered or unordered fp comparisons.
10726 Return the appropriate mode to use. */
10729 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10731 /* ??? In order to make all comparisons reversible, we do all comparisons
10732 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10733 all forms trapping and nontrapping comparisons, we can make inequality
10734 comparisons trapping again, since it results in better code when using
10735 FCOM based compares. */
10736 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10740 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10742 enum machine_mode mode
= GET_MODE (op0
);
10744 if (SCALAR_FLOAT_MODE_P (mode
))
10746 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
10747 return ix86_fp_compare_mode (code
);
10752 /* Only zero flag is needed. */
10753 case EQ
: /* ZF=0 */
10754 case NE
: /* ZF!=0 */
10756 /* Codes needing carry flag. */
10757 case GEU
: /* CF=0 */
10758 case GTU
: /* CF=0 & ZF=0 */
10759 case LTU
: /* CF=1 */
10760 case LEU
: /* CF=1 | ZF=1 */
10762 /* Codes possibly doable only with sign flag when
10763 comparing against zero. */
10764 case GE
: /* SF=OF or SF=0 */
10765 case LT
: /* SF<>OF or SF=1 */
10766 if (op1
== const0_rtx
)
10769 /* For other cases Carry flag is not required. */
10771 /* Codes doable only with sign flag when comparing
10772 against zero, but we miss jump instruction for it
10773 so we need to use relational tests against overflow
10774 that thus needs to be zero. */
10775 case GT
: /* ZF=0 & SF=OF */
10776 case LE
: /* ZF=1 | SF<>OF */
10777 if (op1
== const0_rtx
)
10781 /* strcmp pattern do (use flags) and combine may ask us for proper
10786 gcc_unreachable ();
10790 /* Return the fixed registers used for condition codes. */
10793 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10800 /* If two condition code modes are compatible, return a condition code
10801 mode which is compatible with both. Otherwise, return
10804 static enum machine_mode
10805 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10810 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10813 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10814 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10820 gcc_unreachable ();
10842 /* These are only compatible with themselves, which we already
10848 /* Split comparison code CODE into comparisons we can do using branch
10849 instructions. BYPASS_CODE is comparison code for branch that will
10850 branch around FIRST_CODE and SECOND_CODE. If some of branches
10851 is not required, set value to UNKNOWN.
10852 We never require more than two branches. */
10855 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10856 enum rtx_code
*first_code
,
10857 enum rtx_code
*second_code
)
10859 *first_code
= code
;
10860 *bypass_code
= UNKNOWN
;
10861 *second_code
= UNKNOWN
;
10863 /* The fcomi comparison sets flags as follows:
10873 case GT
: /* GTU - CF=0 & ZF=0 */
10874 case GE
: /* GEU - CF=0 */
10875 case ORDERED
: /* PF=0 */
10876 case UNORDERED
: /* PF=1 */
10877 case UNEQ
: /* EQ - ZF=1 */
10878 case UNLT
: /* LTU - CF=1 */
10879 case UNLE
: /* LEU - CF=1 | ZF=1 */
10880 case LTGT
: /* EQ - ZF=0 */
10882 case LT
: /* LTU - CF=1 - fails on unordered */
10883 *first_code
= UNLT
;
10884 *bypass_code
= UNORDERED
;
10886 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10887 *first_code
= UNLE
;
10888 *bypass_code
= UNORDERED
;
10890 case EQ
: /* EQ - ZF=1 - fails on unordered */
10891 *first_code
= UNEQ
;
10892 *bypass_code
= UNORDERED
;
10894 case NE
: /* NE - ZF=0 - fails on unordered */
10895 *first_code
= LTGT
;
10896 *second_code
= UNORDERED
;
10898 case UNGE
: /* GEU - CF=0 - fails on unordered */
10900 *second_code
= UNORDERED
;
10902 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10904 *second_code
= UNORDERED
;
10907 gcc_unreachable ();
10909 if (!TARGET_IEEE_FP
)
10911 *second_code
= UNKNOWN
;
10912 *bypass_code
= UNKNOWN
;
10916 /* Return cost of comparison done fcom + arithmetics operations on AX.
10917 All following functions do use number of instructions as a cost metrics.
10918 In future this should be tweaked to compute bytes for optimize_size and
10919 take into account performance of various instructions on various CPUs. */
10921 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10923 if (!TARGET_IEEE_FP
)
10925 /* The cost of code output by ix86_expand_fp_compare. */
10949 gcc_unreachable ();
10953 /* Return cost of comparison done using fcomi operation.
10954 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10956 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10958 enum rtx_code bypass_code
, first_code
, second_code
;
10959 /* Return arbitrarily high cost when instruction is not supported - this
10960 prevents gcc from using it. */
10963 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10964 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10967 /* Return cost of comparison done using sahf operation.
10968 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10970 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10972 enum rtx_code bypass_code
, first_code
, second_code
;
10973 /* Return arbitrarily high cost when instruction is not preferred - this
10974 avoids gcc from using it. */
10975 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
10977 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10978 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10981 /* Compute cost of the comparison done using any method.
10982 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10984 ix86_fp_comparison_cost (enum rtx_code code
)
10986 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10989 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10990 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10992 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10993 if (min
> sahf_cost
)
10995 if (min
> fcomi_cost
)
11000 /* Return true if we should use an FCOMI instruction for this
11004 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11006 enum rtx_code swapped_code
= swap_condition (code
);
11008 return ((ix86_fp_comparison_cost (code
)
11009 == ix86_fp_comparison_fcomi_cost (code
))
11010 || (ix86_fp_comparison_cost (swapped_code
)
11011 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11014 /* Swap, force into registers, or otherwise massage the two operands
11015 to a fp comparison. The operands are updated in place; the new
11016 comparison code is returned. */
11018 static enum rtx_code
11019 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11021 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11022 rtx op0
= *pop0
, op1
= *pop1
;
11023 enum machine_mode op_mode
= GET_MODE (op0
);
11024 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11026 /* All of the unordered compare instructions only work on registers.
11027 The same is true of the fcomi compare instructions. The XFmode
11028 compare instructions require registers except when comparing
11029 against zero or when converting operand 1 from fixed point to
11033 && (fpcmp_mode
== CCFPUmode
11034 || (op_mode
== XFmode
11035 && ! (standard_80387_constant_p (op0
) == 1
11036 || standard_80387_constant_p (op1
) == 1)
11037 && GET_CODE (op1
) != FLOAT
)
11038 || ix86_use_fcomi_compare (code
)))
11040 op0
= force_reg (op_mode
, op0
);
11041 op1
= force_reg (op_mode
, op1
);
11045 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11046 things around if they appear profitable, otherwise force op0
11047 into a register. */
11049 if (standard_80387_constant_p (op0
) == 0
11051 && ! (standard_80387_constant_p (op1
) == 0
11055 tmp
= op0
, op0
= op1
, op1
= tmp
;
11056 code
= swap_condition (code
);
11060 op0
= force_reg (op_mode
, op0
);
11062 if (CONSTANT_P (op1
))
11064 int tmp
= standard_80387_constant_p (op1
);
11066 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11070 op1
= force_reg (op_mode
, op1
);
11073 op1
= force_reg (op_mode
, op1
);
11077 /* Try to rearrange the comparison to make it cheaper. */
11078 if (ix86_fp_comparison_cost (code
)
11079 > ix86_fp_comparison_cost (swap_condition (code
))
11080 && (REG_P (op1
) || !no_new_pseudos
))
11083 tmp
= op0
, op0
= op1
, op1
= tmp
;
11084 code
= swap_condition (code
);
11086 op0
= force_reg (op_mode
, op0
);
11094 /* Convert comparison codes we use to represent FP comparison to integer
11095 code that will result in proper branch. Return UNKNOWN if no such code
11099 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11128 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11131 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11132 rtx
*second_test
, rtx
*bypass_test
)
11134 enum machine_mode fpcmp_mode
, intcmp_mode
;
11136 int cost
= ix86_fp_comparison_cost (code
);
11137 enum rtx_code bypass_code
, first_code
, second_code
;
11139 fpcmp_mode
= ix86_fp_compare_mode (code
);
11140 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11143 *second_test
= NULL_RTX
;
11145 *bypass_test
= NULL_RTX
;
11147 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11149 /* Do fcomi/sahf based test when profitable. */
11150 if ((TARGET_CMOVE
|| TARGET_SAHF
)
11151 && (bypass_code
== UNKNOWN
|| bypass_test
)
11152 && (second_code
== UNKNOWN
|| second_test
)
11153 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11157 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11158 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11164 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11165 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11167 scratch
= gen_reg_rtx (HImode
);
11168 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11169 emit_insn (gen_x86_sahf_1 (scratch
));
11172 /* The FP codes work out to act like unsigned. */
11173 intcmp_mode
= fpcmp_mode
;
11175 if (bypass_code
!= UNKNOWN
)
11176 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11177 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11179 if (second_code
!= UNKNOWN
)
11180 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11181 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11186 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11187 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11188 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11190 scratch
= gen_reg_rtx (HImode
);
11191 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11193 /* In the unordered case, we have to check C2 for NaN's, which
11194 doesn't happen to work out to anything nice combination-wise.
11195 So do some bit twiddling on the value we've got in AH to come
11196 up with an appropriate set of condition codes. */
11198 intcmp_mode
= CCNOmode
;
11203 if (code
== GT
|| !TARGET_IEEE_FP
)
11205 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11210 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11211 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11212 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11213 intcmp_mode
= CCmode
;
11219 if (code
== LT
&& TARGET_IEEE_FP
)
11221 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11222 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11223 intcmp_mode
= CCmode
;
11228 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11234 if (code
== GE
|| !TARGET_IEEE_FP
)
11236 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11241 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11242 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11249 if (code
== LE
&& TARGET_IEEE_FP
)
11251 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11252 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11253 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11254 intcmp_mode
= CCmode
;
11259 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11265 if (code
== EQ
&& TARGET_IEEE_FP
)
11267 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11268 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11269 intcmp_mode
= CCmode
;
11274 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11281 if (code
== NE
&& TARGET_IEEE_FP
)
11283 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11284 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11290 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11296 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11300 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11305 gcc_unreachable ();
11309 /* Return the test that should be put into the flags user, i.e.
11310 the bcc, scc, or cmov instruction. */
11311 return gen_rtx_fmt_ee (code
, VOIDmode
,
11312 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11317 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11320 op0
= ix86_compare_op0
;
11321 op1
= ix86_compare_op1
;
11324 *second_test
= NULL_RTX
;
11326 *bypass_test
= NULL_RTX
;
11328 if (ix86_compare_emitted
)
11330 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11331 ix86_compare_emitted
= NULL_RTX
;
11333 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11335 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11336 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11337 second_test
, bypass_test
);
11340 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11345 /* Return true if the CODE will result in nontrivial jump sequence. */
11347 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11349 enum rtx_code bypass_code
, first_code
, second_code
;
11352 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11353 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11357 ix86_expand_branch (enum rtx_code code
, rtx label
)
11361 /* If we have emitted a compare insn, go straight to simple.
11362 ix86_expand_compare won't emit anything if ix86_compare_emitted
11364 if (ix86_compare_emitted
)
11367 switch (GET_MODE (ix86_compare_op0
))
11373 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11374 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11375 gen_rtx_LABEL_REF (VOIDmode
, label
),
11377 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11386 enum rtx_code bypass_code
, first_code
, second_code
;
11388 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11389 &ix86_compare_op1
);
11391 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11393 /* Check whether we will use the natural sequence with one jump. If
11394 so, we can expand jump early. Otherwise delay expansion by
11395 creating compound insn to not confuse optimizers. */
11396 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11399 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11400 gen_rtx_LABEL_REF (VOIDmode
, label
),
11401 pc_rtx
, NULL_RTX
, NULL_RTX
);
11405 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11406 ix86_compare_op0
, ix86_compare_op1
);
11407 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11408 gen_rtx_LABEL_REF (VOIDmode
, label
),
11410 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11412 use_fcomi
= ix86_use_fcomi_compare (code
);
11413 vec
= rtvec_alloc (3 + !use_fcomi
);
11414 RTVEC_ELT (vec
, 0) = tmp
;
11416 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11418 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11421 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11423 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11432 /* Expand DImode branch into multiple compare+branch. */
11434 rtx lo
[2], hi
[2], label2
;
11435 enum rtx_code code1
, code2
, code3
;
11436 enum machine_mode submode
;
11438 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11440 tmp
= ix86_compare_op0
;
11441 ix86_compare_op0
= ix86_compare_op1
;
11442 ix86_compare_op1
= tmp
;
11443 code
= swap_condition (code
);
11445 if (GET_MODE (ix86_compare_op0
) == DImode
)
11447 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11448 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11453 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11454 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11458 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11459 avoid two branches. This costs one extra insn, so disable when
11460 optimizing for size. */
11462 if ((code
== EQ
|| code
== NE
)
11464 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11469 if (hi
[1] != const0_rtx
)
11470 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11471 NULL_RTX
, 0, OPTAB_WIDEN
);
11474 if (lo
[1] != const0_rtx
)
11475 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11476 NULL_RTX
, 0, OPTAB_WIDEN
);
11478 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11479 NULL_RTX
, 0, OPTAB_WIDEN
);
11481 ix86_compare_op0
= tmp
;
11482 ix86_compare_op1
= const0_rtx
;
11483 ix86_expand_branch (code
, label
);
11487 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11488 op1 is a constant and the low word is zero, then we can just
11489 examine the high word. */
11491 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11494 case LT
: case LTU
: case GE
: case GEU
:
11495 ix86_compare_op0
= hi
[0];
11496 ix86_compare_op1
= hi
[1];
11497 ix86_expand_branch (code
, label
);
11503 /* Otherwise, we need two or three jumps. */
11505 label2
= gen_label_rtx ();
11508 code2
= swap_condition (code
);
11509 code3
= unsigned_condition (code
);
11513 case LT
: case GT
: case LTU
: case GTU
:
11516 case LE
: code1
= LT
; code2
= GT
; break;
11517 case GE
: code1
= GT
; code2
= LT
; break;
11518 case LEU
: code1
= LTU
; code2
= GTU
; break;
11519 case GEU
: code1
= GTU
; code2
= LTU
; break;
11521 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11522 case NE
: code2
= UNKNOWN
; break;
11525 gcc_unreachable ();
11530 * if (hi(a) < hi(b)) goto true;
11531 * if (hi(a) > hi(b)) goto false;
11532 * if (lo(a) < lo(b)) goto true;
11536 ix86_compare_op0
= hi
[0];
11537 ix86_compare_op1
= hi
[1];
11539 if (code1
!= UNKNOWN
)
11540 ix86_expand_branch (code1
, label
);
11541 if (code2
!= UNKNOWN
)
11542 ix86_expand_branch (code2
, label2
);
11544 ix86_compare_op0
= lo
[0];
11545 ix86_compare_op1
= lo
[1];
11546 ix86_expand_branch (code3
, label
);
11548 if (code2
!= UNKNOWN
)
11549 emit_label (label2
);
11554 gcc_unreachable ();
11558 /* Split branch based on floating point condition. */
11560 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11561 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11563 rtx second
, bypass
;
11564 rtx label
= NULL_RTX
;
11566 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11569 if (target2
!= pc_rtx
)
11572 code
= reverse_condition_maybe_unordered (code
);
11577 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11578 tmp
, &second
, &bypass
);
11580 /* Remove pushed operand from stack. */
11582 ix86_free_from_memory (GET_MODE (pushed
));
11584 if (split_branch_probability
>= 0)
11586 /* Distribute the probabilities across the jumps.
11587 Assume the BYPASS and SECOND to be always test
11589 probability
= split_branch_probability
;
11591 /* Value of 1 is low enough to make no need for probability
11592 to be updated. Later we may run some experiments and see
11593 if unordered values are more frequent in practice. */
11595 bypass_probability
= 1;
11597 second_probability
= 1;
11599 if (bypass
!= NULL_RTX
)
11601 label
= gen_label_rtx ();
11602 i
= emit_jump_insn (gen_rtx_SET
11604 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11606 gen_rtx_LABEL_REF (VOIDmode
,
11609 if (bypass_probability
>= 0)
11611 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11612 GEN_INT (bypass_probability
),
11615 i
= emit_jump_insn (gen_rtx_SET
11617 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11618 condition
, target1
, target2
)));
11619 if (probability
>= 0)
11621 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11622 GEN_INT (probability
),
11624 if (second
!= NULL_RTX
)
11626 i
= emit_jump_insn (gen_rtx_SET
11628 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11630 if (second_probability
>= 0)
11632 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11633 GEN_INT (second_probability
),
11636 if (label
!= NULL_RTX
)
11637 emit_label (label
);
11641 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11643 rtx ret
, tmp
, tmpreg
, equiv
;
11644 rtx second_test
, bypass_test
;
11646 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11647 return 0; /* FAIL */
11649 gcc_assert (GET_MODE (dest
) == QImode
);
11651 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11652 PUT_MODE (ret
, QImode
);
11657 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11658 if (bypass_test
|| second_test
)
11660 rtx test
= second_test
;
11662 rtx tmp2
= gen_reg_rtx (QImode
);
11665 gcc_assert (!second_test
);
11666 test
= bypass_test
;
11668 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11670 PUT_MODE (test
, QImode
);
11671 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11674 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11676 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11679 /* Attach a REG_EQUAL note describing the comparison result. */
11680 if (ix86_compare_op0
&& ix86_compare_op1
)
11682 equiv
= simplify_gen_relational (code
, QImode
,
11683 GET_MODE (ix86_compare_op0
),
11684 ix86_compare_op0
, ix86_compare_op1
);
11685 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11688 return 1; /* DONE */
11691 /* Expand comparison setting or clearing carry flag. Return true when
11692 successful and set pop for the operation. */
11694 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11696 enum machine_mode mode
=
11697 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11699 /* Do not handle DImode compares that go through special path.
11700 Also we can't deal with FP compares yet. This is possible to add. */
11701 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11704 if (SCALAR_FLOAT_MODE_P (mode
))
11706 rtx second_test
= NULL
, bypass_test
= NULL
;
11707 rtx compare_op
, compare_seq
;
11709 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11711 /* Shortcut: following common codes never translate
11712 into carry flag compares. */
11713 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11714 || code
== ORDERED
|| code
== UNORDERED
)
11717 /* These comparisons require zero flag; swap operands so they won't. */
11718 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11719 && !TARGET_IEEE_FP
)
11724 code
= swap_condition (code
);
11727 /* Try to expand the comparison and verify that we end up with carry flag
11728 based comparison. This is fails to be true only when we decide to expand
11729 comparison using arithmetic that is not too common scenario. */
11731 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11732 &second_test
, &bypass_test
);
11733 compare_seq
= get_insns ();
11736 if (second_test
|| bypass_test
)
11738 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11739 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11740 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11742 code
= GET_CODE (compare_op
);
11743 if (code
!= LTU
&& code
!= GEU
)
11745 emit_insn (compare_seq
);
11749 if (!INTEGRAL_MODE_P (mode
))
11757 /* Convert a==0 into (unsigned)a<1. */
11760 if (op1
!= const0_rtx
)
11763 code
= (code
== EQ
? LTU
: GEU
);
11766 /* Convert a>b into b<a or a>=b-1. */
11769 if (CONST_INT_P (op1
))
11771 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11772 /* Bail out on overflow. We still can swap operands but that
11773 would force loading of the constant into register. */
11774 if (op1
== const0_rtx
11775 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11777 code
= (code
== GTU
? GEU
: LTU
);
11784 code
= (code
== GTU
? LTU
: GEU
);
11788 /* Convert a>=0 into (unsigned)a<0x80000000. */
11791 if (mode
== DImode
|| op1
!= const0_rtx
)
11793 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11794 code
= (code
== LT
? GEU
: LTU
);
11798 if (mode
== DImode
|| op1
!= constm1_rtx
)
11800 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11801 code
= (code
== LE
? GEU
: LTU
);
11807 /* Swapping operands may cause constant to appear as first operand. */
11808 if (!nonimmediate_operand (op0
, VOIDmode
))
11810 if (no_new_pseudos
)
11812 op0
= force_reg (mode
, op0
);
11814 ix86_compare_op0
= op0
;
11815 ix86_compare_op1
= op1
;
11816 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11817 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11822 ix86_expand_int_movcc (rtx operands
[])
11824 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11825 rtx compare_seq
, compare_op
;
11826 rtx second_test
, bypass_test
;
11827 enum machine_mode mode
= GET_MODE (operands
[0]);
11828 bool sign_bit_compare_p
= false;;
11831 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11832 compare_seq
= get_insns ();
11835 compare_code
= GET_CODE (compare_op
);
11837 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11838 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11839 sign_bit_compare_p
= true;
11841 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11842 HImode insns, we'd be swallowed in word prefix ops. */
11844 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11845 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11846 && CONST_INT_P (operands
[2])
11847 && CONST_INT_P (operands
[3]))
11849 rtx out
= operands
[0];
11850 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11851 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11852 HOST_WIDE_INT diff
;
11855 /* Sign bit compares are better done using shifts than we do by using
11857 if (sign_bit_compare_p
11858 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11859 ix86_compare_op1
, &compare_op
))
11861 /* Detect overlap between destination and compare sources. */
11864 if (!sign_bit_compare_p
)
11866 bool fpcmp
= false;
11868 compare_code
= GET_CODE (compare_op
);
11870 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11871 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11874 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11877 /* To simplify rest of code, restrict to the GEU case. */
11878 if (compare_code
== LTU
)
11880 HOST_WIDE_INT tmp
= ct
;
11883 compare_code
= reverse_condition (compare_code
);
11884 code
= reverse_condition (code
);
11889 PUT_CODE (compare_op
,
11890 reverse_condition_maybe_unordered
11891 (GET_CODE (compare_op
)));
11893 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11897 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11898 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11899 tmp
= gen_reg_rtx (mode
);
11901 if (mode
== DImode
)
11902 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11904 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11908 if (code
== GT
|| code
== GE
)
11909 code
= reverse_condition (code
);
11912 HOST_WIDE_INT tmp
= ct
;
11917 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11918 ix86_compare_op1
, VOIDmode
, 0, -1);
11931 tmp
= expand_simple_binop (mode
, PLUS
,
11933 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11944 tmp
= expand_simple_binop (mode
, IOR
,
11946 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11948 else if (diff
== -1 && ct
)
11958 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11960 tmp
= expand_simple_binop (mode
, PLUS
,
11961 copy_rtx (tmp
), GEN_INT (cf
),
11962 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11970 * andl cf - ct, dest
11980 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11983 tmp
= expand_simple_binop (mode
, AND
,
11985 gen_int_mode (cf
- ct
, mode
),
11986 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11988 tmp
= expand_simple_binop (mode
, PLUS
,
11989 copy_rtx (tmp
), GEN_INT (ct
),
11990 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11993 if (!rtx_equal_p (tmp
, out
))
11994 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11996 return 1; /* DONE */
12001 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12004 tmp
= ct
, ct
= cf
, cf
= tmp
;
12007 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12009 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12011 /* We may be reversing unordered compare to normal compare, that
12012 is not valid in general (we may convert non-trapping condition
12013 to trapping one), however on i386 we currently emit all
12014 comparisons unordered. */
12015 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12016 code
= reverse_condition_maybe_unordered (code
);
12020 compare_code
= reverse_condition (compare_code
);
12021 code
= reverse_condition (code
);
12025 compare_code
= UNKNOWN
;
12026 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12027 && CONST_INT_P (ix86_compare_op1
))
12029 if (ix86_compare_op1
== const0_rtx
12030 && (code
== LT
|| code
== GE
))
12031 compare_code
= code
;
12032 else if (ix86_compare_op1
== constm1_rtx
)
12036 else if (code
== GT
)
12041 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12042 if (compare_code
!= UNKNOWN
12043 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12044 && (cf
== -1 || ct
== -1))
12046 /* If lea code below could be used, only optimize
12047 if it results in a 2 insn sequence. */
12049 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12050 || diff
== 3 || diff
== 5 || diff
== 9)
12051 || (compare_code
== LT
&& ct
== -1)
12052 || (compare_code
== GE
&& cf
== -1))
12055 * notl op1 (if necessary)
12063 code
= reverse_condition (code
);
12066 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12067 ix86_compare_op1
, VOIDmode
, 0, -1);
12069 out
= expand_simple_binop (mode
, IOR
,
12071 out
, 1, OPTAB_DIRECT
);
12072 if (out
!= operands
[0])
12073 emit_move_insn (operands
[0], out
);
12075 return 1; /* DONE */
12080 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12081 || diff
== 3 || diff
== 5 || diff
== 9)
12082 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12084 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12090 * lea cf(dest*(ct-cf)),dest
12094 * This also catches the degenerate setcc-only case.
12100 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12101 ix86_compare_op1
, VOIDmode
, 0, 1);
12104 /* On x86_64 the lea instruction operates on Pmode, so we need
12105 to get arithmetics done in proper mode to match. */
12107 tmp
= copy_rtx (out
);
12111 out1
= copy_rtx (out
);
12112 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12116 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12122 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12125 if (!rtx_equal_p (tmp
, out
))
12128 out
= force_operand (tmp
, copy_rtx (out
));
12130 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12132 if (!rtx_equal_p (out
, operands
[0]))
12133 emit_move_insn (operands
[0], copy_rtx (out
));
12135 return 1; /* DONE */
12139 * General case: Jumpful:
12140 * xorl dest,dest cmpl op1, op2
12141 * cmpl op1, op2 movl ct, dest
12142 * setcc dest jcc 1f
12143 * decl dest movl cf, dest
12144 * andl (cf-ct),dest 1:
12147 * Size 20. Size 14.
12149 * This is reasonably steep, but branch mispredict costs are
12150 * high on modern cpus, so consider failing only if optimizing
12154 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12155 && BRANCH_COST
>= 2)
12159 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12164 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12166 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12168 /* We may be reversing unordered compare to normal compare,
12169 that is not valid in general (we may convert non-trapping
12170 condition to trapping one), however on i386 we currently
12171 emit all comparisons unordered. */
12172 code
= reverse_condition_maybe_unordered (code
);
12176 code
= reverse_condition (code
);
12177 if (compare_code
!= UNKNOWN
)
12178 compare_code
= reverse_condition (compare_code
);
12182 if (compare_code
!= UNKNOWN
)
12184 /* notl op1 (if needed)
12189 For x < 0 (resp. x <= -1) there will be no notl,
12190 so if possible swap the constants to get rid of the
12192 True/false will be -1/0 while code below (store flag
12193 followed by decrement) is 0/-1, so the constants need
12194 to be exchanged once more. */
12196 if (compare_code
== GE
|| !cf
)
12198 code
= reverse_condition (code
);
12203 HOST_WIDE_INT tmp
= cf
;
12208 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12209 ix86_compare_op1
, VOIDmode
, 0, -1);
12213 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12214 ix86_compare_op1
, VOIDmode
, 0, 1);
12216 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12217 copy_rtx (out
), 1, OPTAB_DIRECT
);
12220 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12221 gen_int_mode (cf
- ct
, mode
),
12222 copy_rtx (out
), 1, OPTAB_DIRECT
);
12224 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12225 copy_rtx (out
), 1, OPTAB_DIRECT
);
12226 if (!rtx_equal_p (out
, operands
[0]))
12227 emit_move_insn (operands
[0], copy_rtx (out
));
12229 return 1; /* DONE */
12233 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12235 /* Try a few things more with specific constants and a variable. */
12238 rtx var
, orig_out
, out
, tmp
;
12240 if (BRANCH_COST
<= 2)
12241 return 0; /* FAIL */
12243 /* If one of the two operands is an interesting constant, load a
12244 constant with the above and mask it in with a logical operation. */
12246 if (CONST_INT_P (operands
[2]))
12249 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12250 operands
[3] = constm1_rtx
, op
= and_optab
;
12251 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12252 operands
[3] = const0_rtx
, op
= ior_optab
;
12254 return 0; /* FAIL */
12256 else if (CONST_INT_P (operands
[3]))
12259 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12260 operands
[2] = constm1_rtx
, op
= and_optab
;
12261 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12262 operands
[2] = const0_rtx
, op
= ior_optab
;
12264 return 0; /* FAIL */
12267 return 0; /* FAIL */
12269 orig_out
= operands
[0];
12270 tmp
= gen_reg_rtx (mode
);
12273 /* Recurse to get the constant loaded. */
12274 if (ix86_expand_int_movcc (operands
) == 0)
12275 return 0; /* FAIL */
12277 /* Mask in the interesting variable. */
12278 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12280 if (!rtx_equal_p (out
, orig_out
))
12281 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12283 return 1; /* DONE */
12287 * For comparison with above,
12297 if (! nonimmediate_operand (operands
[2], mode
))
12298 operands
[2] = force_reg (mode
, operands
[2]);
12299 if (! nonimmediate_operand (operands
[3], mode
))
12300 operands
[3] = force_reg (mode
, operands
[3]);
12302 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12304 rtx tmp
= gen_reg_rtx (mode
);
12305 emit_move_insn (tmp
, operands
[3]);
12308 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12310 rtx tmp
= gen_reg_rtx (mode
);
12311 emit_move_insn (tmp
, operands
[2]);
12315 if (! register_operand (operands
[2], VOIDmode
)
12317 || ! register_operand (operands
[3], VOIDmode
)))
12318 operands
[2] = force_reg (mode
, operands
[2]);
12321 && ! register_operand (operands
[3], VOIDmode
))
12322 operands
[3] = force_reg (mode
, operands
[3]);
12324 emit_insn (compare_seq
);
12325 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12326 gen_rtx_IF_THEN_ELSE (mode
,
12327 compare_op
, operands
[2],
12330 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12331 gen_rtx_IF_THEN_ELSE (mode
,
12333 copy_rtx (operands
[3]),
12334 copy_rtx (operands
[0]))));
12336 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12337 gen_rtx_IF_THEN_ELSE (mode
,
12339 copy_rtx (operands
[2]),
12340 copy_rtx (operands
[0]))));
12342 return 1; /* DONE */
12345 /* Swap, force into registers, or otherwise massage the two operands
12346 to an sse comparison with a mask result. Thus we differ a bit from
12347 ix86_prepare_fp_compare_args which expects to produce a flags result.
12349 The DEST operand exists to help determine whether to commute commutative
12350 operators. The POP0/POP1 operands are updated in place. The new
12351 comparison code is returned, or UNKNOWN if not implementable. */
12353 static enum rtx_code
12354 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12355 rtx
*pop0
, rtx
*pop1
)
12363 /* We have no LTGT as an operator. We could implement it with
12364 NE & ORDERED, but this requires an extra temporary. It's
12365 not clear that it's worth it. */
12372 /* These are supported directly. */
12379 /* For commutative operators, try to canonicalize the destination
12380 operand to be first in the comparison - this helps reload to
12381 avoid extra moves. */
12382 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12390 /* These are not supported directly. Swap the comparison operands
12391 to transform into something that is supported. */
12395 code
= swap_condition (code
);
12399 gcc_unreachable ();
12405 /* Detect conditional moves that exactly match min/max operational
12406 semantics. Note that this is IEEE safe, as long as we don't
12407 interchange the operands.
12409 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12410 and TRUE if the operation is successful and instructions are emitted. */
12413 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12414 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12416 enum machine_mode mode
;
12422 else if (code
== UNGE
)
12425 if_true
= if_false
;
12431 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12433 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12438 mode
= GET_MODE (dest
);
12440 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12441 but MODE may be a vector mode and thus not appropriate. */
12442 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12444 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12447 if_true
= force_reg (mode
, if_true
);
12448 v
= gen_rtvec (2, if_true
, if_false
);
12449 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12453 code
= is_min
? SMIN
: SMAX
;
12454 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12457 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12461 /* Expand an sse vector comparison. Return the register with the result. */
12464 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12465 rtx op_true
, rtx op_false
)
12467 enum machine_mode mode
= GET_MODE (dest
);
12470 cmp_op0
= force_reg (mode
, cmp_op0
);
12471 if (!nonimmediate_operand (cmp_op1
, mode
))
12472 cmp_op1
= force_reg (mode
, cmp_op1
);
12475 || reg_overlap_mentioned_p (dest
, op_true
)
12476 || reg_overlap_mentioned_p (dest
, op_false
))
12477 dest
= gen_reg_rtx (mode
);
12479 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12480 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12485 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12486 operations. This is used for both scalar and vector conditional moves. */
12489 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12491 enum machine_mode mode
= GET_MODE (dest
);
12494 if (op_false
== CONST0_RTX (mode
))
12496 op_true
= force_reg (mode
, op_true
);
12497 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12498 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12500 else if (op_true
== CONST0_RTX (mode
))
12502 op_false
= force_reg (mode
, op_false
);
12503 x
= gen_rtx_NOT (mode
, cmp
);
12504 x
= gen_rtx_AND (mode
, x
, op_false
);
12505 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12509 op_true
= force_reg (mode
, op_true
);
12510 op_false
= force_reg (mode
, op_false
);
12512 t2
= gen_reg_rtx (mode
);
12514 t3
= gen_reg_rtx (mode
);
12518 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12519 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12521 x
= gen_rtx_NOT (mode
, cmp
);
12522 x
= gen_rtx_AND (mode
, x
, op_false
);
12523 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12525 x
= gen_rtx_IOR (mode
, t3
, t2
);
12526 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12530 /* Expand a floating-point conditional move. Return true if successful. */
12533 ix86_expand_fp_movcc (rtx operands
[])
12535 enum machine_mode mode
= GET_MODE (operands
[0]);
12536 enum rtx_code code
= GET_CODE (operands
[1]);
12537 rtx tmp
, compare_op
, second_test
, bypass_test
;
12539 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12541 enum machine_mode cmode
;
12543 /* Since we've no cmove for sse registers, don't force bad register
12544 allocation just to gain access to it. Deny movcc when the
12545 comparison mode doesn't match the move mode. */
12546 cmode
= GET_MODE (ix86_compare_op0
);
12547 if (cmode
== VOIDmode
)
12548 cmode
= GET_MODE (ix86_compare_op1
);
12552 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12554 &ix86_compare_op1
);
12555 if (code
== UNKNOWN
)
12558 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12559 ix86_compare_op1
, operands
[2],
12563 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12564 ix86_compare_op1
, operands
[2], operands
[3]);
12565 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12569 /* The floating point conditional move instructions don't directly
12570 support conditions resulting from a signed integer comparison. */
12572 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12574 /* The floating point conditional move instructions don't directly
12575 support signed integer comparisons. */
12577 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12579 gcc_assert (!second_test
&& !bypass_test
);
12580 tmp
= gen_reg_rtx (QImode
);
12581 ix86_expand_setcc (code
, tmp
);
12583 ix86_compare_op0
= tmp
;
12584 ix86_compare_op1
= const0_rtx
;
12585 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12587 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12589 tmp
= gen_reg_rtx (mode
);
12590 emit_move_insn (tmp
, operands
[3]);
12593 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12595 tmp
= gen_reg_rtx (mode
);
12596 emit_move_insn (tmp
, operands
[2]);
12600 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12601 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12602 operands
[2], operands
[3])));
12604 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12605 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12606 operands
[3], operands
[0])));
12608 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12609 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12610 operands
[2], operands
[0])));
12615 /* Expand a floating-point vector conditional move; a vcond operation
12616 rather than a movcc operation. */
12619 ix86_expand_fp_vcond (rtx operands
[])
12621 enum rtx_code code
= GET_CODE (operands
[3]);
12624 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12625 &operands
[4], &operands
[5]);
12626 if (code
== UNKNOWN
)
12629 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12630 operands
[5], operands
[1], operands
[2]))
12633 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12634 operands
[1], operands
[2]);
12635 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12639 /* Expand a signed integral vector conditional move. */
12642 ix86_expand_int_vcond (rtx operands
[])
12644 enum machine_mode mode
= GET_MODE (operands
[0]);
12645 enum rtx_code code
= GET_CODE (operands
[3]);
12646 bool negate
= false;
12649 cop0
= operands
[4];
12650 cop1
= operands
[5];
12652 /* Canonicalize the comparison to EQ, GT, GTU. */
12663 code
= reverse_condition (code
);
12669 code
= reverse_condition (code
);
12675 code
= swap_condition (code
);
12676 x
= cop0
, cop0
= cop1
, cop1
= x
;
12680 gcc_unreachable ();
12683 /* Unsigned parallel compare is not supported by the hardware. Play some
12684 tricks to turn this into a signed comparison against 0. */
12687 cop0
= force_reg (mode
, cop0
);
12695 /* Perform a parallel modulo subtraction. */
12696 t1
= gen_reg_rtx (mode
);
12697 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12699 /* Extract the original sign bit of op0. */
12700 mask
= GEN_INT (-0x80000000);
12701 mask
= gen_rtx_CONST_VECTOR (mode
,
12702 gen_rtvec (4, mask
, mask
, mask
, mask
));
12703 mask
= force_reg (mode
, mask
);
12704 t2
= gen_reg_rtx (mode
);
12705 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12707 /* XOR it back into the result of the subtraction. This results
12708 in the sign bit set iff we saw unsigned underflow. */
12709 x
= gen_reg_rtx (mode
);
12710 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12718 /* Perform a parallel unsigned saturating subtraction. */
12719 x
= gen_reg_rtx (mode
);
12720 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12721 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12728 gcc_unreachable ();
12732 cop1
= CONST0_RTX (mode
);
12735 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12736 operands
[1+negate
], operands
[2-negate
]);
12738 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12739 operands
[2-negate
]);
12743 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12744 true if we should do zero extension, else sign extension. HIGH_P is
12745 true if we want the N/2 high elements, else the low elements. */
12748 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12750 enum machine_mode imode
= GET_MODE (operands
[1]);
12751 rtx (*unpack
)(rtx
, rtx
, rtx
);
12758 unpack
= gen_vec_interleave_highv16qi
;
12760 unpack
= gen_vec_interleave_lowv16qi
;
12764 unpack
= gen_vec_interleave_highv8hi
;
12766 unpack
= gen_vec_interleave_lowv8hi
;
12770 unpack
= gen_vec_interleave_highv4si
;
12772 unpack
= gen_vec_interleave_lowv4si
;
12775 gcc_unreachable ();
12778 dest
= gen_lowpart (imode
, operands
[0]);
12781 se
= force_reg (imode
, CONST0_RTX (imode
));
12783 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12784 operands
[1], pc_rtx
, pc_rtx
);
12786 emit_insn (unpack (dest
, operands
[1], se
));
12789 /* Expand conditional increment or decrement using adb/sbb instructions.
12790 The default case using setcc followed by the conditional move can be
12791 done by generic code. */
12793 ix86_expand_int_addcc (rtx operands
[])
12795 enum rtx_code code
= GET_CODE (operands
[1]);
12797 rtx val
= const0_rtx
;
12798 bool fpcmp
= false;
12799 enum machine_mode mode
= GET_MODE (operands
[0]);
12801 if (operands
[3] != const1_rtx
12802 && operands
[3] != constm1_rtx
)
12804 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12805 ix86_compare_op1
, &compare_op
))
12807 code
= GET_CODE (compare_op
);
12809 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12810 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12813 code
= ix86_fp_compare_code_to_integer (code
);
12820 PUT_CODE (compare_op
,
12821 reverse_condition_maybe_unordered
12822 (GET_CODE (compare_op
)));
12824 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12826 PUT_MODE (compare_op
, mode
);
12828 /* Construct either adc or sbb insn. */
12829 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12831 switch (GET_MODE (operands
[0]))
12834 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12837 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12840 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12843 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12846 gcc_unreachable ();
12851 switch (GET_MODE (operands
[0]))
12854 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12857 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12860 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12863 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12866 gcc_unreachable ();
12869 return 1; /* DONE */
12873 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12874 works for floating pointer parameters and nonoffsetable memories.
12875 For pushes, it returns just stack offsets; the values will be saved
12876 in the right order. Maximally three parts are generated. */
12879 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12884 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12886 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12888 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12889 gcc_assert (size
>= 2 && size
<= 3);
12891 /* Optimize constant pool reference to immediates. This is used by fp
12892 moves, that force all constants to memory to allow combining. */
12893 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12895 rtx tmp
= maybe_get_pool_constant (operand
);
12900 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12902 /* The only non-offsetable memories we handle are pushes. */
12903 int ok
= push_operand (operand
, VOIDmode
);
12907 operand
= copy_rtx (operand
);
12908 PUT_MODE (operand
, Pmode
);
12909 parts
[0] = parts
[1] = parts
[2] = operand
;
12913 if (GET_CODE (operand
) == CONST_VECTOR
)
12915 enum machine_mode imode
= int_mode_for_mode (mode
);
12916 /* Caution: if we looked through a constant pool memory above,
12917 the operand may actually have a different mode now. That's
12918 ok, since we want to pun this all the way back to an integer. */
12919 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12920 gcc_assert (operand
!= NULL
);
12926 if (mode
== DImode
)
12927 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12930 if (REG_P (operand
))
12932 gcc_assert (reload_completed
);
12933 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12934 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12936 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12938 else if (offsettable_memref_p (operand
))
12940 operand
= adjust_address (operand
, SImode
, 0);
12941 parts
[0] = operand
;
12942 parts
[1] = adjust_address (operand
, SImode
, 4);
12944 parts
[2] = adjust_address (operand
, SImode
, 8);
12946 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12951 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12955 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12956 parts
[2] = gen_int_mode (l
[2], SImode
);
12959 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12962 gcc_unreachable ();
12964 parts
[1] = gen_int_mode (l
[1], SImode
);
12965 parts
[0] = gen_int_mode (l
[0], SImode
);
12968 gcc_unreachable ();
12973 if (mode
== TImode
)
12974 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12975 if (mode
== XFmode
|| mode
== TFmode
)
12977 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12978 if (REG_P (operand
))
12980 gcc_assert (reload_completed
);
12981 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12982 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12984 else if (offsettable_memref_p (operand
))
12986 operand
= adjust_address (operand
, DImode
, 0);
12987 parts
[0] = operand
;
12988 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12990 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12995 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12996 real_to_target (l
, &r
, mode
);
12998 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12999 if (HOST_BITS_PER_WIDE_INT
>= 64)
13002 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13003 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13006 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13008 if (upper_mode
== SImode
)
13009 parts
[1] = gen_int_mode (l
[2], SImode
);
13010 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13013 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13014 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13017 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13020 gcc_unreachable ();
13027 /* Emit insns to perform a move or push of DI, DF, and XF values.
13028 Return false when normal moves are needed; true when all required
13029 insns have been emitted. Operands 2-4 contain the input values
13030 int the correct order; operands 5-7 contain the output values. */
13033 ix86_split_long_move (rtx operands
[])
13038 int collisions
= 0;
13039 enum machine_mode mode
= GET_MODE (operands
[0]);
13041 /* The DFmode expanders may ask us to move double.
13042 For 64bit target this is single move. By hiding the fact
13043 here we simplify i386.md splitters. */
13044 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13046 /* Optimize constant pool reference to immediates. This is used by
13047 fp moves, that force all constants to memory to allow combining. */
13049 if (MEM_P (operands
[1])
13050 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13051 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13052 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13053 if (push_operand (operands
[0], VOIDmode
))
13055 operands
[0] = copy_rtx (operands
[0]);
13056 PUT_MODE (operands
[0], Pmode
);
13059 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13060 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13061 emit_move_insn (operands
[0], operands
[1]);
13065 /* The only non-offsettable memory we handle is push. */
13066 if (push_operand (operands
[0], VOIDmode
))
13069 gcc_assert (!MEM_P (operands
[0])
13070 || offsettable_memref_p (operands
[0]));
13072 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13073 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13075 /* When emitting push, take care for source operands on the stack. */
13076 if (push
&& MEM_P (operands
[1])
13077 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13080 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13081 XEXP (part
[1][2], 0));
13082 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13083 XEXP (part
[1][1], 0));
13086 /* We need to do copy in the right order in case an address register
13087 of the source overlaps the destination. */
13088 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13090 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13092 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13095 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13098 /* Collision in the middle part can be handled by reordering. */
13099 if (collisions
== 1 && nparts
== 3
13100 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13103 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13104 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13107 /* If there are more collisions, we can't handle it by reordering.
13108 Do an lea to the last part and use only one colliding move. */
13109 else if (collisions
> 1)
13115 base
= part
[0][nparts
- 1];
13117 /* Handle the case when the last part isn't valid for lea.
13118 Happens in 64-bit mode storing the 12-byte XFmode. */
13119 if (GET_MODE (base
) != Pmode
)
13120 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13122 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13123 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13124 part
[1][1] = replace_equiv_address (part
[1][1],
13125 plus_constant (base
, UNITS_PER_WORD
));
13127 part
[1][2] = replace_equiv_address (part
[1][2],
13128 plus_constant (base
, 8));
13138 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13139 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13140 emit_move_insn (part
[0][2], part
[1][2]);
13145 /* In 64bit mode we don't have 32bit push available. In case this is
13146 register, it is OK - we will just use larger counterpart. We also
13147 retype memory - these comes from attempt to avoid REX prefix on
13148 moving of second half of TFmode value. */
13149 if (GET_MODE (part
[1][1]) == SImode
)
13151 switch (GET_CODE (part
[1][1]))
13154 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13158 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13162 gcc_unreachable ();
13165 if (GET_MODE (part
[1][0]) == SImode
)
13166 part
[1][0] = part
[1][1];
13169 emit_move_insn (part
[0][1], part
[1][1]);
13170 emit_move_insn (part
[0][0], part
[1][0]);
13174 /* Choose correct order to not overwrite the source before it is copied. */
13175 if ((REG_P (part
[0][0])
13176 && REG_P (part
[1][1])
13177 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13179 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13181 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13185 operands
[2] = part
[0][2];
13186 operands
[3] = part
[0][1];
13187 operands
[4] = part
[0][0];
13188 operands
[5] = part
[1][2];
13189 operands
[6] = part
[1][1];
13190 operands
[7] = part
[1][0];
13194 operands
[2] = part
[0][1];
13195 operands
[3] = part
[0][0];
13196 operands
[5] = part
[1][1];
13197 operands
[6] = part
[1][0];
13204 operands
[2] = part
[0][0];
13205 operands
[3] = part
[0][1];
13206 operands
[4] = part
[0][2];
13207 operands
[5] = part
[1][0];
13208 operands
[6] = part
[1][1];
13209 operands
[7] = part
[1][2];
13213 operands
[2] = part
[0][0];
13214 operands
[3] = part
[0][1];
13215 operands
[5] = part
[1][0];
13216 operands
[6] = part
[1][1];
13220 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13223 if (CONST_INT_P (operands
[5])
13224 && operands
[5] != const0_rtx
13225 && REG_P (operands
[2]))
13227 if (CONST_INT_P (operands
[6])
13228 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13229 operands
[6] = operands
[2];
13232 && CONST_INT_P (operands
[7])
13233 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13234 operands
[7] = operands
[2];
13238 && CONST_INT_P (operands
[6])
13239 && operands
[6] != const0_rtx
13240 && REG_P (operands
[3])
13241 && CONST_INT_P (operands
[7])
13242 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13243 operands
[7] = operands
[3];
13246 emit_move_insn (operands
[2], operands
[5]);
13247 emit_move_insn (operands
[3], operands
[6]);
13249 emit_move_insn (operands
[4], operands
[7]);
13254 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13255 left shift by a constant, either using a single shift or
13256 a sequence of add instructions. */
13259 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13263 emit_insn ((mode
== DImode
13265 : gen_adddi3
) (operand
, operand
, operand
));
13267 else if (!optimize_size
13268 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13271 for (i
=0; i
<count
; i
++)
13273 emit_insn ((mode
== DImode
13275 : gen_adddi3
) (operand
, operand
, operand
));
13279 emit_insn ((mode
== DImode
13281 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13285 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13287 rtx low
[2], high
[2];
13289 const int single_width
= mode
== DImode
? 32 : 64;
13291 if (CONST_INT_P (operands
[2]))
13293 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13294 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13296 if (count
>= single_width
)
13298 emit_move_insn (high
[0], low
[1]);
13299 emit_move_insn (low
[0], const0_rtx
);
13301 if (count
> single_width
)
13302 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13306 if (!rtx_equal_p (operands
[0], operands
[1]))
13307 emit_move_insn (operands
[0], operands
[1]);
13308 emit_insn ((mode
== DImode
13310 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13311 ix86_expand_ashl_const (low
[0], count
, mode
);
13316 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13318 if (operands
[1] == const1_rtx
)
13320 /* Assuming we've chosen a QImode capable registers, then 1 << N
13321 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13322 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13324 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13326 ix86_expand_clear (low
[0]);
13327 ix86_expand_clear (high
[0]);
13328 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13330 d
= gen_lowpart (QImode
, low
[0]);
13331 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13332 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13333 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13335 d
= gen_lowpart (QImode
, high
[0]);
13336 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13337 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13338 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13341 /* Otherwise, we can get the same results by manually performing
13342 a bit extract operation on bit 5/6, and then performing the two
13343 shifts. The two methods of getting 0/1 into low/high are exactly
13344 the same size. Avoiding the shift in the bit extract case helps
13345 pentium4 a bit; no one else seems to care much either way. */
13350 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13351 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13353 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13354 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13356 emit_insn ((mode
== DImode
13358 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13359 emit_insn ((mode
== DImode
13361 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13362 emit_move_insn (low
[0], high
[0]);
13363 emit_insn ((mode
== DImode
13365 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13368 emit_insn ((mode
== DImode
13370 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13371 emit_insn ((mode
== DImode
13373 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13377 if (operands
[1] == constm1_rtx
)
13379 /* For -1 << N, we can avoid the shld instruction, because we
13380 know that we're shifting 0...31/63 ones into a -1. */
13381 emit_move_insn (low
[0], constm1_rtx
);
13383 emit_move_insn (high
[0], low
[0]);
13385 emit_move_insn (high
[0], constm1_rtx
);
13389 if (!rtx_equal_p (operands
[0], operands
[1]))
13390 emit_move_insn (operands
[0], operands
[1]);
13392 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13393 emit_insn ((mode
== DImode
13395 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13398 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13400 if (TARGET_CMOVE
&& scratch
)
13402 ix86_expand_clear (scratch
);
13403 emit_insn ((mode
== DImode
13404 ? gen_x86_shift_adj_1
13405 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13408 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13412 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13414 rtx low
[2], high
[2];
13416 const int single_width
= mode
== DImode
? 32 : 64;
13418 if (CONST_INT_P (operands
[2]))
13420 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13421 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13423 if (count
== single_width
* 2 - 1)
13425 emit_move_insn (high
[0], high
[1]);
13426 emit_insn ((mode
== DImode
13428 : gen_ashrdi3
) (high
[0], high
[0],
13429 GEN_INT (single_width
- 1)));
13430 emit_move_insn (low
[0], high
[0]);
13433 else if (count
>= single_width
)
13435 emit_move_insn (low
[0], high
[1]);
13436 emit_move_insn (high
[0], low
[0]);
13437 emit_insn ((mode
== DImode
13439 : gen_ashrdi3
) (high
[0], high
[0],
13440 GEN_INT (single_width
- 1)));
13441 if (count
> single_width
)
13442 emit_insn ((mode
== DImode
13444 : gen_ashrdi3
) (low
[0], low
[0],
13445 GEN_INT (count
- single_width
)));
13449 if (!rtx_equal_p (operands
[0], operands
[1]))
13450 emit_move_insn (operands
[0], operands
[1]);
13451 emit_insn ((mode
== DImode
13453 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13454 emit_insn ((mode
== DImode
13456 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13461 if (!rtx_equal_p (operands
[0], operands
[1]))
13462 emit_move_insn (operands
[0], operands
[1]);
13464 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13466 emit_insn ((mode
== DImode
13468 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13469 emit_insn ((mode
== DImode
13471 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13473 if (TARGET_CMOVE
&& scratch
)
13475 emit_move_insn (scratch
, high
[0]);
13476 emit_insn ((mode
== DImode
13478 : gen_ashrdi3
) (scratch
, scratch
,
13479 GEN_INT (single_width
- 1)));
13480 emit_insn ((mode
== DImode
13481 ? gen_x86_shift_adj_1
13482 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13486 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13491 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13493 rtx low
[2], high
[2];
13495 const int single_width
= mode
== DImode
? 32 : 64;
13497 if (CONST_INT_P (operands
[2]))
13499 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13500 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13502 if (count
>= single_width
)
13504 emit_move_insn (low
[0], high
[1]);
13505 ix86_expand_clear (high
[0]);
13507 if (count
> single_width
)
13508 emit_insn ((mode
== DImode
13510 : gen_lshrdi3
) (low
[0], low
[0],
13511 GEN_INT (count
- single_width
)));
13515 if (!rtx_equal_p (operands
[0], operands
[1]))
13516 emit_move_insn (operands
[0], operands
[1]);
13517 emit_insn ((mode
== DImode
13519 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13520 emit_insn ((mode
== DImode
13522 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13527 if (!rtx_equal_p (operands
[0], operands
[1]))
13528 emit_move_insn (operands
[0], operands
[1]);
13530 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13532 emit_insn ((mode
== DImode
13534 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13535 emit_insn ((mode
== DImode
13537 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13539 /* Heh. By reversing the arguments, we can reuse this pattern. */
13540 if (TARGET_CMOVE
&& scratch
)
13542 ix86_expand_clear (scratch
);
13543 emit_insn ((mode
== DImode
13544 ? gen_x86_shift_adj_1
13545 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13549 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13553 /* Predict just emitted jump instruction to be taken with probability PROB. */
13555 predict_jump (int prob
)
13557 rtx insn
= get_last_insn ();
13558 gcc_assert (JUMP_P (insn
));
13560 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13565 /* Helper function for the string operations below. Dest VARIABLE whether
13566 it is aligned to VALUE bytes. If true, jump to the label. */
13568 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13570 rtx label
= gen_label_rtx ();
13571 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13572 if (GET_MODE (variable
) == DImode
)
13573 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13575 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13576 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13579 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13581 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13585 /* Adjust COUNTER by the VALUE. */
13587 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13589 if (GET_MODE (countreg
) == DImode
)
13590 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13592 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13595 /* Zero extend possibly SImode EXP to Pmode register. */
13597 ix86_zero_extend_to_Pmode (rtx exp
)
13600 if (GET_MODE (exp
) == VOIDmode
)
13601 return force_reg (Pmode
, exp
);
13602 if (GET_MODE (exp
) == Pmode
)
13603 return copy_to_mode_reg (Pmode
, exp
);
13604 r
= gen_reg_rtx (Pmode
);
13605 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13609 /* Divide COUNTREG by SCALE. */
13611 scale_counter (rtx countreg
, int scale
)
13614 rtx piece_size_mask
;
13618 if (CONST_INT_P (countreg
))
13619 return GEN_INT (INTVAL (countreg
) / scale
);
13620 gcc_assert (REG_P (countreg
));
13622 piece_size_mask
= GEN_INT (scale
- 1);
13623 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13624 GEN_INT (exact_log2 (scale
)),
13625 NULL
, 1, OPTAB_DIRECT
);
13629 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13630 DImode for constant loop counts. */
13632 static enum machine_mode
13633 counter_mode (rtx count_exp
)
13635 if (GET_MODE (count_exp
) != VOIDmode
)
13636 return GET_MODE (count_exp
);
13637 if (GET_CODE (count_exp
) != CONST_INT
)
13639 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13644 /* When SRCPTR is non-NULL, output simple loop to move memory
13645 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13646 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13647 equivalent loop to set memory by VALUE (supposed to be in MODE).
13649 The size is rounded down to whole number of chunk size moved at once.
13650 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13654 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13655 rtx destptr
, rtx srcptr
, rtx value
,
13656 rtx count
, enum machine_mode mode
, int unroll
,
13659 rtx out_label
, top_label
, iter
, tmp
;
13660 enum machine_mode iter_mode
= counter_mode (count
);
13661 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13662 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13668 top_label
= gen_label_rtx ();
13669 out_label
= gen_label_rtx ();
13670 iter
= gen_reg_rtx (iter_mode
);
13672 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13673 NULL
, 1, OPTAB_DIRECT
);
13674 /* Those two should combine. */
13675 if (piece_size
== const1_rtx
)
13677 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13679 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13681 emit_move_insn (iter
, const0_rtx
);
13683 emit_label (top_label
);
13685 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13686 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13687 destmem
= change_address (destmem
, mode
, x_addr
);
13691 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13692 srcmem
= change_address (srcmem
, mode
, y_addr
);
13694 /* When unrolling for chips that reorder memory reads and writes,
13695 we can save registers by using single temporary.
13696 Also using 4 temporaries is overkill in 32bit mode. */
13697 if (!TARGET_64BIT
&& 0)
13699 for (i
= 0; i
< unroll
; i
++)
13704 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13706 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13708 emit_move_insn (destmem
, srcmem
);
13714 gcc_assert (unroll
<= 4);
13715 for (i
= 0; i
< unroll
; i
++)
13717 tmpreg
[i
] = gen_reg_rtx (mode
);
13721 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13723 emit_move_insn (tmpreg
[i
], srcmem
);
13725 for (i
= 0; i
< unroll
; i
++)
13730 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13732 emit_move_insn (destmem
, tmpreg
[i
]);
13737 for (i
= 0; i
< unroll
; i
++)
13741 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13742 emit_move_insn (destmem
, value
);
13745 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13746 true, OPTAB_LIB_WIDEN
);
13748 emit_move_insn (iter
, tmp
);
13750 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13752 if (expected_size
!= -1)
13754 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13755 if (expected_size
== 0)
13757 else if (expected_size
> REG_BR_PROB_BASE
)
13758 predict_jump (REG_BR_PROB_BASE
- 1);
13760 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13763 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13764 iter
= ix86_zero_extend_to_Pmode (iter
);
13765 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13766 true, OPTAB_LIB_WIDEN
);
13767 if (tmp
!= destptr
)
13768 emit_move_insn (destptr
, tmp
);
13771 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13772 true, OPTAB_LIB_WIDEN
);
13774 emit_move_insn (srcptr
, tmp
);
13776 emit_label (out_label
);
13779 /* Output "rep; mov" instruction.
13780 Arguments have same meaning as for previous function */
13782 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13783 rtx destptr
, rtx srcptr
,
13785 enum machine_mode mode
)
13791 /* If the size is known, it is shorter to use rep movs. */
13792 if (mode
== QImode
&& CONST_INT_P (count
)
13793 && !(INTVAL (count
) & 3))
13796 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13797 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13798 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13799 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13800 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13801 if (mode
!= QImode
)
13803 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13804 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13805 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13806 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13807 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13808 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13812 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13813 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13815 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13819 /* Output "rep; stos" instruction.
13820 Arguments have same meaning as for previous function */
13822 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13824 enum machine_mode mode
)
13829 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13830 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13831 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13832 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13833 if (mode
!= QImode
)
13835 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13836 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13837 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13840 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13841 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13845 emit_strmov (rtx destmem
, rtx srcmem
,
13846 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13848 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13849 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13850 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13853 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13855 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13856 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13859 if (CONST_INT_P (count
))
13861 HOST_WIDE_INT countval
= INTVAL (count
);
13864 if ((countval
& 0x10) && max_size
> 16)
13868 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13869 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13872 gcc_unreachable ();
13875 if ((countval
& 0x08) && max_size
> 8)
13878 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13881 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13882 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13886 if ((countval
& 0x04) && max_size
> 4)
13888 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13891 if ((countval
& 0x02) && max_size
> 2)
13893 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13896 if ((countval
& 0x01) && max_size
> 1)
13898 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13905 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13906 count
, 1, OPTAB_DIRECT
);
13907 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13908 count
, QImode
, 1, 4);
13912 /* When there are stringops, we can cheaply increase dest and src pointers.
13913 Otherwise we save code size by maintaining offset (zero is readily
13914 available from preceding rep operation) and using x86 addressing modes.
13916 if (TARGET_SINGLE_STRINGOP
)
13920 rtx label
= ix86_expand_aligntest (count
, 4, true);
13921 src
= change_address (srcmem
, SImode
, srcptr
);
13922 dest
= change_address (destmem
, SImode
, destptr
);
13923 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13924 emit_label (label
);
13925 LABEL_NUSES (label
) = 1;
13929 rtx label
= ix86_expand_aligntest (count
, 2, true);
13930 src
= change_address (srcmem
, HImode
, srcptr
);
13931 dest
= change_address (destmem
, HImode
, destptr
);
13932 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13933 emit_label (label
);
13934 LABEL_NUSES (label
) = 1;
13938 rtx label
= ix86_expand_aligntest (count
, 1, true);
13939 src
= change_address (srcmem
, QImode
, srcptr
);
13940 dest
= change_address (destmem
, QImode
, destptr
);
13941 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13942 emit_label (label
);
13943 LABEL_NUSES (label
) = 1;
13948 rtx offset
= force_reg (Pmode
, const0_rtx
);
13953 rtx label
= ix86_expand_aligntest (count
, 4, true);
13954 src
= change_address (srcmem
, SImode
, srcptr
);
13955 dest
= change_address (destmem
, SImode
, destptr
);
13956 emit_move_insn (dest
, src
);
13957 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13958 true, OPTAB_LIB_WIDEN
);
13960 emit_move_insn (offset
, tmp
);
13961 emit_label (label
);
13962 LABEL_NUSES (label
) = 1;
13966 rtx label
= ix86_expand_aligntest (count
, 2, true);
13967 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13968 src
= change_address (srcmem
, HImode
, tmp
);
13969 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13970 dest
= change_address (destmem
, HImode
, tmp
);
13971 emit_move_insn (dest
, src
);
13972 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13973 true, OPTAB_LIB_WIDEN
);
13975 emit_move_insn (offset
, tmp
);
13976 emit_label (label
);
13977 LABEL_NUSES (label
) = 1;
13981 rtx label
= ix86_expand_aligntest (count
, 1, true);
13982 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13983 src
= change_address (srcmem
, QImode
, tmp
);
13984 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13985 dest
= change_address (destmem
, QImode
, tmp
);
13986 emit_move_insn (dest
, src
);
13987 emit_label (label
);
13988 LABEL_NUSES (label
) = 1;
13993 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13995 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13996 rtx count
, int max_size
)
13999 expand_simple_binop (counter_mode (count
), AND
, count
,
14000 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14001 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14002 gen_lowpart (QImode
, value
), count
, QImode
,
14006 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14008 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14012 if (CONST_INT_P (count
))
14014 HOST_WIDE_INT countval
= INTVAL (count
);
14017 if ((countval
& 0x10) && max_size
> 16)
14021 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14022 emit_insn (gen_strset (destptr
, dest
, value
));
14023 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14024 emit_insn (gen_strset (destptr
, dest
, value
));
14027 gcc_unreachable ();
14030 if ((countval
& 0x08) && max_size
> 8)
14034 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14035 emit_insn (gen_strset (destptr
, dest
, value
));
14039 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14040 emit_insn (gen_strset (destptr
, dest
, value
));
14041 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14042 emit_insn (gen_strset (destptr
, dest
, value
));
14046 if ((countval
& 0x04) && max_size
> 4)
14048 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14049 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14052 if ((countval
& 0x02) && max_size
> 2)
14054 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14055 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14058 if ((countval
& 0x01) && max_size
> 1)
14060 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14061 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14068 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14073 rtx label
= ix86_expand_aligntest (count
, 16, true);
14076 dest
= change_address (destmem
, DImode
, destptr
);
14077 emit_insn (gen_strset (destptr
, dest
, value
));
14078 emit_insn (gen_strset (destptr
, dest
, value
));
14082 dest
= change_address (destmem
, SImode
, destptr
);
14083 emit_insn (gen_strset (destptr
, dest
, value
));
14084 emit_insn (gen_strset (destptr
, dest
, value
));
14085 emit_insn (gen_strset (destptr
, dest
, value
));
14086 emit_insn (gen_strset (destptr
, dest
, value
));
14088 emit_label (label
);
14089 LABEL_NUSES (label
) = 1;
14093 rtx label
= ix86_expand_aligntest (count
, 8, true);
14096 dest
= change_address (destmem
, DImode
, destptr
);
14097 emit_insn (gen_strset (destptr
, dest
, value
));
14101 dest
= change_address (destmem
, SImode
, destptr
);
14102 emit_insn (gen_strset (destptr
, dest
, value
));
14103 emit_insn (gen_strset (destptr
, dest
, value
));
14105 emit_label (label
);
14106 LABEL_NUSES (label
) = 1;
14110 rtx label
= ix86_expand_aligntest (count
, 4, true);
14111 dest
= change_address (destmem
, SImode
, destptr
);
14112 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14113 emit_label (label
);
14114 LABEL_NUSES (label
) = 1;
14118 rtx label
= ix86_expand_aligntest (count
, 2, true);
14119 dest
= change_address (destmem
, HImode
, destptr
);
14120 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14121 emit_label (label
);
14122 LABEL_NUSES (label
) = 1;
14126 rtx label
= ix86_expand_aligntest (count
, 1, true);
14127 dest
= change_address (destmem
, QImode
, destptr
);
14128 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14129 emit_label (label
);
14130 LABEL_NUSES (label
) = 1;
14134 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14135 DESIRED_ALIGNMENT. */
14137 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14138 rtx destptr
, rtx srcptr
, rtx count
,
14139 int align
, int desired_alignment
)
14141 if (align
<= 1 && desired_alignment
> 1)
14143 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14144 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14145 destmem
= change_address (destmem
, QImode
, destptr
);
14146 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14147 ix86_adjust_counter (count
, 1);
14148 emit_label (label
);
14149 LABEL_NUSES (label
) = 1;
14151 if (align
<= 2 && desired_alignment
> 2)
14153 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14154 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14155 destmem
= change_address (destmem
, HImode
, destptr
);
14156 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14157 ix86_adjust_counter (count
, 2);
14158 emit_label (label
);
14159 LABEL_NUSES (label
) = 1;
14161 if (align
<= 4 && desired_alignment
> 4)
14163 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14164 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14165 destmem
= change_address (destmem
, SImode
, destptr
);
14166 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14167 ix86_adjust_counter (count
, 4);
14168 emit_label (label
);
14169 LABEL_NUSES (label
) = 1;
14171 gcc_assert (desired_alignment
<= 8);
14174 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14175 DESIRED_ALIGNMENT. */
14177 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14178 int align
, int desired_alignment
)
14180 if (align
<= 1 && desired_alignment
> 1)
14182 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14183 destmem
= change_address (destmem
, QImode
, destptr
);
14184 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14185 ix86_adjust_counter (count
, 1);
14186 emit_label (label
);
14187 LABEL_NUSES (label
) = 1;
14189 if (align
<= 2 && desired_alignment
> 2)
14191 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14192 destmem
= change_address (destmem
, HImode
, destptr
);
14193 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14194 ix86_adjust_counter (count
, 2);
14195 emit_label (label
);
14196 LABEL_NUSES (label
) = 1;
14198 if (align
<= 4 && desired_alignment
> 4)
14200 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14201 destmem
= change_address (destmem
, SImode
, destptr
);
14202 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14203 ix86_adjust_counter (count
, 4);
14204 emit_label (label
);
14205 LABEL_NUSES (label
) = 1;
14207 gcc_assert (desired_alignment
<= 8);
14210 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14211 static enum stringop_alg
14212 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14213 int *dynamic_check
)
14215 const struct stringop_algs
* algs
;
14217 *dynamic_check
= -1;
14219 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14221 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14222 if (stringop_alg
!= no_stringop
)
14223 return stringop_alg
;
14224 /* rep; movq or rep; movl is the smallest variant. */
14225 else if (optimize_size
)
14227 if (!count
|| (count
& 3))
14228 return rep_prefix_1_byte
;
14230 return rep_prefix_4_byte
;
14232 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14234 else if (expected_size
!= -1 && expected_size
< 4)
14235 return loop_1_byte
;
14236 else if (expected_size
!= -1)
14239 enum stringop_alg alg
= libcall
;
14240 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14242 gcc_assert (algs
->size
[i
].max
);
14243 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14245 if (algs
->size
[i
].alg
!= libcall
)
14246 alg
= algs
->size
[i
].alg
;
14247 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14248 last non-libcall inline algorithm. */
14249 if (TARGET_INLINE_ALL_STRINGOPS
)
14251 /* When the current size is best to be copied by a libcall,
14252 but we are still forced to inline, run the heuristic bellow
14253 that will pick code for medium sized blocks. */
14254 if (alg
!= libcall
)
14259 return algs
->size
[i
].alg
;
14262 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14264 /* When asked to inline the call anyway, try to pick meaningful choice.
14265 We look for maximal size of block that is faster to copy by hand and
14266 take blocks of at most of that size guessing that average size will
14267 be roughly half of the block.
14269 If this turns out to be bad, we might simply specify the preferred
14270 choice in ix86_costs. */
14271 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14272 && algs
->unknown_size
== libcall
)
14275 enum stringop_alg alg
;
14278 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14279 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14280 max
= algs
->size
[i
].max
;
14283 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14284 gcc_assert (*dynamic_check
== -1);
14285 gcc_assert (alg
!= libcall
);
14286 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14287 *dynamic_check
= max
;
14290 return algs
->unknown_size
;
14293 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14294 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14296 decide_alignment (int align
,
14297 enum stringop_alg alg
,
14300 int desired_align
= 0;
14304 gcc_unreachable ();
14306 case unrolled_loop
:
14307 desired_align
= GET_MODE_SIZE (Pmode
);
14309 case rep_prefix_8_byte
:
14312 case rep_prefix_4_byte
:
14313 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14314 copying whole cacheline at once. */
14315 if (TARGET_PENTIUMPRO
)
14320 case rep_prefix_1_byte
:
14321 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14322 copying whole cacheline at once. */
14323 if (TARGET_PENTIUMPRO
)
14337 if (desired_align
< align
)
14338 desired_align
= align
;
14339 if (expected_size
!= -1 && expected_size
< 4)
14340 desired_align
= align
;
14341 return desired_align
;
14344 /* Return the smallest power of 2 greater than VAL. */
14346 smallest_pow2_greater_than (int val
)
14354 /* Expand string move (memcpy) operation. Use i386 string operations when
14355 profitable. expand_clrmem contains similar code. The code depends upon
14356 architecture, block size and alignment, but always has the same
14359 1) Prologue guard: Conditional that jumps up to epilogues for small
14360 blocks that can be handled by epilogue alone. This is faster but
14361 also needed for correctness, since prologue assume the block is larger
14362 than the desired alignment.
14364 Optional dynamic check for size and libcall for large
14365 blocks is emitted here too, with -minline-stringops-dynamically.
14367 2) Prologue: copy first few bytes in order to get destination aligned
14368 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14369 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14370 We emit either a jump tree on power of two sized blocks, or a byte loop.
14372 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14373 with specified algorithm.
14375 4) Epilogue: code copying tail of the block that is too small to be
14376 handled by main body (or up to size guarded by prologue guard). */
14379 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14380 rtx expected_align_exp
, rtx expected_size_exp
)
14386 rtx jump_around_label
= NULL
;
14387 HOST_WIDE_INT align
= 1;
14388 unsigned HOST_WIDE_INT count
= 0;
14389 HOST_WIDE_INT expected_size
= -1;
14390 int size_needed
= 0, epilogue_size_needed
;
14391 int desired_align
= 0;
14392 enum stringop_alg alg
;
14395 if (CONST_INT_P (align_exp
))
14396 align
= INTVAL (align_exp
);
14397 /* i386 can do misaligned access on reasonably increased cost. */
14398 if (CONST_INT_P (expected_align_exp
)
14399 && INTVAL (expected_align_exp
) > align
)
14400 align
= INTVAL (expected_align_exp
);
14401 if (CONST_INT_P (count_exp
))
14402 count
= expected_size
= INTVAL (count_exp
);
14403 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14404 expected_size
= INTVAL (expected_size_exp
);
14406 /* Step 0: Decide on preferred algorithm, desired alignment and
14407 size of chunks to be copied by main loop. */
14409 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14410 desired_align
= decide_alignment (align
, alg
, expected_size
);
14412 if (!TARGET_ALIGN_STRINGOPS
)
14413 align
= desired_align
;
14415 if (alg
== libcall
)
14417 gcc_assert (alg
!= no_stringop
);
14419 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14420 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14421 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14426 gcc_unreachable ();
14428 size_needed
= GET_MODE_SIZE (Pmode
);
14430 case unrolled_loop
:
14431 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14433 case rep_prefix_8_byte
:
14436 case rep_prefix_4_byte
:
14439 case rep_prefix_1_byte
:
14445 epilogue_size_needed
= size_needed
;
14447 /* Step 1: Prologue guard. */
14449 /* Alignment code needs count to be in register. */
14450 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14452 enum machine_mode mode
= SImode
;
14453 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14455 count_exp
= force_reg (mode
, count_exp
);
14457 gcc_assert (desired_align
>= 1 && align
>= 1);
14459 /* Ensure that alignment prologue won't copy past end of block. */
14460 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14462 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14463 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14464 Make sure it is power of 2. */
14465 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14467 label
= gen_label_rtx ();
14468 emit_cmp_and_jump_insns (count_exp
,
14469 GEN_INT (epilogue_size_needed
),
14470 LTU
, 0, counter_mode (count_exp
), 1, label
);
14471 if (GET_CODE (count_exp
) == CONST_INT
)
14473 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14474 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14476 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14478 /* Emit code to decide on runtime whether library call or inline should be
14480 if (dynamic_check
!= -1)
14482 rtx hot_label
= gen_label_rtx ();
14483 jump_around_label
= gen_label_rtx ();
14484 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14485 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14486 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14487 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14488 emit_jump (jump_around_label
);
14489 emit_label (hot_label
);
14492 /* Step 2: Alignment prologue. */
14494 if (desired_align
> align
)
14496 /* Except for the first move in epilogue, we no longer know
14497 constant offset in aliasing info. It don't seems to worth
14498 the pain to maintain it for the first move, so throw away
14500 src
= change_address (src
, BLKmode
, srcreg
);
14501 dst
= change_address (dst
, BLKmode
, destreg
);
14502 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14505 if (label
&& size_needed
== 1)
14507 emit_label (label
);
14508 LABEL_NUSES (label
) = 1;
14512 /* Step 3: Main loop. */
14518 gcc_unreachable ();
14520 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14521 count_exp
, QImode
, 1, expected_size
);
14524 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14525 count_exp
, Pmode
, 1, expected_size
);
14527 case unrolled_loop
:
14528 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14529 registers for 4 temporaries anyway. */
14530 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14531 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14534 case rep_prefix_8_byte
:
14535 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14538 case rep_prefix_4_byte
:
14539 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14542 case rep_prefix_1_byte
:
14543 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14547 /* Adjust properly the offset of src and dest memory for aliasing. */
14548 if (CONST_INT_P (count_exp
))
14550 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14551 (count
/ size_needed
) * size_needed
);
14552 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14553 (count
/ size_needed
) * size_needed
);
14557 src
= change_address (src
, BLKmode
, srcreg
);
14558 dst
= change_address (dst
, BLKmode
, destreg
);
14561 /* Step 4: Epilogue to copy the remaining bytes. */
14565 /* When the main loop is done, COUNT_EXP might hold original count,
14566 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14567 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14568 bytes. Compensate if needed. */
14570 if (size_needed
< epilogue_size_needed
)
14573 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14574 GEN_INT (size_needed
- 1), count_exp
, 1,
14576 if (tmp
!= count_exp
)
14577 emit_move_insn (count_exp
, tmp
);
14579 emit_label (label
);
14580 LABEL_NUSES (label
) = 1;
14583 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14584 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14585 epilogue_size_needed
);
14586 if (jump_around_label
)
14587 emit_label (jump_around_label
);
14591 /* Helper function for memcpy. For QImode value 0xXY produce
14592 0xXYXYXYXY of wide specified by MODE. This is essentially
14593 a * 0x10101010, but we can do slightly better than
14594 synth_mult by unwinding the sequence by hand on CPUs with
14597 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14599 enum machine_mode valmode
= GET_MODE (val
);
14601 int nops
= mode
== DImode
? 3 : 2;
14603 gcc_assert (mode
== SImode
|| mode
== DImode
);
14604 if (val
== const0_rtx
)
14605 return copy_to_mode_reg (mode
, const0_rtx
);
14606 if (CONST_INT_P (val
))
14608 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14612 if (mode
== DImode
)
14613 v
|= (v
<< 16) << 16;
14614 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14617 if (valmode
== VOIDmode
)
14619 if (valmode
!= QImode
)
14620 val
= gen_lowpart (QImode
, val
);
14621 if (mode
== QImode
)
14623 if (!TARGET_PARTIAL_REG_STALL
)
14625 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14626 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14627 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14628 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14630 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14631 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14632 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14637 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14639 if (!TARGET_PARTIAL_REG_STALL
)
14640 if (mode
== SImode
)
14641 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14643 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14646 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14647 NULL
, 1, OPTAB_DIRECT
);
14649 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14651 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14652 NULL
, 1, OPTAB_DIRECT
);
14653 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14654 if (mode
== SImode
)
14656 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14657 NULL
, 1, OPTAB_DIRECT
);
14658 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14663 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14664 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14665 alignment from ALIGN to DESIRED_ALIGN. */
14667 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14672 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14673 promoted_val
= promote_duplicated_reg (DImode
, val
);
14674 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14675 promoted_val
= promote_duplicated_reg (SImode
, val
);
14676 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14677 promoted_val
= promote_duplicated_reg (HImode
, val
);
14679 promoted_val
= val
;
14681 return promoted_val
;
14684 /* Expand string clear operation (bzero). Use i386 string operations when
14685 profitable. See expand_movmem comment for explanation of individual
14686 steps performed. */
14688 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14689 rtx expected_align_exp
, rtx expected_size_exp
)
14694 rtx jump_around_label
= NULL
;
14695 HOST_WIDE_INT align
= 1;
14696 unsigned HOST_WIDE_INT count
= 0;
14697 HOST_WIDE_INT expected_size
= -1;
14698 int size_needed
= 0, epilogue_size_needed
;
14699 int desired_align
= 0;
14700 enum stringop_alg alg
;
14701 rtx promoted_val
= NULL
;
14702 bool force_loopy_epilogue
= false;
14705 if (CONST_INT_P (align_exp
))
14706 align
= INTVAL (align_exp
);
14707 /* i386 can do misaligned access on reasonably increased cost. */
14708 if (CONST_INT_P (expected_align_exp
)
14709 && INTVAL (expected_align_exp
) > align
)
14710 align
= INTVAL (expected_align_exp
);
14711 if (CONST_INT_P (count_exp
))
14712 count
= expected_size
= INTVAL (count_exp
);
14713 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14714 expected_size
= INTVAL (expected_size_exp
);
14716 /* Step 0: Decide on preferred algorithm, desired alignment and
14717 size of chunks to be copied by main loop. */
14719 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14720 desired_align
= decide_alignment (align
, alg
, expected_size
);
14722 if (!TARGET_ALIGN_STRINGOPS
)
14723 align
= desired_align
;
14725 if (alg
== libcall
)
14727 gcc_assert (alg
!= no_stringop
);
14729 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14730 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14735 gcc_unreachable ();
14737 size_needed
= GET_MODE_SIZE (Pmode
);
14739 case unrolled_loop
:
14740 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14742 case rep_prefix_8_byte
:
14745 case rep_prefix_4_byte
:
14748 case rep_prefix_1_byte
:
14753 epilogue_size_needed
= size_needed
;
14755 /* Step 1: Prologue guard. */
14757 /* Alignment code needs count to be in register. */
14758 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14760 enum machine_mode mode
= SImode
;
14761 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14763 count_exp
= force_reg (mode
, count_exp
);
14765 /* Do the cheap promotion to allow better CSE across the
14766 main loop and epilogue (ie one load of the big constant in the
14767 front of all code. */
14768 if (CONST_INT_P (val_exp
))
14769 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14770 desired_align
, align
);
14771 /* Ensure that alignment prologue won't copy past end of block. */
14772 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14774 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14775 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14776 Make sure it is power of 2. */
14777 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14779 /* To improve performance of small blocks, we jump around the VAL
14780 promoting mode. This mean that if the promoted VAL is not constant,
14781 we might not use it in the epilogue and have to use byte
14783 if (epilogue_size_needed
> 2 && !promoted_val
)
14784 force_loopy_epilogue
= true;
14785 label
= gen_label_rtx ();
14786 emit_cmp_and_jump_insns (count_exp
,
14787 GEN_INT (epilogue_size_needed
),
14788 LTU
, 0, counter_mode (count_exp
), 1, label
);
14789 if (GET_CODE (count_exp
) == CONST_INT
)
14791 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14792 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14794 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14796 if (dynamic_check
!= -1)
14798 rtx hot_label
= gen_label_rtx ();
14799 jump_around_label
= gen_label_rtx ();
14800 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14801 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14802 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14803 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14804 emit_jump (jump_around_label
);
14805 emit_label (hot_label
);
14808 /* Step 2: Alignment prologue. */
14810 /* Do the expensive promotion once we branched off the small blocks. */
14812 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14813 desired_align
, align
);
14814 gcc_assert (desired_align
>= 1 && align
>= 1);
14816 if (desired_align
> align
)
14818 /* Except for the first move in epilogue, we no longer know
14819 constant offset in aliasing info. It don't seems to worth
14820 the pain to maintain it for the first move, so throw away
14822 dst
= change_address (dst
, BLKmode
, destreg
);
14823 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14826 if (label
&& size_needed
== 1)
14828 emit_label (label
);
14829 LABEL_NUSES (label
) = 1;
14833 /* Step 3: Main loop. */
14839 gcc_unreachable ();
14841 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14842 count_exp
, QImode
, 1, expected_size
);
14845 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14846 count_exp
, Pmode
, 1, expected_size
);
14848 case unrolled_loop
:
14849 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14850 count_exp
, Pmode
, 4, expected_size
);
14852 case rep_prefix_8_byte
:
14853 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14856 case rep_prefix_4_byte
:
14857 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14860 case rep_prefix_1_byte
:
14861 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14865 /* Adjust properly the offset of src and dest memory for aliasing. */
14866 if (CONST_INT_P (count_exp
))
14867 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14868 (count
/ size_needed
) * size_needed
);
14870 dst
= change_address (dst
, BLKmode
, destreg
);
14872 /* Step 4: Epilogue to copy the remaining bytes. */
14876 /* When the main loop is done, COUNT_EXP might hold original count,
14877 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14878 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14879 bytes. Compensate if needed. */
14881 if (size_needed
< desired_align
- align
)
14884 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14885 GEN_INT (size_needed
- 1), count_exp
, 1,
14887 size_needed
= desired_align
- align
+ 1;
14888 if (tmp
!= count_exp
)
14889 emit_move_insn (count_exp
, tmp
);
14891 emit_label (label
);
14892 LABEL_NUSES (label
) = 1;
14894 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14896 if (force_loopy_epilogue
)
14897 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14900 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14903 if (jump_around_label
)
14904 emit_label (jump_around_label
);
14908 /* Expand the appropriate insns for doing strlen if not just doing
14911 out = result, initialized with the start address
14912 align_rtx = alignment of the address.
14913 scratch = scratch register, initialized with the startaddress when
14914 not aligned, otherwise undefined
14916 This is just the body. It needs the initializations mentioned above and
14917 some address computing at the end. These things are done in i386.md. */
14920 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14924 rtx align_2_label
= NULL_RTX
;
14925 rtx align_3_label
= NULL_RTX
;
14926 rtx align_4_label
= gen_label_rtx ();
14927 rtx end_0_label
= gen_label_rtx ();
14929 rtx tmpreg
= gen_reg_rtx (SImode
);
14930 rtx scratch
= gen_reg_rtx (SImode
);
14934 if (CONST_INT_P (align_rtx
))
14935 align
= INTVAL (align_rtx
);
14937 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14939 /* Is there a known alignment and is it less than 4? */
14942 rtx scratch1
= gen_reg_rtx (Pmode
);
14943 emit_move_insn (scratch1
, out
);
14944 /* Is there a known alignment and is it not 2? */
14947 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14948 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14950 /* Leave just the 3 lower bits. */
14951 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14952 NULL_RTX
, 0, OPTAB_WIDEN
);
14954 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14955 Pmode
, 1, align_4_label
);
14956 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14957 Pmode
, 1, align_2_label
);
14958 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14959 Pmode
, 1, align_3_label
);
14963 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14964 check if is aligned to 4 - byte. */
14966 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14967 NULL_RTX
, 0, OPTAB_WIDEN
);
14969 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14970 Pmode
, 1, align_4_label
);
14973 mem
= change_address (src
, QImode
, out
);
14975 /* Now compare the bytes. */
14977 /* Compare the first n unaligned byte on a byte per byte basis. */
14978 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14979 QImode
, 1, end_0_label
);
14981 /* Increment the address. */
14983 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14985 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14987 /* Not needed with an alignment of 2 */
14990 emit_label (align_2_label
);
14992 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14996 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14998 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15000 emit_label (align_3_label
);
15003 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15007 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15009 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15012 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15013 align this loop. It gives only huge programs, but does not help to
15015 emit_label (align_4_label
);
15017 mem
= change_address (src
, SImode
, out
);
15018 emit_move_insn (scratch
, mem
);
15020 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15022 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15024 /* This formula yields a nonzero result iff one of the bytes is zero.
15025 This saves three branches inside loop and many cycles. */
15027 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15028 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15029 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15030 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15031 gen_int_mode (0x80808080, SImode
)));
15032 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15037 rtx reg
= gen_reg_rtx (SImode
);
15038 rtx reg2
= gen_reg_rtx (Pmode
);
15039 emit_move_insn (reg
, tmpreg
);
15040 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15042 /* If zero is not in the first two bytes, move two bytes forward. */
15043 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15044 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15045 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15046 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15047 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15050 /* Emit lea manually to avoid clobbering of flags. */
15051 emit_insn (gen_rtx_SET (SImode
, reg2
,
15052 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15054 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15055 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15056 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15057 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15064 rtx end_2_label
= gen_label_rtx ();
15065 /* Is zero in the first two bytes? */
15067 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15068 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15069 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15070 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15071 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15073 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15074 JUMP_LABEL (tmp
) = end_2_label
;
15076 /* Not in the first two. Move two bytes forward. */
15077 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15079 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15081 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15083 emit_label (end_2_label
);
15087 /* Avoid branch in fixing the byte. */
15088 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15089 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15090 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15092 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15094 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15096 emit_label (end_0_label
);
15099 /* Expand strlen. */
15102 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
15104 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
15106 /* The generic case of strlen expander is long. Avoid it's
15107 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15109 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15110 && !TARGET_INLINE_ALL_STRINGOPS
15112 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
15115 addr
= force_reg (Pmode
, XEXP (src
, 0));
15116 scratch1
= gen_reg_rtx (Pmode
);
15118 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15121 /* Well it seems that some optimizer does not combine a call like
15122 foo(strlen(bar), strlen(bar));
15123 when the move and the subtraction is done here. It does calculate
15124 the length just once when these instructions are done inside of
15125 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15126 often used and I use one fewer register for the lifetime of
15127 output_strlen_unroll() this is better. */
15129 emit_move_insn (out
, addr
);
15131 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
15133 /* strlensi_unroll_1 returns the address of the zero at the end of
15134 the string, like memchr(), so compute the length by subtracting
15135 the start address. */
15137 emit_insn (gen_subdi3 (out
, out
, addr
));
15139 emit_insn (gen_subsi3 (out
, out
, addr
));
15144 scratch2
= gen_reg_rtx (Pmode
);
15145 scratch3
= gen_reg_rtx (Pmode
);
15146 scratch4
= force_reg (Pmode
, constm1_rtx
);
15148 emit_move_insn (scratch3
, addr
);
15149 eoschar
= force_reg (QImode
, eoschar
);
15151 src
= replace_equiv_address_nv (src
, scratch3
);
15153 /* If .md starts supporting :P, this can be done in .md. */
15154 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
15155 scratch4
), UNSPEC_SCAS
);
15156 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
15159 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
15160 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
15164 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
15165 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
15171 /* For given symbol (function) construct code to compute address of it's PLT
15172 entry in large x86-64 PIC model. */
15174 construct_plt_address (rtx symbol
)
15176 rtx tmp
= gen_reg_rtx (Pmode
);
15177 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15179 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15180 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15182 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15183 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15188 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15189 rtx callarg2 ATTRIBUTE_UNUSED
,
15190 rtx pop
, int sibcall
)
15192 rtx use
= NULL
, call
;
15194 if (pop
== const0_rtx
)
15196 gcc_assert (!TARGET_64BIT
|| !pop
);
15198 if (TARGET_MACHO
&& !TARGET_64BIT
)
15201 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15202 fnaddr
= machopic_indirect_call_target (fnaddr
);
15207 /* Static functions and indirect calls don't need the pic register. */
15208 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15209 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15210 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15211 use_reg (&use
, pic_offset_table_rtx
);
15214 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15216 rtx al
= gen_rtx_REG (QImode
, 0);
15217 emit_move_insn (al
, callarg2
);
15218 use_reg (&use
, al
);
15221 if (ix86_cmodel
== CM_LARGE_PIC
15222 && GET_CODE (fnaddr
) == MEM
15223 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15224 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15225 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15226 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15228 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15229 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15231 if (sibcall
&& TARGET_64BIT
15232 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15235 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15236 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15237 emit_move_insn (fnaddr
, addr
);
15238 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15241 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15243 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15246 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15247 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15248 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15251 call
= emit_call_insn (call
);
15253 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15257 /* Clear stack slot assignments remembered from previous functions.
15258 This is called from INIT_EXPANDERS once before RTL is emitted for each
15261 static struct machine_function
*
15262 ix86_init_machine_status (void)
15264 struct machine_function
*f
;
15266 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15267 f
->use_fast_prologue_epilogue_nregs
= -1;
15268 f
->tls_descriptor_call_expanded_p
= 0;
15273 /* Return a MEM corresponding to a stack slot with mode MODE.
15274 Allocate a new slot if necessary.
15276 The RTL for a function can have several slots available: N is
15277 which slot to use. */
15280 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15282 struct stack_local_entry
*s
;
15284 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15286 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15287 if (s
->mode
== mode
&& s
->n
== n
)
15288 return copy_rtx (s
->rtl
);
15290 s
= (struct stack_local_entry
*)
15291 ggc_alloc (sizeof (struct stack_local_entry
));
15294 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15296 s
->next
= ix86_stack_locals
;
15297 ix86_stack_locals
= s
;
15301 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15303 static GTY(()) rtx ix86_tls_symbol
;
15305 ix86_tls_get_addr (void)
15308 if (!ix86_tls_symbol
)
15310 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15311 (TARGET_ANY_GNU_TLS
15313 ? "___tls_get_addr"
15314 : "__tls_get_addr");
15317 return ix86_tls_symbol
;
15320 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15322 static GTY(()) rtx ix86_tls_module_base_symbol
;
15324 ix86_tls_module_base (void)
15327 if (!ix86_tls_module_base_symbol
)
15329 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15330 "_TLS_MODULE_BASE_");
15331 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15332 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15335 return ix86_tls_module_base_symbol
;
15338 /* Calculate the length of the memory address in the instruction
15339 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15342 memory_address_length (rtx addr
)
15344 struct ix86_address parts
;
15345 rtx base
, index
, disp
;
15349 if (GET_CODE (addr
) == PRE_DEC
15350 || GET_CODE (addr
) == POST_INC
15351 || GET_CODE (addr
) == PRE_MODIFY
15352 || GET_CODE (addr
) == POST_MODIFY
)
15355 ok
= ix86_decompose_address (addr
, &parts
);
15358 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15359 parts
.base
= SUBREG_REG (parts
.base
);
15360 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15361 parts
.index
= SUBREG_REG (parts
.index
);
15364 index
= parts
.index
;
15369 - esp as the base always wants an index,
15370 - ebp as the base always wants a displacement. */
15372 /* Register Indirect. */
15373 if (base
&& !index
&& !disp
)
15375 /* esp (for its index) and ebp (for its displacement) need
15376 the two-byte modrm form. */
15377 if (addr
== stack_pointer_rtx
15378 || addr
== arg_pointer_rtx
15379 || addr
== frame_pointer_rtx
15380 || addr
== hard_frame_pointer_rtx
)
15384 /* Direct Addressing. */
15385 else if (disp
&& !base
&& !index
)
15390 /* Find the length of the displacement constant. */
15393 if (base
&& satisfies_constraint_K (disp
))
15398 /* ebp always wants a displacement. */
15399 else if (base
== hard_frame_pointer_rtx
)
15402 /* An index requires the two-byte modrm form.... */
15404 /* ...like esp, which always wants an index. */
15405 || base
== stack_pointer_rtx
15406 || base
== arg_pointer_rtx
15407 || base
== frame_pointer_rtx
)
15414 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15415 is set, expect that insn have 8bit immediate alternative. */
15417 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15421 extract_insn_cached (insn
);
15422 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15423 if (CONSTANT_P (recog_data
.operand
[i
]))
15426 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15430 switch (get_attr_mode (insn
))
15441 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15446 fatal_insn ("unknown insn mode", insn
);
15452 /* Compute default value for "length_address" attribute. */
15454 ix86_attr_length_address_default (rtx insn
)
15458 if (get_attr_type (insn
) == TYPE_LEA
)
15460 rtx set
= PATTERN (insn
);
15462 if (GET_CODE (set
) == PARALLEL
)
15463 set
= XVECEXP (set
, 0, 0);
15465 gcc_assert (GET_CODE (set
) == SET
);
15467 return memory_address_length (SET_SRC (set
));
15470 extract_insn_cached (insn
);
15471 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15472 if (MEM_P (recog_data
.operand
[i
]))
15474 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15480 /* Return the maximum number of instructions a cpu can issue. */
15483 ix86_issue_rate (void)
15487 case PROCESSOR_PENTIUM
:
15491 case PROCESSOR_PENTIUMPRO
:
15492 case PROCESSOR_PENTIUM4
:
15493 case PROCESSOR_ATHLON
:
15495 case PROCESSOR_AMDFAM10
:
15496 case PROCESSOR_NOCONA
:
15497 case PROCESSOR_GENERIC32
:
15498 case PROCESSOR_GENERIC64
:
15501 case PROCESSOR_CORE2
:
15509 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15510 by DEP_INSN and nothing set by DEP_INSN. */
15513 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15517 /* Simplify the test for uninteresting insns. */
15518 if (insn_type
!= TYPE_SETCC
15519 && insn_type
!= TYPE_ICMOV
15520 && insn_type
!= TYPE_FCMOV
15521 && insn_type
!= TYPE_IBR
)
15524 if ((set
= single_set (dep_insn
)) != 0)
15526 set
= SET_DEST (set
);
15529 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15530 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15531 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15532 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15534 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15535 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15540 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15543 /* This test is true if the dependent insn reads the flags but
15544 not any other potentially set register. */
15545 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15548 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15554 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15555 address with operands set by DEP_INSN. */
15558 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15562 if (insn_type
== TYPE_LEA
15565 addr
= PATTERN (insn
);
15567 if (GET_CODE (addr
) == PARALLEL
)
15568 addr
= XVECEXP (addr
, 0, 0);
15570 gcc_assert (GET_CODE (addr
) == SET
);
15572 addr
= SET_SRC (addr
);
15577 extract_insn_cached (insn
);
15578 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15579 if (MEM_P (recog_data
.operand
[i
]))
15581 addr
= XEXP (recog_data
.operand
[i
], 0);
15588 return modified_in_p (addr
, dep_insn
);
15592 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15594 enum attr_type insn_type
, dep_insn_type
;
15595 enum attr_memory memory
;
15597 int dep_insn_code_number
;
15599 /* Anti and output dependencies have zero cost on all CPUs. */
15600 if (REG_NOTE_KIND (link
) != 0)
15603 dep_insn_code_number
= recog_memoized (dep_insn
);
15605 /* If we can't recognize the insns, we can't really do anything. */
15606 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15609 insn_type
= get_attr_type (insn
);
15610 dep_insn_type
= get_attr_type (dep_insn
);
15614 case PROCESSOR_PENTIUM
:
15615 /* Address Generation Interlock adds a cycle of latency. */
15616 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15619 /* ??? Compares pair with jump/setcc. */
15620 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15623 /* Floating point stores require value to be ready one cycle earlier. */
15624 if (insn_type
== TYPE_FMOV
15625 && get_attr_memory (insn
) == MEMORY_STORE
15626 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15630 case PROCESSOR_PENTIUMPRO
:
15631 memory
= get_attr_memory (insn
);
15633 /* INT->FP conversion is expensive. */
15634 if (get_attr_fp_int_src (dep_insn
))
15637 /* There is one cycle extra latency between an FP op and a store. */
15638 if (insn_type
== TYPE_FMOV
15639 && (set
= single_set (dep_insn
)) != NULL_RTX
15640 && (set2
= single_set (insn
)) != NULL_RTX
15641 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15642 && MEM_P (SET_DEST (set2
)))
15645 /* Show ability of reorder buffer to hide latency of load by executing
15646 in parallel with previous instruction in case
15647 previous instruction is not needed to compute the address. */
15648 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15649 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15651 /* Claim moves to take one cycle, as core can issue one load
15652 at time and the next load can start cycle later. */
15653 if (dep_insn_type
== TYPE_IMOV
15654 || dep_insn_type
== TYPE_FMOV
)
15662 memory
= get_attr_memory (insn
);
15664 /* The esp dependency is resolved before the instruction is really
15666 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15667 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15670 /* INT->FP conversion is expensive. */
15671 if (get_attr_fp_int_src (dep_insn
))
15674 /* Show ability of reorder buffer to hide latency of load by executing
15675 in parallel with previous instruction in case
15676 previous instruction is not needed to compute the address. */
15677 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15678 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15680 /* Claim moves to take one cycle, as core can issue one load
15681 at time and the next load can start cycle later. */
15682 if (dep_insn_type
== TYPE_IMOV
15683 || dep_insn_type
== TYPE_FMOV
)
15692 case PROCESSOR_ATHLON
:
15694 case PROCESSOR_AMDFAM10
:
15695 case PROCESSOR_GENERIC32
:
15696 case PROCESSOR_GENERIC64
:
15697 memory
= get_attr_memory (insn
);
15699 /* Show ability of reorder buffer to hide latency of load by executing
15700 in parallel with previous instruction in case
15701 previous instruction is not needed to compute the address. */
15702 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15703 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15705 enum attr_unit unit
= get_attr_unit (insn
);
15708 /* Because of the difference between the length of integer and
15709 floating unit pipeline preparation stages, the memory operands
15710 for floating point are cheaper.
15712 ??? For Athlon it the difference is most probably 2. */
15713 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15716 loadcost
= TARGET_ATHLON
? 2 : 0;
15718 if (cost
>= loadcost
)
15731 /* How many alternative schedules to try. This should be as wide as the
15732 scheduling freedom in the DFA, but no wider. Making this value too
15733 large results extra work for the scheduler. */
15736 ia32_multipass_dfa_lookahead (void)
15738 if (ix86_tune
== PROCESSOR_PENTIUM
)
15741 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15742 || ix86_tune
== PROCESSOR_K6
)
15750 /* Compute the alignment given to a constant that is being placed in memory.
15751 EXP is the constant and ALIGN is the alignment that the object would
15753 The value of this function is used instead of that alignment to align
15757 ix86_constant_alignment (tree exp
, int align
)
15759 if (TREE_CODE (exp
) == REAL_CST
)
15761 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15763 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15766 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15767 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15768 return BITS_PER_WORD
;
15773 /* Compute the alignment for a static variable.
15774 TYPE is the data type, and ALIGN is the alignment that
15775 the object would ordinarily have. The value of this function is used
15776 instead of that alignment to align the object. */
15779 ix86_data_alignment (tree type
, int align
)
15781 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15783 if (AGGREGATE_TYPE_P (type
)
15784 && TYPE_SIZE (type
)
15785 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15786 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15787 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15788 && align
< max_align
)
15791 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15792 to 16byte boundary. */
15795 if (AGGREGATE_TYPE_P (type
)
15796 && TYPE_SIZE (type
)
15797 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15798 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15799 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15803 if (TREE_CODE (type
) == ARRAY_TYPE
)
15805 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15807 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15810 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15813 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15815 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15818 else if ((TREE_CODE (type
) == RECORD_TYPE
15819 || TREE_CODE (type
) == UNION_TYPE
15820 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15821 && TYPE_FIELDS (type
))
15823 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15825 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15828 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15829 || TREE_CODE (type
) == INTEGER_TYPE
)
15831 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15833 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15840 /* Compute the alignment for a local variable.
15841 TYPE is the data type, and ALIGN is the alignment that
15842 the object would ordinarily have. The value of this macro is used
15843 instead of that alignment to align the object. */
15846 ix86_local_alignment (tree type
, int align
)
15848 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15849 to 16byte boundary. */
15852 if (AGGREGATE_TYPE_P (type
)
15853 && TYPE_SIZE (type
)
15854 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15855 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15856 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15859 if (TREE_CODE (type
) == ARRAY_TYPE
)
15861 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15863 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15866 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15868 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15870 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15873 else if ((TREE_CODE (type
) == RECORD_TYPE
15874 || TREE_CODE (type
) == UNION_TYPE
15875 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15876 && TYPE_FIELDS (type
))
15878 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15880 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15883 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15884 || TREE_CODE (type
) == INTEGER_TYPE
)
15887 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15889 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15895 /* Emit RTL insns to initialize the variable parts of a trampoline.
15896 FNADDR is an RTX for the address of the function's pure code.
15897 CXT is an RTX for the static chain value for the function. */
15899 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15903 /* Compute offset from the end of the jmp to the target function. */
15904 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15905 plus_constant (tramp
, 10),
15906 NULL_RTX
, 1, OPTAB_DIRECT
);
15907 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15908 gen_int_mode (0xb9, QImode
));
15909 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15910 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15911 gen_int_mode (0xe9, QImode
));
15912 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15917 /* Try to load address using shorter movl instead of movabs.
15918 We may want to support movq for kernel mode, but kernel does not use
15919 trampolines at the moment. */
15920 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15922 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15923 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15924 gen_int_mode (0xbb41, HImode
));
15925 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15926 gen_lowpart (SImode
, fnaddr
));
15931 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15932 gen_int_mode (0xbb49, HImode
));
15933 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15937 /* Load static chain using movabs to r10. */
15938 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15939 gen_int_mode (0xba49, HImode
));
15940 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15943 /* Jump to the r11 */
15944 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15945 gen_int_mode (0xff49, HImode
));
15946 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15947 gen_int_mode (0xe3, QImode
));
15949 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15952 #ifdef ENABLE_EXECUTE_STACK
15953 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15954 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15958 /* Codes for all the SSE/MMX builtins. */
15961 IX86_BUILTIN_ADDPS
,
15962 IX86_BUILTIN_ADDSS
,
15963 IX86_BUILTIN_DIVPS
,
15964 IX86_BUILTIN_DIVSS
,
15965 IX86_BUILTIN_MULPS
,
15966 IX86_BUILTIN_MULSS
,
15967 IX86_BUILTIN_SUBPS
,
15968 IX86_BUILTIN_SUBSS
,
15970 IX86_BUILTIN_CMPEQPS
,
15971 IX86_BUILTIN_CMPLTPS
,
15972 IX86_BUILTIN_CMPLEPS
,
15973 IX86_BUILTIN_CMPGTPS
,
15974 IX86_BUILTIN_CMPGEPS
,
15975 IX86_BUILTIN_CMPNEQPS
,
15976 IX86_BUILTIN_CMPNLTPS
,
15977 IX86_BUILTIN_CMPNLEPS
,
15978 IX86_BUILTIN_CMPNGTPS
,
15979 IX86_BUILTIN_CMPNGEPS
,
15980 IX86_BUILTIN_CMPORDPS
,
15981 IX86_BUILTIN_CMPUNORDPS
,
15982 IX86_BUILTIN_CMPEQSS
,
15983 IX86_BUILTIN_CMPLTSS
,
15984 IX86_BUILTIN_CMPLESS
,
15985 IX86_BUILTIN_CMPNEQSS
,
15986 IX86_BUILTIN_CMPNLTSS
,
15987 IX86_BUILTIN_CMPNLESS
,
15988 IX86_BUILTIN_CMPNGTSS
,
15989 IX86_BUILTIN_CMPNGESS
,
15990 IX86_BUILTIN_CMPORDSS
,
15991 IX86_BUILTIN_CMPUNORDSS
,
15993 IX86_BUILTIN_COMIEQSS
,
15994 IX86_BUILTIN_COMILTSS
,
15995 IX86_BUILTIN_COMILESS
,
15996 IX86_BUILTIN_COMIGTSS
,
15997 IX86_BUILTIN_COMIGESS
,
15998 IX86_BUILTIN_COMINEQSS
,
15999 IX86_BUILTIN_UCOMIEQSS
,
16000 IX86_BUILTIN_UCOMILTSS
,
16001 IX86_BUILTIN_UCOMILESS
,
16002 IX86_BUILTIN_UCOMIGTSS
,
16003 IX86_BUILTIN_UCOMIGESS
,
16004 IX86_BUILTIN_UCOMINEQSS
,
16006 IX86_BUILTIN_CVTPI2PS
,
16007 IX86_BUILTIN_CVTPS2PI
,
16008 IX86_BUILTIN_CVTSI2SS
,
16009 IX86_BUILTIN_CVTSI642SS
,
16010 IX86_BUILTIN_CVTSS2SI
,
16011 IX86_BUILTIN_CVTSS2SI64
,
16012 IX86_BUILTIN_CVTTPS2PI
,
16013 IX86_BUILTIN_CVTTSS2SI
,
16014 IX86_BUILTIN_CVTTSS2SI64
,
16016 IX86_BUILTIN_MAXPS
,
16017 IX86_BUILTIN_MAXSS
,
16018 IX86_BUILTIN_MINPS
,
16019 IX86_BUILTIN_MINSS
,
16021 IX86_BUILTIN_LOADUPS
,
16022 IX86_BUILTIN_STOREUPS
,
16023 IX86_BUILTIN_MOVSS
,
16025 IX86_BUILTIN_MOVHLPS
,
16026 IX86_BUILTIN_MOVLHPS
,
16027 IX86_BUILTIN_LOADHPS
,
16028 IX86_BUILTIN_LOADLPS
,
16029 IX86_BUILTIN_STOREHPS
,
16030 IX86_BUILTIN_STORELPS
,
16032 IX86_BUILTIN_MASKMOVQ
,
16033 IX86_BUILTIN_MOVMSKPS
,
16034 IX86_BUILTIN_PMOVMSKB
,
16036 IX86_BUILTIN_MOVNTPS
,
16037 IX86_BUILTIN_MOVNTQ
,
16039 IX86_BUILTIN_LOADDQU
,
16040 IX86_BUILTIN_STOREDQU
,
16042 IX86_BUILTIN_PACKSSWB
,
16043 IX86_BUILTIN_PACKSSDW
,
16044 IX86_BUILTIN_PACKUSWB
,
16046 IX86_BUILTIN_PADDB
,
16047 IX86_BUILTIN_PADDW
,
16048 IX86_BUILTIN_PADDD
,
16049 IX86_BUILTIN_PADDQ
,
16050 IX86_BUILTIN_PADDSB
,
16051 IX86_BUILTIN_PADDSW
,
16052 IX86_BUILTIN_PADDUSB
,
16053 IX86_BUILTIN_PADDUSW
,
16054 IX86_BUILTIN_PSUBB
,
16055 IX86_BUILTIN_PSUBW
,
16056 IX86_BUILTIN_PSUBD
,
16057 IX86_BUILTIN_PSUBQ
,
16058 IX86_BUILTIN_PSUBSB
,
16059 IX86_BUILTIN_PSUBSW
,
16060 IX86_BUILTIN_PSUBUSB
,
16061 IX86_BUILTIN_PSUBUSW
,
16064 IX86_BUILTIN_PANDN
,
16068 IX86_BUILTIN_PAVGB
,
16069 IX86_BUILTIN_PAVGW
,
16071 IX86_BUILTIN_PCMPEQB
,
16072 IX86_BUILTIN_PCMPEQW
,
16073 IX86_BUILTIN_PCMPEQD
,
16074 IX86_BUILTIN_PCMPGTB
,
16075 IX86_BUILTIN_PCMPGTW
,
16076 IX86_BUILTIN_PCMPGTD
,
16078 IX86_BUILTIN_PMADDWD
,
16080 IX86_BUILTIN_PMAXSW
,
16081 IX86_BUILTIN_PMAXUB
,
16082 IX86_BUILTIN_PMINSW
,
16083 IX86_BUILTIN_PMINUB
,
16085 IX86_BUILTIN_PMULHUW
,
16086 IX86_BUILTIN_PMULHW
,
16087 IX86_BUILTIN_PMULLW
,
16089 IX86_BUILTIN_PSADBW
,
16090 IX86_BUILTIN_PSHUFW
,
16092 IX86_BUILTIN_PSLLW
,
16093 IX86_BUILTIN_PSLLD
,
16094 IX86_BUILTIN_PSLLQ
,
16095 IX86_BUILTIN_PSRAW
,
16096 IX86_BUILTIN_PSRAD
,
16097 IX86_BUILTIN_PSRLW
,
16098 IX86_BUILTIN_PSRLD
,
16099 IX86_BUILTIN_PSRLQ
,
16100 IX86_BUILTIN_PSLLWI
,
16101 IX86_BUILTIN_PSLLDI
,
16102 IX86_BUILTIN_PSLLQI
,
16103 IX86_BUILTIN_PSRAWI
,
16104 IX86_BUILTIN_PSRADI
,
16105 IX86_BUILTIN_PSRLWI
,
16106 IX86_BUILTIN_PSRLDI
,
16107 IX86_BUILTIN_PSRLQI
,
16109 IX86_BUILTIN_PUNPCKHBW
,
16110 IX86_BUILTIN_PUNPCKHWD
,
16111 IX86_BUILTIN_PUNPCKHDQ
,
16112 IX86_BUILTIN_PUNPCKLBW
,
16113 IX86_BUILTIN_PUNPCKLWD
,
16114 IX86_BUILTIN_PUNPCKLDQ
,
16116 IX86_BUILTIN_SHUFPS
,
16118 IX86_BUILTIN_RCPPS
,
16119 IX86_BUILTIN_RCPSS
,
16120 IX86_BUILTIN_RSQRTPS
,
16121 IX86_BUILTIN_RSQRTSS
,
16122 IX86_BUILTIN_SQRTPS
,
16123 IX86_BUILTIN_SQRTSS
,
16125 IX86_BUILTIN_UNPCKHPS
,
16126 IX86_BUILTIN_UNPCKLPS
,
16128 IX86_BUILTIN_ANDPS
,
16129 IX86_BUILTIN_ANDNPS
,
16131 IX86_BUILTIN_XORPS
,
16134 IX86_BUILTIN_LDMXCSR
,
16135 IX86_BUILTIN_STMXCSR
,
16136 IX86_BUILTIN_SFENCE
,
16138 /* 3DNow! Original */
16139 IX86_BUILTIN_FEMMS
,
16140 IX86_BUILTIN_PAVGUSB
,
16141 IX86_BUILTIN_PF2ID
,
16142 IX86_BUILTIN_PFACC
,
16143 IX86_BUILTIN_PFADD
,
16144 IX86_BUILTIN_PFCMPEQ
,
16145 IX86_BUILTIN_PFCMPGE
,
16146 IX86_BUILTIN_PFCMPGT
,
16147 IX86_BUILTIN_PFMAX
,
16148 IX86_BUILTIN_PFMIN
,
16149 IX86_BUILTIN_PFMUL
,
16150 IX86_BUILTIN_PFRCP
,
16151 IX86_BUILTIN_PFRCPIT1
,
16152 IX86_BUILTIN_PFRCPIT2
,
16153 IX86_BUILTIN_PFRSQIT1
,
16154 IX86_BUILTIN_PFRSQRT
,
16155 IX86_BUILTIN_PFSUB
,
16156 IX86_BUILTIN_PFSUBR
,
16157 IX86_BUILTIN_PI2FD
,
16158 IX86_BUILTIN_PMULHRW
,
16160 /* 3DNow! Athlon Extensions */
16161 IX86_BUILTIN_PF2IW
,
16162 IX86_BUILTIN_PFNACC
,
16163 IX86_BUILTIN_PFPNACC
,
16164 IX86_BUILTIN_PI2FW
,
16165 IX86_BUILTIN_PSWAPDSI
,
16166 IX86_BUILTIN_PSWAPDSF
,
16169 IX86_BUILTIN_ADDPD
,
16170 IX86_BUILTIN_ADDSD
,
16171 IX86_BUILTIN_DIVPD
,
16172 IX86_BUILTIN_DIVSD
,
16173 IX86_BUILTIN_MULPD
,
16174 IX86_BUILTIN_MULSD
,
16175 IX86_BUILTIN_SUBPD
,
16176 IX86_BUILTIN_SUBSD
,
16178 IX86_BUILTIN_CMPEQPD
,
16179 IX86_BUILTIN_CMPLTPD
,
16180 IX86_BUILTIN_CMPLEPD
,
16181 IX86_BUILTIN_CMPGTPD
,
16182 IX86_BUILTIN_CMPGEPD
,
16183 IX86_BUILTIN_CMPNEQPD
,
16184 IX86_BUILTIN_CMPNLTPD
,
16185 IX86_BUILTIN_CMPNLEPD
,
16186 IX86_BUILTIN_CMPNGTPD
,
16187 IX86_BUILTIN_CMPNGEPD
,
16188 IX86_BUILTIN_CMPORDPD
,
16189 IX86_BUILTIN_CMPUNORDPD
,
16190 IX86_BUILTIN_CMPEQSD
,
16191 IX86_BUILTIN_CMPLTSD
,
16192 IX86_BUILTIN_CMPLESD
,
16193 IX86_BUILTIN_CMPNEQSD
,
16194 IX86_BUILTIN_CMPNLTSD
,
16195 IX86_BUILTIN_CMPNLESD
,
16196 IX86_BUILTIN_CMPORDSD
,
16197 IX86_BUILTIN_CMPUNORDSD
,
16199 IX86_BUILTIN_COMIEQSD
,
16200 IX86_BUILTIN_COMILTSD
,
16201 IX86_BUILTIN_COMILESD
,
16202 IX86_BUILTIN_COMIGTSD
,
16203 IX86_BUILTIN_COMIGESD
,
16204 IX86_BUILTIN_COMINEQSD
,
16205 IX86_BUILTIN_UCOMIEQSD
,
16206 IX86_BUILTIN_UCOMILTSD
,
16207 IX86_BUILTIN_UCOMILESD
,
16208 IX86_BUILTIN_UCOMIGTSD
,
16209 IX86_BUILTIN_UCOMIGESD
,
16210 IX86_BUILTIN_UCOMINEQSD
,
16212 IX86_BUILTIN_MAXPD
,
16213 IX86_BUILTIN_MAXSD
,
16214 IX86_BUILTIN_MINPD
,
16215 IX86_BUILTIN_MINSD
,
16217 IX86_BUILTIN_ANDPD
,
16218 IX86_BUILTIN_ANDNPD
,
16220 IX86_BUILTIN_XORPD
,
16222 IX86_BUILTIN_SQRTPD
,
16223 IX86_BUILTIN_SQRTSD
,
16225 IX86_BUILTIN_UNPCKHPD
,
16226 IX86_BUILTIN_UNPCKLPD
,
16228 IX86_BUILTIN_SHUFPD
,
16230 IX86_BUILTIN_LOADUPD
,
16231 IX86_BUILTIN_STOREUPD
,
16232 IX86_BUILTIN_MOVSD
,
16234 IX86_BUILTIN_LOADHPD
,
16235 IX86_BUILTIN_LOADLPD
,
16237 IX86_BUILTIN_CVTDQ2PD
,
16238 IX86_BUILTIN_CVTDQ2PS
,
16240 IX86_BUILTIN_CVTPD2DQ
,
16241 IX86_BUILTIN_CVTPD2PI
,
16242 IX86_BUILTIN_CVTPD2PS
,
16243 IX86_BUILTIN_CVTTPD2DQ
,
16244 IX86_BUILTIN_CVTTPD2PI
,
16246 IX86_BUILTIN_CVTPI2PD
,
16247 IX86_BUILTIN_CVTSI2SD
,
16248 IX86_BUILTIN_CVTSI642SD
,
16250 IX86_BUILTIN_CVTSD2SI
,
16251 IX86_BUILTIN_CVTSD2SI64
,
16252 IX86_BUILTIN_CVTSD2SS
,
16253 IX86_BUILTIN_CVTSS2SD
,
16254 IX86_BUILTIN_CVTTSD2SI
,
16255 IX86_BUILTIN_CVTTSD2SI64
,
16257 IX86_BUILTIN_CVTPS2DQ
,
16258 IX86_BUILTIN_CVTPS2PD
,
16259 IX86_BUILTIN_CVTTPS2DQ
,
16261 IX86_BUILTIN_MOVNTI
,
16262 IX86_BUILTIN_MOVNTPD
,
16263 IX86_BUILTIN_MOVNTDQ
,
16266 IX86_BUILTIN_MASKMOVDQU
,
16267 IX86_BUILTIN_MOVMSKPD
,
16268 IX86_BUILTIN_PMOVMSKB128
,
16270 IX86_BUILTIN_PACKSSWB128
,
16271 IX86_BUILTIN_PACKSSDW128
,
16272 IX86_BUILTIN_PACKUSWB128
,
16274 IX86_BUILTIN_PADDB128
,
16275 IX86_BUILTIN_PADDW128
,
16276 IX86_BUILTIN_PADDD128
,
16277 IX86_BUILTIN_PADDQ128
,
16278 IX86_BUILTIN_PADDSB128
,
16279 IX86_BUILTIN_PADDSW128
,
16280 IX86_BUILTIN_PADDUSB128
,
16281 IX86_BUILTIN_PADDUSW128
,
16282 IX86_BUILTIN_PSUBB128
,
16283 IX86_BUILTIN_PSUBW128
,
16284 IX86_BUILTIN_PSUBD128
,
16285 IX86_BUILTIN_PSUBQ128
,
16286 IX86_BUILTIN_PSUBSB128
,
16287 IX86_BUILTIN_PSUBSW128
,
16288 IX86_BUILTIN_PSUBUSB128
,
16289 IX86_BUILTIN_PSUBUSW128
,
16291 IX86_BUILTIN_PAND128
,
16292 IX86_BUILTIN_PANDN128
,
16293 IX86_BUILTIN_POR128
,
16294 IX86_BUILTIN_PXOR128
,
16296 IX86_BUILTIN_PAVGB128
,
16297 IX86_BUILTIN_PAVGW128
,
16299 IX86_BUILTIN_PCMPEQB128
,
16300 IX86_BUILTIN_PCMPEQW128
,
16301 IX86_BUILTIN_PCMPEQD128
,
16302 IX86_BUILTIN_PCMPGTB128
,
16303 IX86_BUILTIN_PCMPGTW128
,
16304 IX86_BUILTIN_PCMPGTD128
,
16306 IX86_BUILTIN_PMADDWD128
,
16308 IX86_BUILTIN_PMAXSW128
,
16309 IX86_BUILTIN_PMAXUB128
,
16310 IX86_BUILTIN_PMINSW128
,
16311 IX86_BUILTIN_PMINUB128
,
16313 IX86_BUILTIN_PMULUDQ
,
16314 IX86_BUILTIN_PMULUDQ128
,
16315 IX86_BUILTIN_PMULHUW128
,
16316 IX86_BUILTIN_PMULHW128
,
16317 IX86_BUILTIN_PMULLW128
,
16319 IX86_BUILTIN_PSADBW128
,
16320 IX86_BUILTIN_PSHUFHW
,
16321 IX86_BUILTIN_PSHUFLW
,
16322 IX86_BUILTIN_PSHUFD
,
16324 IX86_BUILTIN_PSLLDQI128
,
16325 IX86_BUILTIN_PSLLWI128
,
16326 IX86_BUILTIN_PSLLDI128
,
16327 IX86_BUILTIN_PSLLQI128
,
16328 IX86_BUILTIN_PSRAWI128
,
16329 IX86_BUILTIN_PSRADI128
,
16330 IX86_BUILTIN_PSRLDQI128
,
16331 IX86_BUILTIN_PSRLWI128
,
16332 IX86_BUILTIN_PSRLDI128
,
16333 IX86_BUILTIN_PSRLQI128
,
16335 IX86_BUILTIN_PSLLDQ128
,
16336 IX86_BUILTIN_PSLLW128
,
16337 IX86_BUILTIN_PSLLD128
,
16338 IX86_BUILTIN_PSLLQ128
,
16339 IX86_BUILTIN_PSRAW128
,
16340 IX86_BUILTIN_PSRAD128
,
16341 IX86_BUILTIN_PSRLW128
,
16342 IX86_BUILTIN_PSRLD128
,
16343 IX86_BUILTIN_PSRLQ128
,
16345 IX86_BUILTIN_PUNPCKHBW128
,
16346 IX86_BUILTIN_PUNPCKHWD128
,
16347 IX86_BUILTIN_PUNPCKHDQ128
,
16348 IX86_BUILTIN_PUNPCKHQDQ128
,
16349 IX86_BUILTIN_PUNPCKLBW128
,
16350 IX86_BUILTIN_PUNPCKLWD128
,
16351 IX86_BUILTIN_PUNPCKLDQ128
,
16352 IX86_BUILTIN_PUNPCKLQDQ128
,
16354 IX86_BUILTIN_CLFLUSH
,
16355 IX86_BUILTIN_MFENCE
,
16356 IX86_BUILTIN_LFENCE
,
16358 /* Prescott New Instructions. */
16359 IX86_BUILTIN_ADDSUBPS
,
16360 IX86_BUILTIN_HADDPS
,
16361 IX86_BUILTIN_HSUBPS
,
16362 IX86_BUILTIN_MOVSHDUP
,
16363 IX86_BUILTIN_MOVSLDUP
,
16364 IX86_BUILTIN_ADDSUBPD
,
16365 IX86_BUILTIN_HADDPD
,
16366 IX86_BUILTIN_HSUBPD
,
16367 IX86_BUILTIN_LDDQU
,
16369 IX86_BUILTIN_MONITOR
,
16370 IX86_BUILTIN_MWAIT
,
16373 IX86_BUILTIN_PHADDW
,
16374 IX86_BUILTIN_PHADDD
,
16375 IX86_BUILTIN_PHADDSW
,
16376 IX86_BUILTIN_PHSUBW
,
16377 IX86_BUILTIN_PHSUBD
,
16378 IX86_BUILTIN_PHSUBSW
,
16379 IX86_BUILTIN_PMADDUBSW
,
16380 IX86_BUILTIN_PMULHRSW
,
16381 IX86_BUILTIN_PSHUFB
,
16382 IX86_BUILTIN_PSIGNB
,
16383 IX86_BUILTIN_PSIGNW
,
16384 IX86_BUILTIN_PSIGND
,
16385 IX86_BUILTIN_PALIGNR
,
16386 IX86_BUILTIN_PABSB
,
16387 IX86_BUILTIN_PABSW
,
16388 IX86_BUILTIN_PABSD
,
16390 IX86_BUILTIN_PHADDW128
,
16391 IX86_BUILTIN_PHADDD128
,
16392 IX86_BUILTIN_PHADDSW128
,
16393 IX86_BUILTIN_PHSUBW128
,
16394 IX86_BUILTIN_PHSUBD128
,
16395 IX86_BUILTIN_PHSUBSW128
,
16396 IX86_BUILTIN_PMADDUBSW128
,
16397 IX86_BUILTIN_PMULHRSW128
,
16398 IX86_BUILTIN_PSHUFB128
,
16399 IX86_BUILTIN_PSIGNB128
,
16400 IX86_BUILTIN_PSIGNW128
,
16401 IX86_BUILTIN_PSIGND128
,
16402 IX86_BUILTIN_PALIGNR128
,
16403 IX86_BUILTIN_PABSB128
,
16404 IX86_BUILTIN_PABSW128
,
16405 IX86_BUILTIN_PABSD128
,
16407 /* AMDFAM10 - SSE4A New Instructions. */
16408 IX86_BUILTIN_MOVNTSD
,
16409 IX86_BUILTIN_MOVNTSS
,
16410 IX86_BUILTIN_EXTRQI
,
16411 IX86_BUILTIN_EXTRQ
,
16412 IX86_BUILTIN_INSERTQI
,
16413 IX86_BUILTIN_INSERTQ
,
16415 IX86_BUILTIN_VEC_INIT_V2SI
,
16416 IX86_BUILTIN_VEC_INIT_V4HI
,
16417 IX86_BUILTIN_VEC_INIT_V8QI
,
16418 IX86_BUILTIN_VEC_EXT_V2DF
,
16419 IX86_BUILTIN_VEC_EXT_V2DI
,
16420 IX86_BUILTIN_VEC_EXT_V4SF
,
16421 IX86_BUILTIN_VEC_EXT_V4SI
,
16422 IX86_BUILTIN_VEC_EXT_V8HI
,
16423 IX86_BUILTIN_VEC_EXT_V2SI
,
16424 IX86_BUILTIN_VEC_EXT_V4HI
,
16425 IX86_BUILTIN_VEC_SET_V8HI
,
16426 IX86_BUILTIN_VEC_SET_V4HI
,
16431 /* Table for the ix86 builtin decls. */
16432 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16434 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16435 * if the target_flags include one of MASK. Stores the function decl
16436 * in the ix86_builtins array.
16437 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16440 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16442 tree decl
= NULL_TREE
;
16444 if (mask
& target_flags
16445 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16447 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16449 ix86_builtins
[(int) code
] = decl
;
16455 /* Like def_builtin, but also marks the function decl "const". */
16458 def_builtin_const (int mask
, const char *name
, tree type
,
16459 enum ix86_builtins code
)
16461 tree decl
= def_builtin (mask
, name
, type
, code
);
16463 TREE_READONLY (decl
) = 1;
16467 /* Bits for builtin_description.flag. */
16469 /* Set when we don't support the comparison natively, and should
16470 swap_comparison in order to support it. */
16471 #define BUILTIN_DESC_SWAP_OPERANDS 1
16473 struct builtin_description
16475 const unsigned int mask
;
16476 const enum insn_code icode
;
16477 const char *const name
;
16478 const enum ix86_builtins code
;
16479 const enum rtx_code comparison
;
16480 const unsigned int flag
;
16483 static const struct builtin_description bdesc_comi
[] =
16485 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16486 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16487 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16488 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16489 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16490 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16491 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16492 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16493 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16494 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16495 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16496 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16497 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16498 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16499 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16500 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16501 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16502 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16503 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16504 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16505 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16506 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16507 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16508 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16511 static const struct builtin_description bdesc_2arg
[] =
16514 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16515 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16516 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16517 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16518 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16519 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16520 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16521 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16523 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16524 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16525 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16526 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16527 BUILTIN_DESC_SWAP_OPERANDS
},
16528 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16529 BUILTIN_DESC_SWAP_OPERANDS
},
16530 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16531 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16532 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16533 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16534 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16535 BUILTIN_DESC_SWAP_OPERANDS
},
16536 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16537 BUILTIN_DESC_SWAP_OPERANDS
},
16538 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16539 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16540 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16541 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16542 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16543 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16544 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16545 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16546 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16547 BUILTIN_DESC_SWAP_OPERANDS
},
16548 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16549 BUILTIN_DESC_SWAP_OPERANDS
},
16550 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
16552 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16553 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16554 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16555 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16557 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16558 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16559 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16560 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16562 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16563 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16564 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16565 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16566 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16569 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16570 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16571 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16572 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16573 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16574 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16575 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16576 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16578 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16579 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16580 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16581 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16582 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16583 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16584 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16585 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16587 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16588 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16589 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16591 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16592 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16593 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16594 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16596 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16597 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16599 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16600 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16601 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16602 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16603 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16604 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16606 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16607 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16608 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16609 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16611 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16612 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16613 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16614 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16615 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16616 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16619 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16620 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16621 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16623 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16624 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16625 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16627 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16628 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16629 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16630 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16631 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16632 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16634 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16635 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16636 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16637 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16638 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16639 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16641 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16642 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16643 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16644 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16646 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16647 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16650 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16651 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16652 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16653 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16654 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16655 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16656 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16657 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16659 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16660 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16661 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16662 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16663 BUILTIN_DESC_SWAP_OPERANDS
},
16664 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16665 BUILTIN_DESC_SWAP_OPERANDS
},
16666 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16667 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16668 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16669 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16670 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16671 BUILTIN_DESC_SWAP_OPERANDS
},
16672 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16673 BUILTIN_DESC_SWAP_OPERANDS
},
16674 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16675 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16676 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16677 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16678 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16679 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16680 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16681 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16682 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16684 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16685 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16686 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16687 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16689 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16690 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16691 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16692 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16694 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16695 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16696 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16699 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16700 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16701 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16702 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16703 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16704 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16705 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16706 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16708 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16709 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16710 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16711 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16712 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16713 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16714 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16715 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16717 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16718 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16720 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16721 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16722 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16723 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16725 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16726 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16728 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16729 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16730 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16731 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16732 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16733 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16735 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16736 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16737 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16738 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16740 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16741 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16742 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16743 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16744 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16745 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16746 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16747 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16749 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16750 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16751 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16753 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16754 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16756 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16757 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16759 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16760 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16761 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16763 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16764 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16765 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16767 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16768 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16770 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16772 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16773 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16774 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16775 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16778 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16779 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16780 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16781 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16782 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16783 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16786 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16787 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16788 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16789 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16790 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16791 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16792 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16793 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16794 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16795 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16796 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16797 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16798 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16799 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16800 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16801 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16802 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16803 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16804 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16805 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16806 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16807 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16808 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16809 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16812 static const struct builtin_description bdesc_1arg
[] =
16814 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16815 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16817 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16818 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16819 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16821 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16822 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16823 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16824 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16825 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16826 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16828 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16829 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16831 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16833 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16834 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16836 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16837 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16838 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16839 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16840 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16842 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16844 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16845 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16846 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16847 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16849 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16850 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16851 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16854 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16855 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16858 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16859 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16860 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16861 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16862 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16863 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16866 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16867 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16870 ix86_init_mmx_sse_builtins (void)
16872 const struct builtin_description
* d
;
16875 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16876 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16877 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16878 tree V2DI_type_node
16879 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16880 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16881 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16882 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16883 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16884 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16885 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16887 tree pchar_type_node
= build_pointer_type (char_type_node
);
16888 tree pcchar_type_node
= build_pointer_type (
16889 build_type_variant (char_type_node
, 1, 0));
16890 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16891 tree pcfloat_type_node
= build_pointer_type (
16892 build_type_variant (float_type_node
, 1, 0));
16893 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16894 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16895 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16898 tree int_ftype_v4sf_v4sf
16899 = build_function_type_list (integer_type_node
,
16900 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16901 tree v4si_ftype_v4sf_v4sf
16902 = build_function_type_list (V4SI_type_node
,
16903 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16904 /* MMX/SSE/integer conversions. */
16905 tree int_ftype_v4sf
16906 = build_function_type_list (integer_type_node
,
16907 V4SF_type_node
, NULL_TREE
);
16908 tree int64_ftype_v4sf
16909 = build_function_type_list (long_long_integer_type_node
,
16910 V4SF_type_node
, NULL_TREE
);
16911 tree int_ftype_v8qi
16912 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16913 tree v4sf_ftype_v4sf_int
16914 = build_function_type_list (V4SF_type_node
,
16915 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16916 tree v4sf_ftype_v4sf_int64
16917 = build_function_type_list (V4SF_type_node
,
16918 V4SF_type_node
, long_long_integer_type_node
,
16920 tree v4sf_ftype_v4sf_v2si
16921 = build_function_type_list (V4SF_type_node
,
16922 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16924 /* Miscellaneous. */
16925 tree v8qi_ftype_v4hi_v4hi
16926 = build_function_type_list (V8QI_type_node
,
16927 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16928 tree v4hi_ftype_v2si_v2si
16929 = build_function_type_list (V4HI_type_node
,
16930 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16931 tree v4sf_ftype_v4sf_v4sf_int
16932 = build_function_type_list (V4SF_type_node
,
16933 V4SF_type_node
, V4SF_type_node
,
16934 integer_type_node
, NULL_TREE
);
16935 tree v2si_ftype_v4hi_v4hi
16936 = build_function_type_list (V2SI_type_node
,
16937 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16938 tree v4hi_ftype_v4hi_int
16939 = build_function_type_list (V4HI_type_node
,
16940 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16941 tree v4hi_ftype_v4hi_di
16942 = build_function_type_list (V4HI_type_node
,
16943 V4HI_type_node
, long_long_unsigned_type_node
,
16945 tree v2si_ftype_v2si_di
16946 = build_function_type_list (V2SI_type_node
,
16947 V2SI_type_node
, long_long_unsigned_type_node
,
16949 tree void_ftype_void
16950 = build_function_type (void_type_node
, void_list_node
);
16951 tree void_ftype_unsigned
16952 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16953 tree void_ftype_unsigned_unsigned
16954 = build_function_type_list (void_type_node
, unsigned_type_node
,
16955 unsigned_type_node
, NULL_TREE
);
16956 tree void_ftype_pcvoid_unsigned_unsigned
16957 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16958 unsigned_type_node
, unsigned_type_node
,
16960 tree unsigned_ftype_void
16961 = build_function_type (unsigned_type_node
, void_list_node
);
16962 tree v2si_ftype_v4sf
16963 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16964 /* Loads/stores. */
16965 tree void_ftype_v8qi_v8qi_pchar
16966 = build_function_type_list (void_type_node
,
16967 V8QI_type_node
, V8QI_type_node
,
16968 pchar_type_node
, NULL_TREE
);
16969 tree v4sf_ftype_pcfloat
16970 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16971 /* @@@ the type is bogus */
16972 tree v4sf_ftype_v4sf_pv2si
16973 = build_function_type_list (V4SF_type_node
,
16974 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16975 tree void_ftype_pv2si_v4sf
16976 = build_function_type_list (void_type_node
,
16977 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16978 tree void_ftype_pfloat_v4sf
16979 = build_function_type_list (void_type_node
,
16980 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16981 tree void_ftype_pdi_di
16982 = build_function_type_list (void_type_node
,
16983 pdi_type_node
, long_long_unsigned_type_node
,
16985 tree void_ftype_pv2di_v2di
16986 = build_function_type_list (void_type_node
,
16987 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16988 /* Normal vector unops. */
16989 tree v4sf_ftype_v4sf
16990 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16991 tree v16qi_ftype_v16qi
16992 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16993 tree v8hi_ftype_v8hi
16994 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16995 tree v4si_ftype_v4si
16996 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16997 tree v8qi_ftype_v8qi
16998 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16999 tree v4hi_ftype_v4hi
17000 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17002 /* Normal vector binops. */
17003 tree v4sf_ftype_v4sf_v4sf
17004 = build_function_type_list (V4SF_type_node
,
17005 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17006 tree v8qi_ftype_v8qi_v8qi
17007 = build_function_type_list (V8QI_type_node
,
17008 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17009 tree v4hi_ftype_v4hi_v4hi
17010 = build_function_type_list (V4HI_type_node
,
17011 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17012 tree v2si_ftype_v2si_v2si
17013 = build_function_type_list (V2SI_type_node
,
17014 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17015 tree di_ftype_di_di
17016 = build_function_type_list (long_long_unsigned_type_node
,
17017 long_long_unsigned_type_node
,
17018 long_long_unsigned_type_node
, NULL_TREE
);
17020 tree di_ftype_di_di_int
17021 = build_function_type_list (long_long_unsigned_type_node
,
17022 long_long_unsigned_type_node
,
17023 long_long_unsigned_type_node
,
17024 integer_type_node
, NULL_TREE
);
17026 tree v2si_ftype_v2sf
17027 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
17028 tree v2sf_ftype_v2si
17029 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
17030 tree v2si_ftype_v2si
17031 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17032 tree v2sf_ftype_v2sf
17033 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17034 tree v2sf_ftype_v2sf_v2sf
17035 = build_function_type_list (V2SF_type_node
,
17036 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17037 tree v2si_ftype_v2sf_v2sf
17038 = build_function_type_list (V2SI_type_node
,
17039 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17040 tree pint_type_node
= build_pointer_type (integer_type_node
);
17041 tree pdouble_type_node
= build_pointer_type (double_type_node
);
17042 tree pcdouble_type_node
= build_pointer_type (
17043 build_type_variant (double_type_node
, 1, 0));
17044 tree int_ftype_v2df_v2df
17045 = build_function_type_list (integer_type_node
,
17046 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17048 tree void_ftype_pcvoid
17049 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
17050 tree v4sf_ftype_v4si
17051 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
17052 tree v4si_ftype_v4sf
17053 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
17054 tree v2df_ftype_v4si
17055 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
17056 tree v4si_ftype_v2df
17057 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
17058 tree v2si_ftype_v2df
17059 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
17060 tree v4sf_ftype_v2df
17061 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17062 tree v2df_ftype_v2si
17063 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
17064 tree v2df_ftype_v4sf
17065 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17066 tree int_ftype_v2df
17067 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
17068 tree int64_ftype_v2df
17069 = build_function_type_list (long_long_integer_type_node
,
17070 V2DF_type_node
, NULL_TREE
);
17071 tree v2df_ftype_v2df_int
17072 = build_function_type_list (V2DF_type_node
,
17073 V2DF_type_node
, integer_type_node
, NULL_TREE
);
17074 tree v2df_ftype_v2df_int64
17075 = build_function_type_list (V2DF_type_node
,
17076 V2DF_type_node
, long_long_integer_type_node
,
17078 tree v4sf_ftype_v4sf_v2df
17079 = build_function_type_list (V4SF_type_node
,
17080 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17081 tree v2df_ftype_v2df_v4sf
17082 = build_function_type_list (V2DF_type_node
,
17083 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17084 tree v2df_ftype_v2df_v2df_int
17085 = build_function_type_list (V2DF_type_node
,
17086 V2DF_type_node
, V2DF_type_node
,
17089 tree v2df_ftype_v2df_pcdouble
17090 = build_function_type_list (V2DF_type_node
,
17091 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17092 tree void_ftype_pdouble_v2df
17093 = build_function_type_list (void_type_node
,
17094 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
17095 tree void_ftype_pint_int
17096 = build_function_type_list (void_type_node
,
17097 pint_type_node
, integer_type_node
, NULL_TREE
);
17098 tree void_ftype_v16qi_v16qi_pchar
17099 = build_function_type_list (void_type_node
,
17100 V16QI_type_node
, V16QI_type_node
,
17101 pchar_type_node
, NULL_TREE
);
17102 tree v2df_ftype_pcdouble
17103 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17104 tree v2df_ftype_v2df_v2df
17105 = build_function_type_list (V2DF_type_node
,
17106 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17107 tree v16qi_ftype_v16qi_v16qi
17108 = build_function_type_list (V16QI_type_node
,
17109 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17110 tree v8hi_ftype_v8hi_v8hi
17111 = build_function_type_list (V8HI_type_node
,
17112 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17113 tree v4si_ftype_v4si_v4si
17114 = build_function_type_list (V4SI_type_node
,
17115 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17116 tree v2di_ftype_v2di_v2di
17117 = build_function_type_list (V2DI_type_node
,
17118 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17119 tree v2di_ftype_v2df_v2df
17120 = build_function_type_list (V2DI_type_node
,
17121 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17122 tree v2df_ftype_v2df
17123 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17124 tree v2di_ftype_v2di_int
17125 = build_function_type_list (V2DI_type_node
,
17126 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17127 tree v2di_ftype_v2di_v2di_int
17128 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17129 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17130 tree v4si_ftype_v4si_int
17131 = build_function_type_list (V4SI_type_node
,
17132 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17133 tree v8hi_ftype_v8hi_int
17134 = build_function_type_list (V8HI_type_node
,
17135 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17136 tree v4si_ftype_v8hi_v8hi
17137 = build_function_type_list (V4SI_type_node
,
17138 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17139 tree di_ftype_v8qi_v8qi
17140 = build_function_type_list (long_long_unsigned_type_node
,
17141 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17142 tree di_ftype_v2si_v2si
17143 = build_function_type_list (long_long_unsigned_type_node
,
17144 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17145 tree v2di_ftype_v16qi_v16qi
17146 = build_function_type_list (V2DI_type_node
,
17147 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17148 tree v2di_ftype_v4si_v4si
17149 = build_function_type_list (V2DI_type_node
,
17150 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17151 tree int_ftype_v16qi
17152 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17153 tree v16qi_ftype_pcchar
17154 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17155 tree void_ftype_pchar_v16qi
17156 = build_function_type_list (void_type_node
,
17157 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17159 tree v2di_ftype_v2di_unsigned_unsigned
17160 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17161 unsigned_type_node
, unsigned_type_node
,
17163 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17164 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17165 unsigned_type_node
, unsigned_type_node
,
17167 tree v2di_ftype_v2di_v16qi
17168 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17172 tree float128_type
;
17175 /* The __float80 type. */
17176 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17177 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17181 /* The __float80 type. */
17182 float80_type
= make_node (REAL_TYPE
);
17183 TYPE_PRECISION (float80_type
) = 80;
17184 layout_type (float80_type
);
17185 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17190 float128_type
= make_node (REAL_TYPE
);
17191 TYPE_PRECISION (float128_type
) = 128;
17192 layout_type (float128_type
);
17193 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17196 /* Add all builtins that are more or less simple operations on two
17198 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17200 /* Use one of the operands; the target can have a different mode for
17201 mask-generating compares. */
17202 enum machine_mode mode
;
17207 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17212 type
= v16qi_ftype_v16qi_v16qi
;
17215 type
= v8hi_ftype_v8hi_v8hi
;
17218 type
= v4si_ftype_v4si_v4si
;
17221 type
= v2di_ftype_v2di_v2di
;
17224 type
= v2df_ftype_v2df_v2df
;
17227 type
= v4sf_ftype_v4sf_v4sf
;
17230 type
= v8qi_ftype_v8qi_v8qi
;
17233 type
= v4hi_ftype_v4hi_v4hi
;
17236 type
= v2si_ftype_v2si_v2si
;
17239 type
= di_ftype_di_di
;
17243 gcc_unreachable ();
17246 /* Override for comparisons. */
17247 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17248 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17249 type
= v4si_ftype_v4sf_v4sf
;
17251 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17252 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17253 type
= v2di_ftype_v2df_v2df
;
17255 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17258 /* Add all builtins that are more or less simple operations on 1 operand. */
17259 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17261 enum machine_mode mode
;
17266 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17271 type
= v16qi_ftype_v16qi
;
17274 type
= v8hi_ftype_v8hi
;
17277 type
= v4si_ftype_v4si
;
17280 type
= v2df_ftype_v2df
;
17283 type
= v4sf_ftype_v4sf
;
17286 type
= v8qi_ftype_v8qi
;
17289 type
= v4hi_ftype_v4hi
;
17292 type
= v2si_ftype_v2si
;
17299 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17302 /* Add the remaining MMX insns with somewhat more complicated types. */
17303 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17304 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17305 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17306 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17308 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17309 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17310 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17312 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17313 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17315 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17316 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17318 /* comi/ucomi insns. */
17319 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17320 if (d
->mask
== MASK_SSE2
)
17321 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17323 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17325 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17326 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17327 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17329 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17330 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17331 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17332 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17333 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17334 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17335 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17336 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17337 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17338 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17339 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17341 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17343 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17344 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17346 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17347 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17348 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17349 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17351 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17352 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17353 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17354 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17356 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17358 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17360 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17361 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17362 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17363 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17364 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17365 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17367 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17369 /* Original 3DNow! */
17370 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17371 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17372 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17373 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17374 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17375 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17376 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17377 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17378 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17379 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17380 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17381 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17382 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17383 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17384 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17385 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17386 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17387 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17388 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17389 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17391 /* 3DNow! extension as used in the Athlon CPU. */
17392 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17393 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17394 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17395 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17396 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17397 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17400 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17402 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17403 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17405 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17406 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17408 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17409 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17410 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17411 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17412 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17414 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17415 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17416 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17417 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17419 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17420 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17422 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17424 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17425 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17427 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17428 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17429 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17430 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17431 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17433 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17435 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17436 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17437 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17438 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17440 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17441 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17442 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17444 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17445 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17446 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17447 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17449 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17450 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17451 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17453 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17454 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17456 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17457 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17459 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17460 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17461 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17462 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17463 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
17464 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
17465 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17467 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17468 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17469 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17470 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17471 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
17472 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
17473 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17475 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17476 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17477 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
17478 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
17480 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17482 /* Prescott New Instructions. */
17483 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17484 void_ftype_pcvoid_unsigned_unsigned
,
17485 IX86_BUILTIN_MONITOR
);
17486 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17487 void_ftype_unsigned_unsigned
,
17488 IX86_BUILTIN_MWAIT
);
17489 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17490 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17493 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17494 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17495 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17496 IX86_BUILTIN_PALIGNR
);
17498 /* AMDFAM10 SSE4A New built-ins */
17499 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17500 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17501 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17502 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17503 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17504 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17505 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17506 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17507 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17508 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17509 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17510 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17512 /* Access to the vec_init patterns. */
17513 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17514 integer_type_node
, NULL_TREE
);
17515 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17516 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17518 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17519 short_integer_type_node
,
17520 short_integer_type_node
,
17521 short_integer_type_node
, NULL_TREE
);
17522 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17523 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17525 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17526 char_type_node
, char_type_node
,
17527 char_type_node
, char_type_node
,
17528 char_type_node
, char_type_node
,
17529 char_type_node
, NULL_TREE
);
17530 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17531 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17533 /* Access to the vec_extract patterns. */
17534 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17535 integer_type_node
, NULL_TREE
);
17536 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17537 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17539 ftype
= build_function_type_list (long_long_integer_type_node
,
17540 V2DI_type_node
, integer_type_node
,
17542 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17543 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17545 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17546 integer_type_node
, NULL_TREE
);
17547 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17548 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17550 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17551 integer_type_node
, NULL_TREE
);
17552 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17553 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17555 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17556 integer_type_node
, NULL_TREE
);
17557 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17558 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17560 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17561 integer_type_node
, NULL_TREE
);
17562 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17563 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17565 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17566 integer_type_node
, NULL_TREE
);
17567 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17568 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17570 /* Access to the vec_set patterns. */
17571 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17573 integer_type_node
, NULL_TREE
);
17574 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17575 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17577 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17579 integer_type_node
, NULL_TREE
);
17580 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17581 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17585 ix86_init_builtins (void)
17588 ix86_init_mmx_sse_builtins ();
17591 /* Errors in the source file can cause expand_expr to return const0_rtx
17592 where we expect a vector. To avoid crashing, use one of the vector
17593 clear instructions. */
17595 safe_vector_operand (rtx x
, enum machine_mode mode
)
17597 if (x
== const0_rtx
)
17598 x
= CONST0_RTX (mode
);
17602 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17605 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17608 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17609 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17610 rtx op0
= expand_normal (arg0
);
17611 rtx op1
= expand_normal (arg1
);
17612 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17613 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17614 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17616 if (VECTOR_MODE_P (mode0
))
17617 op0
= safe_vector_operand (op0
, mode0
);
17618 if (VECTOR_MODE_P (mode1
))
17619 op1
= safe_vector_operand (op1
, mode1
);
17621 if (optimize
|| !target
17622 || GET_MODE (target
) != tmode
17623 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17624 target
= gen_reg_rtx (tmode
);
17626 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17628 rtx x
= gen_reg_rtx (V4SImode
);
17629 emit_insn (gen_sse2_loadd (x
, op1
));
17630 op1
= gen_lowpart (TImode
, x
);
17633 /* The insn must want input operands in the same modes as the
17635 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17636 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17638 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17639 op0
= copy_to_mode_reg (mode0
, op0
);
17640 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17641 op1
= copy_to_mode_reg (mode1
, op1
);
17643 /* ??? Using ix86_fixup_binary_operands is problematic when
17644 we've got mismatched modes. Fake it. */
17650 if (tmode
== mode0
&& tmode
== mode1
)
17652 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17656 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17658 op0
= force_reg (mode0
, op0
);
17659 op1
= force_reg (mode1
, op1
);
17660 target
= gen_reg_rtx (tmode
);
17663 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17670 /* Subroutine of ix86_expand_builtin to take care of stores. */
17673 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17676 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17677 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17678 rtx op0
= expand_normal (arg0
);
17679 rtx op1
= expand_normal (arg1
);
17680 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17681 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17683 if (VECTOR_MODE_P (mode1
))
17684 op1
= safe_vector_operand (op1
, mode1
);
17686 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17687 op1
= copy_to_mode_reg (mode1
, op1
);
17689 pat
= GEN_FCN (icode
) (op0
, op1
);
17695 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17698 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17699 rtx target
, int do_load
)
17702 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17703 rtx op0
= expand_normal (arg0
);
17704 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17705 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17707 if (optimize
|| !target
17708 || GET_MODE (target
) != tmode
17709 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17710 target
= gen_reg_rtx (tmode
);
17712 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17715 if (VECTOR_MODE_P (mode0
))
17716 op0
= safe_vector_operand (op0
, mode0
);
17718 if ((optimize
&& !register_operand (op0
, mode0
))
17719 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17720 op0
= copy_to_mode_reg (mode0
, op0
);
17723 pat
= GEN_FCN (icode
) (target
, op0
);
17730 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17731 sqrtss, rsqrtss, rcpss. */
17734 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17737 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17738 rtx op1
, op0
= expand_normal (arg0
);
17739 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17740 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17742 if (optimize
|| !target
17743 || GET_MODE (target
) != tmode
17744 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17745 target
= gen_reg_rtx (tmode
);
17747 if (VECTOR_MODE_P (mode0
))
17748 op0
= safe_vector_operand (op0
, mode0
);
17750 if ((optimize
&& !register_operand (op0
, mode0
))
17751 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17752 op0
= copy_to_mode_reg (mode0
, op0
);
17755 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17756 op1
= copy_to_mode_reg (mode0
, op1
);
17758 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17765 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17768 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17772 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17773 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17774 rtx op0
= expand_normal (arg0
);
17775 rtx op1
= expand_normal (arg1
);
17777 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17778 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17779 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17780 enum rtx_code comparison
= d
->comparison
;
17782 if (VECTOR_MODE_P (mode0
))
17783 op0
= safe_vector_operand (op0
, mode0
);
17784 if (VECTOR_MODE_P (mode1
))
17785 op1
= safe_vector_operand (op1
, mode1
);
17787 /* Swap operands if we have a comparison that isn't available in
17789 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17791 rtx tmp
= gen_reg_rtx (mode1
);
17792 emit_move_insn (tmp
, op1
);
17797 if (optimize
|| !target
17798 || GET_MODE (target
) != tmode
17799 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17800 target
= gen_reg_rtx (tmode
);
17802 if ((optimize
&& !register_operand (op0
, mode0
))
17803 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17804 op0
= copy_to_mode_reg (mode0
, op0
);
17805 if ((optimize
&& !register_operand (op1
, mode1
))
17806 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17807 op1
= copy_to_mode_reg (mode1
, op1
);
17809 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17810 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17817 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17820 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17824 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17825 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17826 rtx op0
= expand_normal (arg0
);
17827 rtx op1
= expand_normal (arg1
);
17828 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17829 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17830 enum rtx_code comparison
= d
->comparison
;
17832 if (VECTOR_MODE_P (mode0
))
17833 op0
= safe_vector_operand (op0
, mode0
);
17834 if (VECTOR_MODE_P (mode1
))
17835 op1
= safe_vector_operand (op1
, mode1
);
17837 /* Swap operands if we have a comparison that isn't available in
17839 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17846 target
= gen_reg_rtx (SImode
);
17847 emit_move_insn (target
, const0_rtx
);
17848 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17850 if ((optimize
&& !register_operand (op0
, mode0
))
17851 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17852 op0
= copy_to_mode_reg (mode0
, op0
);
17853 if ((optimize
&& !register_operand (op1
, mode1
))
17854 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17855 op1
= copy_to_mode_reg (mode1
, op1
);
17857 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17861 emit_insn (gen_rtx_SET (VOIDmode
,
17862 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17863 gen_rtx_fmt_ee (comparison
, QImode
,
17867 return SUBREG_REG (target
);
17870 /* Return the integer constant in ARG. Constrain it to be in the range
17871 of the subparts of VEC_TYPE; issue an error if not. */
17874 get_element_number (tree vec_type
, tree arg
)
17876 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17878 if (!host_integerp (arg
, 1)
17879 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17881 error ("selector must be an integer constant in the range 0..%wi", max
);
17888 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17889 ix86_expand_vector_init. We DO have language-level syntax for this, in
17890 the form of (type){ init-list }. Except that since we can't place emms
17891 instructions from inside the compiler, we can't allow the use of MMX
17892 registers unless the user explicitly asks for it. So we do *not* define
17893 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17894 we have builtins invoked by mmintrin.h that gives us license to emit
17895 these sorts of instructions. */
17898 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17900 enum machine_mode tmode
= TYPE_MODE (type
);
17901 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17902 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17903 rtvec v
= rtvec_alloc (n_elt
);
17905 gcc_assert (VECTOR_MODE_P (tmode
));
17906 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17908 for (i
= 0; i
< n_elt
; ++i
)
17910 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17911 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17914 if (!target
|| !register_operand (target
, tmode
))
17915 target
= gen_reg_rtx (tmode
);
17917 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17921 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17922 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17923 had a language-level syntax for referencing vector elements. */
17926 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17928 enum machine_mode tmode
, mode0
;
17933 arg0
= CALL_EXPR_ARG (exp
, 0);
17934 arg1
= CALL_EXPR_ARG (exp
, 1);
17936 op0
= expand_normal (arg0
);
17937 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17939 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17940 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17941 gcc_assert (VECTOR_MODE_P (mode0
));
17943 op0
= force_reg (mode0
, op0
);
17945 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17946 target
= gen_reg_rtx (tmode
);
17948 ix86_expand_vector_extract (true, target
, op0
, elt
);
17953 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17954 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17955 a language-level syntax for referencing vector elements. */
17958 ix86_expand_vec_set_builtin (tree exp
)
17960 enum machine_mode tmode
, mode1
;
17961 tree arg0
, arg1
, arg2
;
17963 rtx op0
, op1
, target
;
17965 arg0
= CALL_EXPR_ARG (exp
, 0);
17966 arg1
= CALL_EXPR_ARG (exp
, 1);
17967 arg2
= CALL_EXPR_ARG (exp
, 2);
17969 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17970 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17971 gcc_assert (VECTOR_MODE_P (tmode
));
17973 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17974 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17975 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17977 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17978 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17980 op0
= force_reg (tmode
, op0
);
17981 op1
= force_reg (mode1
, op1
);
17983 /* OP0 is the source of these builtin functions and shouldn't be
17984 modified. Create a copy, use it and return it as target. */
17985 target
= gen_reg_rtx (tmode
);
17986 emit_move_insn (target
, op0
);
17987 ix86_expand_vector_set (true, target
, op1
, elt
);
17992 /* Expand an expression EXP that calls a built-in function,
17993 with result going to TARGET if that's convenient
17994 (and in mode MODE if that's convenient).
17995 SUBTARGET may be used as the target for computing one of EXP's operands.
17996 IGNORE is nonzero if the value is to be ignored. */
17999 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
18000 enum machine_mode mode ATTRIBUTE_UNUSED
,
18001 int ignore ATTRIBUTE_UNUSED
)
18003 const struct builtin_description
*d
;
18005 enum insn_code icode
;
18006 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
18007 tree arg0
, arg1
, arg2
, arg3
;
18008 rtx op0
, op1
, op2
, op3
, pat
;
18009 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
18010 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
18014 case IX86_BUILTIN_EMMS
:
18015 emit_insn (gen_mmx_emms ());
18018 case IX86_BUILTIN_SFENCE
:
18019 emit_insn (gen_sse_sfence ());
18022 case IX86_BUILTIN_MASKMOVQ
:
18023 case IX86_BUILTIN_MASKMOVDQU
:
18024 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
18025 ? CODE_FOR_mmx_maskmovq
18026 : CODE_FOR_sse2_maskmovdqu
);
18027 /* Note the arg order is different from the operand order. */
18028 arg1
= CALL_EXPR_ARG (exp
, 0);
18029 arg2
= CALL_EXPR_ARG (exp
, 1);
18030 arg0
= CALL_EXPR_ARG (exp
, 2);
18031 op0
= expand_normal (arg0
);
18032 op1
= expand_normal (arg1
);
18033 op2
= expand_normal (arg2
);
18034 mode0
= insn_data
[icode
].operand
[0].mode
;
18035 mode1
= insn_data
[icode
].operand
[1].mode
;
18036 mode2
= insn_data
[icode
].operand
[2].mode
;
18038 op0
= force_reg (Pmode
, op0
);
18039 op0
= gen_rtx_MEM (mode1
, op0
);
18041 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
18042 op0
= copy_to_mode_reg (mode0
, op0
);
18043 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
18044 op1
= copy_to_mode_reg (mode1
, op1
);
18045 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
18046 op2
= copy_to_mode_reg (mode2
, op2
);
18047 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
18053 case IX86_BUILTIN_SQRTSS
:
18054 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
18055 case IX86_BUILTIN_RSQRTSS
:
18056 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
18057 case IX86_BUILTIN_RCPSS
:
18058 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
18060 case IX86_BUILTIN_LOADUPS
:
18061 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
18063 case IX86_BUILTIN_STOREUPS
:
18064 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
18066 case IX86_BUILTIN_LOADHPS
:
18067 case IX86_BUILTIN_LOADLPS
:
18068 case IX86_BUILTIN_LOADHPD
:
18069 case IX86_BUILTIN_LOADLPD
:
18070 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
18071 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
18072 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
18073 : CODE_FOR_sse2_loadlpd
);
18074 arg0
= CALL_EXPR_ARG (exp
, 0);
18075 arg1
= CALL_EXPR_ARG (exp
, 1);
18076 op0
= expand_normal (arg0
);
18077 op1
= expand_normal (arg1
);
18078 tmode
= insn_data
[icode
].operand
[0].mode
;
18079 mode0
= insn_data
[icode
].operand
[1].mode
;
18080 mode1
= insn_data
[icode
].operand
[2].mode
;
18082 op0
= force_reg (mode0
, op0
);
18083 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
18084 if (optimize
|| target
== 0
18085 || GET_MODE (target
) != tmode
18086 || !register_operand (target
, tmode
))
18087 target
= gen_reg_rtx (tmode
);
18088 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18094 case IX86_BUILTIN_STOREHPS
:
18095 case IX86_BUILTIN_STORELPS
:
18096 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
18097 : CODE_FOR_sse_storelps
);
18098 arg0
= CALL_EXPR_ARG (exp
, 0);
18099 arg1
= CALL_EXPR_ARG (exp
, 1);
18100 op0
= expand_normal (arg0
);
18101 op1
= expand_normal (arg1
);
18102 mode0
= insn_data
[icode
].operand
[0].mode
;
18103 mode1
= insn_data
[icode
].operand
[1].mode
;
18105 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18106 op1
= force_reg (mode1
, op1
);
18108 pat
= GEN_FCN (icode
) (op0
, op1
);
18114 case IX86_BUILTIN_MOVNTPS
:
18115 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
18116 case IX86_BUILTIN_MOVNTQ
:
18117 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
18119 case IX86_BUILTIN_LDMXCSR
:
18120 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
18121 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18122 emit_move_insn (target
, op0
);
18123 emit_insn (gen_sse_ldmxcsr (target
));
18126 case IX86_BUILTIN_STMXCSR
:
18127 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18128 emit_insn (gen_sse_stmxcsr (target
));
18129 return copy_to_mode_reg (SImode
, target
);
18131 case IX86_BUILTIN_SHUFPS
:
18132 case IX86_BUILTIN_SHUFPD
:
18133 icode
= (fcode
== IX86_BUILTIN_SHUFPS
18134 ? CODE_FOR_sse_shufps
18135 : CODE_FOR_sse2_shufpd
);
18136 arg0
= CALL_EXPR_ARG (exp
, 0);
18137 arg1
= CALL_EXPR_ARG (exp
, 1);
18138 arg2
= CALL_EXPR_ARG (exp
, 2);
18139 op0
= expand_normal (arg0
);
18140 op1
= expand_normal (arg1
);
18141 op2
= expand_normal (arg2
);
18142 tmode
= insn_data
[icode
].operand
[0].mode
;
18143 mode0
= insn_data
[icode
].operand
[1].mode
;
18144 mode1
= insn_data
[icode
].operand
[2].mode
;
18145 mode2
= insn_data
[icode
].operand
[3].mode
;
18147 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18148 op0
= copy_to_mode_reg (mode0
, op0
);
18149 if ((optimize
&& !register_operand (op1
, mode1
))
18150 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18151 op1
= copy_to_mode_reg (mode1
, op1
);
18152 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18154 /* @@@ better error message */
18155 error ("mask must be an immediate");
18156 return gen_reg_rtx (tmode
);
18158 if (optimize
|| target
== 0
18159 || GET_MODE (target
) != tmode
18160 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18161 target
= gen_reg_rtx (tmode
);
18162 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18168 case IX86_BUILTIN_PSHUFW
:
18169 case IX86_BUILTIN_PSHUFD
:
18170 case IX86_BUILTIN_PSHUFHW
:
18171 case IX86_BUILTIN_PSHUFLW
:
18172 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18173 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18174 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18175 : CODE_FOR_mmx_pshufw
);
18176 arg0
= CALL_EXPR_ARG (exp
, 0);
18177 arg1
= CALL_EXPR_ARG (exp
, 1);
18178 op0
= expand_normal (arg0
);
18179 op1
= expand_normal (arg1
);
18180 tmode
= insn_data
[icode
].operand
[0].mode
;
18181 mode1
= insn_data
[icode
].operand
[1].mode
;
18182 mode2
= insn_data
[icode
].operand
[2].mode
;
18184 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18185 op0
= copy_to_mode_reg (mode1
, op0
);
18186 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18188 /* @@@ better error message */
18189 error ("mask must be an immediate");
18193 || GET_MODE (target
) != tmode
18194 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18195 target
= gen_reg_rtx (tmode
);
18196 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18202 case IX86_BUILTIN_PSLLWI128
:
18203 icode
= CODE_FOR_ashlv8hi3
;
18205 case IX86_BUILTIN_PSLLDI128
:
18206 icode
= CODE_FOR_ashlv4si3
;
18208 case IX86_BUILTIN_PSLLQI128
:
18209 icode
= CODE_FOR_ashlv2di3
;
18211 case IX86_BUILTIN_PSRAWI128
:
18212 icode
= CODE_FOR_ashrv8hi3
;
18214 case IX86_BUILTIN_PSRADI128
:
18215 icode
= CODE_FOR_ashrv4si3
;
18217 case IX86_BUILTIN_PSRLWI128
:
18218 icode
= CODE_FOR_lshrv8hi3
;
18220 case IX86_BUILTIN_PSRLDI128
:
18221 icode
= CODE_FOR_lshrv4si3
;
18223 case IX86_BUILTIN_PSRLQI128
:
18224 icode
= CODE_FOR_lshrv2di3
;
18227 arg0
= CALL_EXPR_ARG (exp
, 0);
18228 arg1
= CALL_EXPR_ARG (exp
, 1);
18229 op0
= expand_normal (arg0
);
18230 op1
= expand_normal (arg1
);
18232 if (!CONST_INT_P (op1
))
18234 error ("shift must be an immediate");
18237 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
18238 op1
= GEN_INT (255);
18240 tmode
= insn_data
[icode
].operand
[0].mode
;
18241 mode1
= insn_data
[icode
].operand
[1].mode
;
18242 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18243 op0
= copy_to_reg (op0
);
18245 target
= gen_reg_rtx (tmode
);
18246 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18252 case IX86_BUILTIN_PSLLW128
:
18253 icode
= CODE_FOR_ashlv8hi3
;
18255 case IX86_BUILTIN_PSLLD128
:
18256 icode
= CODE_FOR_ashlv4si3
;
18258 case IX86_BUILTIN_PSLLQ128
:
18259 icode
= CODE_FOR_ashlv2di3
;
18261 case IX86_BUILTIN_PSRAW128
:
18262 icode
= CODE_FOR_ashrv8hi3
;
18264 case IX86_BUILTIN_PSRAD128
:
18265 icode
= CODE_FOR_ashrv4si3
;
18267 case IX86_BUILTIN_PSRLW128
:
18268 icode
= CODE_FOR_lshrv8hi3
;
18270 case IX86_BUILTIN_PSRLD128
:
18271 icode
= CODE_FOR_lshrv4si3
;
18273 case IX86_BUILTIN_PSRLQ128
:
18274 icode
= CODE_FOR_lshrv2di3
;
18277 arg0
= CALL_EXPR_ARG (exp
, 0);
18278 arg1
= CALL_EXPR_ARG (exp
, 1);
18279 op0
= expand_normal (arg0
);
18280 op1
= expand_normal (arg1
);
18282 tmode
= insn_data
[icode
].operand
[0].mode
;
18283 mode1
= insn_data
[icode
].operand
[1].mode
;
18285 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18286 op0
= copy_to_reg (op0
);
18288 op1
= simplify_gen_subreg (TImode
, op1
, GET_MODE (op1
), 0);
18289 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
18290 op1
= copy_to_reg (op1
);
18292 target
= gen_reg_rtx (tmode
);
18293 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18299 case IX86_BUILTIN_PSLLDQI128
:
18300 case IX86_BUILTIN_PSRLDQI128
:
18301 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18302 : CODE_FOR_sse2_lshrti3
);
18303 arg0
= CALL_EXPR_ARG (exp
, 0);
18304 arg1
= CALL_EXPR_ARG (exp
, 1);
18305 op0
= expand_normal (arg0
);
18306 op1
= expand_normal (arg1
);
18307 tmode
= insn_data
[icode
].operand
[0].mode
;
18308 mode1
= insn_data
[icode
].operand
[1].mode
;
18309 mode2
= insn_data
[icode
].operand
[2].mode
;
18311 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18313 op0
= copy_to_reg (op0
);
18314 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18316 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18318 error ("shift must be an immediate");
18321 target
= gen_reg_rtx (V2DImode
);
18322 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
18329 case IX86_BUILTIN_FEMMS
:
18330 emit_insn (gen_mmx_femms ());
18333 case IX86_BUILTIN_PAVGUSB
:
18334 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18336 case IX86_BUILTIN_PF2ID
:
18337 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18339 case IX86_BUILTIN_PFACC
:
18340 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18342 case IX86_BUILTIN_PFADD
:
18343 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18345 case IX86_BUILTIN_PFCMPEQ
:
18346 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18348 case IX86_BUILTIN_PFCMPGE
:
18349 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18351 case IX86_BUILTIN_PFCMPGT
:
18352 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18354 case IX86_BUILTIN_PFMAX
:
18355 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18357 case IX86_BUILTIN_PFMIN
:
18358 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18360 case IX86_BUILTIN_PFMUL
:
18361 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18363 case IX86_BUILTIN_PFRCP
:
18364 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18366 case IX86_BUILTIN_PFRCPIT1
:
18367 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18369 case IX86_BUILTIN_PFRCPIT2
:
18370 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18372 case IX86_BUILTIN_PFRSQIT1
:
18373 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18375 case IX86_BUILTIN_PFRSQRT
:
18376 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18378 case IX86_BUILTIN_PFSUB
:
18379 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18381 case IX86_BUILTIN_PFSUBR
:
18382 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18384 case IX86_BUILTIN_PI2FD
:
18385 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18387 case IX86_BUILTIN_PMULHRW
:
18388 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18390 case IX86_BUILTIN_PF2IW
:
18391 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18393 case IX86_BUILTIN_PFNACC
:
18394 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18396 case IX86_BUILTIN_PFPNACC
:
18397 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18399 case IX86_BUILTIN_PI2FW
:
18400 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18402 case IX86_BUILTIN_PSWAPDSI
:
18403 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18405 case IX86_BUILTIN_PSWAPDSF
:
18406 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18408 case IX86_BUILTIN_SQRTSD
:
18409 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18410 case IX86_BUILTIN_LOADUPD
:
18411 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18412 case IX86_BUILTIN_STOREUPD
:
18413 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18415 case IX86_BUILTIN_MFENCE
:
18416 emit_insn (gen_sse2_mfence ());
18418 case IX86_BUILTIN_LFENCE
:
18419 emit_insn (gen_sse2_lfence ());
18422 case IX86_BUILTIN_CLFLUSH
:
18423 arg0
= CALL_EXPR_ARG (exp
, 0);
18424 op0
= expand_normal (arg0
);
18425 icode
= CODE_FOR_sse2_clflush
;
18426 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18427 op0
= copy_to_mode_reg (Pmode
, op0
);
18429 emit_insn (gen_sse2_clflush (op0
));
18432 case IX86_BUILTIN_MOVNTPD
:
18433 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18434 case IX86_BUILTIN_MOVNTDQ
:
18435 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18436 case IX86_BUILTIN_MOVNTI
:
18437 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18439 case IX86_BUILTIN_LOADDQU
:
18440 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18441 case IX86_BUILTIN_STOREDQU
:
18442 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18444 case IX86_BUILTIN_MONITOR
:
18445 arg0
= CALL_EXPR_ARG (exp
, 0);
18446 arg1
= CALL_EXPR_ARG (exp
, 1);
18447 arg2
= CALL_EXPR_ARG (exp
, 2);
18448 op0
= expand_normal (arg0
);
18449 op1
= expand_normal (arg1
);
18450 op2
= expand_normal (arg2
);
18452 op0
= copy_to_mode_reg (Pmode
, op0
);
18454 op1
= copy_to_mode_reg (SImode
, op1
);
18456 op2
= copy_to_mode_reg (SImode
, op2
);
18458 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18460 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18463 case IX86_BUILTIN_MWAIT
:
18464 arg0
= CALL_EXPR_ARG (exp
, 0);
18465 arg1
= CALL_EXPR_ARG (exp
, 1);
18466 op0
= expand_normal (arg0
);
18467 op1
= expand_normal (arg1
);
18469 op0
= copy_to_mode_reg (SImode
, op0
);
18471 op1
= copy_to_mode_reg (SImode
, op1
);
18472 emit_insn (gen_sse3_mwait (op0
, op1
));
18475 case IX86_BUILTIN_LDDQU
:
18476 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18479 case IX86_BUILTIN_PALIGNR
:
18480 case IX86_BUILTIN_PALIGNR128
:
18481 if (fcode
== IX86_BUILTIN_PALIGNR
)
18483 icode
= CODE_FOR_ssse3_palignrdi
;
18488 icode
= CODE_FOR_ssse3_palignrti
;
18491 arg0
= CALL_EXPR_ARG (exp
, 0);
18492 arg1
= CALL_EXPR_ARG (exp
, 1);
18493 arg2
= CALL_EXPR_ARG (exp
, 2);
18494 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18495 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18496 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18497 tmode
= insn_data
[icode
].operand
[0].mode
;
18498 mode1
= insn_data
[icode
].operand
[1].mode
;
18499 mode2
= insn_data
[icode
].operand
[2].mode
;
18500 mode3
= insn_data
[icode
].operand
[3].mode
;
18502 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18504 op0
= copy_to_reg (op0
);
18505 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18507 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18509 op1
= copy_to_reg (op1
);
18510 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18512 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18514 error ("shift must be an immediate");
18517 target
= gen_reg_rtx (mode
);
18518 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18525 case IX86_BUILTIN_MOVNTSD
:
18526 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18528 case IX86_BUILTIN_MOVNTSS
:
18529 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18531 case IX86_BUILTIN_INSERTQ
:
18532 case IX86_BUILTIN_EXTRQ
:
18533 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18534 ? CODE_FOR_sse4a_extrq
18535 : CODE_FOR_sse4a_insertq
);
18536 arg0
= CALL_EXPR_ARG (exp
, 0);
18537 arg1
= CALL_EXPR_ARG (exp
, 1);
18538 op0
= expand_normal (arg0
);
18539 op1
= expand_normal (arg1
);
18540 tmode
= insn_data
[icode
].operand
[0].mode
;
18541 mode1
= insn_data
[icode
].operand
[1].mode
;
18542 mode2
= insn_data
[icode
].operand
[2].mode
;
18543 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18544 op0
= copy_to_mode_reg (mode1
, op0
);
18545 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18546 op1
= copy_to_mode_reg (mode2
, op1
);
18547 if (optimize
|| target
== 0
18548 || GET_MODE (target
) != tmode
18549 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18550 target
= gen_reg_rtx (tmode
);
18551 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18557 case IX86_BUILTIN_EXTRQI
:
18558 icode
= CODE_FOR_sse4a_extrqi
;
18559 arg0
= CALL_EXPR_ARG (exp
, 0);
18560 arg1
= CALL_EXPR_ARG (exp
, 1);
18561 arg2
= CALL_EXPR_ARG (exp
, 2);
18562 op0
= expand_normal (arg0
);
18563 op1
= expand_normal (arg1
);
18564 op2
= expand_normal (arg2
);
18565 tmode
= insn_data
[icode
].operand
[0].mode
;
18566 mode1
= insn_data
[icode
].operand
[1].mode
;
18567 mode2
= insn_data
[icode
].operand
[2].mode
;
18568 mode3
= insn_data
[icode
].operand
[3].mode
;
18569 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18570 op0
= copy_to_mode_reg (mode1
, op0
);
18571 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18573 error ("index mask must be an immediate");
18574 return gen_reg_rtx (tmode
);
18576 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18578 error ("length mask must be an immediate");
18579 return gen_reg_rtx (tmode
);
18581 if (optimize
|| target
== 0
18582 || GET_MODE (target
) != tmode
18583 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18584 target
= gen_reg_rtx (tmode
);
18585 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18591 case IX86_BUILTIN_INSERTQI
:
18592 icode
= CODE_FOR_sse4a_insertqi
;
18593 arg0
= CALL_EXPR_ARG (exp
, 0);
18594 arg1
= CALL_EXPR_ARG (exp
, 1);
18595 arg2
= CALL_EXPR_ARG (exp
, 2);
18596 arg3
= CALL_EXPR_ARG (exp
, 3);
18597 op0
= expand_normal (arg0
);
18598 op1
= expand_normal (arg1
);
18599 op2
= expand_normal (arg2
);
18600 op3
= expand_normal (arg3
);
18601 tmode
= insn_data
[icode
].operand
[0].mode
;
18602 mode1
= insn_data
[icode
].operand
[1].mode
;
18603 mode2
= insn_data
[icode
].operand
[2].mode
;
18604 mode3
= insn_data
[icode
].operand
[3].mode
;
18605 mode4
= insn_data
[icode
].operand
[4].mode
;
18607 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18608 op0
= copy_to_mode_reg (mode1
, op0
);
18610 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18611 op1
= copy_to_mode_reg (mode2
, op1
);
18613 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18615 error ("index mask must be an immediate");
18616 return gen_reg_rtx (tmode
);
18618 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18620 error ("length mask must be an immediate");
18621 return gen_reg_rtx (tmode
);
18623 if (optimize
|| target
== 0
18624 || GET_MODE (target
) != tmode
18625 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18626 target
= gen_reg_rtx (tmode
);
18627 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18633 case IX86_BUILTIN_VEC_INIT_V2SI
:
18634 case IX86_BUILTIN_VEC_INIT_V4HI
:
18635 case IX86_BUILTIN_VEC_INIT_V8QI
:
18636 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18638 case IX86_BUILTIN_VEC_EXT_V2DF
:
18639 case IX86_BUILTIN_VEC_EXT_V2DI
:
18640 case IX86_BUILTIN_VEC_EXT_V4SF
:
18641 case IX86_BUILTIN_VEC_EXT_V4SI
:
18642 case IX86_BUILTIN_VEC_EXT_V8HI
:
18643 case IX86_BUILTIN_VEC_EXT_V2SI
:
18644 case IX86_BUILTIN_VEC_EXT_V4HI
:
18645 return ix86_expand_vec_ext_builtin (exp
, target
);
18647 case IX86_BUILTIN_VEC_SET_V8HI
:
18648 case IX86_BUILTIN_VEC_SET_V4HI
:
18649 return ix86_expand_vec_set_builtin (exp
);
18655 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18656 if (d
->code
== fcode
)
18658 /* Compares are treated specially. */
18659 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18660 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18661 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18662 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18663 return ix86_expand_sse_compare (d
, exp
, target
);
18665 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18668 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18669 if (d
->code
== fcode
)
18670 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18672 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18673 if (d
->code
== fcode
)
18674 return ix86_expand_sse_comi (d
, exp
, target
);
18676 gcc_unreachable ();
18679 /* Returns a function decl for a vectorized version of the builtin function
18680 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18681 if it is not available. */
18684 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18687 enum machine_mode in_mode
, out_mode
;
18690 if (TREE_CODE (type_out
) != VECTOR_TYPE
18691 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18694 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18695 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18696 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18697 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18701 case BUILT_IN_SQRT
:
18702 if (out_mode
== DFmode
&& out_n
== 2
18703 && in_mode
== DFmode
&& in_n
== 2)
18704 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18707 case BUILT_IN_SQRTF
:
18708 if (out_mode
== SFmode
&& out_n
== 4
18709 && in_mode
== SFmode
&& in_n
== 4)
18710 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18713 case BUILT_IN_LRINTF
:
18714 if (out_mode
== SImode
&& out_n
== 4
18715 && in_mode
== SFmode
&& in_n
== 4)
18716 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18726 /* Returns a decl of a function that implements conversion of the
18727 input vector of type TYPE, or NULL_TREE if it is not available. */
18730 ix86_builtin_conversion (enum tree_code code
, tree type
)
18732 if (TREE_CODE (type
) != VECTOR_TYPE
)
18738 switch (TYPE_MODE (type
))
18741 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18746 case FIX_TRUNC_EXPR
:
18747 switch (TYPE_MODE (type
))
18750 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18760 /* Store OPERAND to the memory after reload is completed. This means
18761 that we can't easily use assign_stack_local. */
18763 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18767 gcc_assert (reload_completed
);
18768 if (TARGET_RED_ZONE
)
18770 result
= gen_rtx_MEM (mode
,
18771 gen_rtx_PLUS (Pmode
,
18773 GEN_INT (-RED_ZONE_SIZE
)));
18774 emit_move_insn (result
, operand
);
18776 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18782 operand
= gen_lowpart (DImode
, operand
);
18786 gen_rtx_SET (VOIDmode
,
18787 gen_rtx_MEM (DImode
,
18788 gen_rtx_PRE_DEC (DImode
,
18789 stack_pointer_rtx
)),
18793 gcc_unreachable ();
18795 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18804 split_di (&operand
, 1, operands
, operands
+ 1);
18806 gen_rtx_SET (VOIDmode
,
18807 gen_rtx_MEM (SImode
,
18808 gen_rtx_PRE_DEC (Pmode
,
18809 stack_pointer_rtx
)),
18812 gen_rtx_SET (VOIDmode
,
18813 gen_rtx_MEM (SImode
,
18814 gen_rtx_PRE_DEC (Pmode
,
18815 stack_pointer_rtx
)),
18820 /* Store HImodes as SImodes. */
18821 operand
= gen_lowpart (SImode
, operand
);
18825 gen_rtx_SET (VOIDmode
,
18826 gen_rtx_MEM (GET_MODE (operand
),
18827 gen_rtx_PRE_DEC (SImode
,
18828 stack_pointer_rtx
)),
18832 gcc_unreachable ();
18834 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18839 /* Free operand from the memory. */
18841 ix86_free_from_memory (enum machine_mode mode
)
18843 if (!TARGET_RED_ZONE
)
18847 if (mode
== DImode
|| TARGET_64BIT
)
18851 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18852 to pop or add instruction if registers are available. */
18853 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18854 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18859 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18860 QImode must go into class Q_REGS.
18861 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18862 movdf to do mem-to-mem moves through integer regs. */
18864 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18866 enum machine_mode mode
= GET_MODE (x
);
18868 /* We're only allowed to return a subclass of CLASS. Many of the
18869 following checks fail for NO_REGS, so eliminate that early. */
18870 if (class == NO_REGS
)
18873 /* All classes can load zeros. */
18874 if (x
== CONST0_RTX (mode
))
18877 /* Force constants into memory if we are loading a (nonzero) constant into
18878 an MMX or SSE register. This is because there are no MMX/SSE instructions
18879 to load from a constant. */
18881 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18884 /* Prefer SSE regs only, if we can use them for math. */
18885 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18886 return SSE_CLASS_P (class) ? class : NO_REGS
;
18888 /* Floating-point constants need more complex checks. */
18889 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18891 /* General regs can load everything. */
18892 if (reg_class_subset_p (class, GENERAL_REGS
))
18895 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18896 zero above. We only want to wind up preferring 80387 registers if
18897 we plan on doing computation with them. */
18899 && standard_80387_constant_p (x
))
18901 /* Limit class to non-sse. */
18902 if (class == FLOAT_SSE_REGS
)
18904 if (class == FP_TOP_SSE_REGS
)
18906 if (class == FP_SECOND_SSE_REGS
)
18907 return FP_SECOND_REG
;
18908 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18915 /* Generally when we see PLUS here, it's the function invariant
18916 (plus soft-fp const_int). Which can only be computed into general
18918 if (GET_CODE (x
) == PLUS
)
18919 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18921 /* QImode constants are easy to load, but non-constant QImode data
18922 must go into Q_REGS. */
18923 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18925 if (reg_class_subset_p (class, Q_REGS
))
18927 if (reg_class_subset_p (Q_REGS
, class))
18935 /* Discourage putting floating-point values in SSE registers unless
18936 SSE math is being used, and likewise for the 387 registers. */
18938 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18940 enum machine_mode mode
= GET_MODE (x
);
18942 /* Restrict the output reload class to the register bank that we are doing
18943 math on. If we would like not to return a subset of CLASS, reject this
18944 alternative: if reload cannot do this, it will still use its choice. */
18945 mode
= GET_MODE (x
);
18946 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18947 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18949 if (X87_FLOAT_MODE_P (mode
))
18951 if (class == FP_TOP_SSE_REGS
)
18953 else if (class == FP_SECOND_SSE_REGS
)
18954 return FP_SECOND_REG
;
18956 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18962 /* If we are copying between general and FP registers, we need a memory
18963 location. The same is true for SSE and MMX registers.
18965 The macro can't work reliably when one of the CLASSES is class containing
18966 registers from multiple units (SSE, MMX, integer). We avoid this by never
18967 combining those units in single alternative in the machine description.
18968 Ensure that this constraint holds to avoid unexpected surprises.
18970 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18971 enforce these sanity checks. */
18974 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18975 enum machine_mode mode
, int strict
)
18977 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18978 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18979 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18980 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18981 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18982 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18984 gcc_assert (!strict
);
18988 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18991 /* ??? This is a lie. We do have moves between mmx/general, and for
18992 mmx/sse2. But by saying we need secondary memory we discourage the
18993 register allocator from using the mmx registers unless needed. */
18994 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18997 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18999 /* SSE1 doesn't have any direct moves from other classes. */
19003 /* If the target says that inter-unit moves are more expensive
19004 than moving through memory, then don't generate them. */
19005 if (!TARGET_INTER_UNIT_MOVES
)
19008 /* Between SSE and general, we have moves no larger than word size. */
19009 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19016 /* Return true if the registers in CLASS cannot represent the change from
19017 modes FROM to TO. */
19020 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
19021 enum reg_class
class)
19026 /* x87 registers can't do subreg at all, as all values are reformatted
19027 to extended precision. */
19028 if (MAYBE_FLOAT_CLASS_P (class))
19031 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19033 /* Vector registers do not support QI or HImode loads. If we don't
19034 disallow a change to these modes, reload will assume it's ok to
19035 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19036 the vec_dupv4hi pattern. */
19037 if (GET_MODE_SIZE (from
) < 4)
19040 /* Vector registers do not support subreg with nonzero offsets, which
19041 are otherwise valid for integer registers. Since we can't see
19042 whether we have a nonzero offset from here, prohibit all
19043 nonparadoxical subregs changing size. */
19044 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
19051 /* Return the cost of moving data from a register in class CLASS1 to
19052 one in class CLASS2.
19054 It is not required that the cost always equal 2 when FROM is the same as TO;
19055 on some machines it is expensive to move between registers if they are not
19056 general registers. */
19059 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
19060 enum reg_class class2
)
19062 /* In case we require secondary memory, compute cost of the store followed
19063 by load. In order to avoid bad register allocation choices, we need
19064 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19066 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
19070 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
19071 MEMORY_MOVE_COST (mode
, class1
, 1));
19072 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
19073 MEMORY_MOVE_COST (mode
, class2
, 1));
19075 /* In case of copying from general_purpose_register we may emit multiple
19076 stores followed by single load causing memory size mismatch stall.
19077 Count this as arbitrarily high cost of 20. */
19078 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
19081 /* In the case of FP/MMX moves, the registers actually overlap, and we
19082 have to switch modes in order to treat them differently. */
19083 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
19084 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
19090 /* Moves between SSE/MMX and integer unit are expensive. */
19091 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
19092 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
19093 return ix86_cost
->mmxsse_to_integer
;
19094 if (MAYBE_FLOAT_CLASS_P (class1
))
19095 return ix86_cost
->fp_move
;
19096 if (MAYBE_SSE_CLASS_P (class1
))
19097 return ix86_cost
->sse_move
;
19098 if (MAYBE_MMX_CLASS_P (class1
))
19099 return ix86_cost
->mmx_move
;
19103 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19106 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
19108 /* Flags and only flags can only hold CCmode values. */
19109 if (CC_REGNO_P (regno
))
19110 return GET_MODE_CLASS (mode
) == MODE_CC
;
19111 if (GET_MODE_CLASS (mode
) == MODE_CC
19112 || GET_MODE_CLASS (mode
) == MODE_RANDOM
19113 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
19115 if (FP_REGNO_P (regno
))
19116 return VALID_FP_MODE_P (mode
);
19117 if (SSE_REGNO_P (regno
))
19119 /* We implement the move patterns for all vector modes into and
19120 out of SSE registers, even when no operation instructions
19122 return (VALID_SSE_REG_MODE (mode
)
19123 || VALID_SSE2_REG_MODE (mode
)
19124 || VALID_MMX_REG_MODE (mode
)
19125 || VALID_MMX_REG_MODE_3DNOW (mode
));
19127 if (MMX_REGNO_P (regno
))
19129 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19130 so if the register is available at all, then we can move data of
19131 the given mode into or out of it. */
19132 return (VALID_MMX_REG_MODE (mode
)
19133 || VALID_MMX_REG_MODE_3DNOW (mode
));
19136 if (mode
== QImode
)
19138 /* Take care for QImode values - they can be in non-QI regs,
19139 but then they do cause partial register stalls. */
19140 if (regno
< 4 || TARGET_64BIT
)
19142 if (!TARGET_PARTIAL_REG_STALL
)
19144 return reload_in_progress
|| reload_completed
;
19146 /* We handle both integer and floats in the general purpose registers. */
19147 else if (VALID_INT_MODE_P (mode
))
19149 else if (VALID_FP_MODE_P (mode
))
19151 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19152 on to use that value in smaller contexts, this can easily force a
19153 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19154 supporting DImode, allow it. */
19155 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
19161 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19162 tieable integer mode. */
19165 ix86_tieable_integer_mode_p (enum machine_mode mode
)
19174 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
19177 return TARGET_64BIT
;
19184 /* Return true if MODE1 is accessible in a register that can hold MODE2
19185 without copying. That is, all register classes that can hold MODE2
19186 can also hold MODE1. */
19189 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
19191 if (mode1
== mode2
)
19194 if (ix86_tieable_integer_mode_p (mode1
)
19195 && ix86_tieable_integer_mode_p (mode2
))
19198 /* MODE2 being XFmode implies fp stack or general regs, which means we
19199 can tie any smaller floating point modes to it. Note that we do not
19200 tie this with TFmode. */
19201 if (mode2
== XFmode
)
19202 return mode1
== SFmode
|| mode1
== DFmode
;
19204 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19205 that we can tie it with SFmode. */
19206 if (mode2
== DFmode
)
19207 return mode1
== SFmode
;
19209 /* If MODE2 is only appropriate for an SSE register, then tie with
19210 any other mode acceptable to SSE registers. */
19211 if (GET_MODE_SIZE (mode2
) == 16
19212 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19213 return (GET_MODE_SIZE (mode1
) == 16
19214 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19216 /* If MODE2 is appropriate for an MMX register, then tie
19217 with any other mode acceptable to MMX registers. */
19218 if (GET_MODE_SIZE (mode2
) == 8
19219 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
19220 return (GET_MODE_SIZE (mode1
) == 8
19221 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
19226 /* Return the cost of moving data of mode M between a
19227 register and memory. A value of 2 is the default; this cost is
19228 relative to those in `REGISTER_MOVE_COST'.
19230 If moving between registers and memory is more expensive than
19231 between two registers, you should define this macro to express the
19234 Model also increased moving costs of QImode registers in non
19238 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19240 if (FLOAT_CLASS_P (class))
19257 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19259 if (SSE_CLASS_P (class))
19262 switch (GET_MODE_SIZE (mode
))
19276 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19278 if (MMX_CLASS_P (class))
19281 switch (GET_MODE_SIZE (mode
))
19292 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19294 switch (GET_MODE_SIZE (mode
))
19298 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19299 : ix86_cost
->movzbl_load
);
19301 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19302 : ix86_cost
->int_store
[0] + 4);
19305 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19307 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19308 if (mode
== TFmode
)
19310 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19311 * (((int) GET_MODE_SIZE (mode
)
19312 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19316 /* Compute a (partial) cost for rtx X. Return true if the complete
19317 cost has been computed, and false if subexpressions should be
19318 scanned. In either case, *TOTAL contains the cost result. */
19321 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19323 enum machine_mode mode
= GET_MODE (x
);
19331 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19333 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19335 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19337 || (!GET_CODE (x
) != LABEL_REF
19338 && (GET_CODE (x
) != SYMBOL_REF
19339 || !SYMBOL_REF_LOCAL_P (x
)))))
19346 if (mode
== VOIDmode
)
19349 switch (standard_80387_constant_p (x
))
19354 default: /* Other constants */
19359 /* Start with (MEM (SYMBOL_REF)), since that's where
19360 it'll probably end up. Add a penalty for size. */
19361 *total
= (COSTS_N_INSNS (1)
19362 + (flag_pic
!= 0 && !TARGET_64BIT
)
19363 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19369 /* The zero extensions is often completely free on x86_64, so make
19370 it as cheap as possible. */
19371 if (TARGET_64BIT
&& mode
== DImode
19372 && GET_MODE (XEXP (x
, 0)) == SImode
)
19374 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19375 *total
= ix86_cost
->add
;
19377 *total
= ix86_cost
->movzx
;
19381 *total
= ix86_cost
->movsx
;
19385 if (CONST_INT_P (XEXP (x
, 1))
19386 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19388 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19391 *total
= ix86_cost
->add
;
19394 if ((value
== 2 || value
== 3)
19395 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19397 *total
= ix86_cost
->lea
;
19407 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19409 if (CONST_INT_P (XEXP (x
, 1)))
19411 if (INTVAL (XEXP (x
, 1)) > 32)
19412 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19414 *total
= ix86_cost
->shift_const
* 2;
19418 if (GET_CODE (XEXP (x
, 1)) == AND
)
19419 *total
= ix86_cost
->shift_var
* 2;
19421 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19426 if (CONST_INT_P (XEXP (x
, 1)))
19427 *total
= ix86_cost
->shift_const
;
19429 *total
= ix86_cost
->shift_var
;
19434 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19436 /* ??? SSE scalar cost should be used here. */
19437 *total
= ix86_cost
->fmul
;
19440 else if (X87_FLOAT_MODE_P (mode
))
19442 *total
= ix86_cost
->fmul
;
19445 else if (FLOAT_MODE_P (mode
))
19447 /* ??? SSE vector cost should be used here. */
19448 *total
= ix86_cost
->fmul
;
19453 rtx op0
= XEXP (x
, 0);
19454 rtx op1
= XEXP (x
, 1);
19456 if (CONST_INT_P (XEXP (x
, 1)))
19458 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19459 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19463 /* This is arbitrary. */
19466 /* Compute costs correctly for widening multiplication. */
19467 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19468 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19469 == GET_MODE_SIZE (mode
))
19471 int is_mulwiden
= 0;
19472 enum machine_mode inner_mode
= GET_MODE (op0
);
19474 if (GET_CODE (op0
) == GET_CODE (op1
))
19475 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19476 else if (CONST_INT_P (op1
))
19478 if (GET_CODE (op0
) == SIGN_EXTEND
)
19479 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19482 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19486 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19489 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19490 + nbits
* ix86_cost
->mult_bit
19491 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19500 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19501 /* ??? SSE cost should be used here. */
19502 *total
= ix86_cost
->fdiv
;
19503 else if (X87_FLOAT_MODE_P (mode
))
19504 *total
= ix86_cost
->fdiv
;
19505 else if (FLOAT_MODE_P (mode
))
19506 /* ??? SSE vector cost should be used here. */
19507 *total
= ix86_cost
->fdiv
;
19509 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19513 if (GET_MODE_CLASS (mode
) == MODE_INT
19514 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19516 if (GET_CODE (XEXP (x
, 0)) == PLUS
19517 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19518 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19519 && CONSTANT_P (XEXP (x
, 1)))
19521 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19522 if (val
== 2 || val
== 4 || val
== 8)
19524 *total
= ix86_cost
->lea
;
19525 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19526 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19528 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19532 else if (GET_CODE (XEXP (x
, 0)) == MULT
19533 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19535 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19536 if (val
== 2 || val
== 4 || val
== 8)
19538 *total
= ix86_cost
->lea
;
19539 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19540 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19544 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19546 *total
= ix86_cost
->lea
;
19547 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19548 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19549 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19556 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19558 /* ??? SSE cost should be used here. */
19559 *total
= ix86_cost
->fadd
;
19562 else if (X87_FLOAT_MODE_P (mode
))
19564 *total
= ix86_cost
->fadd
;
19567 else if (FLOAT_MODE_P (mode
))
19569 /* ??? SSE vector cost should be used here. */
19570 *total
= ix86_cost
->fadd
;
19578 if (!TARGET_64BIT
&& mode
== DImode
)
19580 *total
= (ix86_cost
->add
* 2
19581 + (rtx_cost (XEXP (x
, 0), outer_code
)
19582 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19583 + (rtx_cost (XEXP (x
, 1), outer_code
)
19584 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19590 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19592 /* ??? SSE cost should be used here. */
19593 *total
= ix86_cost
->fchs
;
19596 else if (X87_FLOAT_MODE_P (mode
))
19598 *total
= ix86_cost
->fchs
;
19601 else if (FLOAT_MODE_P (mode
))
19603 /* ??? SSE vector cost should be used here. */
19604 *total
= ix86_cost
->fchs
;
19610 if (!TARGET_64BIT
&& mode
== DImode
)
19611 *total
= ix86_cost
->add
* 2;
19613 *total
= ix86_cost
->add
;
19617 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19618 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19619 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19620 && XEXP (x
, 1) == const0_rtx
)
19622 /* This kind of construct is implemented using test[bwl].
19623 Treat it as if we had an AND. */
19624 *total
= (ix86_cost
->add
19625 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19626 + rtx_cost (const1_rtx
, outer_code
));
19632 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
19637 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19638 /* ??? SSE cost should be used here. */
19639 *total
= ix86_cost
->fabs
;
19640 else if (X87_FLOAT_MODE_P (mode
))
19641 *total
= ix86_cost
->fabs
;
19642 else if (FLOAT_MODE_P (mode
))
19643 /* ??? SSE vector cost should be used here. */
19644 *total
= ix86_cost
->fabs
;
19648 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19649 /* ??? SSE cost should be used here. */
19650 *total
= ix86_cost
->fsqrt
;
19651 else if (X87_FLOAT_MODE_P (mode
))
19652 *total
= ix86_cost
->fsqrt
;
19653 else if (FLOAT_MODE_P (mode
))
19654 /* ??? SSE vector cost should be used here. */
19655 *total
= ix86_cost
->fsqrt
;
19659 if (XINT (x
, 1) == UNSPEC_TP
)
19670 static int current_machopic_label_num
;
19672 /* Given a symbol name and its associated stub, write out the
19673 definition of the stub. */
19676 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19678 unsigned int length
;
19679 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19680 int label
= ++current_machopic_label_num
;
19682 /* For 64-bit we shouldn't get here. */
19683 gcc_assert (!TARGET_64BIT
);
19685 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19686 symb
= (*targetm
.strip_name_encoding
) (symb
);
19688 length
= strlen (stub
);
19689 binder_name
= alloca (length
+ 32);
19690 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19692 length
= strlen (symb
);
19693 symbol_name
= alloca (length
+ 32);
19694 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19696 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19699 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19701 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19703 fprintf (file
, "%s:\n", stub
);
19704 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19708 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19709 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19710 fprintf (file
, "\tjmp\t*%%edx\n");
19713 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19715 fprintf (file
, "%s:\n", binder_name
);
19719 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19720 fprintf (file
, "\tpushl\t%%eax\n");
19723 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19725 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19727 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19728 fprintf (file
, "%s:\n", lazy_ptr_name
);
19729 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19730 fprintf (file
, "\t.long %s\n", binder_name
);
19734 darwin_x86_file_end (void)
19736 darwin_file_end ();
19739 #endif /* TARGET_MACHO */
19741 /* Order the registers for register allocator. */
19744 x86_order_regs_for_local_alloc (void)
19749 /* First allocate the local general purpose registers. */
19750 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19751 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19752 reg_alloc_order
[pos
++] = i
;
19754 /* Global general purpose registers. */
19755 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19756 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19757 reg_alloc_order
[pos
++] = i
;
19759 /* x87 registers come first in case we are doing FP math
19761 if (!TARGET_SSE_MATH
)
19762 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19763 reg_alloc_order
[pos
++] = i
;
19765 /* SSE registers. */
19766 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19767 reg_alloc_order
[pos
++] = i
;
19768 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19769 reg_alloc_order
[pos
++] = i
;
19771 /* x87 registers. */
19772 if (TARGET_SSE_MATH
)
19773 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19774 reg_alloc_order
[pos
++] = i
;
19776 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19777 reg_alloc_order
[pos
++] = i
;
19779 /* Initialize the rest of array as we do not allocate some registers
19781 while (pos
< FIRST_PSEUDO_REGISTER
)
19782 reg_alloc_order
[pos
++] = 0;
19785 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19786 struct attribute_spec.handler. */
19788 ix86_handle_struct_attribute (tree
*node
, tree name
,
19789 tree args ATTRIBUTE_UNUSED
,
19790 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19793 if (DECL_P (*node
))
19795 if (TREE_CODE (*node
) == TYPE_DECL
)
19796 type
= &TREE_TYPE (*node
);
19801 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19802 || TREE_CODE (*type
) == UNION_TYPE
)))
19804 warning (OPT_Wattributes
, "%qs attribute ignored",
19805 IDENTIFIER_POINTER (name
));
19806 *no_add_attrs
= true;
19809 else if ((is_attribute_p ("ms_struct", name
)
19810 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19811 || ((is_attribute_p ("gcc_struct", name
)
19812 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19814 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19815 IDENTIFIER_POINTER (name
));
19816 *no_add_attrs
= true;
19823 ix86_ms_bitfield_layout_p (tree record_type
)
19825 return (TARGET_MS_BITFIELD_LAYOUT
&&
19826 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19827 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19830 /* Returns an expression indicating where the this parameter is
19831 located on entry to the FUNCTION. */
19834 x86_this_parameter (tree function
)
19836 tree type
= TREE_TYPE (function
);
19837 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19841 const int *parm_regs
;
19843 if (TARGET_64BIT_MS_ABI
)
19844 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
19846 parm_regs
= x86_64_int_parameter_registers
;
19847 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
19850 if (ix86_function_regparm (type
, function
) > 0
19851 && !type_has_variadic_args_p (type
))
19854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19856 return gen_rtx_REG (SImode
, regno
);
19859 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
19862 /* Determine whether x86_output_mi_thunk can succeed. */
19865 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19866 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19867 HOST_WIDE_INT vcall_offset
, tree function
)
19869 /* 64-bit can handle anything. */
19873 /* For 32-bit, everything's fine if we have one free register. */
19874 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19877 /* Need a free register for vcall_offset. */
19881 /* Need a free register for GOT references. */
19882 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19885 /* Otherwise ok. */
19889 /* Output the assembler code for a thunk function. THUNK_DECL is the
19890 declaration for the thunk function itself, FUNCTION is the decl for
19891 the target function. DELTA is an immediate constant offset to be
19892 added to THIS. If VCALL_OFFSET is nonzero, the word at
19893 *(*this + vcall_offset) should be added to THIS. */
19896 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19897 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19898 HOST_WIDE_INT vcall_offset
, tree function
)
19901 rtx
this = x86_this_parameter (function
);
19904 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19905 pull it in now and let DELTA benefit. */
19908 else if (vcall_offset
)
19910 /* Put the this parameter into %eax. */
19912 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19913 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19916 this_reg
= NULL_RTX
;
19918 /* Adjust the this parameter by a fixed constant. */
19921 xops
[0] = GEN_INT (delta
);
19922 xops
[1] = this_reg
? this_reg
: this;
19925 if (!x86_64_general_operand (xops
[0], DImode
))
19927 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19929 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19933 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19936 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19939 /* Adjust the this parameter by a value stored in the vtable. */
19943 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19946 int tmp_regno
= 2 /* ECX */;
19947 if (lookup_attribute ("fastcall",
19948 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19949 tmp_regno
= 0 /* EAX */;
19950 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19953 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19956 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19958 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19960 /* Adjust the this parameter. */
19961 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19962 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19964 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19965 xops
[0] = GEN_INT (vcall_offset
);
19967 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19968 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19970 xops
[1] = this_reg
;
19972 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19974 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19977 /* If necessary, drop THIS back to its stack slot. */
19978 if (this_reg
&& this_reg
!= this)
19980 xops
[0] = this_reg
;
19982 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19985 xops
[0] = XEXP (DECL_RTL (function
), 0);
19988 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19989 output_asm_insn ("jmp\t%P0", xops
);
19990 /* All thunks should be in the same object as their target,
19991 and thus binds_local_p should be true. */
19992 else if (TARGET_64BIT_MS_ABI
)
19993 gcc_unreachable ();
19996 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19997 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19998 tmp
= gen_rtx_MEM (QImode
, tmp
);
20000 output_asm_insn ("jmp\t%A0", xops
);
20005 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
20006 output_asm_insn ("jmp\t%P0", xops
);
20011 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
20012 tmp
= (gen_rtx_SYMBOL_REF
20014 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
20015 tmp
= gen_rtx_MEM (QImode
, tmp
);
20017 output_asm_insn ("jmp\t%0", xops
);
20020 #endif /* TARGET_MACHO */
20022 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
20023 output_set_got (tmp
, NULL_RTX
);
20026 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
20027 output_asm_insn ("jmp\t{*}%1", xops
);
20033 x86_file_start (void)
20035 default_file_start ();
20037 darwin_file_start ();
20039 if (X86_FILE_START_VERSION_DIRECTIVE
)
20040 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
20041 if (X86_FILE_START_FLTUSED
)
20042 fputs ("\t.global\t__fltused\n", asm_out_file
);
20043 if (ix86_asm_dialect
== ASM_INTEL
)
20044 fputs ("\t.intel_syntax\n", asm_out_file
);
20048 x86_field_alignment (tree field
, int computed
)
20050 enum machine_mode mode
;
20051 tree type
= TREE_TYPE (field
);
20053 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
20055 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
20056 ? get_inner_array_type (type
) : type
);
20057 if (mode
== DFmode
|| mode
== DCmode
20058 || GET_MODE_CLASS (mode
) == MODE_INT
20059 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
20060 return MIN (32, computed
);
20064 /* Output assembler code to FILE to increment profiler label # LABELNO
20065 for profiling a function entry. */
20067 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
20071 #ifndef NO_PROFILE_COUNTERS
20072 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
20075 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
20076 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
20078 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
20082 #ifndef NO_PROFILE_COUNTERS
20083 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20084 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
20086 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
20090 #ifndef NO_PROFILE_COUNTERS
20091 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
20092 PROFILE_COUNT_REGISTER
);
20094 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
20098 /* We don't have exact information about the insn sizes, but we may assume
20099 quite safely that we are informed about all 1 byte insns and memory
20100 address sizes. This is enough to eliminate unnecessary padding in
20104 min_insn_size (rtx insn
)
20108 if (!INSN_P (insn
) || !active_insn_p (insn
))
20111 /* Discard alignments we've emit and jump instructions. */
20112 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
20113 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
20116 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
20117 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
20120 /* Important case - calls are always 5 bytes.
20121 It is common to have many calls in the row. */
20123 && symbolic_reference_mentioned_p (PATTERN (insn
))
20124 && !SIBLING_CALL_P (insn
))
20126 if (get_attr_length (insn
) <= 1)
20129 /* For normal instructions we may rely on the sizes of addresses
20130 and the presence of symbol to require 4 bytes of encoding.
20131 This is not the case for jumps where references are PC relative. */
20132 if (!JUMP_P (insn
))
20134 l
= get_attr_length_address (insn
);
20135 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
20144 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20148 ix86_avoid_jump_misspredicts (void)
20150 rtx insn
, start
= get_insns ();
20151 int nbytes
= 0, njumps
= 0;
20154 /* Look for all minimal intervals of instructions containing 4 jumps.
20155 The intervals are bounded by START and INSN. NBYTES is the total
20156 size of instructions in the interval including INSN and not including
20157 START. When the NBYTES is smaller than 16 bytes, it is possible
20158 that the end of START and INSN ends up in the same 16byte page.
20160 The smallest offset in the page INSN can start is the case where START
20161 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20162 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20164 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20167 nbytes
+= min_insn_size (insn
);
20169 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
20170 INSN_UID (insn
), min_insn_size (insn
));
20172 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20173 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
20181 start
= NEXT_INSN (start
);
20182 if ((JUMP_P (start
)
20183 && GET_CODE (PATTERN (start
)) != ADDR_VEC
20184 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
20186 njumps
--, isjump
= 1;
20189 nbytes
-= min_insn_size (start
);
20191 gcc_assert (njumps
>= 0);
20193 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
20194 INSN_UID (start
), INSN_UID (insn
), nbytes
);
20196 if (njumps
== 3 && isjump
&& nbytes
< 16)
20198 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
20201 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
20202 INSN_UID (insn
), padsize
);
20203 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
20208 /* AMD Athlon works faster
20209 when RET is not destination of conditional jump or directly preceded
20210 by other jump instruction. We avoid the penalty by inserting NOP just
20211 before the RET instructions in such cases. */
20213 ix86_pad_returns (void)
20218 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
20220 basic_block bb
= e
->src
;
20221 rtx ret
= BB_END (bb
);
20223 bool replace
= false;
20225 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
20226 || !maybe_hot_bb_p (bb
))
20228 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
20229 if (active_insn_p (prev
) || LABEL_P (prev
))
20231 if (prev
&& LABEL_P (prev
))
20236 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20237 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
20238 && !(e
->flags
& EDGE_FALLTHRU
))
20243 prev
= prev_active_insn (ret
);
20245 && ((JUMP_P (prev
) && any_condjump_p (prev
))
20248 /* Empty functions get branch mispredict even when the jump destination
20249 is not visible to us. */
20250 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
20255 emit_insn_before (gen_return_internal_long (), ret
);
20261 /* Implement machine specific optimizations. We implement padding of returns
20262 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20266 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
20267 ix86_pad_returns ();
20268 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
20269 ix86_avoid_jump_misspredicts ();
20272 /* Return nonzero when QImode register that must be represented via REX prefix
20275 x86_extended_QIreg_mentioned_p (rtx insn
)
20278 extract_insn_cached (insn
);
20279 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20280 if (REG_P (recog_data
.operand
[i
])
20281 && REGNO (recog_data
.operand
[i
]) >= 4)
20286 /* Return nonzero when P points to register encoded via REX prefix.
20287 Called via for_each_rtx. */
20289 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20291 unsigned int regno
;
20294 regno
= REGNO (*p
);
20295 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20298 /* Return true when INSN mentions register that must be encoded using REX
20301 x86_extended_reg_mentioned_p (rtx insn
)
20303 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20306 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20307 optabs would emit if we didn't have TFmode patterns. */
20310 x86_emit_floatuns (rtx operands
[2])
20312 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20313 enum machine_mode mode
, inmode
;
20315 inmode
= GET_MODE (operands
[1]);
20316 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20319 in
= force_reg (inmode
, operands
[1]);
20320 mode
= GET_MODE (out
);
20321 neglab
= gen_label_rtx ();
20322 donelab
= gen_label_rtx ();
20323 f0
= gen_reg_rtx (mode
);
20325 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20327 expand_float (out
, in
, 0);
20329 emit_jump_insn (gen_jump (donelab
));
20332 emit_label (neglab
);
20334 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20336 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20338 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20340 expand_float (f0
, i0
, 0);
20342 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20344 emit_label (donelab
);
20347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20348 with all elements equal to VAR. Return true if successful. */
20351 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20352 rtx target
, rtx val
)
20354 enum machine_mode smode
, wsmode
, wvmode
;
20369 val
= force_reg (GET_MODE_INNER (mode
), val
);
20370 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20371 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20377 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20379 val
= gen_lowpart (SImode
, val
);
20380 x
= gen_rtx_TRUNCATE (HImode
, val
);
20381 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20382 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20404 /* Extend HImode to SImode using a paradoxical SUBREG. */
20405 tmp1
= gen_reg_rtx (SImode
);
20406 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20407 /* Insert the SImode value as low element of V4SImode vector. */
20408 tmp2
= gen_reg_rtx (V4SImode
);
20409 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20410 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20411 CONST0_RTX (V4SImode
),
20413 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20414 /* Cast the V4SImode vector back to a V8HImode vector. */
20415 tmp1
= gen_reg_rtx (V8HImode
);
20416 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20417 /* Duplicate the low short through the whole low SImode word. */
20418 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20419 /* Cast the V8HImode vector back to a V4SImode vector. */
20420 tmp2
= gen_reg_rtx (V4SImode
);
20421 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20422 /* Replicate the low element of the V4SImode vector. */
20423 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20424 /* Cast the V2SImode back to V8HImode, and store in target. */
20425 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20436 /* Extend QImode to SImode using a paradoxical SUBREG. */
20437 tmp1
= gen_reg_rtx (SImode
);
20438 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20439 /* Insert the SImode value as low element of V4SImode vector. */
20440 tmp2
= gen_reg_rtx (V4SImode
);
20441 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20442 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20443 CONST0_RTX (V4SImode
),
20445 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20446 /* Cast the V4SImode vector back to a V16QImode vector. */
20447 tmp1
= gen_reg_rtx (V16QImode
);
20448 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20449 /* Duplicate the low byte through the whole low SImode word. */
20450 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20451 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20452 /* Cast the V16QImode vector back to a V4SImode vector. */
20453 tmp2
= gen_reg_rtx (V4SImode
);
20454 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20455 /* Replicate the low element of the V4SImode vector. */
20456 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20457 /* Cast the V2SImode back to V16QImode, and store in target. */
20458 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20466 /* Replicate the value once into the next wider mode and recurse. */
20467 val
= convert_modes (wsmode
, smode
, val
, true);
20468 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20469 GEN_INT (GET_MODE_BITSIZE (smode
)),
20470 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20471 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20473 x
= gen_reg_rtx (wvmode
);
20474 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20475 gcc_unreachable ();
20476 emit_move_insn (target
, gen_lowpart (mode
, x
));
20484 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20485 whose ONE_VAR element is VAR, and other elements are zero. Return true
20489 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20490 rtx target
, rtx var
, int one_var
)
20492 enum machine_mode vsimode
;
20508 var
= force_reg (GET_MODE_INNER (mode
), var
);
20509 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20510 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20515 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20516 new_target
= gen_reg_rtx (mode
);
20518 new_target
= target
;
20519 var
= force_reg (GET_MODE_INNER (mode
), var
);
20520 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20521 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20522 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20525 /* We need to shuffle the value to the correct position, so
20526 create a new pseudo to store the intermediate result. */
20528 /* With SSE2, we can use the integer shuffle insns. */
20529 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20531 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20533 GEN_INT (one_var
== 1 ? 0 : 1),
20534 GEN_INT (one_var
== 2 ? 0 : 1),
20535 GEN_INT (one_var
== 3 ? 0 : 1)));
20536 if (target
!= new_target
)
20537 emit_move_insn (target
, new_target
);
20541 /* Otherwise convert the intermediate result to V4SFmode and
20542 use the SSE1 shuffle instructions. */
20543 if (mode
!= V4SFmode
)
20545 tmp
= gen_reg_rtx (V4SFmode
);
20546 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20551 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20553 GEN_INT (one_var
== 1 ? 0 : 1),
20554 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20555 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20557 if (mode
!= V4SFmode
)
20558 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20559 else if (tmp
!= target
)
20560 emit_move_insn (target
, tmp
);
20562 else if (target
!= new_target
)
20563 emit_move_insn (target
, new_target
);
20568 vsimode
= V4SImode
;
20574 vsimode
= V2SImode
;
20580 /* Zero extend the variable element to SImode and recurse. */
20581 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20583 x
= gen_reg_rtx (vsimode
);
20584 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20586 gcc_unreachable ();
20588 emit_move_insn (target
, gen_lowpart (mode
, x
));
20596 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20597 consisting of the values in VALS. It is known that all elements
20598 except ONE_VAR are constants. Return true if successful. */
20601 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20602 rtx target
, rtx vals
, int one_var
)
20604 rtx var
= XVECEXP (vals
, 0, one_var
);
20605 enum machine_mode wmode
;
20608 const_vec
= copy_rtx (vals
);
20609 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20610 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20618 /* For the two element vectors, it's just as easy to use
20619 the general case. */
20635 /* There's no way to set one QImode entry easily. Combine
20636 the variable value with its adjacent constant value, and
20637 promote to an HImode set. */
20638 x
= XVECEXP (vals
, 0, one_var
^ 1);
20641 var
= convert_modes (HImode
, QImode
, var
, true);
20642 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20643 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20644 x
= GEN_INT (INTVAL (x
) & 0xff);
20648 var
= convert_modes (HImode
, QImode
, var
, true);
20649 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20651 if (x
!= const0_rtx
)
20652 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20653 1, OPTAB_LIB_WIDEN
);
20655 x
= gen_reg_rtx (wmode
);
20656 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20657 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20659 emit_move_insn (target
, gen_lowpart (mode
, x
));
20666 emit_move_insn (target
, const_vec
);
20667 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20671 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20672 all values variable, and none identical. */
20675 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20676 rtx target
, rtx vals
)
20678 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20679 rtx op0
= NULL
, op1
= NULL
;
20680 bool use_vec_concat
= false;
20686 if (!mmx_ok
&& !TARGET_SSE
)
20692 /* For the two element vectors, we always implement VEC_CONCAT. */
20693 op0
= XVECEXP (vals
, 0, 0);
20694 op1
= XVECEXP (vals
, 0, 1);
20695 use_vec_concat
= true;
20699 half_mode
= V2SFmode
;
20702 half_mode
= V2SImode
;
20708 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20709 Recurse to load the two halves. */
20711 op0
= gen_reg_rtx (half_mode
);
20712 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20713 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20715 op1
= gen_reg_rtx (half_mode
);
20716 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20717 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20719 use_vec_concat
= true;
20730 gcc_unreachable ();
20733 if (use_vec_concat
)
20735 if (!register_operand (op0
, half_mode
))
20736 op0
= force_reg (half_mode
, op0
);
20737 if (!register_operand (op1
, half_mode
))
20738 op1
= force_reg (half_mode
, op1
);
20740 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20741 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20745 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20746 enum machine_mode inner_mode
;
20747 rtx words
[4], shift
;
20749 inner_mode
= GET_MODE_INNER (mode
);
20750 n_elts
= GET_MODE_NUNITS (mode
);
20751 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20752 n_elt_per_word
= n_elts
/ n_words
;
20753 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20755 for (i
= 0; i
< n_words
; ++i
)
20757 rtx word
= NULL_RTX
;
20759 for (j
= 0; j
< n_elt_per_word
; ++j
)
20761 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20762 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20768 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20769 word
, 1, OPTAB_LIB_WIDEN
);
20770 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20771 word
, 1, OPTAB_LIB_WIDEN
);
20779 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20780 else if (n_words
== 2)
20782 rtx tmp
= gen_reg_rtx (mode
);
20783 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20784 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20785 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20786 emit_move_insn (target
, tmp
);
20788 else if (n_words
== 4)
20790 rtx tmp
= gen_reg_rtx (V4SImode
);
20791 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20792 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20793 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20796 gcc_unreachable ();
20800 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20801 instructions unless MMX_OK is true. */
20804 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20806 enum machine_mode mode
= GET_MODE (target
);
20807 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20808 int n_elts
= GET_MODE_NUNITS (mode
);
20809 int n_var
= 0, one_var
= -1;
20810 bool all_same
= true, all_const_zero
= true;
20814 for (i
= 0; i
< n_elts
; ++i
)
20816 x
= XVECEXP (vals
, 0, i
);
20817 if (!CONSTANT_P (x
))
20818 n_var
++, one_var
= i
;
20819 else if (x
!= CONST0_RTX (inner_mode
))
20820 all_const_zero
= false;
20821 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20825 /* Constants are best loaded from the constant pool. */
20828 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20832 /* If all values are identical, broadcast the value. */
20834 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20835 XVECEXP (vals
, 0, 0)))
20838 /* Values where only one field is non-constant are best loaded from
20839 the pool and overwritten via move later. */
20843 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20844 XVECEXP (vals
, 0, one_var
),
20848 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20852 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20856 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20858 enum machine_mode mode
= GET_MODE (target
);
20859 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20860 bool use_vec_merge
= false;
20869 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20870 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20872 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20874 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20875 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20885 /* For the two element vectors, we implement a VEC_CONCAT with
20886 the extraction of the other element. */
20888 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20889 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20892 op0
= val
, op1
= tmp
;
20894 op0
= tmp
, op1
= val
;
20896 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20897 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20905 use_vec_merge
= true;
20909 /* tmp = target = A B C D */
20910 tmp
= copy_to_reg (target
);
20911 /* target = A A B B */
20912 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20913 /* target = X A B B */
20914 ix86_expand_vector_set (false, target
, val
, 0);
20915 /* target = A X C D */
20916 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20917 GEN_INT (1), GEN_INT (0),
20918 GEN_INT (2+4), GEN_INT (3+4)));
20922 /* tmp = target = A B C D */
20923 tmp
= copy_to_reg (target
);
20924 /* tmp = X B C D */
20925 ix86_expand_vector_set (false, tmp
, val
, 0);
20926 /* target = A B X D */
20927 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20928 GEN_INT (0), GEN_INT (1),
20929 GEN_INT (0+4), GEN_INT (3+4)));
20933 /* tmp = target = A B C D */
20934 tmp
= copy_to_reg (target
);
20935 /* tmp = X B C D */
20936 ix86_expand_vector_set (false, tmp
, val
, 0);
20937 /* target = A B X D */
20938 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20939 GEN_INT (0), GEN_INT (1),
20940 GEN_INT (2+4), GEN_INT (0+4)));
20944 gcc_unreachable ();
20949 /* Element 0 handled by vec_merge below. */
20952 use_vec_merge
= true;
20958 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20959 store into element 0, then shuffle them back. */
20963 order
[0] = GEN_INT (elt
);
20964 order
[1] = const1_rtx
;
20965 order
[2] = const2_rtx
;
20966 order
[3] = GEN_INT (3);
20967 order
[elt
] = const0_rtx
;
20969 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20970 order
[1], order
[2], order
[3]));
20972 ix86_expand_vector_set (false, target
, val
, 0);
20974 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20975 order
[1], order
[2], order
[3]));
20979 /* For SSE1, we have to reuse the V4SF code. */
20980 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20981 gen_lowpart (SFmode
, val
), elt
);
20986 use_vec_merge
= TARGET_SSE2
;
20989 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
21000 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
21001 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
21002 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
21006 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
21008 emit_move_insn (mem
, target
);
21010 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
21011 emit_move_insn (tmp
, val
);
21013 emit_move_insn (target
, mem
);
21018 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
21020 enum machine_mode mode
= GET_MODE (vec
);
21021 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
21022 bool use_vec_extr
= false;
21035 use_vec_extr
= true;
21047 tmp
= gen_reg_rtx (mode
);
21048 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
21049 GEN_INT (elt
), GEN_INT (elt
),
21050 GEN_INT (elt
+4), GEN_INT (elt
+4)));
21054 tmp
= gen_reg_rtx (mode
);
21055 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
21059 gcc_unreachable ();
21062 use_vec_extr
= true;
21077 tmp
= gen_reg_rtx (mode
);
21078 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
21079 GEN_INT (elt
), GEN_INT (elt
),
21080 GEN_INT (elt
), GEN_INT (elt
)));
21084 tmp
= gen_reg_rtx (mode
);
21085 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
21089 gcc_unreachable ();
21092 use_vec_extr
= true;
21097 /* For SSE1, we have to reuse the V4SF code. */
21098 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
21099 gen_lowpart (V4SFmode
, vec
), elt
);
21105 use_vec_extr
= TARGET_SSE2
;
21108 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
21113 /* ??? Could extract the appropriate HImode element and shift. */
21120 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
21121 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
21123 /* Let the rtl optimizers know about the zero extension performed. */
21124 if (inner_mode
== HImode
)
21126 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
21127 target
= gen_lowpart (SImode
, target
);
21130 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
21134 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
21136 emit_move_insn (mem
, vec
);
21138 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
21139 emit_move_insn (target
, tmp
);
21143 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21144 pattern to reduce; DEST is the destination; IN is the input vector. */
21147 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
21149 rtx tmp1
, tmp2
, tmp3
;
21151 tmp1
= gen_reg_rtx (V4SFmode
);
21152 tmp2
= gen_reg_rtx (V4SFmode
);
21153 tmp3
= gen_reg_rtx (V4SFmode
);
21155 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
21156 emit_insn (fn (tmp2
, tmp1
, in
));
21158 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
21159 GEN_INT (1), GEN_INT (1),
21160 GEN_INT (1+4), GEN_INT (1+4)));
21161 emit_insn (fn (dest
, tmp2
, tmp3
));
21164 /* Target hook for scalar_mode_supported_p. */
21166 ix86_scalar_mode_supported_p (enum machine_mode mode
)
21168 if (DECIMAL_FLOAT_MODE_P (mode
))
21170 else if (mode
== TFmode
)
21171 return TARGET_64BIT
;
21173 return default_scalar_mode_supported_p (mode
);
21176 /* Implements target hook vector_mode_supported_p. */
21178 ix86_vector_mode_supported_p (enum machine_mode mode
)
21180 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
21182 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
21184 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
21186 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
21191 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21193 We do this in the new i386 backend to maintain source compatibility
21194 with the old cc0-based compiler. */
21197 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
21198 tree inputs ATTRIBUTE_UNUSED
,
21201 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
21203 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
21208 /* Implements target vector targetm.asm.encode_section_info. This
21209 is not used by netware. */
21211 static void ATTRIBUTE_UNUSED
21212 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
21214 default_encode_section_info (decl
, rtl
, first
);
21216 if (TREE_CODE (decl
) == VAR_DECL
21217 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
21218 && ix86_in_large_data_p (decl
))
21219 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
21222 /* Worker function for REVERSE_CONDITION. */
21225 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
21227 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
21228 ? reverse_condition (code
)
21229 : reverse_condition_maybe_unordered (code
));
21232 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21236 output_387_reg_move (rtx insn
, rtx
*operands
)
21238 if (REG_P (operands
[0]))
21240 if (REG_P (operands
[1])
21241 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21243 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21244 return output_387_ffreep (operands
, 0);
21245 return "fstp\t%y0";
21247 if (STACK_TOP_P (operands
[0]))
21248 return "fld%z1\t%y1";
21251 else if (MEM_P (operands
[0]))
21253 gcc_assert (REG_P (operands
[1]));
21254 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21255 return "fstp%z0\t%y0";
21258 /* There is no non-popping store to memory for XFmode.
21259 So if we need one, follow the store with a load. */
21260 if (GET_MODE (operands
[0]) == XFmode
)
21261 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21263 return "fst%z0\t%y0";
21270 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21271 FP status register is set. */
21274 ix86_emit_fp_unordered_jump (rtx label
)
21276 rtx reg
= gen_reg_rtx (HImode
);
21279 emit_insn (gen_x86_fnstsw_1 (reg
));
21281 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
21283 emit_insn (gen_x86_sahf_1 (reg
));
21285 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21286 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21290 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21292 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21293 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21296 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21297 gen_rtx_LABEL_REF (VOIDmode
, label
),
21299 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21301 emit_jump_insn (temp
);
21302 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21305 /* Output code to perform a log1p XFmode calculation. */
21307 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21309 rtx label1
= gen_label_rtx ();
21310 rtx label2
= gen_label_rtx ();
21312 rtx tmp
= gen_reg_rtx (XFmode
);
21313 rtx tmp2
= gen_reg_rtx (XFmode
);
21315 emit_insn (gen_absxf2 (tmp
, op1
));
21316 emit_insn (gen_cmpxf (tmp
,
21317 CONST_DOUBLE_FROM_REAL_VALUE (
21318 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21320 emit_jump_insn (gen_bge (label1
));
21322 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21323 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21324 emit_jump (label2
);
21326 emit_label (label1
);
21327 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21328 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21329 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21330 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21332 emit_label (label2
);
21335 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21337 static void ATTRIBUTE_UNUSED
21338 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21341 /* With Binutils 2.15, the "@unwind" marker must be specified on
21342 every occurrence of the ".eh_frame" section, not just the first
21345 && strcmp (name
, ".eh_frame") == 0)
21347 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21348 flags
& SECTION_WRITE
? "aw" : "a");
21351 default_elf_asm_named_section (name
, flags
, decl
);
21354 /* Return the mangling of TYPE if it is an extended fundamental type. */
21356 static const char *
21357 ix86_mangle_fundamental_type (tree type
)
21359 switch (TYPE_MODE (type
))
21362 /* __float128 is "g". */
21365 /* "long double" or __float80 is "e". */
21372 /* For 32-bit code we can save PIC register setup by using
21373 __stack_chk_fail_local hidden function instead of calling
21374 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21375 register, so it is better to call __stack_chk_fail directly. */
21378 ix86_stack_protect_fail (void)
21380 return TARGET_64BIT
21381 ? default_external_stack_protect_fail ()
21382 : default_hidden_stack_protect_fail ();
21385 /* Select a format to encode pointers in exception handling data. CODE
21386 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21387 true if the symbol may be affected by dynamic relocations.
21389 ??? All x86 object file formats are capable of representing this.
21390 After all, the relocation needed is the same as for the call insn.
21391 Whether or not a particular assembler allows us to enter such, I
21392 guess we'll have to see. */
21394 asm_preferred_eh_data_format (int code
, int global
)
21398 int type
= DW_EH_PE_sdata8
;
21400 || ix86_cmodel
== CM_SMALL_PIC
21401 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21402 type
= DW_EH_PE_sdata4
;
21403 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21405 if (ix86_cmodel
== CM_SMALL
21406 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21407 return DW_EH_PE_udata4
;
21408 return DW_EH_PE_absptr
;
21411 /* Expand copysign from SIGN to the positive value ABS_VALUE
21412 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21415 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21417 enum machine_mode mode
= GET_MODE (sign
);
21418 rtx sgn
= gen_reg_rtx (mode
);
21419 if (mask
== NULL_RTX
)
21421 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21422 if (!VECTOR_MODE_P (mode
))
21424 /* We need to generate a scalar mode mask in this case. */
21425 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21426 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21427 mask
= gen_reg_rtx (mode
);
21428 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21432 mask
= gen_rtx_NOT (mode
, mask
);
21433 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21434 gen_rtx_AND (mode
, mask
, sign
)));
21435 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21436 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21439 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21440 mask for masking out the sign-bit is stored in *SMASK, if that is
21443 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21445 enum machine_mode mode
= GET_MODE (op0
);
21448 xa
= gen_reg_rtx (mode
);
21449 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21450 if (!VECTOR_MODE_P (mode
))
21452 /* We need to generate a scalar mode mask in this case. */
21453 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21454 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21455 mask
= gen_reg_rtx (mode
);
21456 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21458 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21459 gen_rtx_AND (mode
, op0
, mask
)));
21467 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21468 swapping the operands if SWAP_OPERANDS is true. The expanded
21469 code is a forward jump to a newly created label in case the
21470 comparison is true. The generated label rtx is returned. */
21472 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21473 bool swap_operands
)
21484 label
= gen_label_rtx ();
21485 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21486 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21487 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21488 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21489 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21490 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21491 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21492 JUMP_LABEL (tmp
) = label
;
21497 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21498 using comparison code CODE. Operands are swapped for the comparison if
21499 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21501 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21502 bool swap_operands
)
21504 enum machine_mode mode
= GET_MODE (op0
);
21505 rtx mask
= gen_reg_rtx (mode
);
21514 if (mode
== DFmode
)
21515 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21516 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21518 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21519 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21524 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21525 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21527 ix86_gen_TWO52 (enum machine_mode mode
)
21529 REAL_VALUE_TYPE TWO52r
;
21532 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21533 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21534 TWO52
= force_reg (mode
, TWO52
);
21539 /* Expand SSE sequence for computing lround from OP1 storing
21542 ix86_expand_lround (rtx op0
, rtx op1
)
21544 /* C code for the stuff we're doing below:
21545 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21548 enum machine_mode mode
= GET_MODE (op1
);
21549 const struct real_format
*fmt
;
21550 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21553 /* load nextafter (0.5, 0.0) */
21554 fmt
= REAL_MODE_FORMAT (mode
);
21555 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21556 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21558 /* adj = copysign (0.5, op1) */
21559 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21560 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21562 /* adj = op1 + adj */
21563 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21565 /* op0 = (imode)adj */
21566 expand_fix (op0
, adj
, 0);
21569 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21572 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21574 /* C code for the stuff we're doing below (for do_floor):
21576 xi -= (double)xi > op1 ? 1 : 0;
21579 enum machine_mode fmode
= GET_MODE (op1
);
21580 enum machine_mode imode
= GET_MODE (op0
);
21581 rtx ireg
, freg
, label
, tmp
;
21583 /* reg = (long)op1 */
21584 ireg
= gen_reg_rtx (imode
);
21585 expand_fix (ireg
, op1
, 0);
21587 /* freg = (double)reg */
21588 freg
= gen_reg_rtx (fmode
);
21589 expand_float (freg
, ireg
, 0);
21591 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21592 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21593 freg
, op1
, !do_floor
);
21594 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21595 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21596 emit_move_insn (ireg
, tmp
);
21598 emit_label (label
);
21599 LABEL_NUSES (label
) = 1;
21601 emit_move_insn (op0
, ireg
);
21604 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21605 result in OPERAND0. */
21607 ix86_expand_rint (rtx operand0
, rtx operand1
)
21609 /* C code for the stuff we're doing below:
21610 xa = fabs (operand1);
21611 if (!isless (xa, 2**52))
21613 xa = xa + 2**52 - 2**52;
21614 return copysign (xa, operand1);
21616 enum machine_mode mode
= GET_MODE (operand0
);
21617 rtx res
, xa
, label
, TWO52
, mask
;
21619 res
= gen_reg_rtx (mode
);
21620 emit_move_insn (res
, operand1
);
21622 /* xa = abs (operand1) */
21623 xa
= ix86_expand_sse_fabs (res
, &mask
);
21625 /* if (!isless (xa, TWO52)) goto label; */
21626 TWO52
= ix86_gen_TWO52 (mode
);
21627 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21629 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21630 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21632 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21634 emit_label (label
);
21635 LABEL_NUSES (label
) = 1;
21637 emit_move_insn (operand0
, res
);
21640 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21643 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21645 /* C code for the stuff we expand below.
21646 double xa = fabs (x), x2;
21647 if (!isless (xa, TWO52))
21649 xa = xa + TWO52 - TWO52;
21650 x2 = copysign (xa, x);
21659 enum machine_mode mode
= GET_MODE (operand0
);
21660 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21662 TWO52
= ix86_gen_TWO52 (mode
);
21664 /* Temporary for holding the result, initialized to the input
21665 operand to ease control flow. */
21666 res
= gen_reg_rtx (mode
);
21667 emit_move_insn (res
, operand1
);
21669 /* xa = abs (operand1) */
21670 xa
= ix86_expand_sse_fabs (res
, &mask
);
21672 /* if (!isless (xa, TWO52)) goto label; */
21673 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21675 /* xa = xa + TWO52 - TWO52; */
21676 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21677 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21679 /* xa = copysign (xa, operand1) */
21680 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21682 /* generate 1.0 or -1.0 */
21683 one
= force_reg (mode
,
21684 const_double_from_real_value (do_floor
21685 ? dconst1
: dconstm1
, mode
));
21687 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21688 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21689 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21690 gen_rtx_AND (mode
, one
, tmp
)));
21691 /* We always need to subtract here to preserve signed zero. */
21692 tmp
= expand_simple_binop (mode
, MINUS
,
21693 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21694 emit_move_insn (res
, tmp
);
21696 emit_label (label
);
21697 LABEL_NUSES (label
) = 1;
21699 emit_move_insn (operand0
, res
);
21702 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21705 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21707 /* C code for the stuff we expand below.
21708 double xa = fabs (x), x2;
21709 if (!isless (xa, TWO52))
21711 x2 = (double)(long)x;
21718 if (HONOR_SIGNED_ZEROS (mode))
21719 return copysign (x2, x);
21722 enum machine_mode mode
= GET_MODE (operand0
);
21723 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21725 TWO52
= ix86_gen_TWO52 (mode
);
21727 /* Temporary for holding the result, initialized to the input
21728 operand to ease control flow. */
21729 res
= gen_reg_rtx (mode
);
21730 emit_move_insn (res
, operand1
);
21732 /* xa = abs (operand1) */
21733 xa
= ix86_expand_sse_fabs (res
, &mask
);
21735 /* if (!isless (xa, TWO52)) goto label; */
21736 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21738 /* xa = (double)(long)x */
21739 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21740 expand_fix (xi
, res
, 0);
21741 expand_float (xa
, xi
, 0);
21744 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21746 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21747 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21748 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21749 gen_rtx_AND (mode
, one
, tmp
)));
21750 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21751 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21752 emit_move_insn (res
, tmp
);
21754 if (HONOR_SIGNED_ZEROS (mode
))
21755 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21757 emit_label (label
);
21758 LABEL_NUSES (label
) = 1;
21760 emit_move_insn (operand0
, res
);
21763 /* Expand SSE sequence for computing round from OPERAND1 storing
21764 into OPERAND0. Sequence that works without relying on DImode truncation
21765 via cvttsd2siq that is only available on 64bit targets. */
21767 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21769 /* C code for the stuff we expand below.
21770 double xa = fabs (x), xa2, x2;
21771 if (!isless (xa, TWO52))
21773 Using the absolute value and copying back sign makes
21774 -0.0 -> -0.0 correct.
21775 xa2 = xa + TWO52 - TWO52;
21780 else if (dxa > 0.5)
21782 x2 = copysign (xa2, x);
21785 enum machine_mode mode
= GET_MODE (operand0
);
21786 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21788 TWO52
= ix86_gen_TWO52 (mode
);
21790 /* Temporary for holding the result, initialized to the input
21791 operand to ease control flow. */
21792 res
= gen_reg_rtx (mode
);
21793 emit_move_insn (res
, operand1
);
21795 /* xa = abs (operand1) */
21796 xa
= ix86_expand_sse_fabs (res
, &mask
);
21798 /* if (!isless (xa, TWO52)) goto label; */
21799 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21801 /* xa2 = xa + TWO52 - TWO52; */
21802 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21803 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21805 /* dxa = xa2 - xa; */
21806 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21808 /* generate 0.5, 1.0 and -0.5 */
21809 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21810 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21811 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21815 tmp
= gen_reg_rtx (mode
);
21816 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21817 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21818 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21819 gen_rtx_AND (mode
, one
, tmp
)));
21820 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21821 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21822 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21823 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21824 gen_rtx_AND (mode
, one
, tmp
)));
21825 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21827 /* res = copysign (xa2, operand1) */
21828 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21830 emit_label (label
);
21831 LABEL_NUSES (label
) = 1;
21833 emit_move_insn (operand0
, res
);
21836 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21839 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21841 /* C code for SSE variant we expand below.
21842 double xa = fabs (x), x2;
21843 if (!isless (xa, TWO52))
21845 x2 = (double)(long)x;
21846 if (HONOR_SIGNED_ZEROS (mode))
21847 return copysign (x2, x);
21850 enum machine_mode mode
= GET_MODE (operand0
);
21851 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21853 TWO52
= ix86_gen_TWO52 (mode
);
21855 /* Temporary for holding the result, initialized to the input
21856 operand to ease control flow. */
21857 res
= gen_reg_rtx (mode
);
21858 emit_move_insn (res
, operand1
);
21860 /* xa = abs (operand1) */
21861 xa
= ix86_expand_sse_fabs (res
, &mask
);
21863 /* if (!isless (xa, TWO52)) goto label; */
21864 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21866 /* x = (double)(long)x */
21867 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21868 expand_fix (xi
, res
, 0);
21869 expand_float (res
, xi
, 0);
21871 if (HONOR_SIGNED_ZEROS (mode
))
21872 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21874 emit_label (label
);
21875 LABEL_NUSES (label
) = 1;
21877 emit_move_insn (operand0
, res
);
21880 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21883 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21885 enum machine_mode mode
= GET_MODE (operand0
);
21886 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21888 /* C code for SSE variant we expand below.
21889 double xa = fabs (x), x2;
21890 if (!isless (xa, TWO52))
21892 xa2 = xa + TWO52 - TWO52;
21896 x2 = copysign (xa2, x);
21900 TWO52
= ix86_gen_TWO52 (mode
);
21902 /* Temporary for holding the result, initialized to the input
21903 operand to ease control flow. */
21904 res
= gen_reg_rtx (mode
);
21905 emit_move_insn (res
, operand1
);
21907 /* xa = abs (operand1) */
21908 xa
= ix86_expand_sse_fabs (res
, &smask
);
21910 /* if (!isless (xa, TWO52)) goto label; */
21911 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21913 /* res = xa + TWO52 - TWO52; */
21914 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21915 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21916 emit_move_insn (res
, tmp
);
21919 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21921 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21922 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21923 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21924 gen_rtx_AND (mode
, mask
, one
)));
21925 tmp
= expand_simple_binop (mode
, MINUS
,
21926 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21927 emit_move_insn (res
, tmp
);
21929 /* res = copysign (res, operand1) */
21930 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21932 emit_label (label
);
21933 LABEL_NUSES (label
) = 1;
21935 emit_move_insn (operand0
, res
);
21938 /* Expand SSE sequence for computing round from OPERAND1 storing
21941 ix86_expand_round (rtx operand0
, rtx operand1
)
21943 /* C code for the stuff we're doing below:
21944 double xa = fabs (x);
21945 if (!isless (xa, TWO52))
21947 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21948 return copysign (xa, x);
21950 enum machine_mode mode
= GET_MODE (operand0
);
21951 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21952 const struct real_format
*fmt
;
21953 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21955 /* Temporary for holding the result, initialized to the input
21956 operand to ease control flow. */
21957 res
= gen_reg_rtx (mode
);
21958 emit_move_insn (res
, operand1
);
21960 TWO52
= ix86_gen_TWO52 (mode
);
21961 xa
= ix86_expand_sse_fabs (res
, &mask
);
21962 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21964 /* load nextafter (0.5, 0.0) */
21965 fmt
= REAL_MODE_FORMAT (mode
);
21966 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21967 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21969 /* xa = xa + 0.5 */
21970 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21971 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21973 /* xa = (double)(int64_t)xa */
21974 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21975 expand_fix (xi
, xa
, 0);
21976 expand_float (xa
, xi
, 0);
21978 /* res = copysign (xa, operand1) */
21979 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21981 emit_label (label
);
21982 LABEL_NUSES (label
) = 1;
21984 emit_move_insn (operand0
, res
);
21988 /* Table of valid machine attributes. */
21989 static const struct attribute_spec ix86_attribute_table
[] =
21991 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21992 /* Stdcall attribute says callee is responsible for popping arguments
21993 if they are not variable. */
21994 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
21995 /* Fastcall attribute says callee is responsible for popping arguments
21996 if they are not variable. */
21997 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
21998 /* Cdecl attribute says the callee is a normal C declaration */
21999 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
22000 /* Regparm attribute specifies how many integer arguments are to be
22001 passed in registers. */
22002 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
22003 /* Sseregparm attribute says we are using x86_64 calling conventions
22004 for FP arguments. */
22005 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
22006 /* force_align_arg_pointer says this function realigns the stack at entry. */
22007 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
22008 false, true, true, ix86_handle_cconv_attribute
},
22009 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22010 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
22011 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
22012 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
22014 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
22015 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
22016 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22017 SUBTARGET_ATTRIBUTE_TABLE
,
22019 { NULL
, 0, 0, false, false, false, NULL
}
22022 /* Initialize the GCC target structure. */
22023 #undef TARGET_ATTRIBUTE_TABLE
22024 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22025 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22026 # undef TARGET_MERGE_DECL_ATTRIBUTES
22027 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22030 #undef TARGET_COMP_TYPE_ATTRIBUTES
22031 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22033 #undef TARGET_INIT_BUILTINS
22034 #define TARGET_INIT_BUILTINS ix86_init_builtins
22035 #undef TARGET_EXPAND_BUILTIN
22036 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22038 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22039 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22040 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22041 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22043 #undef TARGET_ASM_FUNCTION_EPILOGUE
22044 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22046 #undef TARGET_ENCODE_SECTION_INFO
22047 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22048 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22050 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22053 #undef TARGET_ASM_OPEN_PAREN
22054 #define TARGET_ASM_OPEN_PAREN ""
22055 #undef TARGET_ASM_CLOSE_PAREN
22056 #define TARGET_ASM_CLOSE_PAREN ""
22058 #undef TARGET_ASM_ALIGNED_HI_OP
22059 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22060 #undef TARGET_ASM_ALIGNED_SI_OP
22061 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22063 #undef TARGET_ASM_ALIGNED_DI_OP
22064 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22067 #undef TARGET_ASM_UNALIGNED_HI_OP
22068 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22069 #undef TARGET_ASM_UNALIGNED_SI_OP
22070 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22071 #undef TARGET_ASM_UNALIGNED_DI_OP
22072 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22074 #undef TARGET_SCHED_ADJUST_COST
22075 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22076 #undef TARGET_SCHED_ISSUE_RATE
22077 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22078 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22079 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22080 ia32_multipass_dfa_lookahead
22082 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22083 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22086 #undef TARGET_HAVE_TLS
22087 #define TARGET_HAVE_TLS true
22089 #undef TARGET_CANNOT_FORCE_CONST_MEM
22090 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22091 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22092 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22094 #undef TARGET_DELEGITIMIZE_ADDRESS
22095 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22097 #undef TARGET_MS_BITFIELD_LAYOUT_P
22098 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22101 #undef TARGET_BINDS_LOCAL_P
22102 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22104 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22105 #undef TARGET_BINDS_LOCAL_P
22106 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22109 #undef TARGET_ASM_OUTPUT_MI_THUNK
22110 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22111 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22112 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22114 #undef TARGET_ASM_FILE_START
22115 #define TARGET_ASM_FILE_START x86_file_start
22117 #undef TARGET_DEFAULT_TARGET_FLAGS
22118 #define TARGET_DEFAULT_TARGET_FLAGS \
22120 | TARGET_64BIT_DEFAULT \
22121 | TARGET_SUBTARGET_DEFAULT \
22122 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22124 #undef TARGET_HANDLE_OPTION
22125 #define TARGET_HANDLE_OPTION ix86_handle_option
22127 #undef TARGET_RTX_COSTS
22128 #define TARGET_RTX_COSTS ix86_rtx_costs
22129 #undef TARGET_ADDRESS_COST
22130 #define TARGET_ADDRESS_COST ix86_address_cost
22132 #undef TARGET_FIXED_CONDITION_CODE_REGS
22133 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22134 #undef TARGET_CC_MODES_COMPATIBLE
22135 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22137 #undef TARGET_MACHINE_DEPENDENT_REORG
22138 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22140 #undef TARGET_BUILD_BUILTIN_VA_LIST
22141 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22143 #undef TARGET_MD_ASM_CLOBBERS
22144 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22146 #undef TARGET_PROMOTE_PROTOTYPES
22147 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22148 #undef TARGET_STRUCT_VALUE_RTX
22149 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22150 #undef TARGET_SETUP_INCOMING_VARARGS
22151 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22152 #undef TARGET_MUST_PASS_IN_STACK
22153 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22154 #undef TARGET_PASS_BY_REFERENCE
22155 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22156 #undef TARGET_INTERNAL_ARG_POINTER
22157 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22158 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22159 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22160 #undef TARGET_STRICT_ARGUMENT_NAMING
22161 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22163 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22164 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22166 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22167 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22169 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22170 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22173 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22174 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22177 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22178 #undef TARGET_INSERT_ATTRIBUTES
22179 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22182 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22183 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22185 #undef TARGET_STACK_PROTECT_FAIL
22186 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22188 #undef TARGET_FUNCTION_VALUE
22189 #define TARGET_FUNCTION_VALUE ix86_function_value
22191 struct gcc_target targetm
= TARGET_INITIALIZER
;
22193 #include "gt-i386.h"