1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
608 struct processor_costs pentium4_cost
= {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (3), /* cost of a lea instruction */
611 COSTS_N_INSNS (4), /* variable shift costs */
612 COSTS_N_INSNS (4), /* constant shift costs */
613 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (15), /* HI */
615 COSTS_N_INSNS (15), /* SI */
616 COSTS_N_INSNS (15), /* DI */
617 COSTS_N_INSNS (15)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (56), /* HI */
621 COSTS_N_INSNS (56), /* SI */
622 COSTS_N_INSNS (56), /* DI */
623 COSTS_N_INSNS (56)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 16, /* "large" insn */
628 2, /* cost for loading QImode using movzbl */
629 {4, 5, 4}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {2, 3, 2}, /* cost of storing integer registers */
633 2, /* cost of reg,reg fld/fst */
634 {2, 2, 6}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {4, 4, 6}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {2, 2}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {2, 2}, /* cost of storing MMX registers
642 in SImode and DImode */
643 12, /* cost of moving SSE register */
644 {12, 12, 12}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {2, 2, 8}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 10, /* MMX or SSE register to integer */
649 64, /* size of prefetch block */
650 6, /* number of parallel prefetches */
652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
658 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
659 DUMMY_STRINGOP_ALGS
},
660 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
662 DUMMY_STRINGOP_ALGS
},
666 struct processor_costs nocona_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (1), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (10), /* HI */
673 COSTS_N_INSNS (10), /* SI */
674 COSTS_N_INSNS (10), /* DI */
675 COSTS_N_INSNS (10)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (66), /* HI */
679 COSTS_N_INSNS (66), /* SI */
680 COSTS_N_INSNS (66), /* DI */
681 COSTS_N_INSNS (66)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 16, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {4, 4, 4}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {4, 4, 4}, /* cost of storing integer registers */
691 3, /* cost of reg,reg fld/fst */
692 {12, 12, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {4, 4, 4}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 6, /* cost of moving MMX register */
697 {12, 12}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {12, 12}, /* cost of storing MMX registers
700 in SImode and DImode */
701 6, /* cost of moving SSE register */
702 {12, 12, 12}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {12, 12, 12}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 8, /* MMX or SSE register to integer */
707 128, /* size of prefetch block */
708 8, /* number of parallel prefetches */
710 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
711 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
712 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
713 COSTS_N_INSNS (3), /* cost of FABS instruction. */
714 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
715 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
716 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
717 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
718 {100000, unrolled_loop
}, {-1, libcall
}}}},
719 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
721 {libcall
, {{24, loop
}, {64, unrolled_loop
},
722 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
726 struct processor_costs core2_cost
= {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
729 COSTS_N_INSNS (1), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (3), /* HI */
733 COSTS_N_INSNS (3), /* SI */
734 COSTS_N_INSNS (3), /* DI */
735 COSTS_N_INSNS (3)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (22), /* HI */
739 COSTS_N_INSNS (22), /* SI */
740 COSTS_N_INSNS (22), /* DI */
741 COSTS_N_INSNS (22)}, /* other */
742 COSTS_N_INSNS (1), /* cost of movsx */
743 COSTS_N_INSNS (1), /* cost of movzx */
744 8, /* "large" insn */
746 2, /* cost for loading QImode using movzbl */
747 {6, 6, 6}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {4, 4, 4}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {6, 6, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 4}, /* cost of loading integer registers */
755 2, /* cost of moving MMX register */
756 {6, 6}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {4, 4}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {6, 6, 6}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 4, 4}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 2, /* MMX or SSE register to integer */
766 128, /* size of prefetch block */
767 8, /* number of parallel prefetches */
769 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (1), /* cost of FABS instruction. */
773 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
775 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
776 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
777 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
778 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
779 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
780 {libcall
, {{24, loop
}, {32, unrolled_loop
},
781 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
784 /* Generic64 should produce code tuned for Nocona and K8. */
786 struct processor_costs generic64_cost
= {
787 COSTS_N_INSNS (1), /* cost of an add instruction */
788 /* On all chips taken into consideration lea is 2 cycles and more. With
789 this cost however our current implementation of synth_mult results in
790 use of unnecessary temporary registers causing regression on several
791 SPECfp benchmarks. */
792 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
793 COSTS_N_INSNS (1), /* variable shift costs */
794 COSTS_N_INSNS (1), /* constant shift costs */
795 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
796 COSTS_N_INSNS (4), /* HI */
797 COSTS_N_INSNS (3), /* SI */
798 COSTS_N_INSNS (4), /* DI */
799 COSTS_N_INSNS (2)}, /* other */
800 0, /* cost of multiply per each bit set */
801 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
802 COSTS_N_INSNS (26), /* HI */
803 COSTS_N_INSNS (42), /* SI */
804 COSTS_N_INSNS (74), /* DI */
805 COSTS_N_INSNS (74)}, /* other */
806 COSTS_N_INSNS (1), /* cost of movsx */
807 COSTS_N_INSNS (1), /* cost of movzx */
808 8, /* "large" insn */
810 4, /* cost for loading QImode using movzbl */
811 {4, 4, 4}, /* cost of loading integer registers
812 in QImode, HImode and SImode.
813 Relative to reg-reg move (2). */
814 {4, 4, 4}, /* cost of storing integer registers */
815 4, /* cost of reg,reg fld/fst */
816 {12, 12, 12}, /* cost of loading fp registers
817 in SFmode, DFmode and XFmode */
818 {6, 6, 8}, /* cost of storing fp registers
819 in SFmode, DFmode and XFmode */
820 2, /* cost of moving MMX register */
821 {8, 8}, /* cost of loading MMX registers
822 in SImode and DImode */
823 {8, 8}, /* cost of storing MMX registers
824 in SImode and DImode */
825 2, /* cost of moving SSE register */
826 {8, 8, 8}, /* cost of loading SSE registers
827 in SImode, DImode and TImode */
828 {8, 8, 8}, /* cost of storing SSE registers
829 in SImode, DImode and TImode */
830 5, /* MMX or SSE register to integer */
831 64, /* size of prefetch block */
832 6, /* number of parallel prefetches */
833 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
834 is increased to perhaps more appropriate value of 5. */
836 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
837 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
838 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
839 COSTS_N_INSNS (8), /* cost of FABS instruction. */
840 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
841 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
842 {DUMMY_STRINGOP_ALGS
,
843 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
844 {DUMMY_STRINGOP_ALGS
,
845 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
848 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
850 struct processor_costs generic32_cost
= {
851 COSTS_N_INSNS (1), /* cost of an add instruction */
852 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
853 COSTS_N_INSNS (1), /* variable shift costs */
854 COSTS_N_INSNS (1), /* constant shift costs */
855 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
856 COSTS_N_INSNS (4), /* HI */
857 COSTS_N_INSNS (3), /* SI */
858 COSTS_N_INSNS (4), /* DI */
859 COSTS_N_INSNS (2)}, /* other */
860 0, /* cost of multiply per each bit set */
861 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
862 COSTS_N_INSNS (26), /* HI */
863 COSTS_N_INSNS (42), /* SI */
864 COSTS_N_INSNS (74), /* DI */
865 COSTS_N_INSNS (74)}, /* other */
866 COSTS_N_INSNS (1), /* cost of movsx */
867 COSTS_N_INSNS (1), /* cost of movzx */
868 8, /* "large" insn */
870 4, /* cost for loading QImode using movzbl */
871 {4, 4, 4}, /* cost of loading integer registers
872 in QImode, HImode and SImode.
873 Relative to reg-reg move (2). */
874 {4, 4, 4}, /* cost of storing integer registers */
875 4, /* cost of reg,reg fld/fst */
876 {12, 12, 12}, /* cost of loading fp registers
877 in SFmode, DFmode and XFmode */
878 {6, 6, 8}, /* cost of storing fp registers
879 in SFmode, DFmode and XFmode */
880 2, /* cost of moving MMX register */
881 {8, 8}, /* cost of loading MMX registers
882 in SImode and DImode */
883 {8, 8}, /* cost of storing MMX registers
884 in SImode and DImode */
885 2, /* cost of moving SSE register */
886 {8, 8, 8}, /* cost of loading SSE registers
887 in SImode, DImode and TImode */
888 {8, 8, 8}, /* cost of storing SSE registers
889 in SImode, DImode and TImode */
890 5, /* MMX or SSE register to integer */
891 64, /* size of prefetch block */
892 6, /* number of parallel prefetches */
894 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
895 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
896 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
897 COSTS_N_INSNS (8), /* cost of FABS instruction. */
898 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
899 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
900 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
901 DUMMY_STRINGOP_ALGS
},
902 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
903 DUMMY_STRINGOP_ALGS
},
906 const struct processor_costs
*ix86_cost
= &pentium_cost
;
908 /* Processor feature/optimization bitmasks. */
909 #define m_386 (1<<PROCESSOR_I386)
910 #define m_486 (1<<PROCESSOR_I486)
911 #define m_PENT (1<<PROCESSOR_PENTIUM)
912 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
913 #define m_GEODE (1<<PROCESSOR_GEODE)
914 #define m_K6_GEODE (m_K6 | m_GEODE)
915 #define m_K6 (1<<PROCESSOR_K6)
916 #define m_ATHLON (1<<PROCESSOR_ATHLON)
917 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
918 #define m_K8 (1<<PROCESSOR_K8)
919 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
920 #define m_NOCONA (1<<PROCESSOR_NOCONA)
921 #define m_CORE2 (1<<PROCESSOR_CORE2)
922 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
923 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
924 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
926 /* Generic instruction choice should be common subset of supported CPUs
927 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
929 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
930 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
931 generic because it is not working well with PPro base chips. */
932 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_CORE2
| m_GENERIC64
;
933 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
934 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
935 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
936 const int x86_double_with_add
= ~m_386
;
937 const int x86_use_bit_test
= m_386
;
938 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_CORE2
| m_GENERIC
;
939 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
940 const int x86_3dnow_a
= m_ATHLON_K8
;
941 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
942 /* Branch hints were put in P4 based on simulation result. But
943 after P4 was made, no performance benefit was observed with
944 branch hints. It also increases the code size. As the result,
945 icc never generates branch hints. */
946 const int x86_branch_hints
= 0;
947 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
948 /* We probably ought to watch for partial register stalls on Generic32
949 compilation setting as well. However in current implementation the
950 partial register stalls are not eliminated very well - they can
951 be introduced via subregs synthesized by combine and can happen
952 in caller/callee saving sequences.
953 Because this option pays back little on PPro based chips and is in conflict
954 with partial reg. dependencies used by Athlon/P4 based chips, it is better
955 to leave it off for generic32 for now. */
956 const int x86_partial_reg_stall
= m_PPRO
;
957 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
958 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
959 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_CORE2
| m_GENERIC
);
960 const int x86_use_mov0
= m_K6
;
961 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
962 const int x86_read_modify_write
= ~m_PENT
;
963 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
964 const int x86_split_long_moves
= m_PPRO
;
965 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_CORE2
| m_GENERIC
; /* m_PENT4 ? */
966 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
967 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
968 const int x86_qimode_math
= ~(0);
969 const int x86_promote_qi_regs
= 0;
970 /* On PPro this flag is meant to avoid partial register stalls. Just like
971 the x86_partial_reg_stall this option might be considered for Generic32
972 if our scheme for avoiding partial stalls was more effective. */
973 const int x86_himode_math
= ~(m_PPRO
);
974 const int x86_promote_hi_regs
= m_PPRO
;
975 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
976 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
977 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
978 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6_GEODE
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
979 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
980 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
981 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
982 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
983 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
984 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
985 const int x86_shift1
= ~m_486
;
986 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
987 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
988 that thread 128bit SSE registers as single units versus K8 based chips that
989 divide SSE registers to two 64bit halves.
990 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
991 to allow register renaming on 128bit SSE units, but usually results in one
992 extra microop on 64bit SSE units. Experimental results shows that disabling
993 this option on P4 brings over 20% SPECfp regression, while enabling it on
994 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
996 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
997 /* Set for machines where the type and dependencies are resolved on SSE
998 register parts instead of whole registers, so we may maintain just
999 lower part of scalar values in proper format leaving the upper part
1001 const int x86_sse_split_regs
= m_ATHLON_K8
;
1002 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
1003 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
1004 const int x86_use_ffreep
= m_ATHLON_K8
;
1005 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
1007 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1008 integer data in xmm registers. Which results in pretty abysmal code. */
1009 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
1011 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1012 /* Some CPU cores are not able to predict more than 4 branch instructions in
1013 the 16 byte window. */
1014 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1015 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
;
1016 const int x86_use_bt
= m_ATHLON_K8
;
1017 /* Compare and exchange was added for 80486. */
1018 const int x86_cmpxchg
= ~m_386
;
1019 /* Compare and exchange 8 bytes was added for pentium. */
1020 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
1021 /* Exchange and add was added for 80486. */
1022 const int x86_xadd
= ~m_386
;
1023 /* Byteswap was added for 80486. */
1024 const int x86_bswap
= ~m_386
;
1025 const int x86_pad_returns
= m_ATHLON_K8
| m_CORE2
| m_GENERIC
;
1027 static enum stringop_alg stringop_alg
= no_stringop
;
1029 /* In case the average insn count for single function invocation is
1030 lower than this constant, emit fast (but longer) prologue and
1032 #define FAST_PROLOGUE_INSN_COUNT 20
1034 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1035 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1036 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1037 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1039 /* Array of the smallest class containing reg number REGNO, indexed by
1040 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1042 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1044 /* ax, dx, cx, bx */
1045 AREG
, DREG
, CREG
, BREG
,
1046 /* si, di, bp, sp */
1047 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1049 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1050 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1053 /* flags, fpsr, fpcr, frame */
1054 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1055 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1057 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1059 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1060 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1061 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1065 /* The "default" register map used in 32bit mode. */
1067 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1069 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1070 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1071 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1072 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1073 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1074 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1075 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1078 static int const x86_64_int_parameter_registers
[6] =
1080 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1081 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1084 static int const x86_64_int_return_registers
[4] =
1086 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1089 /* The "default" register map used in 64bit mode. */
1090 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1092 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1093 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1094 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1095 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1096 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1097 8,9,10,11,12,13,14,15, /* extended integer registers */
1098 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1101 /* Define the register numbers to be used in Dwarf debugging information.
1102 The SVR4 reference port C compiler uses the following register numbers
1103 in its Dwarf output code:
1104 0 for %eax (gcc regno = 0)
1105 1 for %ecx (gcc regno = 2)
1106 2 for %edx (gcc regno = 1)
1107 3 for %ebx (gcc regno = 3)
1108 4 for %esp (gcc regno = 7)
1109 5 for %ebp (gcc regno = 6)
1110 6 for %esi (gcc regno = 4)
1111 7 for %edi (gcc regno = 5)
1112 The following three DWARF register numbers are never generated by
1113 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1114 believes these numbers have these meanings.
1115 8 for %eip (no gcc equivalent)
1116 9 for %eflags (gcc regno = 17)
1117 10 for %trapno (no gcc equivalent)
1118 It is not at all clear how we should number the FP stack registers
1119 for the x86 architecture. If the version of SDB on x86/svr4 were
1120 a bit less brain dead with respect to floating-point then we would
1121 have a precedent to follow with respect to DWARF register numbers
1122 for x86 FP registers, but the SDB on x86/svr4 is so completely
1123 broken with respect to FP registers that it is hardly worth thinking
1124 of it as something to strive for compatibility with.
1125 The version of x86/svr4 SDB I have at the moment does (partially)
1126 seem to believe that DWARF register number 11 is associated with
1127 the x86 register %st(0), but that's about all. Higher DWARF
1128 register numbers don't seem to be associated with anything in
1129 particular, and even for DWARF regno 11, SDB only seems to under-
1130 stand that it should say that a variable lives in %st(0) (when
1131 asked via an `=' command) if we said it was in DWARF regno 11,
1132 but SDB still prints garbage when asked for the value of the
1133 variable in question (via a `/' command).
1134 (Also note that the labels SDB prints for various FP stack regs
1135 when doing an `x' command are all wrong.)
1136 Note that these problems generally don't affect the native SVR4
1137 C compiler because it doesn't allow the use of -O with -g and
1138 because when it is *not* optimizing, it allocates a memory
1139 location for each floating-point variable, and the memory
1140 location is what gets described in the DWARF AT_location
1141 attribute for the variable in question.
1142 Regardless of the severe mental illness of the x86/svr4 SDB, we
1143 do something sensible here and we use the following DWARF
1144 register numbers. Note that these are all stack-top-relative
1146 11 for %st(0) (gcc regno = 8)
1147 12 for %st(1) (gcc regno = 9)
1148 13 for %st(2) (gcc regno = 10)
1149 14 for %st(3) (gcc regno = 11)
1150 15 for %st(4) (gcc regno = 12)
1151 16 for %st(5) (gcc regno = 13)
1152 17 for %st(6) (gcc regno = 14)
1153 18 for %st(7) (gcc regno = 15)
1155 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1157 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1158 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1159 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1160 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1161 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1162 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1163 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1166 /* Test and compare insns in i386.md store the information needed to
1167 generate branch and scc insns here. */
1169 rtx ix86_compare_op0
= NULL_RTX
;
1170 rtx ix86_compare_op1
= NULL_RTX
;
1171 rtx ix86_compare_emitted
= NULL_RTX
;
1173 /* Size of the register save area. */
1174 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1176 /* Define the structure for the machine field in struct function. */
1178 struct stack_local_entry
GTY(())
1180 unsigned short mode
;
1183 struct stack_local_entry
*next
;
1186 /* Structure describing stack frame layout.
1187 Stack grows downward:
1193 saved frame pointer if frame_pointer_needed
1194 <- HARD_FRAME_POINTER
1199 [va_arg registers] (
1200 > to_allocate <- FRAME_POINTER
1210 HOST_WIDE_INT frame
;
1212 int outgoing_arguments_size
;
1215 HOST_WIDE_INT to_allocate
;
1216 /* The offsets relative to ARG_POINTER. */
1217 HOST_WIDE_INT frame_pointer_offset
;
1218 HOST_WIDE_INT hard_frame_pointer_offset
;
1219 HOST_WIDE_INT stack_pointer_offset
;
1221 /* When save_regs_using_mov is set, emit prologue using
1222 move instead of push instructions. */
1223 bool save_regs_using_mov
;
1226 /* Code model option. */
1227 enum cmodel ix86_cmodel
;
1229 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1231 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1233 /* Which unit we are generating floating point math for. */
1234 enum fpmath_unit ix86_fpmath
;
1236 /* Which cpu are we scheduling for. */
1237 enum processor_type ix86_tune
;
1238 /* Which instruction set architecture to use. */
1239 enum processor_type ix86_arch
;
1241 /* true if sse prefetch instruction is not NOOP. */
1242 int x86_prefetch_sse
;
1244 /* true if cmpxchg16b is supported. */
1247 /* ix86_regparm_string as a number */
1248 static int ix86_regparm
;
1250 /* -mstackrealign option */
1251 extern int ix86_force_align_arg_pointer
;
1252 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1254 /* Preferred alignment for stack boundary in bits. */
1255 unsigned int ix86_preferred_stack_boundary
;
1257 /* Values 1-5: see jump.c */
1258 int ix86_branch_cost
;
1260 /* Variables which are this size or smaller are put in the data/bss
1261 or ldata/lbss sections. */
1263 int ix86_section_threshold
= 65536;
1265 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1266 char internal_label_prefix
[16];
1267 int internal_label_prefix_len
;
1269 static bool ix86_handle_option (size_t, const char *, int);
1270 static void output_pic_addr_const (FILE *, rtx
, int);
1271 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1273 static const char *get_some_local_dynamic_name (void);
1274 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1275 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1276 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1278 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1279 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1281 static rtx
get_thread_pointer (int);
1282 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1283 static void get_pc_thunk_name (char [32], unsigned int);
1284 static rtx
gen_push (rtx
);
1285 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1286 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1287 static struct machine_function
* ix86_init_machine_status (void);
1288 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1289 static int ix86_nsaved_regs (void);
1290 static void ix86_emit_save_regs (void);
1291 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1292 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1293 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1294 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1295 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1296 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1297 static int ix86_issue_rate (void);
1298 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1299 static int ia32_multipass_dfa_lookahead (void);
1300 static void ix86_init_mmx_sse_builtins (void);
1301 static rtx
x86_this_parameter (tree
);
1302 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1303 HOST_WIDE_INT
, tree
);
1304 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1305 static void x86_file_start (void);
1306 static void ix86_reorg (void);
1307 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1308 static tree
ix86_build_builtin_va_list (void);
1309 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1311 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1312 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1313 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1315 static int ix86_address_cost (rtx
);
1316 static bool ix86_cannot_force_const_mem (rtx
);
1317 static rtx
ix86_delegitimize_address (rtx
);
1319 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1321 struct builtin_description
;
1322 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1324 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1326 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1327 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1328 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1329 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1330 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1331 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1332 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1333 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1334 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1335 static int ix86_fp_comparison_cost (enum rtx_code code
);
1336 static unsigned int ix86_select_alt_pic_regnum (void);
1337 static int ix86_save_reg (unsigned int, int);
1338 static void ix86_compute_frame_layout (struct ix86_frame
*);
1339 static int ix86_comp_type_attributes (tree
, tree
);
1340 static int ix86_function_regparm (tree
, tree
);
1341 const struct attribute_spec ix86_attribute_table
[];
1342 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1343 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1344 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1345 static bool contains_128bit_aligned_vector_p (tree
);
1346 static rtx
ix86_struct_value_rtx (tree
, int);
1347 static bool ix86_ms_bitfield_layout_p (tree
);
1348 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1349 static int extended_reg_mentioned_1 (rtx
*, void *);
1350 static bool ix86_rtx_costs (rtx
, int, int, int *);
1351 static int min_insn_size (rtx
);
1352 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1353 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1354 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1356 static void ix86_init_builtins (void);
1357 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1358 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
);
1359 static const char *ix86_mangle_fundamental_type (tree
);
1360 static tree
ix86_stack_protect_fail (void);
1361 static rtx
ix86_internal_arg_pointer (void);
1362 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1364 /* This function is only used on Solaris. */
1365 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1368 /* Register class used for passing given 64bit part of the argument.
1369 These represent classes as documented by the PS ABI, with the exception
1370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1374 whenever possible (upper half does contain padding).
1376 enum x86_64_reg_class
1379 X86_64_INTEGER_CLASS
,
1380 X86_64_INTEGERSI_CLASS
,
1387 X86_64_COMPLEX_X87_CLASS
,
1390 static const char * const x86_64_reg_class_name
[] = {
1391 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1392 "sseup", "x87", "x87up", "cplx87", "no"
1395 #define MAX_CLASSES 4
1397 /* Table of constants used by fldpi, fldln2, etc.... */
1398 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1399 static bool ext_80387_constants_init
= 0;
1400 static void init_ext_80387_constants (void);
1401 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1402 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1403 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1404 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1405 unsigned HOST_WIDE_INT align
)
1408 /* Initialize the GCC target structure. */
1409 #undef TARGET_ATTRIBUTE_TABLE
1410 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1411 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1412 # undef TARGET_MERGE_DECL_ATTRIBUTES
1413 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1416 #undef TARGET_COMP_TYPE_ATTRIBUTES
1417 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1419 #undef TARGET_INIT_BUILTINS
1420 #define TARGET_INIT_BUILTINS ix86_init_builtins
1421 #undef TARGET_EXPAND_BUILTIN
1422 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1423 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1424 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1426 #undef TARGET_ASM_FUNCTION_EPILOGUE
1427 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1429 #undef TARGET_ENCODE_SECTION_INFO
1430 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1431 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1433 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1436 #undef TARGET_ASM_OPEN_PAREN
1437 #define TARGET_ASM_OPEN_PAREN ""
1438 #undef TARGET_ASM_CLOSE_PAREN
1439 #define TARGET_ASM_CLOSE_PAREN ""
1441 #undef TARGET_ASM_ALIGNED_HI_OP
1442 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1443 #undef TARGET_ASM_ALIGNED_SI_OP
1444 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1446 #undef TARGET_ASM_ALIGNED_DI_OP
1447 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1450 #undef TARGET_ASM_UNALIGNED_HI_OP
1451 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1452 #undef TARGET_ASM_UNALIGNED_SI_OP
1453 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1454 #undef TARGET_ASM_UNALIGNED_DI_OP
1455 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1457 #undef TARGET_SCHED_ADJUST_COST
1458 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1459 #undef TARGET_SCHED_ISSUE_RATE
1460 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1461 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1462 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1463 ia32_multipass_dfa_lookahead
1465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1466 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1469 #undef TARGET_HAVE_TLS
1470 #define TARGET_HAVE_TLS true
1472 #undef TARGET_CANNOT_FORCE_CONST_MEM
1473 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1474 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1475 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1477 #undef TARGET_DELEGITIMIZE_ADDRESS
1478 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1480 #undef TARGET_MS_BITFIELD_LAYOUT_P
1481 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1484 #undef TARGET_BINDS_LOCAL_P
1485 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1488 #undef TARGET_ASM_OUTPUT_MI_THUNK
1489 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1490 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1491 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1493 #undef TARGET_ASM_FILE_START
1494 #define TARGET_ASM_FILE_START x86_file_start
1496 #undef TARGET_DEFAULT_TARGET_FLAGS
1497 #define TARGET_DEFAULT_TARGET_FLAGS \
1499 | TARGET_64BIT_DEFAULT \
1500 | TARGET_SUBTARGET_DEFAULT \
1501 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1503 #undef TARGET_HANDLE_OPTION
1504 #define TARGET_HANDLE_OPTION ix86_handle_option
1506 #undef TARGET_RTX_COSTS
1507 #define TARGET_RTX_COSTS ix86_rtx_costs
1508 #undef TARGET_ADDRESS_COST
1509 #define TARGET_ADDRESS_COST ix86_address_cost
1511 #undef TARGET_FIXED_CONDITION_CODE_REGS
1512 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1513 #undef TARGET_CC_MODES_COMPATIBLE
1514 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1516 #undef TARGET_MACHINE_DEPENDENT_REORG
1517 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1519 #undef TARGET_BUILD_BUILTIN_VA_LIST
1520 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1522 #undef TARGET_MD_ASM_CLOBBERS
1523 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1525 #undef TARGET_PROMOTE_PROTOTYPES
1526 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1527 #undef TARGET_STRUCT_VALUE_RTX
1528 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1529 #undef TARGET_SETUP_INCOMING_VARARGS
1530 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1531 #undef TARGET_MUST_PASS_IN_STACK
1532 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1533 #undef TARGET_PASS_BY_REFERENCE
1534 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1535 #undef TARGET_INTERNAL_ARG_POINTER
1536 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1537 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1538 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1540 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1541 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1543 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1544 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1547 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1550 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1551 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1554 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1555 #undef TARGET_INSERT_ATTRIBUTES
1556 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1559 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1560 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1562 #undef TARGET_STACK_PROTECT_FAIL
1563 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1565 #undef TARGET_FUNCTION_VALUE
1566 #define TARGET_FUNCTION_VALUE ix86_function_value
1568 struct gcc_target targetm
= TARGET_INITIALIZER
;
1571 /* The svr4 ABI for the i386 says that records and unions are returned
1573 #ifndef DEFAULT_PCC_STRUCT_RETURN
1574 #define DEFAULT_PCC_STRUCT_RETURN 1
1577 /* Implement TARGET_HANDLE_OPTION. */
1580 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1587 target_flags
&= ~MASK_3DNOW_A
;
1588 target_flags_explicit
|= MASK_3DNOW_A
;
1595 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1596 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1603 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1604 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1611 target_flags
&= ~MASK_SSE3
;
1612 target_flags_explicit
|= MASK_SSE3
;
1621 /* Sometimes certain combinations of command options do not make
1622 sense on a particular target machine. You can define a macro
1623 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1624 defined, is executed once just after all the command options have
1627 Don't use this macro to turn on various extra optimizations for
1628 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1631 override_options (void)
1634 int ix86_tune_defaulted
= 0;
1636 /* Comes from final.c -- no real reason to change it. */
1637 #define MAX_CODE_ALIGN 16
1641 const struct processor_costs
*cost
; /* Processor costs */
1642 const int target_enable
; /* Target flags to enable. */
1643 const int target_disable
; /* Target flags to disable. */
1644 const int align_loop
; /* Default alignments. */
1645 const int align_loop_max_skip
;
1646 const int align_jump
;
1647 const int align_jump_max_skip
;
1648 const int align_func
;
1650 const processor_target_table
[PROCESSOR_max
] =
1652 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1653 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1654 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1655 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1656 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1657 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1658 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1659 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1660 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1661 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1662 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1663 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1664 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1667 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1670 const char *const name
; /* processor name or nickname. */
1671 const enum processor_type processor
;
1672 const enum pta_flags
1678 PTA_PREFETCH_SSE
= 16,
1686 const processor_alias_table
[] =
1688 {"i386", PROCESSOR_I386
, 0},
1689 {"i486", PROCESSOR_I486
, 0},
1690 {"i586", PROCESSOR_PENTIUM
, 0},
1691 {"pentium", PROCESSOR_PENTIUM
, 0},
1692 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1693 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1694 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1695 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1696 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1697 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1698 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1699 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1700 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1701 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1702 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1703 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1704 | PTA_MMX
| PTA_PREFETCH_SSE
},
1705 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1706 | PTA_MMX
| PTA_PREFETCH_SSE
},
1707 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1708 | PTA_MMX
| PTA_PREFETCH_SSE
},
1709 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1710 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1711 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1712 | PTA_64BIT
| PTA_MMX
1713 | PTA_PREFETCH_SSE
| PTA_CX16
},
1714 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1716 {"k6", PROCESSOR_K6
, PTA_MMX
},
1717 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1718 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1719 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1721 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1722 | PTA_3DNOW
| PTA_3DNOW_A
},
1723 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1724 | PTA_3DNOW_A
| PTA_SSE
},
1725 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1726 | PTA_3DNOW_A
| PTA_SSE
},
1727 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1728 | PTA_3DNOW_A
| PTA_SSE
},
1729 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1730 | PTA_SSE
| PTA_SSE2
},
1731 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1732 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1733 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1734 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1735 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1736 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1737 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1738 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1739 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1740 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1743 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1745 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1746 SUBTARGET_OVERRIDE_OPTIONS
;
1749 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1750 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1753 /* -fPIC is the default for x86_64. */
1754 if (TARGET_MACHO
&& TARGET_64BIT
)
1757 /* Set the default values for switches whose default depends on TARGET_64BIT
1758 in case they weren't overwritten by command line options. */
1761 /* Mach-O doesn't support omitting the frame pointer for now. */
1762 if (flag_omit_frame_pointer
== 2)
1763 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1764 if (flag_asynchronous_unwind_tables
== 2)
1765 flag_asynchronous_unwind_tables
= 1;
1766 if (flag_pcc_struct_return
== 2)
1767 flag_pcc_struct_return
= 0;
1771 if (flag_omit_frame_pointer
== 2)
1772 flag_omit_frame_pointer
= 0;
1773 if (flag_asynchronous_unwind_tables
== 2)
1774 flag_asynchronous_unwind_tables
= 0;
1775 if (flag_pcc_struct_return
== 2)
1776 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1779 /* Need to check -mtune=generic first. */
1780 if (ix86_tune_string
)
1782 if (!strcmp (ix86_tune_string
, "generic")
1783 || !strcmp (ix86_tune_string
, "i686")
1784 /* As special support for cross compilers we read -mtune=native
1785 as -mtune=generic. With native compilers we won't see the
1786 -mtune=native, as it was changed by the driver. */
1787 || !strcmp (ix86_tune_string
, "native"))
1790 ix86_tune_string
= "generic64";
1792 ix86_tune_string
= "generic32";
1794 else if (!strncmp (ix86_tune_string
, "generic", 7))
1795 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1799 if (ix86_arch_string
)
1800 ix86_tune_string
= ix86_arch_string
;
1801 if (!ix86_tune_string
)
1803 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1804 ix86_tune_defaulted
= 1;
1807 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1808 need to use a sensible tune option. */
1809 if (!strcmp (ix86_tune_string
, "generic")
1810 || !strcmp (ix86_tune_string
, "x86-64")
1811 || !strcmp (ix86_tune_string
, "i686"))
1814 ix86_tune_string
= "generic64";
1816 ix86_tune_string
= "generic32";
1819 if (ix86_stringop_string
)
1821 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1822 stringop_alg
= rep_prefix_1_byte
;
1823 else if (!strcmp (ix86_stringop_string
, "libcall"))
1824 stringop_alg
= libcall
;
1825 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
1826 stringop_alg
= rep_prefix_4_byte
;
1827 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
1828 stringop_alg
= rep_prefix_8_byte
;
1829 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
1830 stringop_alg
= loop_1_byte
;
1831 else if (!strcmp (ix86_stringop_string
, "loop"))
1832 stringop_alg
= loop
;
1833 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
1834 stringop_alg
= unrolled_loop
;
1836 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
1838 if (!strcmp (ix86_tune_string
, "x86-64"))
1839 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1840 "-mtune=generic instead as appropriate.");
1842 if (!ix86_arch_string
)
1843 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1844 if (!strcmp (ix86_arch_string
, "generic"))
1845 error ("generic CPU can be used only for -mtune= switch");
1846 if (!strncmp (ix86_arch_string
, "generic", 7))
1847 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1849 if (ix86_cmodel_string
!= 0)
1851 if (!strcmp (ix86_cmodel_string
, "small"))
1852 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1853 else if (!strcmp (ix86_cmodel_string
, "medium"))
1854 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1856 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1857 else if (!strcmp (ix86_cmodel_string
, "32"))
1858 ix86_cmodel
= CM_32
;
1859 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1860 ix86_cmodel
= CM_KERNEL
;
1861 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1862 ix86_cmodel
= CM_LARGE
;
1864 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1868 ix86_cmodel
= CM_32
;
1870 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1872 if (ix86_asm_string
!= 0)
1875 && !strcmp (ix86_asm_string
, "intel"))
1876 ix86_asm_dialect
= ASM_INTEL
;
1877 else if (!strcmp (ix86_asm_string
, "att"))
1878 ix86_asm_dialect
= ASM_ATT
;
1880 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1882 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1883 error ("code model %qs not supported in the %s bit mode",
1884 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1885 if (ix86_cmodel
== CM_LARGE
)
1886 sorry ("code model %<large%> not supported yet");
1887 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1888 sorry ("%i-bit mode not compiled in",
1889 (target_flags
& MASK_64BIT
) ? 64 : 32);
1891 for (i
= 0; i
< pta_size
; i
++)
1892 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1894 ix86_arch
= processor_alias_table
[i
].processor
;
1895 /* Default cpu tuning to the architecture. */
1896 ix86_tune
= ix86_arch
;
1897 if (processor_alias_table
[i
].flags
& PTA_MMX
1898 && !(target_flags_explicit
& MASK_MMX
))
1899 target_flags
|= MASK_MMX
;
1900 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1901 && !(target_flags_explicit
& MASK_3DNOW
))
1902 target_flags
|= MASK_3DNOW
;
1903 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1904 && !(target_flags_explicit
& MASK_3DNOW_A
))
1905 target_flags
|= MASK_3DNOW_A
;
1906 if (processor_alias_table
[i
].flags
& PTA_SSE
1907 && !(target_flags_explicit
& MASK_SSE
))
1908 target_flags
|= MASK_SSE
;
1909 if (processor_alias_table
[i
].flags
& PTA_SSE2
1910 && !(target_flags_explicit
& MASK_SSE2
))
1911 target_flags
|= MASK_SSE2
;
1912 if (processor_alias_table
[i
].flags
& PTA_SSE3
1913 && !(target_flags_explicit
& MASK_SSE3
))
1914 target_flags
|= MASK_SSE3
;
1915 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1916 && !(target_flags_explicit
& MASK_SSSE3
))
1917 target_flags
|= MASK_SSSE3
;
1918 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1919 x86_prefetch_sse
= true;
1920 if (processor_alias_table
[i
].flags
& PTA_CX16
)
1921 x86_cmpxchg16b
= true;
1922 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1923 error ("CPU you selected does not support x86-64 "
1929 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1931 for (i
= 0; i
< pta_size
; i
++)
1932 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1934 ix86_tune
= processor_alias_table
[i
].processor
;
1935 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1937 if (ix86_tune_defaulted
)
1939 ix86_tune_string
= "x86-64";
1940 for (i
= 0; i
< pta_size
; i
++)
1941 if (! strcmp (ix86_tune_string
,
1942 processor_alias_table
[i
].name
))
1944 ix86_tune
= processor_alias_table
[i
].processor
;
1947 error ("CPU you selected does not support x86-64 "
1950 /* Intel CPUs have always interpreted SSE prefetch instructions as
1951 NOPs; so, we can enable SSE prefetch instructions even when
1952 -mtune (rather than -march) points us to a processor that has them.
1953 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1954 higher processors. */
1955 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1956 x86_prefetch_sse
= true;
1960 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1963 ix86_cost
= &size_cost
;
1965 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1966 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1967 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1969 /* Arrange to set up i386_stack_locals for all functions. */
1970 init_machine_status
= ix86_init_machine_status
;
1972 /* Validate -mregparm= value. */
1973 if (ix86_regparm_string
)
1975 i
= atoi (ix86_regparm_string
);
1976 if (i
< 0 || i
> REGPARM_MAX
)
1977 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1983 ix86_regparm
= REGPARM_MAX
;
1985 /* If the user has provided any of the -malign-* options,
1986 warn and use that value only if -falign-* is not set.
1987 Remove this code in GCC 3.2 or later. */
1988 if (ix86_align_loops_string
)
1990 warning (0, "-malign-loops is obsolete, use -falign-loops");
1991 if (align_loops
== 0)
1993 i
= atoi (ix86_align_loops_string
);
1994 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1995 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1997 align_loops
= 1 << i
;
2001 if (ix86_align_jumps_string
)
2003 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2004 if (align_jumps
== 0)
2006 i
= atoi (ix86_align_jumps_string
);
2007 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2008 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2010 align_jumps
= 1 << i
;
2014 if (ix86_align_funcs_string
)
2016 warning (0, "-malign-functions is obsolete, use -falign-functions");
2017 if (align_functions
== 0)
2019 i
= atoi (ix86_align_funcs_string
);
2020 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2021 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2023 align_functions
= 1 << i
;
2027 /* Default align_* from the processor table. */
2028 if (align_loops
== 0)
2030 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2031 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2033 if (align_jumps
== 0)
2035 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2036 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2038 if (align_functions
== 0)
2040 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2043 /* Validate -mbranch-cost= value, or provide default. */
2044 ix86_branch_cost
= ix86_cost
->branch_cost
;
2045 if (ix86_branch_cost_string
)
2047 i
= atoi (ix86_branch_cost_string
);
2049 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2051 ix86_branch_cost
= i
;
2053 if (ix86_section_threshold_string
)
2055 i
= atoi (ix86_section_threshold_string
);
2057 error ("-mlarge-data-threshold=%d is negative", i
);
2059 ix86_section_threshold
= i
;
2062 if (ix86_tls_dialect_string
)
2064 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2065 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2066 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2067 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2068 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2069 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2071 error ("bad value (%s) for -mtls-dialect= switch",
2072 ix86_tls_dialect_string
);
2075 /* Keep nonleaf frame pointers. */
2076 if (flag_omit_frame_pointer
)
2077 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2078 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2079 flag_omit_frame_pointer
= 1;
2081 /* If we're doing fast math, we don't care about comparison order
2082 wrt NaNs. This lets us use a shorter comparison sequence. */
2083 if (flag_finite_math_only
)
2084 target_flags
&= ~MASK_IEEE_FP
;
2086 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2087 since the insns won't need emulation. */
2088 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
2089 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2091 /* Likewise, if the target doesn't have a 387, or we've specified
2092 software floating point, don't use 387 inline intrinsics. */
2094 target_flags
|= MASK_NO_FANCY_MATH_387
;
2096 /* Turn on SSE3 builtins for -mssse3. */
2098 target_flags
|= MASK_SSE3
;
2100 /* Turn on SSE2 builtins for -msse3. */
2102 target_flags
|= MASK_SSE2
;
2104 /* Turn on SSE builtins for -msse2. */
2106 target_flags
|= MASK_SSE
;
2108 /* Turn on MMX builtins for -msse. */
2111 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2112 x86_prefetch_sse
= true;
2115 /* Turn on MMX builtins for 3Dnow. */
2117 target_flags
|= MASK_MMX
;
2121 if (TARGET_ALIGN_DOUBLE
)
2122 error ("-malign-double makes no sense in the 64bit mode");
2124 error ("-mrtd calling convention not supported in the 64bit mode");
2126 /* Enable by default the SSE and MMX builtins. Do allow the user to
2127 explicitly disable any of these. In particular, disabling SSE and
2128 MMX for kernel code is extremely useful. */
2130 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2131 & ~target_flags_explicit
);
2135 /* i386 ABI does not specify red zone. It still makes sense to use it
2136 when programmer takes care to stack from being destroyed. */
2137 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2138 target_flags
|= MASK_NO_RED_ZONE
;
2141 /* Validate -mpreferred-stack-boundary= value, or provide default.
2142 The default of 128 bits is for Pentium III's SSE __m128. We can't
2143 change it because of optimize_size. Otherwise, we can't mix object
2144 files compiled with -Os and -On. */
2145 ix86_preferred_stack_boundary
= 128;
2146 if (ix86_preferred_stack_boundary_string
)
2148 i
= atoi (ix86_preferred_stack_boundary_string
);
2149 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2150 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2151 TARGET_64BIT
? 4 : 2);
2153 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2156 /* Accept -msseregparm only if at least SSE support is enabled. */
2157 if (TARGET_SSEREGPARM
2159 error ("-msseregparm used without SSE enabled");
2161 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2163 if (ix86_fpmath_string
!= 0)
2165 if (! strcmp (ix86_fpmath_string
, "387"))
2166 ix86_fpmath
= FPMATH_387
;
2167 else if (! strcmp (ix86_fpmath_string
, "sse"))
2171 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2172 ix86_fpmath
= FPMATH_387
;
2175 ix86_fpmath
= FPMATH_SSE
;
2177 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2178 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2182 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2183 ix86_fpmath
= FPMATH_387
;
2185 else if (!TARGET_80387
)
2187 warning (0, "387 instruction set disabled, using SSE arithmetics");
2188 ix86_fpmath
= FPMATH_SSE
;
2191 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2194 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2197 /* If the i387 is disabled, then do not return values in it. */
2199 target_flags
&= ~MASK_FLOAT_RETURNS
;
2201 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2202 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2204 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2206 /* ??? Unwind info is not correct around the CFG unless either a frame
2207 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2208 unwind info generation to be aware of the CFG and propagating states
2210 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2211 || flag_exceptions
|| flag_non_call_exceptions
)
2212 && flag_omit_frame_pointer
2213 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2215 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2216 warning (0, "unwind tables currently require either a frame pointer "
2217 "or -maccumulate-outgoing-args for correctness");
2218 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2221 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2224 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2225 p
= strchr (internal_label_prefix
, 'X');
2226 internal_label_prefix_len
= p
- internal_label_prefix
;
2230 /* When scheduling description is not available, disable scheduler pass
2231 so it won't slow down the compilation and make x87 code slower. */
2232 if (!TARGET_SCHEDULE
)
2233 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2235 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2236 set_param_value ("simultaneous-prefetches",
2237 ix86_cost
->simultaneous_prefetches
);
2238 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2239 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2242 /* switch to the appropriate section for output of DECL.
2243 DECL is either a `VAR_DECL' node or a constant of some sort.
2244 RELOC indicates whether forming the initial value of DECL requires
2245 link-time relocations. */
2248 x86_64_elf_select_section (tree decl
, int reloc
,
2249 unsigned HOST_WIDE_INT align
)
2251 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2252 && ix86_in_large_data_p (decl
))
2254 const char *sname
= NULL
;
2255 unsigned int flags
= SECTION_WRITE
;
2256 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2261 case SECCAT_DATA_REL
:
2262 sname
= ".ldata.rel";
2264 case SECCAT_DATA_REL_LOCAL
:
2265 sname
= ".ldata.rel.local";
2267 case SECCAT_DATA_REL_RO
:
2268 sname
= ".ldata.rel.ro";
2270 case SECCAT_DATA_REL_RO_LOCAL
:
2271 sname
= ".ldata.rel.ro.local";
2275 flags
|= SECTION_BSS
;
2278 case SECCAT_RODATA_MERGE_STR
:
2279 case SECCAT_RODATA_MERGE_STR_INIT
:
2280 case SECCAT_RODATA_MERGE_CONST
:
2284 case SECCAT_SRODATA
:
2291 /* We don't split these for medium model. Place them into
2292 default sections and hope for best. */
2297 /* We might get called with string constants, but get_named_section
2298 doesn't like them as they are not DECLs. Also, we need to set
2299 flags in that case. */
2301 return get_section (sname
, flags
, NULL
);
2302 return get_named_section (decl
, sname
, reloc
);
2305 return default_elf_select_section (decl
, reloc
, align
);
2308 /* Build up a unique section name, expressed as a
2309 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2310 RELOC indicates whether the initial value of EXP requires
2311 link-time relocations. */
2314 x86_64_elf_unique_section (tree decl
, int reloc
)
2316 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2317 && ix86_in_large_data_p (decl
))
2319 const char *prefix
= NULL
;
2320 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2321 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2323 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2326 case SECCAT_DATA_REL
:
2327 case SECCAT_DATA_REL_LOCAL
:
2328 case SECCAT_DATA_REL_RO
:
2329 case SECCAT_DATA_REL_RO_LOCAL
:
2330 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2333 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2336 case SECCAT_RODATA_MERGE_STR
:
2337 case SECCAT_RODATA_MERGE_STR_INIT
:
2338 case SECCAT_RODATA_MERGE_CONST
:
2339 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2341 case SECCAT_SRODATA
:
2348 /* We don't split these for medium model. Place them into
2349 default sections and hope for best. */
2357 plen
= strlen (prefix
);
2359 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2360 name
= targetm
.strip_name_encoding (name
);
2361 nlen
= strlen (name
);
2363 string
= alloca (nlen
+ plen
+ 1);
2364 memcpy (string
, prefix
, plen
);
2365 memcpy (string
+ plen
, name
, nlen
+ 1);
2367 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2371 default_unique_section (decl
, reloc
);
2374 #ifdef COMMON_ASM_OP
2375 /* This says how to output assembler code to declare an
2376 uninitialized external linkage data object.
2378 For medium model x86-64 we need to use .largecomm opcode for
2381 x86_elf_aligned_common (FILE *file
,
2382 const char *name
, unsigned HOST_WIDE_INT size
,
2385 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2386 && size
> (unsigned int)ix86_section_threshold
)
2387 fprintf (file
, ".largecomm\t");
2389 fprintf (file
, "%s", COMMON_ASM_OP
);
2390 assemble_name (file
, name
);
2391 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2392 size
, align
/ BITS_PER_UNIT
);
2395 /* Utility function for targets to use in implementing
2396 ASM_OUTPUT_ALIGNED_BSS. */
2399 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2400 const char *name
, unsigned HOST_WIDE_INT size
,
2403 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2404 && size
> (unsigned int)ix86_section_threshold
)
2405 switch_to_section (get_named_section (decl
, ".lbss", 0));
2407 switch_to_section (bss_section
);
2408 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2409 #ifdef ASM_DECLARE_OBJECT_NAME
2410 last_assemble_variable_decl
= decl
;
2411 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2413 /* Standard thing is just output label for the object. */
2414 ASM_OUTPUT_LABEL (file
, name
);
2415 #endif /* ASM_DECLARE_OBJECT_NAME */
2416 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2420 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2423 make the problem with not enough registers even worse. */
2424 #ifdef INSN_SCHEDULING
2426 flag_schedule_insns
= 0;
2430 /* The Darwin libraries never set errno, so we might as well
2431 avoid calling them when that's the only reason we would. */
2432 flag_errno_math
= 0;
2434 /* The default values of these switches depend on the TARGET_64BIT
2435 that is not known at this moment. Mark these values with 2 and
2436 let user the to override these. In case there is no command line option
2437 specifying them, we will set the defaults in override_options. */
2439 flag_omit_frame_pointer
= 2;
2440 flag_pcc_struct_return
= 2;
2441 flag_asynchronous_unwind_tables
= 2;
2442 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2443 SUBTARGET_OPTIMIZATION_OPTIONS
;
2447 /* Table of valid machine attributes. */
2448 const struct attribute_spec ix86_attribute_table
[] =
2450 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2451 /* Stdcall attribute says callee is responsible for popping arguments
2452 if they are not variable. */
2453 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2454 /* Fastcall attribute says callee is responsible for popping arguments
2455 if they are not variable. */
2456 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2457 /* Cdecl attribute says the callee is a normal C declaration */
2458 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2459 /* Regparm attribute specifies how many integer arguments are to be
2460 passed in registers. */
2461 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2462 /* Sseregparm attribute says we are using x86_64 calling conventions
2463 for FP arguments. */
2464 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2465 /* force_align_arg_pointer says this function realigns the stack at entry. */
2466 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2467 false, true, true, ix86_handle_cconv_attribute
},
2468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2469 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2470 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2471 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2473 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2474 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2475 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2476 SUBTARGET_ATTRIBUTE_TABLE
,
2478 { NULL
, 0, 0, false, false, false, NULL
}
2481 /* Decide whether we can make a sibling call to a function. DECL is the
2482 declaration of the function being targeted by the call and EXP is the
2483 CALL_EXPR representing the call. */
2486 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2491 /* If we are generating position-independent code, we cannot sibcall
2492 optimize any indirect call, or a direct call to a global function,
2493 as the PLT requires %ebx be live. */
2494 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2501 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2502 if (POINTER_TYPE_P (func
))
2503 func
= TREE_TYPE (func
);
2506 /* Check that the return value locations are the same. Like
2507 if we are returning floats on the 80387 register stack, we cannot
2508 make a sibcall from a function that doesn't return a float to a
2509 function that does or, conversely, from a function that does return
2510 a float to a function that doesn't; the necessary stack adjustment
2511 would not be executed. This is also the place we notice
2512 differences in the return value ABI. Note that it is ok for one
2513 of the functions to have void return type as long as the return
2514 value of the other is passed in a register. */
2515 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2516 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2518 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2520 if (!rtx_equal_p (a
, b
))
2523 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2525 else if (!rtx_equal_p (a
, b
))
2528 /* If this call is indirect, we'll need to be able to use a call-clobbered
2529 register for the address of the target function. Make sure that all
2530 such registers are not used for passing parameters. */
2531 if (!decl
&& !TARGET_64BIT
)
2535 /* We're looking at the CALL_EXPR, we need the type of the function. */
2536 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2537 type
= TREE_TYPE (type
); /* pointer type */
2538 type
= TREE_TYPE (type
); /* function type */
2540 if (ix86_function_regparm (type
, NULL
) >= 3)
2542 /* ??? Need to count the actual number of registers to be used,
2543 not the possible number of registers. Fix later. */
2548 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2549 /* Dllimport'd functions are also called indirectly. */
2550 if (decl
&& DECL_DLLIMPORT_P (decl
)
2551 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2555 /* If we forced aligned the stack, then sibcalling would unalign the
2556 stack, which may break the called function. */
2557 if (cfun
->machine
->force_align_arg_pointer
)
2560 /* Otherwise okay. That also includes certain types of indirect calls. */
2564 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2565 calling convention attributes;
2566 arguments as in struct attribute_spec.handler. */
2569 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2571 int flags ATTRIBUTE_UNUSED
,
2574 if (TREE_CODE (*node
) != FUNCTION_TYPE
2575 && TREE_CODE (*node
) != METHOD_TYPE
2576 && TREE_CODE (*node
) != FIELD_DECL
2577 && TREE_CODE (*node
) != TYPE_DECL
)
2579 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2580 IDENTIFIER_POINTER (name
));
2581 *no_add_attrs
= true;
2585 /* Can combine regparm with all attributes but fastcall. */
2586 if (is_attribute_p ("regparm", name
))
2590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2592 error ("fastcall and regparm attributes are not compatible");
2595 cst
= TREE_VALUE (args
);
2596 if (TREE_CODE (cst
) != INTEGER_CST
)
2598 warning (OPT_Wattributes
,
2599 "%qs attribute requires an integer constant argument",
2600 IDENTIFIER_POINTER (name
));
2601 *no_add_attrs
= true;
2603 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2605 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2606 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2607 *no_add_attrs
= true;
2611 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2612 TYPE_ATTRIBUTES (*node
))
2613 && compare_tree_int (cst
, REGPARM_MAX
-1))
2615 error ("%s functions limited to %d register parameters",
2616 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2624 warning (OPT_Wattributes
, "%qs attribute ignored",
2625 IDENTIFIER_POINTER (name
));
2626 *no_add_attrs
= true;
2630 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2631 if (is_attribute_p ("fastcall", name
))
2633 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2635 error ("fastcall and cdecl attributes are not compatible");
2637 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2639 error ("fastcall and stdcall attributes are not compatible");
2641 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2643 error ("fastcall and regparm attributes are not compatible");
2647 /* Can combine stdcall with fastcall (redundant), regparm and
2649 else if (is_attribute_p ("stdcall", name
))
2651 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2653 error ("stdcall and cdecl attributes are not compatible");
2655 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2657 error ("stdcall and fastcall attributes are not compatible");
2661 /* Can combine cdecl with regparm and sseregparm. */
2662 else if (is_attribute_p ("cdecl", name
))
2664 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2666 error ("stdcall and cdecl attributes are not compatible");
2668 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2670 error ("fastcall and cdecl attributes are not compatible");
2674 /* Can combine sseregparm with all attributes. */
2679 /* Return 0 if the attributes for two types are incompatible, 1 if they
2680 are compatible, and 2 if they are nearly compatible (which causes a
2681 warning to be generated). */
2684 ix86_comp_type_attributes (tree type1
, tree type2
)
2686 /* Check for mismatch of non-default calling convention. */
2687 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2689 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2692 /* Check for mismatched fastcall/regparm types. */
2693 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2694 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2695 || (ix86_function_regparm (type1
, NULL
)
2696 != ix86_function_regparm (type2
, NULL
)))
2699 /* Check for mismatched sseregparm types. */
2700 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2701 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2704 /* Check for mismatched return types (cdecl vs stdcall). */
2705 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2706 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2712 /* Return the regparm value for a function with the indicated TYPE and DECL.
2713 DECL may be NULL when calling function indirectly
2714 or considering a libcall. */
2717 ix86_function_regparm (tree type
, tree decl
)
2720 int regparm
= ix86_regparm
;
2721 bool user_convention
= false;
2725 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2728 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2729 user_convention
= true;
2732 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2735 user_convention
= true;
2738 /* Use register calling convention for local functions when possible. */
2739 if (!TARGET_64BIT
&& !user_convention
&& decl
2740 && flag_unit_at_a_time
&& !profile_flag
)
2742 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2745 int local_regparm
, globals
= 0, regno
;
2747 /* Make sure no regparm register is taken by a global register
2749 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2750 if (global_regs
[local_regparm
])
2752 /* We can't use regparm(3) for nested functions as these use
2753 static chain pointer in third argument. */
2754 if (local_regparm
== 3
2755 && decl_function_context (decl
)
2756 && !DECL_NO_STATIC_CHAIN (decl
))
2758 /* If the function realigns its stackpointer, the
2759 prologue will clobber %ecx. If we've already
2760 generated code for the callee, the callee
2761 DECL_STRUCT_FUNCTION is gone, so we fall back to
2762 scanning the attributes for the self-realigning
2764 if ((DECL_STRUCT_FUNCTION (decl
)
2765 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2766 || (!DECL_STRUCT_FUNCTION (decl
)
2767 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2768 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2770 /* Each global register variable increases register preassure,
2771 so the more global reg vars there are, the smaller regparm
2772 optimization use, unless requested by the user explicitly. */
2773 for (regno
= 0; regno
< 6; regno
++)
2774 if (global_regs
[regno
])
2777 = globals
< local_regparm
? local_regparm
- globals
: 0;
2779 if (local_regparm
> regparm
)
2780 regparm
= local_regparm
;
2787 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2788 DFmode (2) arguments in SSE registers for a function with the
2789 indicated TYPE and DECL. DECL may be NULL when calling function
2790 indirectly or considering a libcall. Otherwise return 0. */
2793 ix86_function_sseregparm (tree type
, tree decl
)
2795 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2796 by the sseregparm attribute. */
2797 if (TARGET_SSEREGPARM
2799 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2804 error ("Calling %qD with attribute sseregparm without "
2805 "SSE/SSE2 enabled", decl
);
2807 error ("Calling %qT with attribute sseregparm without "
2808 "SSE/SSE2 enabled", type
);
2815 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2816 (and DFmode for SSE2) arguments in SSE registers,
2817 even for 32-bit targets. */
2818 if (!TARGET_64BIT
&& decl
2819 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2821 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2823 return TARGET_SSE2
? 2 : 1;
2829 /* Return true if EAX is live at the start of the function. Used by
2830 ix86_expand_prologue to determine if we need special help before
2831 calling allocate_stack_worker. */
2834 ix86_eax_live_at_start_p (void)
2836 /* Cheat. Don't bother working forward from ix86_function_regparm
2837 to the function type to whether an actual argument is located in
2838 eax. Instead just look at cfg info, which is still close enough
2839 to correct at this point. This gives false positives for broken
2840 functions that might use uninitialized data that happens to be
2841 allocated in eax, but who cares? */
2842 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2845 /* Value is the number of bytes of arguments automatically
2846 popped when returning from a subroutine call.
2847 FUNDECL is the declaration node of the function (as a tree),
2848 FUNTYPE is the data type of the function (as a tree),
2849 or for a library call it is an identifier node for the subroutine name.
2850 SIZE is the number of bytes of arguments passed on the stack.
2852 On the 80386, the RTD insn may be used to pop them if the number
2853 of args is fixed, but if the number is variable then the caller
2854 must pop them all. RTD can't be used for library calls now
2855 because the library is compiled with the Unix compiler.
2856 Use of RTD is a selectable option, since it is incompatible with
2857 standard Unix calling sequences. If the option is not selected,
2858 the caller must always pop the args.
2860 The attribute stdcall is equivalent to RTD on a per module basis. */
2863 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2865 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2867 /* Cdecl functions override -mrtd, and never pop the stack. */
2868 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2870 /* Stdcall and fastcall functions will pop the stack if not
2872 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2873 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2877 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2878 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2879 == void_type_node
)))
2883 /* Lose any fake structure return argument if it is passed on the stack. */
2884 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2886 && !KEEP_AGGREGATE_RETURN_POINTER
)
2888 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2891 return GET_MODE_SIZE (Pmode
);
2897 /* Argument support functions. */
2899 /* Return true when register may be used to pass function parameters. */
2901 ix86_function_arg_regno_p (int regno
)
2905 return (regno
< REGPARM_MAX
2906 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2907 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2908 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2909 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2911 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2912 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2914 /* RAX is used as hidden argument to va_arg functions. */
2917 for (i
= 0; i
< REGPARM_MAX
; i
++)
2918 if (regno
== x86_64_int_parameter_registers
[i
])
2923 /* Return if we do not know how to pass TYPE solely in registers. */
2926 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2928 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2931 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2932 The layout_type routine is crafty and tries to trick us into passing
2933 currently unsupported vector types on the stack by using TImode. */
2934 return (!TARGET_64BIT
&& mode
== TImode
2935 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2938 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2939 for a call to a function whose data type is FNTYPE.
2940 For a library call, FNTYPE is 0. */
2943 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2944 tree fntype
, /* tree ptr for function decl */
2945 rtx libname
, /* SYMBOL_REF of library name or 0 */
2948 static CUMULATIVE_ARGS zero_cum
;
2949 tree param
, next_param
;
2951 if (TARGET_DEBUG_ARG
)
2953 fprintf (stderr
, "\ninit_cumulative_args (");
2955 fprintf (stderr
, "fntype code = %s, ret code = %s",
2956 tree_code_name
[(int) TREE_CODE (fntype
)],
2957 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2959 fprintf (stderr
, "no fntype");
2962 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2967 /* Set up the number of registers to use for passing arguments. */
2968 cum
->nregs
= ix86_regparm
;
2970 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2972 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2973 cum
->warn_sse
= true;
2974 cum
->warn_mmx
= true;
2975 cum
->maybe_vaarg
= false;
2977 /* Use ecx and edx registers if function has fastcall attribute,
2978 else look for regparm information. */
2979 if (fntype
&& !TARGET_64BIT
)
2981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2987 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2990 /* Set up the number of SSE registers used for passing SFmode
2991 and DFmode arguments. Warn for mismatching ABI. */
2992 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2994 /* Determine if this function has variable arguments. This is
2995 indicated by the last argument being 'void_type_mode' if there
2996 are no variable arguments. If there are variable arguments, then
2997 we won't pass anything in registers in 32-bit mode. */
2999 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3001 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3002 param
!= 0; param
= next_param
)
3004 next_param
= TREE_CHAIN (param
);
3005 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3015 cum
->float_in_sse
= 0;
3017 cum
->maybe_vaarg
= true;
3021 if ((!fntype
&& !libname
)
3022 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3023 cum
->maybe_vaarg
= true;
3025 if (TARGET_DEBUG_ARG
)
3026 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3031 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3032 But in the case of vector types, it is some vector mode.
3034 When we have only some of our vector isa extensions enabled, then there
3035 are some modes for which vector_mode_supported_p is false. For these
3036 modes, the generic vector support in gcc will choose some non-vector mode
3037 in order to implement the type. By computing the natural mode, we'll
3038 select the proper ABI location for the operand and not depend on whatever
3039 the middle-end decides to do with these vector types. */
3041 static enum machine_mode
3042 type_natural_mode (tree type
)
3044 enum machine_mode mode
= TYPE_MODE (type
);
3046 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3048 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3049 if ((size
== 8 || size
== 16)
3050 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3051 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3053 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3055 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3056 mode
= MIN_MODE_VECTOR_FLOAT
;
3058 mode
= MIN_MODE_VECTOR_INT
;
3060 /* Get the mode which has this inner mode and number of units. */
3061 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3062 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3063 && GET_MODE_INNER (mode
) == innermode
)
3073 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3074 this may not agree with the mode that the type system has chosen for the
3075 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3076 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3079 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3084 if (orig_mode
!= BLKmode
)
3085 tmp
= gen_rtx_REG (orig_mode
, regno
);
3088 tmp
= gen_rtx_REG (mode
, regno
);
3089 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3090 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3096 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3097 of this code is to classify each 8bytes of incoming argument by the register
3098 class and assign registers accordingly. */
3100 /* Return the union class of CLASS1 and CLASS2.
3101 See the x86-64 PS ABI for details. */
3103 static enum x86_64_reg_class
3104 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3106 /* Rule #1: If both classes are equal, this is the resulting class. */
3107 if (class1
== class2
)
3110 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3112 if (class1
== X86_64_NO_CLASS
)
3114 if (class2
== X86_64_NO_CLASS
)
3117 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3118 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3119 return X86_64_MEMORY_CLASS
;
3121 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3122 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3123 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3124 return X86_64_INTEGERSI_CLASS
;
3125 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3126 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3127 return X86_64_INTEGER_CLASS
;
3129 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3131 if (class1
== X86_64_X87_CLASS
3132 || class1
== X86_64_X87UP_CLASS
3133 || class1
== X86_64_COMPLEX_X87_CLASS
3134 || class2
== X86_64_X87_CLASS
3135 || class2
== X86_64_X87UP_CLASS
3136 || class2
== X86_64_COMPLEX_X87_CLASS
)
3137 return X86_64_MEMORY_CLASS
;
3139 /* Rule #6: Otherwise class SSE is used. */
3140 return X86_64_SSE_CLASS
;
3143 /* Classify the argument of type TYPE and mode MODE.
3144 CLASSES will be filled by the register class used to pass each word
3145 of the operand. The number of words is returned. In case the parameter
3146 should be passed in memory, 0 is returned. As a special case for zero
3147 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3149 BIT_OFFSET is used internally for handling records and specifies offset
3150 of the offset in bits modulo 256 to avoid overflow cases.
3152 See the x86-64 PS ABI for details.
3156 classify_argument (enum machine_mode mode
, tree type
,
3157 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3159 HOST_WIDE_INT bytes
=
3160 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3161 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3163 /* Variable sized entities are always passed/returned in memory. */
3167 if (mode
!= VOIDmode
3168 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3171 if (type
&& AGGREGATE_TYPE_P (type
))
3175 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3177 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3181 for (i
= 0; i
< words
; i
++)
3182 classes
[i
] = X86_64_NO_CLASS
;
3184 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3185 signalize memory class, so handle it as special case. */
3188 classes
[0] = X86_64_NO_CLASS
;
3192 /* Classify each field of record and merge classes. */
3193 switch (TREE_CODE (type
))
3196 /* And now merge the fields of structure. */
3197 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3199 if (TREE_CODE (field
) == FIELD_DECL
)
3203 if (TREE_TYPE (field
) == error_mark_node
)
3206 /* Bitfields are always classified as integer. Handle them
3207 early, since later code would consider them to be
3208 misaligned integers. */
3209 if (DECL_BIT_FIELD (field
))
3211 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3212 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3213 + tree_low_cst (DECL_SIZE (field
), 0)
3216 merge_classes (X86_64_INTEGER_CLASS
,
3221 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3222 TREE_TYPE (field
), subclasses
,
3223 (int_bit_position (field
)
3224 + bit_offset
) % 256);
3227 for (i
= 0; i
< num
; i
++)
3230 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3232 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3240 /* Arrays are handled as small records. */
3243 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3244 TREE_TYPE (type
), subclasses
, bit_offset
);
3248 /* The partial classes are now full classes. */
3249 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3250 subclasses
[0] = X86_64_SSE_CLASS
;
3251 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3252 subclasses
[0] = X86_64_INTEGER_CLASS
;
3254 for (i
= 0; i
< words
; i
++)
3255 classes
[i
] = subclasses
[i
% num
];
3260 case QUAL_UNION_TYPE
:
3261 /* Unions are similar to RECORD_TYPE but offset is always 0.
3263 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3265 if (TREE_CODE (field
) == FIELD_DECL
)
3269 if (TREE_TYPE (field
) == error_mark_node
)
3272 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3273 TREE_TYPE (field
), subclasses
,
3277 for (i
= 0; i
< num
; i
++)
3278 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3287 /* Final merger cleanup. */
3288 for (i
= 0; i
< words
; i
++)
3290 /* If one class is MEMORY, everything should be passed in
3292 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3295 /* The X86_64_SSEUP_CLASS should be always preceded by
3296 X86_64_SSE_CLASS. */
3297 if (classes
[i
] == X86_64_SSEUP_CLASS
3298 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3299 classes
[i
] = X86_64_SSE_CLASS
;
3301 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3302 if (classes
[i
] == X86_64_X87UP_CLASS
3303 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3304 classes
[i
] = X86_64_SSE_CLASS
;
3309 /* Compute alignment needed. We align all types to natural boundaries with
3310 exception of XFmode that is aligned to 64bits. */
3311 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3313 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3316 mode_alignment
= 128;
3317 else if (mode
== XCmode
)
3318 mode_alignment
= 256;
3319 if (COMPLEX_MODE_P (mode
))
3320 mode_alignment
/= 2;
3321 /* Misaligned fields are always returned in memory. */
3322 if (bit_offset
% mode_alignment
)
3326 /* for V1xx modes, just use the base mode */
3327 if (VECTOR_MODE_P (mode
)
3328 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3329 mode
= GET_MODE_INNER (mode
);
3331 /* Classification of atomic types. */
3336 classes
[0] = X86_64_SSE_CLASS
;
3339 classes
[0] = X86_64_SSE_CLASS
;
3340 classes
[1] = X86_64_SSEUP_CLASS
;
3349 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3350 classes
[0] = X86_64_INTEGERSI_CLASS
;
3352 classes
[0] = X86_64_INTEGER_CLASS
;
3356 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3361 if (!(bit_offset
% 64))
3362 classes
[0] = X86_64_SSESF_CLASS
;
3364 classes
[0] = X86_64_SSE_CLASS
;
3367 classes
[0] = X86_64_SSEDF_CLASS
;
3370 classes
[0] = X86_64_X87_CLASS
;
3371 classes
[1] = X86_64_X87UP_CLASS
;
3374 classes
[0] = X86_64_SSE_CLASS
;
3375 classes
[1] = X86_64_SSEUP_CLASS
;
3378 classes
[0] = X86_64_SSE_CLASS
;
3381 classes
[0] = X86_64_SSEDF_CLASS
;
3382 classes
[1] = X86_64_SSEDF_CLASS
;
3385 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3388 /* This modes is larger than 16 bytes. */
3396 classes
[0] = X86_64_SSE_CLASS
;
3397 classes
[1] = X86_64_SSEUP_CLASS
;
3403 classes
[0] = X86_64_SSE_CLASS
;
3409 gcc_assert (VECTOR_MODE_P (mode
));
3414 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3416 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3417 classes
[0] = X86_64_INTEGERSI_CLASS
;
3419 classes
[0] = X86_64_INTEGER_CLASS
;
3420 classes
[1] = X86_64_INTEGER_CLASS
;
3421 return 1 + (bytes
> 8);
3425 /* Examine the argument and return set number of register required in each
3426 class. Return 0 iff parameter should be passed in memory. */
3428 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3429 int *int_nregs
, int *sse_nregs
)
3431 enum x86_64_reg_class
class[MAX_CLASSES
];
3432 int n
= classify_argument (mode
, type
, class, 0);
3438 for (n
--; n
>= 0; n
--)
3441 case X86_64_INTEGER_CLASS
:
3442 case X86_64_INTEGERSI_CLASS
:
3445 case X86_64_SSE_CLASS
:
3446 case X86_64_SSESF_CLASS
:
3447 case X86_64_SSEDF_CLASS
:
3450 case X86_64_NO_CLASS
:
3451 case X86_64_SSEUP_CLASS
:
3453 case X86_64_X87_CLASS
:
3454 case X86_64_X87UP_CLASS
:
3458 case X86_64_COMPLEX_X87_CLASS
:
3459 return in_return
? 2 : 0;
3460 case X86_64_MEMORY_CLASS
:
3466 /* Construct container for the argument used by GCC interface. See
3467 FUNCTION_ARG for the detailed description. */
3470 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3471 tree type
, int in_return
, int nintregs
, int nsseregs
,
3472 const int *intreg
, int sse_regno
)
3474 /* The following variables hold the static issued_error state. */
3475 static bool issued_sse_arg_error
;
3476 static bool issued_sse_ret_error
;
3477 static bool issued_x87_ret_error
;
3479 enum machine_mode tmpmode
;
3481 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3482 enum x86_64_reg_class
class[MAX_CLASSES
];
3486 int needed_sseregs
, needed_intregs
;
3487 rtx exp
[MAX_CLASSES
];
3490 n
= classify_argument (mode
, type
, class, 0);
3491 if (TARGET_DEBUG_ARG
)
3494 fprintf (stderr
, "Memory class\n");
3497 fprintf (stderr
, "Classes:");
3498 for (i
= 0; i
< n
; i
++)
3500 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3502 fprintf (stderr
, "\n");
3507 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3510 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3513 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3514 some less clueful developer tries to use floating-point anyway. */
3515 if (needed_sseregs
&& !TARGET_SSE
)
3519 if (!issued_sse_ret_error
)
3521 error ("SSE register return with SSE disabled");
3522 issued_sse_ret_error
= true;
3525 else if (!issued_sse_arg_error
)
3527 error ("SSE register argument with SSE disabled");
3528 issued_sse_arg_error
= true;
3533 /* Likewise, error if the ABI requires us to return values in the
3534 x87 registers and the user specified -mno-80387. */
3535 if (!TARGET_80387
&& in_return
)
3536 for (i
= 0; i
< n
; i
++)
3537 if (class[i
] == X86_64_X87_CLASS
3538 || class[i
] == X86_64_X87UP_CLASS
3539 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3541 if (!issued_x87_ret_error
)
3543 error ("x87 register return with x87 disabled");
3544 issued_x87_ret_error
= true;
3549 /* First construct simple cases. Avoid SCmode, since we want to use
3550 single register to pass this type. */
3551 if (n
== 1 && mode
!= SCmode
)
3554 case X86_64_INTEGER_CLASS
:
3555 case X86_64_INTEGERSI_CLASS
:
3556 return gen_rtx_REG (mode
, intreg
[0]);
3557 case X86_64_SSE_CLASS
:
3558 case X86_64_SSESF_CLASS
:
3559 case X86_64_SSEDF_CLASS
:
3560 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3561 case X86_64_X87_CLASS
:
3562 case X86_64_COMPLEX_X87_CLASS
:
3563 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3564 case X86_64_NO_CLASS
:
3565 /* Zero sized array, struct or class. */
3570 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3572 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3574 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3575 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3576 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3577 && class[1] == X86_64_INTEGER_CLASS
3578 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3579 && intreg
[0] + 1 == intreg
[1])
3580 return gen_rtx_REG (mode
, intreg
[0]);
3582 /* Otherwise figure out the entries of the PARALLEL. */
3583 for (i
= 0; i
< n
; i
++)
3587 case X86_64_NO_CLASS
:
3589 case X86_64_INTEGER_CLASS
:
3590 case X86_64_INTEGERSI_CLASS
:
3591 /* Merge TImodes on aligned occasions here too. */
3592 if (i
* 8 + 8 > bytes
)
3593 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3594 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3598 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3599 if (tmpmode
== BLKmode
)
3601 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3602 gen_rtx_REG (tmpmode
, *intreg
),
3606 case X86_64_SSESF_CLASS
:
3607 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3608 gen_rtx_REG (SFmode
,
3609 SSE_REGNO (sse_regno
)),
3613 case X86_64_SSEDF_CLASS
:
3614 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3615 gen_rtx_REG (DFmode
,
3616 SSE_REGNO (sse_regno
)),
3620 case X86_64_SSE_CLASS
:
3621 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3625 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3626 gen_rtx_REG (tmpmode
,
3627 SSE_REGNO (sse_regno
)),
3629 if (tmpmode
== TImode
)
3638 /* Empty aligned struct, union or class. */
3642 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3643 for (i
= 0; i
< nexps
; i
++)
3644 XVECEXP (ret
, 0, i
) = exp
[i
];
3648 /* Update the data in CUM to advance over an argument
3649 of mode MODE and data type TYPE.
3650 (TYPE is null for libcalls where that information may not be available.) */
3653 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3654 tree type
, int named
)
3657 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3658 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3661 mode
= type_natural_mode (type
);
3663 if (TARGET_DEBUG_ARG
)
3664 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3665 "mode=%s, named=%d)\n\n",
3666 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3667 GET_MODE_NAME (mode
), named
);
3671 int int_nregs
, sse_nregs
;
3672 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3673 cum
->words
+= words
;
3674 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3676 cum
->nregs
-= int_nregs
;
3677 cum
->sse_nregs
-= sse_nregs
;
3678 cum
->regno
+= int_nregs
;
3679 cum
->sse_regno
+= sse_nregs
;
3682 cum
->words
+= words
;
3700 cum
->words
+= words
;
3701 cum
->nregs
-= words
;
3702 cum
->regno
+= words
;
3704 if (cum
->nregs
<= 0)
3712 if (cum
->float_in_sse
< 2)
3715 if (cum
->float_in_sse
< 1)
3726 if (!type
|| !AGGREGATE_TYPE_P (type
))
3728 cum
->sse_words
+= words
;
3729 cum
->sse_nregs
-= 1;
3730 cum
->sse_regno
+= 1;
3731 if (cum
->sse_nregs
<= 0)
3743 if (!type
|| !AGGREGATE_TYPE_P (type
))
3745 cum
->mmx_words
+= words
;
3746 cum
->mmx_nregs
-= 1;
3747 cum
->mmx_regno
+= 1;
3748 if (cum
->mmx_nregs
<= 0)
3759 /* Define where to put the arguments to a function.
3760 Value is zero to push the argument on the stack,
3761 or a hard register in which to store the argument.
3763 MODE is the argument's machine mode.
3764 TYPE is the data type of the argument (as a tree).
3765 This is null for libcalls where that information may
3767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3768 the preceding args and about the function being called.
3769 NAMED is nonzero if this argument is a named parameter
3770 (otherwise it is an extra parameter matching an ellipsis). */
3773 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3774 tree type
, int named
)
3776 enum machine_mode mode
= orig_mode
;
3779 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3780 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3781 static bool warnedsse
, warnedmmx
;
3783 /* To simplify the code below, represent vector types with a vector mode
3784 even if MMX/SSE are not active. */
3785 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3786 mode
= type_natural_mode (type
);
3788 /* Handle a hidden AL argument containing number of registers for varargs
3789 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3791 if (mode
== VOIDmode
)
3794 return GEN_INT (cum
->maybe_vaarg
3795 ? (cum
->sse_nregs
< 0
3803 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3805 &x86_64_int_parameter_registers
[cum
->regno
],
3810 /* For now, pass fp/complex values on the stack. */
3822 if (words
<= cum
->nregs
)
3824 int regno
= cum
->regno
;
3826 /* Fastcall allocates the first two DWORD (SImode) or
3827 smaller arguments to ECX and EDX. */
3830 if (mode
== BLKmode
|| mode
== DImode
)
3833 /* ECX not EAX is the first allocated register. */
3837 ret
= gen_rtx_REG (mode
, regno
);
3841 if (cum
->float_in_sse
< 2)
3844 if (cum
->float_in_sse
< 1)
3854 if (!type
|| !AGGREGATE_TYPE_P (type
))
3856 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3859 warning (0, "SSE vector argument without SSE enabled "
3863 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3864 cum
->sse_regno
+ FIRST_SSE_REG
);
3871 if (!type
|| !AGGREGATE_TYPE_P (type
))
3873 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3876 warning (0, "MMX vector argument without MMX enabled "
3880 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3881 cum
->mmx_regno
+ FIRST_MMX_REG
);
3886 if (TARGET_DEBUG_ARG
)
3889 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3890 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3893 print_simple_rtl (stderr
, ret
);
3895 fprintf (stderr
, ", stack");
3897 fprintf (stderr
, " )\n");
3903 /* A C expression that indicates when an argument must be passed by
3904 reference. If nonzero for an argument, a copy of that argument is
3905 made in memory and a pointer to the argument is passed instead of
3906 the argument itself. The pointer is passed in whatever way is
3907 appropriate for passing a pointer to that type. */
3910 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3911 enum machine_mode mode ATTRIBUTE_UNUSED
,
3912 tree type
, bool named ATTRIBUTE_UNUSED
)
3917 if (type
&& int_size_in_bytes (type
) == -1)
3919 if (TARGET_DEBUG_ARG
)
3920 fprintf (stderr
, "function_arg_pass_by_reference\n");
3927 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3928 ABI. Only called if TARGET_SSE. */
3930 contains_128bit_aligned_vector_p (tree type
)
3932 enum machine_mode mode
= TYPE_MODE (type
);
3933 if (SSE_REG_MODE_P (mode
)
3934 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3936 if (TYPE_ALIGN (type
) < 128)
3939 if (AGGREGATE_TYPE_P (type
))
3941 /* Walk the aggregates recursively. */
3942 switch (TREE_CODE (type
))
3946 case QUAL_UNION_TYPE
:
3950 /* Walk all the structure fields. */
3951 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3953 if (TREE_CODE (field
) == FIELD_DECL
3954 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3961 /* Just for use if some languages passes arrays by value. */
3962 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3973 /* Gives the alignment boundary, in bits, of an argument with the
3974 specified mode and type. */
3977 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3981 align
= TYPE_ALIGN (type
);
3983 align
= GET_MODE_ALIGNMENT (mode
);
3984 if (align
< PARM_BOUNDARY
)
3985 align
= PARM_BOUNDARY
;
3988 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3989 make an exception for SSE modes since these require 128bit
3992 The handling here differs from field_alignment. ICC aligns MMX
3993 arguments to 4 byte boundaries, while structure fields are aligned
3994 to 8 byte boundaries. */
3996 align
= PARM_BOUNDARY
;
3999 if (!SSE_REG_MODE_P (mode
))
4000 align
= PARM_BOUNDARY
;
4004 if (!contains_128bit_aligned_vector_p (type
))
4005 align
= PARM_BOUNDARY
;
4013 /* Return true if N is a possible register number of function value. */
4015 ix86_function_value_regno_p (int regno
)
4018 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4019 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4023 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4029 /* Define how to find the value returned by a function.
4030 VALTYPE is the data type of the value (as a tree).
4031 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4032 otherwise, FUNC is 0. */
4034 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4035 bool outgoing ATTRIBUTE_UNUSED
)
4037 enum machine_mode natmode
= type_natural_mode (valtype
);
4041 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4042 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4043 x86_64_int_return_registers
, 0);
4044 /* For zero sized structures, construct_container return NULL, but we
4045 need to keep rest of compiler happy by returning meaningful value. */
4047 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4052 tree fn
= NULL_TREE
, fntype
;
4054 && DECL_P (fntype_or_decl
))
4055 fn
= fntype_or_decl
;
4056 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4057 return gen_rtx_REG (TYPE_MODE (valtype
),
4058 ix86_value_regno (natmode
, fn
, fntype
));
4062 /* Return true iff type is returned in memory. */
4064 ix86_return_in_memory (tree type
)
4066 int needed_intregs
, needed_sseregs
, size
;
4067 enum machine_mode mode
= type_natural_mode (type
);
4070 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4072 if (mode
== BLKmode
)
4075 size
= int_size_in_bytes (type
);
4077 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4080 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4082 /* User-created vectors small enough to fit in EAX. */
4086 /* MMX/3dNow values are returned in MM0,
4087 except when it doesn't exits. */
4089 return (TARGET_MMX
? 0 : 1);
4091 /* SSE values are returned in XMM0, except when it doesn't exist. */
4093 return (TARGET_SSE
? 0 : 1);
4107 /* When returning SSE vector types, we have a choice of either
4108 (1) being abi incompatible with a -march switch, or
4109 (2) generating an error.
4110 Given no good solution, I think the safest thing is one warning.
4111 The user won't be able to use -Werror, but....
4113 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4114 called in response to actually generating a caller or callee that
4115 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4116 via aggregate_value_p for general type probing from tree-ssa. */
4119 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4121 static bool warnedsse
, warnedmmx
;
4125 /* Look at the return type of the function, not the function type. */
4126 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4128 if (!TARGET_SSE
&& !warnedsse
)
4131 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4134 warning (0, "SSE vector return without SSE enabled "
4139 if (!TARGET_MMX
&& !warnedmmx
)
4141 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4144 warning (0, "MMX vector return without MMX enabled "
4153 /* Define how to find the value returned by a library function
4154 assuming the value has mode MODE. */
4156 ix86_libcall_value (enum machine_mode mode
)
4170 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4173 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4177 return gen_rtx_REG (mode
, 0);
4181 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4184 /* Given a mode, return the register to use for a return value. */
4187 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4189 gcc_assert (!TARGET_64BIT
);
4191 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4192 we normally prevent this case when mmx is not available. However
4193 some ABIs may require the result to be returned like DImode. */
4194 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4195 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4197 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4198 we prevent this case when sse is not available. However some ABIs
4199 may require the result to be returned like integer TImode. */
4200 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4201 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4203 /* Decimal floating point values can go in %eax, unlike other float modes. */
4204 if (DECIMAL_FLOAT_MODE_P (mode
))
4207 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4208 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4211 /* Floating point return values in %st(0), except for local functions when
4212 SSE math is enabled or for functions with sseregparm attribute. */
4213 if ((func
|| fntype
)
4214 && (mode
== SFmode
|| mode
== DFmode
))
4216 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4217 if ((sse_level
>= 1 && mode
== SFmode
)
4218 || (sse_level
== 2 && mode
== DFmode
))
4219 return FIRST_SSE_REG
;
4222 return FIRST_FLOAT_REG
;
4225 /* Create the va_list data type. */
4228 ix86_build_builtin_va_list (void)
4230 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4232 /* For i386 we use plain pointer to argument area. */
4234 return build_pointer_type (char_type_node
);
4236 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4237 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4239 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4240 unsigned_type_node
);
4241 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4242 unsigned_type_node
);
4243 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4245 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4248 va_list_gpr_counter_field
= f_gpr
;
4249 va_list_fpr_counter_field
= f_fpr
;
4251 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4252 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4253 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4254 DECL_FIELD_CONTEXT (f_sav
) = record
;
4256 TREE_CHAIN (record
) = type_decl
;
4257 TYPE_NAME (record
) = type_decl
;
4258 TYPE_FIELDS (record
) = f_gpr
;
4259 TREE_CHAIN (f_gpr
) = f_fpr
;
4260 TREE_CHAIN (f_fpr
) = f_ovf
;
4261 TREE_CHAIN (f_ovf
) = f_sav
;
4263 layout_type (record
);
4265 /* The correct type is an array type of one element. */
4266 return build_array_type (record
, build_index_type (size_zero_node
));
4269 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4272 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4273 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4276 CUMULATIVE_ARGS next_cum
;
4277 rtx save_area
= NULL_RTX
, mem
;
4290 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4293 /* Indicate to allocate space on the stack for varargs save area. */
4294 ix86_save_varrargs_registers
= 1;
4296 cfun
->stack_alignment_needed
= 128;
4298 fntype
= TREE_TYPE (current_function_decl
);
4299 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4300 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4301 != void_type_node
));
4303 /* For varargs, we do not want to skip the dummy va_dcl argument.
4304 For stdargs, we do want to skip the last named argument. */
4307 function_arg_advance (&next_cum
, mode
, type
, 1);
4310 save_area
= frame_pointer_rtx
;
4312 set
= get_varargs_alias_set ();
4314 for (i
= next_cum
.regno
;
4316 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4319 mem
= gen_rtx_MEM (Pmode
,
4320 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4321 MEM_NOTRAP_P (mem
) = 1;
4322 set_mem_alias_set (mem
, set
);
4323 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4324 x86_64_int_parameter_registers
[i
]));
4327 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4329 /* Now emit code to save SSE registers. The AX parameter contains number
4330 of SSE parameter registers used to call this function. We use
4331 sse_prologue_save insn template that produces computed jump across
4332 SSE saves. We need some preparation work to get this working. */
4334 label
= gen_label_rtx ();
4335 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4337 /* Compute address to jump to :
4338 label - 5*eax + nnamed_sse_arguments*5 */
4339 tmp_reg
= gen_reg_rtx (Pmode
);
4340 nsse_reg
= gen_reg_rtx (Pmode
);
4341 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4342 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4343 gen_rtx_MULT (Pmode
, nsse_reg
,
4345 if (next_cum
.sse_regno
)
4348 gen_rtx_CONST (DImode
,
4349 gen_rtx_PLUS (DImode
,
4351 GEN_INT (next_cum
.sse_regno
* 4))));
4353 emit_move_insn (nsse_reg
, label_ref
);
4354 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4356 /* Compute address of memory block we save into. We always use pointer
4357 pointing 127 bytes after first byte to store - this is needed to keep
4358 instruction size limited by 4 bytes. */
4359 tmp_reg
= gen_reg_rtx (Pmode
);
4360 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4361 plus_constant (save_area
,
4362 8 * REGPARM_MAX
+ 127)));
4363 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4364 MEM_NOTRAP_P (mem
) = 1;
4365 set_mem_alias_set (mem
, set
);
4366 set_mem_align (mem
, BITS_PER_WORD
);
4368 /* And finally do the dirty job! */
4369 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4370 GEN_INT (next_cum
.sse_regno
), label
));
4375 /* Implement va_start. */
4378 ix86_va_start (tree valist
, rtx nextarg
)
4380 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4381 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4382 tree gpr
, fpr
, ovf
, sav
, t
;
4385 /* Only 64bit target needs something special. */
4388 std_expand_builtin_va_start (valist
, nextarg
);
4392 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4393 f_fpr
= TREE_CHAIN (f_gpr
);
4394 f_ovf
= TREE_CHAIN (f_fpr
);
4395 f_sav
= TREE_CHAIN (f_ovf
);
4397 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4398 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4399 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4400 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4401 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4403 /* Count number of gp and fp argument registers used. */
4404 words
= current_function_args_info
.words
;
4405 n_gpr
= current_function_args_info
.regno
;
4406 n_fpr
= current_function_args_info
.sse_regno
;
4408 if (TARGET_DEBUG_ARG
)
4409 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4410 (int) words
, (int) n_gpr
, (int) n_fpr
);
4412 if (cfun
->va_list_gpr_size
)
4414 type
= TREE_TYPE (gpr
);
4415 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4416 build_int_cst (type
, n_gpr
* 8));
4417 TREE_SIDE_EFFECTS (t
) = 1;
4418 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4421 if (cfun
->va_list_fpr_size
)
4423 type
= TREE_TYPE (fpr
);
4424 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4425 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4426 TREE_SIDE_EFFECTS (t
) = 1;
4427 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4430 /* Find the overflow area. */
4431 type
= TREE_TYPE (ovf
);
4432 t
= make_tree (type
, virtual_incoming_args_rtx
);
4434 t
= build2 (PLUS_EXPR
, type
, t
,
4435 build_int_cst (type
, words
* UNITS_PER_WORD
));
4436 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4437 TREE_SIDE_EFFECTS (t
) = 1;
4438 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4440 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4442 /* Find the register save area.
4443 Prologue of the function save it right above stack frame. */
4444 type
= TREE_TYPE (sav
);
4445 t
= make_tree (type
, frame_pointer_rtx
);
4446 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4447 TREE_SIDE_EFFECTS (t
) = 1;
4448 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4452 /* Implement va_arg. */
4455 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4457 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4458 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4459 tree gpr
, fpr
, ovf
, sav
, t
;
4461 tree lab_false
, lab_over
= NULL_TREE
;
4466 enum machine_mode nat_mode
;
4468 /* Only 64bit target needs something special. */
4470 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4472 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4473 f_fpr
= TREE_CHAIN (f_gpr
);
4474 f_ovf
= TREE_CHAIN (f_fpr
);
4475 f_sav
= TREE_CHAIN (f_ovf
);
4477 valist
= build_va_arg_indirect_ref (valist
);
4478 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4479 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4480 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4481 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4483 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4485 type
= build_pointer_type (type
);
4486 size
= int_size_in_bytes (type
);
4487 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4489 nat_mode
= type_natural_mode (type
);
4490 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4491 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4493 /* Pull the value out of the saved registers. */
4495 addr
= create_tmp_var (ptr_type_node
, "addr");
4496 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4500 int needed_intregs
, needed_sseregs
;
4502 tree int_addr
, sse_addr
;
4504 lab_false
= create_artificial_label ();
4505 lab_over
= create_artificial_label ();
4507 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4509 need_temp
= (!REG_P (container
)
4510 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4511 || TYPE_ALIGN (type
) > 128));
4513 /* In case we are passing structure, verify that it is consecutive block
4514 on the register save area. If not we need to do moves. */
4515 if (!need_temp
&& !REG_P (container
))
4517 /* Verify that all registers are strictly consecutive */
4518 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4522 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4524 rtx slot
= XVECEXP (container
, 0, i
);
4525 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4526 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4534 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4536 rtx slot
= XVECEXP (container
, 0, i
);
4537 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4538 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4550 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4551 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4552 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4553 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4556 /* First ensure that we fit completely in registers. */
4559 t
= build_int_cst (TREE_TYPE (gpr
),
4560 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4561 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4562 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4563 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4564 gimplify_and_add (t
, pre_p
);
4568 t
= build_int_cst (TREE_TYPE (fpr
),
4569 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4571 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4572 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4573 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4574 gimplify_and_add (t
, pre_p
);
4577 /* Compute index to start of area used for integer regs. */
4580 /* int_addr = gpr + sav; */
4581 t
= fold_convert (ptr_type_node
, gpr
);
4582 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4583 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4584 gimplify_and_add (t
, pre_p
);
4588 /* sse_addr = fpr + sav; */
4589 t
= fold_convert (ptr_type_node
, fpr
);
4590 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4591 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4592 gimplify_and_add (t
, pre_p
);
4597 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4600 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4601 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4602 gimplify_and_add (t
, pre_p
);
4604 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4606 rtx slot
= XVECEXP (container
, 0, i
);
4607 rtx reg
= XEXP (slot
, 0);
4608 enum machine_mode mode
= GET_MODE (reg
);
4609 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4610 tree addr_type
= build_pointer_type (piece_type
);
4613 tree dest_addr
, dest
;
4615 if (SSE_REGNO_P (REGNO (reg
)))
4617 src_addr
= sse_addr
;
4618 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4622 src_addr
= int_addr
;
4623 src_offset
= REGNO (reg
) * 8;
4625 src_addr
= fold_convert (addr_type
, src_addr
);
4626 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4627 size_int (src_offset
)));
4628 src
= build_va_arg_indirect_ref (src_addr
);
4630 dest_addr
= fold_convert (addr_type
, addr
);
4631 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4632 size_int (INTVAL (XEXP (slot
, 1)))));
4633 dest
= build_va_arg_indirect_ref (dest_addr
);
4635 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4636 gimplify_and_add (t
, pre_p
);
4642 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4643 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4644 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4645 gimplify_and_add (t
, pre_p
);
4649 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4650 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4651 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4652 gimplify_and_add (t
, pre_p
);
4655 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4656 gimplify_and_add (t
, pre_p
);
4658 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4659 append_to_statement_list (t
, pre_p
);
4662 /* ... otherwise out of the overflow area. */
4664 /* Care for on-stack alignment if needed. */
4665 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4666 || integer_zerop (TYPE_SIZE (type
)))
4670 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4671 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4672 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4673 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4674 build_int_cst (TREE_TYPE (t
), -align
));
4676 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4678 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4679 gimplify_and_add (t2
, pre_p
);
4681 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4682 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4683 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4684 gimplify_and_add (t
, pre_p
);
4688 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4689 append_to_statement_list (t
, pre_p
);
4692 ptrtype
= build_pointer_type (type
);
4693 addr
= fold_convert (ptrtype
, addr
);
4696 addr
= build_va_arg_indirect_ref (addr
);
4697 return build_va_arg_indirect_ref (addr
);
4700 /* Return nonzero if OPNUM's MEM should be matched
4701 in movabs* patterns. */
4704 ix86_check_movabs (rtx insn
, int opnum
)
4708 set
= PATTERN (insn
);
4709 if (GET_CODE (set
) == PARALLEL
)
4710 set
= XVECEXP (set
, 0, 0);
4711 gcc_assert (GET_CODE (set
) == SET
);
4712 mem
= XEXP (set
, opnum
);
4713 while (GET_CODE (mem
) == SUBREG
)
4714 mem
= SUBREG_REG (mem
);
4715 gcc_assert (MEM_P (mem
));
4716 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4719 /* Initialize the table of extra 80387 mathematical constants. */
4722 init_ext_80387_constants (void)
4724 static const char * cst
[5] =
4726 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4727 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4728 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4729 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4730 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4734 for (i
= 0; i
< 5; i
++)
4736 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4737 /* Ensure each constant is rounded to XFmode precision. */
4738 real_convert (&ext_80387_constants_table
[i
],
4739 XFmode
, &ext_80387_constants_table
[i
]);
4742 ext_80387_constants_init
= 1;
4745 /* Return true if the constant is something that can be loaded with
4746 a special instruction. */
4749 standard_80387_constant_p (rtx x
)
4753 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4756 if (x
== CONST0_RTX (GET_MODE (x
)))
4758 if (x
== CONST1_RTX (GET_MODE (x
)))
4761 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4763 /* For XFmode constants, try to find a special 80387 instruction when
4764 optimizing for size or on those CPUs that benefit from them. */
4765 if (GET_MODE (x
) == XFmode
4766 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4770 if (! ext_80387_constants_init
)
4771 init_ext_80387_constants ();
4773 for (i
= 0; i
< 5; i
++)
4774 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4778 /* Load of the constant -0.0 or -1.0 will be split as
4779 fldz;fchs or fld1;fchs sequence. */
4780 if (real_isnegzero (&r
))
4782 if (real_identical (&r
, &dconstm1
))
4788 /* Return the opcode of the special instruction to be used to load
4792 standard_80387_constant_opcode (rtx x
)
4794 switch (standard_80387_constant_p (x
))
4818 /* Return the CONST_DOUBLE representing the 80387 constant that is
4819 loaded by the specified special instruction. The argument IDX
4820 matches the return value from standard_80387_constant_p. */
4823 standard_80387_constant_rtx (int idx
)
4827 if (! ext_80387_constants_init
)
4828 init_ext_80387_constants ();
4844 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4848 /* Return 1 if mode is a valid mode for sse. */
4850 standard_sse_mode_p (enum machine_mode mode
)
4867 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4870 standard_sse_constant_p (rtx x
)
4872 enum machine_mode mode
= GET_MODE (x
);
4874 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4876 if (vector_all_ones_operand (x
, mode
)
4877 && standard_sse_mode_p (mode
))
4878 return TARGET_SSE2
? 2 : -1;
4883 /* Return the opcode of the special instruction to be used to load
4887 standard_sse_constant_opcode (rtx insn
, rtx x
)
4889 switch (standard_sse_constant_p (x
))
4892 if (get_attr_mode (insn
) == MODE_V4SF
)
4893 return "xorps\t%0, %0";
4894 else if (get_attr_mode (insn
) == MODE_V2DF
)
4895 return "xorpd\t%0, %0";
4897 return "pxor\t%0, %0";
4899 return "pcmpeqd\t%0, %0";
4904 /* Returns 1 if OP contains a symbol reference */
4907 symbolic_reference_mentioned_p (rtx op
)
4912 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4915 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4916 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4922 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4923 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4927 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4934 /* Return 1 if it is appropriate to emit `ret' instructions in the
4935 body of a function. Do this only if the epilogue is simple, needing a
4936 couple of insns. Prior to reloading, we can't tell how many registers
4937 must be saved, so return 0 then. Return 0 if there is no frame
4938 marker to de-allocate. */
4941 ix86_can_use_return_insn_p (void)
4943 struct ix86_frame frame
;
4945 if (! reload_completed
|| frame_pointer_needed
)
4948 /* Don't allow more than 32 pop, since that's all we can do
4949 with one instruction. */
4950 if (current_function_pops_args
4951 && current_function_args_size
>= 32768)
4954 ix86_compute_frame_layout (&frame
);
4955 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4958 /* Value should be nonzero if functions must have frame pointers.
4959 Zero means the frame pointer need not be set up (and parms may
4960 be accessed via the stack pointer) in functions that seem suitable. */
4963 ix86_frame_pointer_required (void)
4965 /* If we accessed previous frames, then the generated code expects
4966 to be able to access the saved ebp value in our frame. */
4967 if (cfun
->machine
->accesses_prev_frame
)
4970 /* Several x86 os'es need a frame pointer for other reasons,
4971 usually pertaining to setjmp. */
4972 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4975 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4976 the frame pointer by default. Turn it back on now if we've not
4977 got a leaf function. */
4978 if (TARGET_OMIT_LEAF_FRAME_POINTER
4979 && (!current_function_is_leaf
4980 || ix86_current_function_calls_tls_descriptor
))
4983 if (current_function_profile
)
4989 /* Record that the current function accesses previous call frames. */
4992 ix86_setup_frame_addresses (void)
4994 cfun
->machine
->accesses_prev_frame
= 1;
4997 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4998 # define USE_HIDDEN_LINKONCE 1
5000 # define USE_HIDDEN_LINKONCE 0
5003 static int pic_labels_used
;
5005 /* Fills in the label name that should be used for a pc thunk for
5006 the given register. */
5009 get_pc_thunk_name (char name
[32], unsigned int regno
)
5011 gcc_assert (!TARGET_64BIT
);
5013 if (USE_HIDDEN_LINKONCE
)
5014 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5016 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5020 /* This function generates code for -fpic that loads %ebx with
5021 the return address of the caller and then returns. */
5024 ix86_file_end (void)
5029 for (regno
= 0; regno
< 8; ++regno
)
5033 if (! ((pic_labels_used
>> regno
) & 1))
5036 get_pc_thunk_name (name
, regno
);
5041 switch_to_section (darwin_sections
[text_coal_section
]);
5042 fputs ("\t.weak_definition\t", asm_out_file
);
5043 assemble_name (asm_out_file
, name
);
5044 fputs ("\n\t.private_extern\t", asm_out_file
);
5045 assemble_name (asm_out_file
, name
);
5046 fputs ("\n", asm_out_file
);
5047 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5051 if (USE_HIDDEN_LINKONCE
)
5055 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5057 TREE_PUBLIC (decl
) = 1;
5058 TREE_STATIC (decl
) = 1;
5059 DECL_ONE_ONLY (decl
) = 1;
5061 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5062 switch_to_section (get_named_section (decl
, NULL
, 0));
5064 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5065 fputs ("\t.hidden\t", asm_out_file
);
5066 assemble_name (asm_out_file
, name
);
5067 fputc ('\n', asm_out_file
);
5068 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5072 switch_to_section (text_section
);
5073 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5076 xops
[0] = gen_rtx_REG (SImode
, regno
);
5077 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5078 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5079 output_asm_insn ("ret", xops
);
5082 if (NEED_INDICATE_EXEC_STACK
)
5083 file_end_indicate_exec_stack ();
5086 /* Emit code for the SET_GOT patterns. */
5089 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5094 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5096 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5098 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5101 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5103 output_asm_insn ("call\t%a2", xops
);
5106 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5107 is what will be referenced by the Mach-O PIC subsystem. */
5109 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5112 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5113 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5116 output_asm_insn ("pop{l}\t%0", xops
);
5121 get_pc_thunk_name (name
, REGNO (dest
));
5122 pic_labels_used
|= 1 << REGNO (dest
);
5124 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5125 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5126 output_asm_insn ("call\t%X2", xops
);
5127 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5128 is what will be referenced by the Mach-O PIC subsystem. */
5131 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5133 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5134 CODE_LABEL_NUMBER (label
));
5141 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5142 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5144 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5149 /* Generate an "push" pattern for input ARG. */
5154 return gen_rtx_SET (VOIDmode
,
5156 gen_rtx_PRE_DEC (Pmode
,
5157 stack_pointer_rtx
)),
5161 /* Return >= 0 if there is an unused call-clobbered register available
5162 for the entire function. */
5165 ix86_select_alt_pic_regnum (void)
5167 if (current_function_is_leaf
&& !current_function_profile
5168 && !ix86_current_function_calls_tls_descriptor
)
5171 for (i
= 2; i
>= 0; --i
)
5172 if (!regs_ever_live
[i
])
5176 return INVALID_REGNUM
;
5179 /* Return 1 if we need to save REGNO. */
5181 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5183 if (pic_offset_table_rtx
5184 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5185 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5186 || current_function_profile
5187 || current_function_calls_eh_return
5188 || current_function_uses_const_pool
))
5190 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5195 if (current_function_calls_eh_return
&& maybe_eh_return
)
5200 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5201 if (test
== INVALID_REGNUM
)
5208 if (cfun
->machine
->force_align_arg_pointer
5209 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5212 return (regs_ever_live
[regno
]
5213 && !call_used_regs
[regno
]
5214 && !fixed_regs
[regno
]
5215 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5218 /* Return number of registers to be saved on the stack. */
5221 ix86_nsaved_regs (void)
5226 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5227 if (ix86_save_reg (regno
, true))
5232 /* Return the offset between two registers, one to be eliminated, and the other
5233 its replacement, at the start of a routine. */
5236 ix86_initial_elimination_offset (int from
, int to
)
5238 struct ix86_frame frame
;
5239 ix86_compute_frame_layout (&frame
);
5241 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5242 return frame
.hard_frame_pointer_offset
;
5243 else if (from
== FRAME_POINTER_REGNUM
5244 && to
== HARD_FRAME_POINTER_REGNUM
)
5245 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5248 gcc_assert (to
== STACK_POINTER_REGNUM
);
5250 if (from
== ARG_POINTER_REGNUM
)
5251 return frame
.stack_pointer_offset
;
5253 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5254 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5258 /* Fill structure ix86_frame about frame of currently computed function. */
5261 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5263 HOST_WIDE_INT total_size
;
5264 unsigned int stack_alignment_needed
;
5265 HOST_WIDE_INT offset
;
5266 unsigned int preferred_alignment
;
5267 HOST_WIDE_INT size
= get_frame_size ();
5269 frame
->nregs
= ix86_nsaved_regs ();
5272 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5273 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5275 /* During reload iteration the amount of registers saved can change.
5276 Recompute the value as needed. Do not recompute when amount of registers
5277 didn't change as reload does multiple calls to the function and does not
5278 expect the decision to change within single iteration. */
5280 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5282 int count
= frame
->nregs
;
5284 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5285 /* The fast prologue uses move instead of push to save registers. This
5286 is significantly longer, but also executes faster as modern hardware
5287 can execute the moves in parallel, but can't do that for push/pop.
5289 Be careful about choosing what prologue to emit: When function takes
5290 many instructions to execute we may use slow version as well as in
5291 case function is known to be outside hot spot (this is known with
5292 feedback only). Weight the size of function by number of registers
5293 to save as it is cheap to use one or two push instructions but very
5294 slow to use many of them. */
5296 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5297 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5298 || (flag_branch_probabilities
5299 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5300 cfun
->machine
->use_fast_prologue_epilogue
= false;
5302 cfun
->machine
->use_fast_prologue_epilogue
5303 = !expensive_function_p (count
);
5305 if (TARGET_PROLOGUE_USING_MOVE
5306 && cfun
->machine
->use_fast_prologue_epilogue
)
5307 frame
->save_regs_using_mov
= true;
5309 frame
->save_regs_using_mov
= false;
5312 /* Skip return address and saved base pointer. */
5313 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5315 frame
->hard_frame_pointer_offset
= offset
;
5317 /* Do some sanity checking of stack_alignment_needed and
5318 preferred_alignment, since i386 port is the only using those features
5319 that may break easily. */
5321 gcc_assert (!size
|| stack_alignment_needed
);
5322 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5323 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5324 gcc_assert (stack_alignment_needed
5325 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5327 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5328 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5330 /* Register save area */
5331 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5334 if (ix86_save_varrargs_registers
)
5336 offset
+= X86_64_VARARGS_SIZE
;
5337 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5340 frame
->va_arg_size
= 0;
5342 /* Align start of frame for local function. */
5343 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5344 & -stack_alignment_needed
) - offset
;
5346 offset
+= frame
->padding1
;
5348 /* Frame pointer points here. */
5349 frame
->frame_pointer_offset
= offset
;
5353 /* Add outgoing arguments area. Can be skipped if we eliminated
5354 all the function calls as dead code.
5355 Skipping is however impossible when function calls alloca. Alloca
5356 expander assumes that last current_function_outgoing_args_size
5357 of stack frame are unused. */
5358 if (ACCUMULATE_OUTGOING_ARGS
5359 && (!current_function_is_leaf
|| current_function_calls_alloca
5360 || ix86_current_function_calls_tls_descriptor
))
5362 offset
+= current_function_outgoing_args_size
;
5363 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5366 frame
->outgoing_arguments_size
= 0;
5368 /* Align stack boundary. Only needed if we're calling another function
5370 if (!current_function_is_leaf
|| current_function_calls_alloca
5371 || ix86_current_function_calls_tls_descriptor
)
5372 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5373 & -preferred_alignment
) - offset
;
5375 frame
->padding2
= 0;
5377 offset
+= frame
->padding2
;
5379 /* We've reached end of stack frame. */
5380 frame
->stack_pointer_offset
= offset
;
5382 /* Size prologue needs to allocate. */
5383 frame
->to_allocate
=
5384 (size
+ frame
->padding1
+ frame
->padding2
5385 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5387 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5388 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5389 frame
->save_regs_using_mov
= false;
5391 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5392 && current_function_is_leaf
5393 && !ix86_current_function_calls_tls_descriptor
)
5395 frame
->red_zone_size
= frame
->to_allocate
;
5396 if (frame
->save_regs_using_mov
)
5397 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5398 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5399 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5402 frame
->red_zone_size
= 0;
5403 frame
->to_allocate
-= frame
->red_zone_size
;
5404 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5406 fprintf (stderr
, "\n");
5407 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5408 fprintf (stderr
, "size: %ld\n", (long)size
);
5409 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5410 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5411 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5412 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5413 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5414 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5415 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5416 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5417 (long)frame
->hard_frame_pointer_offset
);
5418 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5419 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5420 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5421 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5425 /* Emit code to save registers in the prologue. */
5428 ix86_emit_save_regs (void)
5433 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5434 if (ix86_save_reg (regno
, true))
5436 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5437 RTX_FRAME_RELATED_P (insn
) = 1;
5441 /* Emit code to save registers using MOV insns. First register
5442 is restored from POINTER + OFFSET. */
5444 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5449 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5450 if (ix86_save_reg (regno
, true))
5452 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5454 gen_rtx_REG (Pmode
, regno
));
5455 RTX_FRAME_RELATED_P (insn
) = 1;
5456 offset
+= UNITS_PER_WORD
;
5460 /* Expand prologue or epilogue stack adjustment.
5461 The pattern exist to put a dependency on all ebp-based memory accesses.
5462 STYLE should be negative if instructions should be marked as frame related,
5463 zero if %r11 register is live and cannot be freely used and positive
5467 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5472 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5473 else if (x86_64_immediate_operand (offset
, DImode
))
5474 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5478 /* r11 is used by indirect sibcall return as well, set before the
5479 epilogue and used after the epilogue. ATM indirect sibcall
5480 shouldn't be used together with huge frame sizes in one
5481 function because of the frame_size check in sibcall.c. */
5483 r11
= gen_rtx_REG (DImode
, R11_REG
);
5484 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5486 RTX_FRAME_RELATED_P (insn
) = 1;
5487 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5491 RTX_FRAME_RELATED_P (insn
) = 1;
5494 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5497 ix86_internal_arg_pointer (void)
5499 bool has_force_align_arg_pointer
=
5500 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5501 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5502 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5503 && DECL_NAME (current_function_decl
)
5504 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5505 && DECL_FILE_SCOPE_P (current_function_decl
))
5506 || ix86_force_align_arg_pointer
5507 || has_force_align_arg_pointer
)
5509 /* Nested functions can't realign the stack due to a register
5511 if (DECL_CONTEXT (current_function_decl
)
5512 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5514 if (ix86_force_align_arg_pointer
)
5515 warning (0, "-mstackrealign ignored for nested functions");
5516 if (has_force_align_arg_pointer
)
5517 error ("%s not supported for nested functions",
5518 ix86_force_align_arg_pointer_string
);
5519 return virtual_incoming_args_rtx
;
5521 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5522 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5525 return virtual_incoming_args_rtx
;
5528 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5529 This is called from dwarf2out.c to emit call frame instructions
5530 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5532 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5534 rtx unspec
= SET_SRC (pattern
);
5535 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5539 case UNSPEC_REG_SAVE
:
5540 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5541 SET_DEST (pattern
));
5543 case UNSPEC_DEF_CFA
:
5544 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5545 INTVAL (XVECEXP (unspec
, 0, 0)));
5552 /* Expand the prologue into a bunch of separate insns. */
5555 ix86_expand_prologue (void)
5559 struct ix86_frame frame
;
5560 HOST_WIDE_INT allocate
;
5562 ix86_compute_frame_layout (&frame
);
5564 if (cfun
->machine
->force_align_arg_pointer
)
5568 /* Grab the argument pointer. */
5569 x
= plus_constant (stack_pointer_rtx
, 4);
5570 y
= cfun
->machine
->force_align_arg_pointer
;
5571 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5572 RTX_FRAME_RELATED_P (insn
) = 1;
5574 /* The unwind info consists of two parts: install the fafp as the cfa,
5575 and record the fafp as the "save register" of the stack pointer.
5576 The later is there in order that the unwinder can see where it
5577 should restore the stack pointer across the and insn. */
5578 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5579 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5580 RTX_FRAME_RELATED_P (x
) = 1;
5581 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5583 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5584 RTX_FRAME_RELATED_P (y
) = 1;
5585 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5586 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5587 REG_NOTES (insn
) = x
;
5589 /* Align the stack. */
5590 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5593 /* And here we cheat like madmen with the unwind info. We force the
5594 cfa register back to sp+4, which is exactly what it was at the
5595 start of the function. Re-pushing the return address results in
5596 the return at the same spot relative to the cfa, and thus is
5597 correct wrt the unwind info. */
5598 x
= cfun
->machine
->force_align_arg_pointer
;
5599 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5600 insn
= emit_insn (gen_push (x
));
5601 RTX_FRAME_RELATED_P (insn
) = 1;
5604 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5605 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5606 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5607 REG_NOTES (insn
) = x
;
5610 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5611 slower on all targets. Also sdb doesn't like it. */
5613 if (frame_pointer_needed
)
5615 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5616 RTX_FRAME_RELATED_P (insn
) = 1;
5618 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5619 RTX_FRAME_RELATED_P (insn
) = 1;
5622 allocate
= frame
.to_allocate
;
5624 if (!frame
.save_regs_using_mov
)
5625 ix86_emit_save_regs ();
5627 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5629 /* When using red zone we may start register saving before allocating
5630 the stack frame saving one cycle of the prologue. */
5631 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5632 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5633 : stack_pointer_rtx
,
5634 -frame
.nregs
* UNITS_PER_WORD
);
5638 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5639 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5640 GEN_INT (-allocate
), -1);
5643 /* Only valid for Win32. */
5644 rtx eax
= gen_rtx_REG (SImode
, 0);
5645 bool eax_live
= ix86_eax_live_at_start_p ();
5648 gcc_assert (!TARGET_64BIT
);
5652 emit_insn (gen_push (eax
));
5656 emit_move_insn (eax
, GEN_INT (allocate
));
5658 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5659 RTX_FRAME_RELATED_P (insn
) = 1;
5660 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5661 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5662 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5663 t
, REG_NOTES (insn
));
5667 if (frame_pointer_needed
)
5668 t
= plus_constant (hard_frame_pointer_rtx
,
5671 - frame
.nregs
* UNITS_PER_WORD
);
5673 t
= plus_constant (stack_pointer_rtx
, allocate
);
5674 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5678 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5680 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5681 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5683 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5684 -frame
.nregs
* UNITS_PER_WORD
);
5687 pic_reg_used
= false;
5688 if (pic_offset_table_rtx
5689 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5690 || current_function_profile
))
5692 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5694 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5695 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5697 pic_reg_used
= true;
5703 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5705 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5707 /* Even with accurate pre-reload life analysis, we can wind up
5708 deleting all references to the pic register after reload.
5709 Consider if cross-jumping unifies two sides of a branch
5710 controlled by a comparison vs the only read from a global.
5711 In which case, allow the set_got to be deleted, though we're
5712 too late to do anything about the ebx save in the prologue. */
5713 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5716 /* Prevent function calls from be scheduled before the call to mcount.
5717 In the pic_reg_used case, make sure that the got load isn't deleted. */
5718 if (current_function_profile
)
5719 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5722 /* Emit code to restore saved registers using MOV insns. First register
5723 is restored from POINTER + OFFSET. */
5725 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5726 int maybe_eh_return
)
5729 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5731 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5732 if (ix86_save_reg (regno
, maybe_eh_return
))
5734 /* Ensure that adjust_address won't be forced to produce pointer
5735 out of range allowed by x86-64 instruction set. */
5736 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5740 r11
= gen_rtx_REG (DImode
, R11_REG
);
5741 emit_move_insn (r11
, GEN_INT (offset
));
5742 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5743 base_address
= gen_rtx_MEM (Pmode
, r11
);
5746 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5747 adjust_address (base_address
, Pmode
, offset
));
5748 offset
+= UNITS_PER_WORD
;
5752 /* Restore function stack, frame, and registers. */
5755 ix86_expand_epilogue (int style
)
5758 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5759 struct ix86_frame frame
;
5760 HOST_WIDE_INT offset
;
5762 ix86_compute_frame_layout (&frame
);
5764 /* Calculate start of saved registers relative to ebp. Special care
5765 must be taken for the normal return case of a function using
5766 eh_return: the eax and edx registers are marked as saved, but not
5767 restored along this path. */
5768 offset
= frame
.nregs
;
5769 if (current_function_calls_eh_return
&& style
!= 2)
5771 offset
*= -UNITS_PER_WORD
;
5773 /* If we're only restoring one register and sp is not valid then
5774 using a move instruction to restore the register since it's
5775 less work than reloading sp and popping the register.
5777 The default code result in stack adjustment using add/lea instruction,
5778 while this code results in LEAVE instruction (or discrete equivalent),
5779 so it is profitable in some other cases as well. Especially when there
5780 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5781 and there is exactly one register to pop. This heuristic may need some
5782 tuning in future. */
5783 if ((!sp_valid
&& frame
.nregs
<= 1)
5784 || (TARGET_EPILOGUE_USING_MOVE
5785 && cfun
->machine
->use_fast_prologue_epilogue
5786 && (frame
.nregs
> 1 || frame
.to_allocate
))
5787 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5788 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5789 && cfun
->machine
->use_fast_prologue_epilogue
5790 && frame
.nregs
== 1)
5791 || current_function_calls_eh_return
)
5793 /* Restore registers. We can use ebp or esp to address the memory
5794 locations. If both are available, default to ebp, since offsets
5795 are known to be small. Only exception is esp pointing directly to the
5796 end of block of saved registers, where we may simplify addressing
5799 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5800 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5801 frame
.to_allocate
, style
== 2);
5803 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5804 offset
, style
== 2);
5806 /* eh_return epilogues need %ecx added to the stack pointer. */
5809 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5811 if (frame_pointer_needed
)
5813 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5814 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5815 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5817 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5818 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5820 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5825 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5826 tmp
= plus_constant (tmp
, (frame
.to_allocate
5827 + frame
.nregs
* UNITS_PER_WORD
));
5828 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5831 else if (!frame_pointer_needed
)
5832 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5833 GEN_INT (frame
.to_allocate
5834 + frame
.nregs
* UNITS_PER_WORD
),
5836 /* If not an i386, mov & pop is faster than "leave". */
5837 else if (TARGET_USE_LEAVE
|| optimize_size
5838 || !cfun
->machine
->use_fast_prologue_epilogue
)
5839 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5842 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5843 hard_frame_pointer_rtx
,
5846 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5848 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5853 /* First step is to deallocate the stack frame so that we can
5854 pop the registers. */
5857 gcc_assert (frame_pointer_needed
);
5858 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5859 hard_frame_pointer_rtx
,
5860 GEN_INT (offset
), style
);
5862 else if (frame
.to_allocate
)
5863 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5864 GEN_INT (frame
.to_allocate
), style
);
5866 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5867 if (ix86_save_reg (regno
, false))
5870 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5872 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5874 if (frame_pointer_needed
)
5876 /* Leave results in shorter dependency chains on CPUs that are
5877 able to grok it fast. */
5878 if (TARGET_USE_LEAVE
)
5879 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5880 else if (TARGET_64BIT
)
5881 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5883 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5887 if (cfun
->machine
->force_align_arg_pointer
)
5889 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5890 cfun
->machine
->force_align_arg_pointer
,
5894 /* Sibcall epilogues don't want a return instruction. */
5898 if (current_function_pops_args
&& current_function_args_size
)
5900 rtx popc
= GEN_INT (current_function_pops_args
);
5902 /* i386 can only pop 64K bytes. If asked to pop more, pop
5903 return address, do explicit add, and jump indirectly to the
5906 if (current_function_pops_args
>= 65536)
5908 rtx ecx
= gen_rtx_REG (SImode
, 2);
5910 /* There is no "pascal" calling convention in 64bit ABI. */
5911 gcc_assert (!TARGET_64BIT
);
5913 emit_insn (gen_popsi1 (ecx
));
5914 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5915 emit_jump_insn (gen_return_indirect_internal (ecx
));
5918 emit_jump_insn (gen_return_pop_internal (popc
));
5921 emit_jump_insn (gen_return_internal ());
5924 /* Reset from the function's potential modifications. */
5927 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5928 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5930 if (pic_offset_table_rtx
)
5931 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5933 /* Mach-O doesn't support labels at the end of objects, so if
5934 it looks like we might want one, insert a NOP. */
5936 rtx insn
= get_last_insn ();
5939 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
5940 insn
= PREV_INSN (insn
);
5944 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
5945 fputs ("\tnop\n", file
);
5951 /* Extract the parts of an RTL expression that is a valid memory address
5952 for an instruction. Return 0 if the structure of the address is
5953 grossly off. Return -1 if the address contains ASHIFT, so it is not
5954 strictly valid, but still used for computing length of lea instruction. */
5957 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5959 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5960 rtx base_reg
, index_reg
;
5961 HOST_WIDE_INT scale
= 1;
5962 rtx scale_rtx
= NULL_RTX
;
5964 enum ix86_address_seg seg
= SEG_DEFAULT
;
5966 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
5968 else if (GET_CODE (addr
) == PLUS
)
5978 addends
[n
++] = XEXP (op
, 1);
5981 while (GET_CODE (op
) == PLUS
);
5986 for (i
= n
; i
>= 0; --i
)
5989 switch (GET_CODE (op
))
5994 index
= XEXP (op
, 0);
5995 scale_rtx
= XEXP (op
, 1);
5999 if (XINT (op
, 1) == UNSPEC_TP
6000 && TARGET_TLS_DIRECT_SEG_REFS
6001 && seg
== SEG_DEFAULT
)
6002 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6031 else if (GET_CODE (addr
) == MULT
)
6033 index
= XEXP (addr
, 0); /* index*scale */
6034 scale_rtx
= XEXP (addr
, 1);
6036 else if (GET_CODE (addr
) == ASHIFT
)
6040 /* We're called for lea too, which implements ashift on occasion. */
6041 index
= XEXP (addr
, 0);
6042 tmp
= XEXP (addr
, 1);
6043 if (!CONST_INT_P (tmp
))
6045 scale
= INTVAL (tmp
);
6046 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6052 disp
= addr
; /* displacement */
6054 /* Extract the integral value of scale. */
6057 if (!CONST_INT_P (scale_rtx
))
6059 scale
= INTVAL (scale_rtx
);
6062 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6063 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6065 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6066 if (base_reg
&& index_reg
&& scale
== 1
6067 && (index_reg
== arg_pointer_rtx
6068 || index_reg
== frame_pointer_rtx
6069 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6072 tmp
= base
, base
= index
, index
= tmp
;
6073 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6076 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6077 if ((base_reg
== hard_frame_pointer_rtx
6078 || base_reg
== frame_pointer_rtx
6079 || base_reg
== arg_pointer_rtx
) && !disp
)
6082 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6083 Avoid this by transforming to [%esi+0]. */
6084 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6085 && base_reg
&& !index_reg
&& !disp
6087 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6090 /* Special case: encode reg+reg instead of reg*2. */
6091 if (!base
&& index
&& scale
&& scale
== 2)
6092 base
= index
, base_reg
= index_reg
, scale
= 1;
6094 /* Special case: scaling cannot be encoded without base or displacement. */
6095 if (!base
&& !disp
&& index
&& scale
!= 1)
6107 /* Return cost of the memory address x.
6108 For i386, it is better to use a complex address than let gcc copy
6109 the address into a reg and make a new pseudo. But not if the address
6110 requires to two regs - that would mean more pseudos with longer
6113 ix86_address_cost (rtx x
)
6115 struct ix86_address parts
;
6117 int ok
= ix86_decompose_address (x
, &parts
);
6121 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6122 parts
.base
= SUBREG_REG (parts
.base
);
6123 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6124 parts
.index
= SUBREG_REG (parts
.index
);
6126 /* More complex memory references are better. */
6127 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6129 if (parts
.seg
!= SEG_DEFAULT
)
6132 /* Attempt to minimize number of registers in the address. */
6134 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6136 && (!REG_P (parts
.index
)
6137 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6141 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6143 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6144 && parts
.base
!= parts
.index
)
6147 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6148 since it's predecode logic can't detect the length of instructions
6149 and it degenerates to vector decoded. Increase cost of such
6150 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6151 to split such addresses or even refuse such addresses at all.
6153 Following addressing modes are affected:
6158 The first and last case may be avoidable by explicitly coding the zero in
6159 memory address, but I don't have AMD-K6 machine handy to check this
6163 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6164 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6165 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6171 /* If X is a machine specific address (i.e. a symbol or label being
6172 referenced as a displacement from the GOT implemented using an
6173 UNSPEC), then return the base term. Otherwise return X. */
6176 ix86_find_base_term (rtx x
)
6182 if (GET_CODE (x
) != CONST
)
6185 if (GET_CODE (term
) == PLUS
6186 && (CONST_INT_P (XEXP (term
, 1))
6187 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6188 term
= XEXP (term
, 0);
6189 if (GET_CODE (term
) != UNSPEC
6190 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6193 term
= XVECEXP (term
, 0, 0);
6195 if (GET_CODE (term
) != SYMBOL_REF
6196 && GET_CODE (term
) != LABEL_REF
)
6202 term
= ix86_delegitimize_address (x
);
6204 if (GET_CODE (term
) != SYMBOL_REF
6205 && GET_CODE (term
) != LABEL_REF
)
6211 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6212 this is used for to form addresses to local data when -fPIC is in
6216 darwin_local_data_pic (rtx disp
)
6218 if (GET_CODE (disp
) == MINUS
)
6220 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6221 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6222 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6224 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6225 if (! strcmp (sym_name
, "<pic base>"))
6233 /* Determine if a given RTX is a valid constant. We already know this
6234 satisfies CONSTANT_P. */
6237 legitimate_constant_p (rtx x
)
6239 switch (GET_CODE (x
))
6244 if (GET_CODE (x
) == PLUS
)
6246 if (!CONST_INT_P (XEXP (x
, 1)))
6251 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6254 /* Only some unspecs are valid as "constants". */
6255 if (GET_CODE (x
) == UNSPEC
)
6256 switch (XINT (x
, 1))
6259 return TARGET_64BIT
;
6262 x
= XVECEXP (x
, 0, 0);
6263 return (GET_CODE (x
) == SYMBOL_REF
6264 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6266 x
= XVECEXP (x
, 0, 0);
6267 return (GET_CODE (x
) == SYMBOL_REF
6268 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6273 /* We must have drilled down to a symbol. */
6274 if (GET_CODE (x
) == LABEL_REF
)
6276 if (GET_CODE (x
) != SYMBOL_REF
)
6281 /* TLS symbols are never valid. */
6282 if (SYMBOL_REF_TLS_MODEL (x
))
6287 if (GET_MODE (x
) == TImode
6288 && x
!= CONST0_RTX (TImode
)
6294 if (x
== CONST0_RTX (GET_MODE (x
)))
6302 /* Otherwise we handle everything else in the move patterns. */
6306 /* Determine if it's legal to put X into the constant pool. This
6307 is not possible for the address of thread-local symbols, which
6308 is checked above. */
6311 ix86_cannot_force_const_mem (rtx x
)
6313 /* We can always put integral constants and vectors in memory. */
6314 switch (GET_CODE (x
))
6324 return !legitimate_constant_p (x
);
6327 /* Determine if a given RTX is a valid constant address. */
6330 constant_address_p (rtx x
)
6332 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6335 /* Nonzero if the constant value X is a legitimate general operand
6336 when generating PIC code. It is given that flag_pic is on and
6337 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6340 legitimate_pic_operand_p (rtx x
)
6344 switch (GET_CODE (x
))
6347 inner
= XEXP (x
, 0);
6348 if (GET_CODE (inner
) == PLUS
6349 && CONST_INT_P (XEXP (inner
, 1)))
6350 inner
= XEXP (inner
, 0);
6352 /* Only some unspecs are valid as "constants". */
6353 if (GET_CODE (inner
) == UNSPEC
)
6354 switch (XINT (inner
, 1))
6357 return TARGET_64BIT
;
6359 x
= XVECEXP (inner
, 0, 0);
6360 return (GET_CODE (x
) == SYMBOL_REF
6361 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6369 return legitimate_pic_address_disp_p (x
);
6376 /* Determine if a given CONST RTX is a valid memory displacement
6380 legitimate_pic_address_disp_p (rtx disp
)
6384 /* In 64bit mode we can allow direct addresses of symbols and labels
6385 when they are not dynamic symbols. */
6388 rtx op0
= disp
, op1
;
6390 switch (GET_CODE (disp
))
6396 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6398 op0
= XEXP (XEXP (disp
, 0), 0);
6399 op1
= XEXP (XEXP (disp
, 0), 1);
6400 if (!CONST_INT_P (op1
)
6401 || INTVAL (op1
) >= 16*1024*1024
6402 || INTVAL (op1
) < -16*1024*1024)
6404 if (GET_CODE (op0
) == LABEL_REF
)
6406 if (GET_CODE (op0
) != SYMBOL_REF
)
6411 /* TLS references should always be enclosed in UNSPEC. */
6412 if (SYMBOL_REF_TLS_MODEL (op0
))
6414 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6422 if (GET_CODE (disp
) != CONST
)
6424 disp
= XEXP (disp
, 0);
6428 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6429 of GOT tables. We should not need these anyway. */
6430 if (GET_CODE (disp
) != UNSPEC
6431 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6432 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6435 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6436 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6442 if (GET_CODE (disp
) == PLUS
)
6444 if (!CONST_INT_P (XEXP (disp
, 1)))
6446 disp
= XEXP (disp
, 0);
6450 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6453 if (GET_CODE (disp
) != UNSPEC
)
6456 switch (XINT (disp
, 1))
6461 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6463 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6464 While ABI specify also 32bit relocation but we don't produce it in
6465 small PIC model at all. */
6466 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6467 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6469 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6471 case UNSPEC_GOTTPOFF
:
6472 case UNSPEC_GOTNTPOFF
:
6473 case UNSPEC_INDNTPOFF
:
6476 disp
= XVECEXP (disp
, 0, 0);
6477 return (GET_CODE (disp
) == SYMBOL_REF
6478 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6480 disp
= XVECEXP (disp
, 0, 0);
6481 return (GET_CODE (disp
) == SYMBOL_REF
6482 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6484 disp
= XVECEXP (disp
, 0, 0);
6485 return (GET_CODE (disp
) == SYMBOL_REF
6486 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6492 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6493 memory address for an instruction. The MODE argument is the machine mode
6494 for the MEM expression that wants to use this address.
6496 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6497 convert common non-canonical forms to canonical form so that they will
6501 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6503 struct ix86_address parts
;
6504 rtx base
, index
, disp
;
6505 HOST_WIDE_INT scale
;
6506 const char *reason
= NULL
;
6507 rtx reason_rtx
= NULL_RTX
;
6509 if (TARGET_DEBUG_ADDR
)
6512 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6513 GET_MODE_NAME (mode
), strict
);
6517 if (ix86_decompose_address (addr
, &parts
) <= 0)
6519 reason
= "decomposition failed";
6524 index
= parts
.index
;
6526 scale
= parts
.scale
;
6528 /* Validate base register.
6530 Don't allow SUBREG's that span more than a word here. It can lead to spill
6531 failures when the base is one word out of a two word structure, which is
6532 represented internally as a DImode int. */
6541 else if (GET_CODE (base
) == SUBREG
6542 && REG_P (SUBREG_REG (base
))
6543 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6545 reg
= SUBREG_REG (base
);
6548 reason
= "base is not a register";
6552 if (GET_MODE (base
) != Pmode
)
6554 reason
= "base is not in Pmode";
6558 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6559 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6561 reason
= "base is not valid";
6566 /* Validate index register.
6568 Don't allow SUBREG's that span more than a word here -- same as above. */
6577 else if (GET_CODE (index
) == SUBREG
6578 && REG_P (SUBREG_REG (index
))
6579 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6581 reg
= SUBREG_REG (index
);
6584 reason
= "index is not a register";
6588 if (GET_MODE (index
) != Pmode
)
6590 reason
= "index is not in Pmode";
6594 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6595 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6597 reason
= "index is not valid";
6602 /* Validate scale factor. */
6605 reason_rtx
= GEN_INT (scale
);
6608 reason
= "scale without index";
6612 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6614 reason
= "scale is not a valid multiplier";
6619 /* Validate displacement. */
6624 if (GET_CODE (disp
) == CONST
6625 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6626 switch (XINT (XEXP (disp
, 0), 1))
6628 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6629 used. While ABI specify also 32bit relocations, we don't produce
6630 them at all and use IP relative instead. */
6633 gcc_assert (flag_pic
);
6635 goto is_legitimate_pic
;
6636 reason
= "64bit address unspec";
6639 case UNSPEC_GOTPCREL
:
6640 gcc_assert (flag_pic
);
6641 goto is_legitimate_pic
;
6643 case UNSPEC_GOTTPOFF
:
6644 case UNSPEC_GOTNTPOFF
:
6645 case UNSPEC_INDNTPOFF
:
6651 reason
= "invalid address unspec";
6655 else if (SYMBOLIC_CONST (disp
)
6659 && MACHOPIC_INDIRECT
6660 && !machopic_operand_p (disp
)
6666 if (TARGET_64BIT
&& (index
|| base
))
6668 /* foo@dtpoff(%rX) is ok. */
6669 if (GET_CODE (disp
) != CONST
6670 || GET_CODE (XEXP (disp
, 0)) != PLUS
6671 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6672 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6673 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6674 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6676 reason
= "non-constant pic memory reference";
6680 else if (! legitimate_pic_address_disp_p (disp
))
6682 reason
= "displacement is an invalid pic construct";
6686 /* This code used to verify that a symbolic pic displacement
6687 includes the pic_offset_table_rtx register.
6689 While this is good idea, unfortunately these constructs may
6690 be created by "adds using lea" optimization for incorrect
6699 This code is nonsensical, but results in addressing
6700 GOT table with pic_offset_table_rtx base. We can't
6701 just refuse it easily, since it gets matched by
6702 "addsi3" pattern, that later gets split to lea in the
6703 case output register differs from input. While this
6704 can be handled by separate addsi pattern for this case
6705 that never results in lea, this seems to be easier and
6706 correct fix for crash to disable this test. */
6708 else if (GET_CODE (disp
) != LABEL_REF
6709 && !CONST_INT_P (disp
)
6710 && (GET_CODE (disp
) != CONST
6711 || !legitimate_constant_p (disp
))
6712 && (GET_CODE (disp
) != SYMBOL_REF
6713 || !legitimate_constant_p (disp
)))
6715 reason
= "displacement is not constant";
6718 else if (TARGET_64BIT
6719 && !x86_64_immediate_operand (disp
, VOIDmode
))
6721 reason
= "displacement is out of range";
6726 /* Everything looks valid. */
6727 if (TARGET_DEBUG_ADDR
)
6728 fprintf (stderr
, "Success.\n");
6732 if (TARGET_DEBUG_ADDR
)
6734 fprintf (stderr
, "Error: %s\n", reason
);
6735 debug_rtx (reason_rtx
);
6740 /* Return a unique alias set for the GOT. */
6742 static HOST_WIDE_INT
6743 ix86_GOT_alias_set (void)
6745 static HOST_WIDE_INT set
= -1;
6747 set
= new_alias_set ();
6751 /* Return a legitimate reference for ORIG (an address) using the
6752 register REG. If REG is 0, a new pseudo is generated.
6754 There are two types of references that must be handled:
6756 1. Global data references must load the address from the GOT, via
6757 the PIC reg. An insn is emitted to do this load, and the reg is
6760 2. Static data references, constant pool addresses, and code labels
6761 compute the address as an offset from the GOT, whose base is in
6762 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6763 differentiate them from global data objects. The returned
6764 address is the PIC reg + an unspec constant.
6766 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6767 reg also appears in the address. */
6770 legitimize_pic_address (rtx orig
, rtx reg
)
6777 if (TARGET_MACHO
&& !TARGET_64BIT
)
6780 reg
= gen_reg_rtx (Pmode
);
6781 /* Use the generic Mach-O PIC machinery. */
6782 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6786 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6788 else if (TARGET_64BIT
6789 && ix86_cmodel
!= CM_SMALL_PIC
6790 && local_symbolic_operand (addr
, Pmode
))
6793 /* This symbol may be referenced via a displacement from the PIC
6794 base address (@GOTOFF). */
6796 if (reload_in_progress
)
6797 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6798 if (GET_CODE (addr
) == CONST
)
6799 addr
= XEXP (addr
, 0);
6800 if (GET_CODE (addr
) == PLUS
)
6802 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6803 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6806 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6807 new = gen_rtx_CONST (Pmode
, new);
6809 tmpreg
= gen_reg_rtx (Pmode
);
6812 emit_move_insn (tmpreg
, new);
6816 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6817 tmpreg
, 1, OPTAB_DIRECT
);
6820 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6822 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6824 /* This symbol may be referenced via a displacement from the PIC
6825 base address (@GOTOFF). */
6827 if (reload_in_progress
)
6828 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6829 if (GET_CODE (addr
) == CONST
)
6830 addr
= XEXP (addr
, 0);
6831 if (GET_CODE (addr
) == PLUS
)
6833 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6834 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6837 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6838 new = gen_rtx_CONST (Pmode
, new);
6839 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6843 emit_move_insn (reg
, new);
6847 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
6851 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6852 new = gen_rtx_CONST (Pmode
, new);
6853 new = gen_const_mem (Pmode
, new);
6854 set_mem_alias_set (new, ix86_GOT_alias_set ());
6857 reg
= gen_reg_rtx (Pmode
);
6858 /* Use directly gen_movsi, otherwise the address is loaded
6859 into register for CSE. We don't want to CSE this addresses,
6860 instead we CSE addresses from the GOT table, so skip this. */
6861 emit_insn (gen_movsi (reg
, new));
6866 /* This symbol must be referenced via a load from the
6867 Global Offset Table (@GOT). */
6869 if (reload_in_progress
)
6870 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6871 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6872 new = gen_rtx_CONST (Pmode
, new);
6873 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6874 new = gen_const_mem (Pmode
, new);
6875 set_mem_alias_set (new, ix86_GOT_alias_set ());
6878 reg
= gen_reg_rtx (Pmode
);
6879 emit_move_insn (reg
, new);
6885 if (CONST_INT_P (addr
)
6886 && !x86_64_immediate_operand (addr
, VOIDmode
))
6890 emit_move_insn (reg
, addr
);
6894 new = force_reg (Pmode
, addr
);
6896 else if (GET_CODE (addr
) == CONST
)
6898 addr
= XEXP (addr
, 0);
6900 /* We must match stuff we generate before. Assume the only
6901 unspecs that can get here are ours. Not that we could do
6902 anything with them anyway.... */
6903 if (GET_CODE (addr
) == UNSPEC
6904 || (GET_CODE (addr
) == PLUS
6905 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6907 gcc_assert (GET_CODE (addr
) == PLUS
);
6909 if (GET_CODE (addr
) == PLUS
)
6911 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6913 /* Check first to see if this is a constant offset from a @GOTOFF
6914 symbol reference. */
6915 if (local_symbolic_operand (op0
, Pmode
)
6916 && CONST_INT_P (op1
))
6920 if (reload_in_progress
)
6921 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6922 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6924 new = gen_rtx_PLUS (Pmode
, new, op1
);
6925 new = gen_rtx_CONST (Pmode
, new);
6926 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6930 emit_move_insn (reg
, new);
6936 if (INTVAL (op1
) < -16*1024*1024
6937 || INTVAL (op1
) >= 16*1024*1024)
6939 if (!x86_64_immediate_operand (op1
, Pmode
))
6940 op1
= force_reg (Pmode
, op1
);
6941 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6947 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6948 new = legitimize_pic_address (XEXP (addr
, 1),
6949 base
== reg
? NULL_RTX
: reg
);
6951 if (CONST_INT_P (new))
6952 new = plus_constant (base
, INTVAL (new));
6955 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6957 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6958 new = XEXP (new, 1);
6960 new = gen_rtx_PLUS (Pmode
, base
, new);
6968 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6971 get_thread_pointer (int to_reg
)
6975 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6979 reg
= gen_reg_rtx (Pmode
);
6980 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6981 insn
= emit_insn (insn
);
6986 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6987 false if we expect this to be used for a memory address and true if
6988 we expect to load the address into a register. */
6991 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6993 rtx dest
, base
, off
, pic
, tp
;
6998 case TLS_MODEL_GLOBAL_DYNAMIC
:
6999 dest
= gen_reg_rtx (Pmode
);
7000 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7002 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7004 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7007 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7008 insns
= get_insns ();
7011 emit_libcall_block (insns
, dest
, rax
, x
);
7013 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7014 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7016 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7018 if (TARGET_GNU2_TLS
)
7020 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7022 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7026 case TLS_MODEL_LOCAL_DYNAMIC
:
7027 base
= gen_reg_rtx (Pmode
);
7028 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7030 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7032 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7035 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7036 insns
= get_insns ();
7039 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7040 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7041 emit_libcall_block (insns
, base
, rax
, note
);
7043 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7044 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7046 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7048 if (TARGET_GNU2_TLS
)
7050 rtx x
= ix86_tls_module_base ();
7052 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7053 gen_rtx_MINUS (Pmode
, x
, tp
));
7056 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7057 off
= gen_rtx_CONST (Pmode
, off
);
7059 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7061 if (TARGET_GNU2_TLS
)
7063 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7065 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7070 case TLS_MODEL_INITIAL_EXEC
:
7074 type
= UNSPEC_GOTNTPOFF
;
7078 if (reload_in_progress
)
7079 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7080 pic
= pic_offset_table_rtx
;
7081 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7083 else if (!TARGET_ANY_GNU_TLS
)
7085 pic
= gen_reg_rtx (Pmode
);
7086 emit_insn (gen_set_got (pic
));
7087 type
= UNSPEC_GOTTPOFF
;
7092 type
= UNSPEC_INDNTPOFF
;
7095 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7096 off
= gen_rtx_CONST (Pmode
, off
);
7098 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7099 off
= gen_const_mem (Pmode
, off
);
7100 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7102 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7104 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7105 off
= force_reg (Pmode
, off
);
7106 return gen_rtx_PLUS (Pmode
, base
, off
);
7110 base
= get_thread_pointer (true);
7111 dest
= gen_reg_rtx (Pmode
);
7112 emit_insn (gen_subsi3 (dest
, base
, off
));
7116 case TLS_MODEL_LOCAL_EXEC
:
7117 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7118 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7119 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7120 off
= gen_rtx_CONST (Pmode
, off
);
7122 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7124 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7125 return gen_rtx_PLUS (Pmode
, base
, off
);
7129 base
= get_thread_pointer (true);
7130 dest
= gen_reg_rtx (Pmode
);
7131 emit_insn (gen_subsi3 (dest
, base
, off
));
7142 /* Try machine-dependent ways of modifying an illegitimate address
7143 to be legitimate. If we find one, return the new, valid address.
7144 This macro is used in only one place: `memory_address' in explow.c.
7146 OLDX is the address as it was before break_out_memory_refs was called.
7147 In some cases it is useful to look at this to decide what needs to be done.
7149 MODE and WIN are passed so that this macro can use
7150 GO_IF_LEGITIMATE_ADDRESS.
7152 It is always safe for this macro to do nothing. It exists to recognize
7153 opportunities to optimize the output.
7155 For the 80386, we handle X+REG by loading X into a register R and
7156 using R+REG. R will go in a general reg and indexing will be used.
7157 However, if REG is a broken-out memory address or multiplication,
7158 nothing needs to be done because REG can certainly go in a general reg.
7160 When -fpic is used, special handling is needed for symbolic references.
7161 See comments by legitimize_pic_address in i386.c for details. */
7164 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7169 if (TARGET_DEBUG_ADDR
)
7171 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7172 GET_MODE_NAME (mode
));
7176 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7178 return legitimize_tls_address (x
, log
, false);
7179 if (GET_CODE (x
) == CONST
7180 && GET_CODE (XEXP (x
, 0)) == PLUS
7181 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7182 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7184 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7185 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7188 if (flag_pic
&& SYMBOLIC_CONST (x
))
7189 return legitimize_pic_address (x
, 0);
7191 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7192 if (GET_CODE (x
) == ASHIFT
7193 && CONST_INT_P (XEXP (x
, 1))
7194 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7197 log
= INTVAL (XEXP (x
, 1));
7198 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7199 GEN_INT (1 << log
));
7202 if (GET_CODE (x
) == PLUS
)
7204 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7206 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7207 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7208 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7211 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7212 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7213 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7214 GEN_INT (1 << log
));
7217 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7218 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7219 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7222 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7223 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7224 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7225 GEN_INT (1 << log
));
7228 /* Put multiply first if it isn't already. */
7229 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7231 rtx tmp
= XEXP (x
, 0);
7232 XEXP (x
, 0) = XEXP (x
, 1);
7237 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7238 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7239 created by virtual register instantiation, register elimination, and
7240 similar optimizations. */
7241 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7244 x
= gen_rtx_PLUS (Pmode
,
7245 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7246 XEXP (XEXP (x
, 1), 0)),
7247 XEXP (XEXP (x
, 1), 1));
7251 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7252 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7253 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7254 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7255 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7256 && CONSTANT_P (XEXP (x
, 1)))
7259 rtx other
= NULL_RTX
;
7261 if (CONST_INT_P (XEXP (x
, 1)))
7263 constant
= XEXP (x
, 1);
7264 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7266 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7268 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7269 other
= XEXP (x
, 1);
7277 x
= gen_rtx_PLUS (Pmode
,
7278 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7279 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7280 plus_constant (other
, INTVAL (constant
)));
7284 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7287 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7290 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7293 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7296 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7300 && REG_P (XEXP (x
, 1))
7301 && REG_P (XEXP (x
, 0)))
7304 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7307 x
= legitimize_pic_address (x
, 0);
7310 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7313 if (REG_P (XEXP (x
, 0)))
7315 rtx temp
= gen_reg_rtx (Pmode
);
7316 rtx val
= force_operand (XEXP (x
, 1), temp
);
7318 emit_move_insn (temp
, val
);
7324 else if (REG_P (XEXP (x
, 1)))
7326 rtx temp
= gen_reg_rtx (Pmode
);
7327 rtx val
= force_operand (XEXP (x
, 0), temp
);
7329 emit_move_insn (temp
, val
);
7339 /* Print an integer constant expression in assembler syntax. Addition
7340 and subtraction are the only arithmetic that may appear in these
7341 expressions. FILE is the stdio stream to write to, X is the rtx, and
7342 CODE is the operand print code from the output string. */
7345 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7349 switch (GET_CODE (x
))
7352 gcc_assert (flag_pic
);
7357 output_addr_const (file
, x
);
7358 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7359 fputs ("@PLT", file
);
7366 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7367 assemble_name (asm_out_file
, buf
);
7371 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7375 /* This used to output parentheses around the expression,
7376 but that does not work on the 386 (either ATT or BSD assembler). */
7377 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7381 if (GET_MODE (x
) == VOIDmode
)
7383 /* We can use %d if the number is <32 bits and positive. */
7384 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7385 fprintf (file
, "0x%lx%08lx",
7386 (unsigned long) CONST_DOUBLE_HIGH (x
),
7387 (unsigned long) CONST_DOUBLE_LOW (x
));
7389 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7392 /* We can't handle floating point constants;
7393 PRINT_OPERAND must handle them. */
7394 output_operand_lossage ("floating constant misused");
7398 /* Some assemblers need integer constants to appear first. */
7399 if (CONST_INT_P (XEXP (x
, 0)))
7401 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7403 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7407 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7408 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7410 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7416 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7417 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7419 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7421 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7425 gcc_assert (XVECLEN (x
, 0) == 1);
7426 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7427 switch (XINT (x
, 1))
7430 fputs ("@GOT", file
);
7433 fputs ("@GOTOFF", file
);
7435 case UNSPEC_GOTPCREL
:
7436 fputs ("@GOTPCREL(%rip)", file
);
7438 case UNSPEC_GOTTPOFF
:
7439 /* FIXME: This might be @TPOFF in Sun ld too. */
7440 fputs ("@GOTTPOFF", file
);
7443 fputs ("@TPOFF", file
);
7447 fputs ("@TPOFF", file
);
7449 fputs ("@NTPOFF", file
);
7452 fputs ("@DTPOFF", file
);
7454 case UNSPEC_GOTNTPOFF
:
7456 fputs ("@GOTTPOFF(%rip)", file
);
7458 fputs ("@GOTNTPOFF", file
);
7460 case UNSPEC_INDNTPOFF
:
7461 fputs ("@INDNTPOFF", file
);
7464 output_operand_lossage ("invalid UNSPEC as operand");
7470 output_operand_lossage ("invalid expression as operand");
7474 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7475 We need to emit DTP-relative relocations. */
7478 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7480 fputs (ASM_LONG
, file
);
7481 output_addr_const (file
, x
);
7482 fputs ("@DTPOFF", file
);
7488 fputs (", 0", file
);
7495 /* In the name of slightly smaller debug output, and to cater to
7496 general assembler lossage, recognize PIC+GOTOFF and turn it back
7497 into a direct symbol reference.
7499 On Darwin, this is necessary to avoid a crash, because Darwin
7500 has a different PIC label for each routine but the DWARF debugging
7501 information is not associated with any particular routine, so it's
7502 necessary to remove references to the PIC label from RTL stored by
7503 the DWARF output code. */
7506 ix86_delegitimize_address (rtx orig_x
)
7509 /* reg_addend is NULL or a multiple of some register. */
7510 rtx reg_addend
= NULL_RTX
;
7511 /* const_addend is NULL or a const_int. */
7512 rtx const_addend
= NULL_RTX
;
7513 /* This is the result, or NULL. */
7514 rtx result
= NULL_RTX
;
7521 if (GET_CODE (x
) != CONST
7522 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7523 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7526 return XVECEXP (XEXP (x
, 0), 0, 0);
7529 if (GET_CODE (x
) != PLUS
7530 || GET_CODE (XEXP (x
, 1)) != CONST
)
7533 if (REG_P (XEXP (x
, 0))
7534 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7535 /* %ebx + GOT/GOTOFF */
7537 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7539 /* %ebx + %reg * scale + GOT/GOTOFF */
7540 reg_addend
= XEXP (x
, 0);
7541 if (REG_P (XEXP (reg_addend
, 0))
7542 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7543 reg_addend
= XEXP (reg_addend
, 1);
7544 else if (REG_P (XEXP (reg_addend
, 1))
7545 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7546 reg_addend
= XEXP (reg_addend
, 0);
7549 if (!REG_P (reg_addend
)
7550 && GET_CODE (reg_addend
) != MULT
7551 && GET_CODE (reg_addend
) != ASHIFT
)
7557 x
= XEXP (XEXP (x
, 1), 0);
7558 if (GET_CODE (x
) == PLUS
7559 && CONST_INT_P (XEXP (x
, 1)))
7561 const_addend
= XEXP (x
, 1);
7565 if (GET_CODE (x
) == UNSPEC
7566 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7567 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7568 result
= XVECEXP (x
, 0, 0);
7570 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7572 result
= XEXP (x
, 0);
7578 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7580 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7585 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7590 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7592 enum rtx_code second_code
, bypass_code
;
7593 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7594 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7595 code
= ix86_fp_compare_code_to_integer (code
);
7599 code
= reverse_condition (code
);
7610 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7614 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7615 Those same assemblers have the same but opposite lossage on cmov. */
7616 gcc_assert (mode
== CCmode
);
7617 suffix
= fp
? "nbe" : "a";
7637 gcc_assert (mode
== CCmode
);
7659 gcc_assert (mode
== CCmode
);
7660 suffix
= fp
? "nb" : "ae";
7663 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7667 gcc_assert (mode
== CCmode
);
7671 suffix
= fp
? "u" : "p";
7674 suffix
= fp
? "nu" : "np";
7679 fputs (suffix
, file
);
7682 /* Print the name of register X to FILE based on its machine mode and number.
7683 If CODE is 'w', pretend the mode is HImode.
7684 If CODE is 'b', pretend the mode is QImode.
7685 If CODE is 'k', pretend the mode is SImode.
7686 If CODE is 'q', pretend the mode is DImode.
7687 If CODE is 'h', pretend the reg is the 'high' byte register.
7688 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7691 print_reg (rtx x
, int code
, FILE *file
)
7693 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7694 && REGNO (x
) != FRAME_POINTER_REGNUM
7695 && REGNO (x
) != FLAGS_REG
7696 && REGNO (x
) != FPSR_REG
7697 && REGNO (x
) != FPCR_REG
);
7699 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7702 if (code
== 'w' || MMX_REG_P (x
))
7704 else if (code
== 'b')
7706 else if (code
== 'k')
7708 else if (code
== 'q')
7710 else if (code
== 'y')
7712 else if (code
== 'h')
7715 code
= GET_MODE_SIZE (GET_MODE (x
));
7717 /* Irritatingly, AMD extended registers use different naming convention
7718 from the normal registers. */
7719 if (REX_INT_REG_P (x
))
7721 gcc_assert (TARGET_64BIT
);
7725 error ("extended registers have no high halves");
7728 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7731 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7734 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7737 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7740 error ("unsupported operand size for extended register");
7748 if (STACK_TOP_P (x
))
7750 fputs ("st(0)", file
);
7757 if (! ANY_FP_REG_P (x
))
7758 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7763 fputs (hi_reg_name
[REGNO (x
)], file
);
7766 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7768 fputs (qi_reg_name
[REGNO (x
)], file
);
7771 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7773 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7780 /* Locate some local-dynamic symbol still in use by this function
7781 so that we can print its name in some tls_local_dynamic_base
7785 get_some_local_dynamic_name (void)
7789 if (cfun
->machine
->some_ld_name
)
7790 return cfun
->machine
->some_ld_name
;
7792 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7794 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7795 return cfun
->machine
->some_ld_name
;
7801 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7805 if (GET_CODE (x
) == SYMBOL_REF
7806 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7808 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7816 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7817 C -- print opcode suffix for set/cmov insn.
7818 c -- like C, but print reversed condition
7819 F,f -- likewise, but for floating-point.
7820 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7822 R -- print the prefix for register names.
7823 z -- print the opcode suffix for the size of the current operand.
7824 * -- print a star (in certain assembler syntax)
7825 A -- print an absolute memory reference.
7826 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7827 s -- print a shift double count, followed by the assemblers argument
7829 b -- print the QImode name of the register for the indicated operand.
7830 %b0 would print %al if operands[0] is reg 0.
7831 w -- likewise, print the HImode name of the register.
7832 k -- likewise, print the SImode name of the register.
7833 q -- likewise, print the DImode name of the register.
7834 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7835 y -- print "st(0)" instead of "st" as a register.
7836 D -- print condition for SSE cmp instruction.
7837 P -- if PIC, print an @PLT suffix.
7838 X -- don't print any sort of PIC '@' suffix for a symbol.
7839 & -- print some in-use local-dynamic symbol name.
7840 H -- print a memory address offset by 8; used for sse high-parts
7844 print_operand (FILE *file
, rtx x
, int code
)
7851 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7856 assemble_name (file
, get_some_local_dynamic_name ());
7860 switch (ASSEMBLER_DIALECT
)
7867 /* Intel syntax. For absolute addresses, registers should not
7868 be surrounded by braces. */
7872 PRINT_OPERAND (file
, x
, 0);
7882 PRINT_OPERAND (file
, x
, 0);
7887 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7892 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7897 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7902 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7907 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7912 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7917 /* 387 opcodes don't get size suffixes if the operands are
7919 if (STACK_REG_P (x
))
7922 /* Likewise if using Intel opcodes. */
7923 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7926 /* This is the size of op from size of operand. */
7927 switch (GET_MODE_SIZE (GET_MODE (x
)))
7934 #ifdef HAVE_GAS_FILDS_FISTS
7940 if (GET_MODE (x
) == SFmode
)
7955 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7957 #ifdef GAS_MNEMONICS
7983 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
7985 PRINT_OPERAND (file
, x
, 0);
7991 /* Little bit of braindamage here. The SSE compare instructions
7992 does use completely different names for the comparisons that the
7993 fp conditional moves. */
7994 switch (GET_CODE (x
))
8009 fputs ("unord", file
);
8013 fputs ("neq", file
);
8017 fputs ("nlt", file
);
8021 fputs ("nle", file
);
8024 fputs ("ord", file
);
8031 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8032 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8034 switch (GET_MODE (x
))
8036 case HImode
: putc ('w', file
); break;
8038 case SFmode
: putc ('l', file
); break;
8040 case DFmode
: putc ('q', file
); break;
8041 default: gcc_unreachable ();
8048 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8051 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8052 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8055 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8058 /* Like above, but reverse condition */
8060 /* Check to see if argument to %c is really a constant
8061 and not a condition code which needs to be reversed. */
8062 if (!COMPARISON_P (x
))
8064 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8067 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8070 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8071 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8074 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8078 /* It doesn't actually matter what mode we use here, as we're
8079 only going to use this for printing. */
8080 x
= adjust_address_nv (x
, DImode
, 8);
8087 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8090 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8093 int pred_val
= INTVAL (XEXP (x
, 0));
8095 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8096 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8098 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8099 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8101 /* Emit hints only in the case default branch prediction
8102 heuristics would fail. */
8103 if (taken
!= cputaken
)
8105 /* We use 3e (DS) prefix for taken branches and
8106 2e (CS) prefix for not taken branches. */
8108 fputs ("ds ; ", file
);
8110 fputs ("cs ; ", file
);
8117 output_operand_lossage ("invalid operand code '%c'", code
);
8122 print_reg (x
, code
, file
);
8126 /* No `byte ptr' prefix for call instructions. */
8127 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8130 switch (GET_MODE_SIZE (GET_MODE (x
)))
8132 case 1: size
= "BYTE"; break;
8133 case 2: size
= "WORD"; break;
8134 case 4: size
= "DWORD"; break;
8135 case 8: size
= "QWORD"; break;
8136 case 12: size
= "XWORD"; break;
8137 case 16: size
= "XMMWORD"; break;
8142 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8145 else if (code
== 'w')
8147 else if (code
== 'k')
8151 fputs (" PTR ", file
);
8155 /* Avoid (%rip) for call operands. */
8156 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8157 && !CONST_INT_P (x
))
8158 output_addr_const (file
, x
);
8159 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8160 output_operand_lossage ("invalid constraints for operand");
8165 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8170 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8171 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8173 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8175 fprintf (file
, "0x%08lx", l
);
8178 /* These float cases don't actually occur as immediate operands. */
8179 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8183 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8184 fprintf (file
, "%s", dstr
);
8187 else if (GET_CODE (x
) == CONST_DOUBLE
8188 && GET_MODE (x
) == XFmode
)
8192 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8193 fprintf (file
, "%s", dstr
);
8198 /* We have patterns that allow zero sets of memory, for instance.
8199 In 64-bit mode, we should probably support all 8-byte vectors,
8200 since we can in fact encode that into an immediate. */
8201 if (GET_CODE (x
) == CONST_VECTOR
)
8203 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8209 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8211 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8214 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8215 || GET_CODE (x
) == LABEL_REF
)
8217 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8220 fputs ("OFFSET FLAT:", file
);
8223 if (CONST_INT_P (x
))
8224 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8226 output_pic_addr_const (file
, x
, code
);
8228 output_addr_const (file
, x
);
8232 /* Print a memory operand whose address is ADDR. */
8235 print_operand_address (FILE *file
, rtx addr
)
8237 struct ix86_address parts
;
8238 rtx base
, index
, disp
;
8240 int ok
= ix86_decompose_address (addr
, &parts
);
8245 index
= parts
.index
;
8247 scale
= parts
.scale
;
8255 if (USER_LABEL_PREFIX
[0] == 0)
8257 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8263 if (!base
&& !index
)
8265 /* Displacement only requires special attention. */
8267 if (CONST_INT_P (disp
))
8269 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8271 if (USER_LABEL_PREFIX
[0] == 0)
8273 fputs ("ds:", file
);
8275 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8278 output_pic_addr_const (file
, disp
, 0);
8280 output_addr_const (file
, disp
);
8282 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8285 if (GET_CODE (disp
) == CONST
8286 && GET_CODE (XEXP (disp
, 0)) == PLUS
8287 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8288 disp
= XEXP (XEXP (disp
, 0), 0);
8289 if (GET_CODE (disp
) == LABEL_REF
8290 || (GET_CODE (disp
) == SYMBOL_REF
8291 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8292 fputs ("(%rip)", file
);
8297 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8302 output_pic_addr_const (file
, disp
, 0);
8303 else if (GET_CODE (disp
) == LABEL_REF
)
8304 output_asm_label (disp
);
8306 output_addr_const (file
, disp
);
8311 print_reg (base
, 0, file
);
8315 print_reg (index
, 0, file
);
8317 fprintf (file
, ",%d", scale
);
8323 rtx offset
= NULL_RTX
;
8327 /* Pull out the offset of a symbol; print any symbol itself. */
8328 if (GET_CODE (disp
) == CONST
8329 && GET_CODE (XEXP (disp
, 0)) == PLUS
8330 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8332 offset
= XEXP (XEXP (disp
, 0), 1);
8333 disp
= gen_rtx_CONST (VOIDmode
,
8334 XEXP (XEXP (disp
, 0), 0));
8338 output_pic_addr_const (file
, disp
, 0);
8339 else if (GET_CODE (disp
) == LABEL_REF
)
8340 output_asm_label (disp
);
8341 else if (CONST_INT_P (disp
))
8344 output_addr_const (file
, disp
);
8350 print_reg (base
, 0, file
);
8353 if (INTVAL (offset
) >= 0)
8355 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8359 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8366 print_reg (index
, 0, file
);
8368 fprintf (file
, "*%d", scale
);
8376 output_addr_const_extra (FILE *file
, rtx x
)
8380 if (GET_CODE (x
) != UNSPEC
)
8383 op
= XVECEXP (x
, 0, 0);
8384 switch (XINT (x
, 1))
8386 case UNSPEC_GOTTPOFF
:
8387 output_addr_const (file
, op
);
8388 /* FIXME: This might be @TPOFF in Sun ld. */
8389 fputs ("@GOTTPOFF", file
);
8392 output_addr_const (file
, op
);
8393 fputs ("@TPOFF", file
);
8396 output_addr_const (file
, op
);
8398 fputs ("@TPOFF", file
);
8400 fputs ("@NTPOFF", file
);
8403 output_addr_const (file
, op
);
8404 fputs ("@DTPOFF", file
);
8406 case UNSPEC_GOTNTPOFF
:
8407 output_addr_const (file
, op
);
8409 fputs ("@GOTTPOFF(%rip)", file
);
8411 fputs ("@GOTNTPOFF", file
);
8413 case UNSPEC_INDNTPOFF
:
8414 output_addr_const (file
, op
);
8415 fputs ("@INDNTPOFF", file
);
8425 /* Split one or more DImode RTL references into pairs of SImode
8426 references. The RTL can be REG, offsettable MEM, integer constant, or
8427 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8428 split and "num" is its length. lo_half and hi_half are output arrays
8429 that parallel "operands". */
8432 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8436 rtx op
= operands
[num
];
8438 /* simplify_subreg refuse to split volatile memory addresses,
8439 but we still have to handle it. */
8442 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8443 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8447 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8448 GET_MODE (op
) == VOIDmode
8449 ? DImode
: GET_MODE (op
), 0);
8450 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8451 GET_MODE (op
) == VOIDmode
8452 ? DImode
: GET_MODE (op
), 4);
8456 /* Split one or more TImode RTL references into pairs of DImode
8457 references. The RTL can be REG, offsettable MEM, integer constant, or
8458 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8459 split and "num" is its length. lo_half and hi_half are output arrays
8460 that parallel "operands". */
8463 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8467 rtx op
= operands
[num
];
8469 /* simplify_subreg refuse to split volatile memory addresses, but we
8470 still have to handle it. */
8473 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8474 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8478 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8479 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8484 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8485 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8486 is the expression of the binary operation. The output may either be
8487 emitted here, or returned to the caller, like all output_* functions.
8489 There is no guarantee that the operands are the same mode, as they
8490 might be within FLOAT or FLOAT_EXTEND expressions. */
8492 #ifndef SYSV386_COMPAT
8493 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8494 wants to fix the assemblers because that causes incompatibility
8495 with gcc. No-one wants to fix gcc because that causes
8496 incompatibility with assemblers... You can use the option of
8497 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8498 #define SYSV386_COMPAT 1
8502 output_387_binary_op (rtx insn
, rtx
*operands
)
8504 static char buf
[30];
8507 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8509 #ifdef ENABLE_CHECKING
8510 /* Even if we do not want to check the inputs, this documents input
8511 constraints. Which helps in understanding the following code. */
8512 if (STACK_REG_P (operands
[0])
8513 && ((REG_P (operands
[1])
8514 && REGNO (operands
[0]) == REGNO (operands
[1])
8515 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8516 || (REG_P (operands
[2])
8517 && REGNO (operands
[0]) == REGNO (operands
[2])
8518 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8519 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8522 gcc_assert (is_sse
);
8525 switch (GET_CODE (operands
[3]))
8528 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8529 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8537 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8538 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8546 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8547 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8555 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8556 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8570 if (GET_MODE (operands
[0]) == SFmode
)
8571 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8573 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8578 switch (GET_CODE (operands
[3]))
8582 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8584 rtx temp
= operands
[2];
8585 operands
[2] = operands
[1];
8589 /* know operands[0] == operands[1]. */
8591 if (MEM_P (operands
[2]))
8597 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8599 if (STACK_TOP_P (operands
[0]))
8600 /* How is it that we are storing to a dead operand[2]?
8601 Well, presumably operands[1] is dead too. We can't
8602 store the result to st(0) as st(0) gets popped on this
8603 instruction. Instead store to operands[2] (which I
8604 think has to be st(1)). st(1) will be popped later.
8605 gcc <= 2.8.1 didn't have this check and generated
8606 assembly code that the Unixware assembler rejected. */
8607 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8609 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8613 if (STACK_TOP_P (operands
[0]))
8614 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8616 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8621 if (MEM_P (operands
[1]))
8627 if (MEM_P (operands
[2]))
8633 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8636 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8637 derived assemblers, confusingly reverse the direction of
8638 the operation for fsub{r} and fdiv{r} when the
8639 destination register is not st(0). The Intel assembler
8640 doesn't have this brain damage. Read !SYSV386_COMPAT to
8641 figure out what the hardware really does. */
8642 if (STACK_TOP_P (operands
[0]))
8643 p
= "{p\t%0, %2|rp\t%2, %0}";
8645 p
= "{rp\t%2, %0|p\t%0, %2}";
8647 if (STACK_TOP_P (operands
[0]))
8648 /* As above for fmul/fadd, we can't store to st(0). */
8649 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8651 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8656 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8659 if (STACK_TOP_P (operands
[0]))
8660 p
= "{rp\t%0, %1|p\t%1, %0}";
8662 p
= "{p\t%1, %0|rp\t%0, %1}";
8664 if (STACK_TOP_P (operands
[0]))
8665 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8667 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8672 if (STACK_TOP_P (operands
[0]))
8674 if (STACK_TOP_P (operands
[1]))
8675 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8677 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8680 else if (STACK_TOP_P (operands
[1]))
8683 p
= "{\t%1, %0|r\t%0, %1}";
8685 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8691 p
= "{r\t%2, %0|\t%0, %2}";
8693 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8706 /* Return needed mode for entity in optimize_mode_switching pass. */
8709 ix86_mode_needed (int entity
, rtx insn
)
8711 enum attr_i387_cw mode
;
8713 /* The mode UNINITIALIZED is used to store control word after a
8714 function call or ASM pattern. The mode ANY specify that function
8715 has no requirements on the control word and make no changes in the
8716 bits we are interested in. */
8719 || (NONJUMP_INSN_P (insn
)
8720 && (asm_noperands (PATTERN (insn
)) >= 0
8721 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8722 return I387_CW_UNINITIALIZED
;
8724 if (recog_memoized (insn
) < 0)
8727 mode
= get_attr_i387_cw (insn
);
8732 if (mode
== I387_CW_TRUNC
)
8737 if (mode
== I387_CW_FLOOR
)
8742 if (mode
== I387_CW_CEIL
)
8747 if (mode
== I387_CW_MASK_PM
)
8758 /* Output code to initialize control word copies used by trunc?f?i and
8759 rounding patterns. CURRENT_MODE is set to current control word,
8760 while NEW_MODE is set to new control word. */
8763 emit_i387_cw_initialization (int mode
)
8765 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8770 rtx reg
= gen_reg_rtx (HImode
);
8772 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8773 emit_move_insn (reg
, copy_rtx (stored_mode
));
8775 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8780 /* round toward zero (truncate) */
8781 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8782 slot
= SLOT_CW_TRUNC
;
8786 /* round down toward -oo */
8787 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8788 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8789 slot
= SLOT_CW_FLOOR
;
8793 /* round up toward +oo */
8794 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8795 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8796 slot
= SLOT_CW_CEIL
;
8799 case I387_CW_MASK_PM
:
8800 /* mask precision exception for nearbyint() */
8801 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8802 slot
= SLOT_CW_MASK_PM
;
8814 /* round toward zero (truncate) */
8815 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8816 slot
= SLOT_CW_TRUNC
;
8820 /* round down toward -oo */
8821 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8822 slot
= SLOT_CW_FLOOR
;
8826 /* round up toward +oo */
8827 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8828 slot
= SLOT_CW_CEIL
;
8831 case I387_CW_MASK_PM
:
8832 /* mask precision exception for nearbyint() */
8833 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8834 slot
= SLOT_CW_MASK_PM
;
8842 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8844 new_mode
= assign_386_stack_local (HImode
, slot
);
8845 emit_move_insn (new_mode
, reg
);
8848 /* Output code for INSN to convert a float to a signed int. OPERANDS
8849 are the insn operands. The output may be [HSD]Imode and the input
8850 operand may be [SDX]Fmode. */
8853 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8855 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8856 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8857 int round_mode
= get_attr_i387_cw (insn
);
8859 /* Jump through a hoop or two for DImode, since the hardware has no
8860 non-popping instruction. We used to do this a different way, but
8861 that was somewhat fragile and broke with post-reload splitters. */
8862 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8863 output_asm_insn ("fld\t%y1", operands
);
8865 gcc_assert (STACK_TOP_P (operands
[1]));
8866 gcc_assert (MEM_P (operands
[0]));
8869 output_asm_insn ("fisttp%z0\t%0", operands
);
8872 if (round_mode
!= I387_CW_ANY
)
8873 output_asm_insn ("fldcw\t%3", operands
);
8874 if (stack_top_dies
|| dimode_p
)
8875 output_asm_insn ("fistp%z0\t%0", operands
);
8877 output_asm_insn ("fist%z0\t%0", operands
);
8878 if (round_mode
!= I387_CW_ANY
)
8879 output_asm_insn ("fldcw\t%2", operands
);
8885 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8886 have the values zero or one, indicates the ffreep insn's operand
8887 from the OPERANDS array. */
8890 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8892 if (TARGET_USE_FFREEP
)
8893 #if HAVE_AS_IX86_FFREEP
8894 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8897 static char retval
[] = ".word\t0xc_df";
8898 int regno
= REGNO (operands
[opno
]);
8900 gcc_assert (FP_REGNO_P (regno
));
8902 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
8907 return opno
? "fstp\t%y1" : "fstp\t%y0";
8911 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8912 should be used. UNORDERED_P is true when fucom should be used. */
8915 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8918 rtx cmp_op0
, cmp_op1
;
8919 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8923 cmp_op0
= operands
[0];
8924 cmp_op1
= operands
[1];
8928 cmp_op0
= operands
[1];
8929 cmp_op1
= operands
[2];
8934 if (GET_MODE (operands
[0]) == SFmode
)
8936 return "ucomiss\t{%1, %0|%0, %1}";
8938 return "comiss\t{%1, %0|%0, %1}";
8941 return "ucomisd\t{%1, %0|%0, %1}";
8943 return "comisd\t{%1, %0|%0, %1}";
8946 gcc_assert (STACK_TOP_P (cmp_op0
));
8948 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8950 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8954 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8955 return output_387_ffreep (operands
, 1);
8958 return "ftst\n\tfnstsw\t%0";
8961 if (STACK_REG_P (cmp_op1
)
8963 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8964 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8966 /* If both the top of the 387 stack dies, and the other operand
8967 is also a stack register that dies, then this must be a
8968 `fcompp' float compare */
8972 /* There is no double popping fcomi variant. Fortunately,
8973 eflags is immune from the fstp's cc clobbering. */
8975 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8977 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8978 return output_387_ffreep (operands
, 0);
8983 return "fucompp\n\tfnstsw\t%0";
8985 return "fcompp\n\tfnstsw\t%0";
8990 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8992 static const char * const alt
[16] =
8994 "fcom%z2\t%y2\n\tfnstsw\t%0",
8995 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8996 "fucom%z2\t%y2\n\tfnstsw\t%0",
8997 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8999 "ficom%z2\t%y2\n\tfnstsw\t%0",
9000 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9004 "fcomi\t{%y1, %0|%0, %y1}",
9005 "fcomip\t{%y1, %0|%0, %y1}",
9006 "fucomi\t{%y1, %0|%0, %y1}",
9007 "fucomip\t{%y1, %0|%0, %y1}",
9018 mask
= eflags_p
<< 3;
9019 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9020 mask
|= unordered_p
<< 1;
9021 mask
|= stack_top_dies
;
9023 gcc_assert (mask
< 16);
9032 ix86_output_addr_vec_elt (FILE *file
, int value
)
9034 const char *directive
= ASM_LONG
;
9038 directive
= ASM_QUAD
;
9040 gcc_assert (!TARGET_64BIT
);
9043 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9047 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9050 fprintf (file
, "%s%s%d-%s%d\n",
9051 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9052 else if (HAVE_AS_GOTOFF_IN_DATA
)
9053 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9055 else if (TARGET_MACHO
)
9057 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9058 machopic_output_function_base_name (file
);
9059 fprintf(file
, "\n");
9063 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9064 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9067 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9071 ix86_expand_clear (rtx dest
)
9075 /* We play register width games, which are only valid after reload. */
9076 gcc_assert (reload_completed
);
9078 /* Avoid HImode and its attendant prefix byte. */
9079 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9080 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9082 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9084 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9085 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9087 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9088 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9094 /* X is an unchanging MEM. If it is a constant pool reference, return
9095 the constant pool rtx, else NULL. */
9098 maybe_get_pool_constant (rtx x
)
9100 x
= ix86_delegitimize_address (XEXP (x
, 0));
9102 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9103 return get_pool_constant (x
);
9109 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9111 int strict
= (reload_in_progress
|| reload_completed
);
9113 enum tls_model model
;
9118 if (GET_CODE (op1
) == SYMBOL_REF
)
9120 model
= SYMBOL_REF_TLS_MODEL (op1
);
9123 op1
= legitimize_tls_address (op1
, model
, true);
9124 op1
= force_operand (op1
, op0
);
9129 else if (GET_CODE (op1
) == CONST
9130 && GET_CODE (XEXP (op1
, 0)) == PLUS
9131 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9133 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9136 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9137 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9138 op1
= force_operand (op1
, NULL
);
9139 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9140 op0
, 1, OPTAB_DIRECT
);
9146 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9148 if (TARGET_MACHO
&& !TARGET_64BIT
)
9153 rtx temp
= ((reload_in_progress
9154 || ((op0
&& REG_P (op0
))
9156 ? op0
: gen_reg_rtx (Pmode
));
9157 op1
= machopic_indirect_data_reference (op1
, temp
);
9158 op1
= machopic_legitimize_pic_address (op1
, mode
,
9159 temp
== op1
? 0 : temp
);
9161 else if (MACHOPIC_INDIRECT
)
9162 op1
= machopic_indirect_data_reference (op1
, 0);
9170 op1
= force_reg (Pmode
, op1
);
9172 op1
= legitimize_address (op1
, op1
, Pmode
);
9178 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9179 || !push_operand (op0
, mode
))
9181 op1
= force_reg (mode
, op1
);
9183 if (push_operand (op0
, mode
)
9184 && ! general_no_elim_operand (op1
, mode
))
9185 op1
= copy_to_mode_reg (mode
, op1
);
9187 /* Force large constants in 64bit compilation into register
9188 to get them CSEed. */
9189 if (TARGET_64BIT
&& mode
== DImode
9190 && immediate_operand (op1
, mode
)
9191 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9192 && !register_operand (op0
, mode
)
9193 && optimize
&& !reload_completed
&& !reload_in_progress
)
9194 op1
= copy_to_mode_reg (mode
, op1
);
9196 if (FLOAT_MODE_P (mode
))
9198 /* If we are loading a floating point constant to a register,
9199 force the value to memory now, since we'll get better code
9200 out the back end. */
9204 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9206 op1
= validize_mem (force_const_mem (mode
, op1
));
9207 if (!register_operand (op0
, mode
))
9209 rtx temp
= gen_reg_rtx (mode
);
9210 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9211 emit_move_insn (op0
, temp
);
9218 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9222 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9224 rtx op0
= operands
[0], op1
= operands
[1];
9226 /* Force constants other than zero into memory. We do not know how
9227 the instructions used to build constants modify the upper 64 bits
9228 of the register, once we have that information we may be able
9229 to handle some of them more efficiently. */
9230 if ((reload_in_progress
| reload_completed
) == 0
9231 && register_operand (op0
, mode
)
9233 && standard_sse_constant_p (op1
) <= 0)
9234 op1
= validize_mem (force_const_mem (mode
, op1
));
9236 /* Make operand1 a register if it isn't already. */
9238 && !register_operand (op0
, mode
)
9239 && !register_operand (op1
, mode
))
9241 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9245 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9248 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9249 straight to ix86_expand_vector_move. */
9252 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9261 /* If we're optimizing for size, movups is the smallest. */
9264 op0
= gen_lowpart (V4SFmode
, op0
);
9265 op1
= gen_lowpart (V4SFmode
, op1
);
9266 emit_insn (gen_sse_movups (op0
, op1
));
9270 /* ??? If we have typed data, then it would appear that using
9271 movdqu is the only way to get unaligned data loaded with
9273 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9275 op0
= gen_lowpart (V16QImode
, op0
);
9276 op1
= gen_lowpart (V16QImode
, op1
);
9277 emit_insn (gen_sse2_movdqu (op0
, op1
));
9281 if (TARGET_SSE2
&& mode
== V2DFmode
)
9285 /* When SSE registers are split into halves, we can avoid
9286 writing to the top half twice. */
9287 if (TARGET_SSE_SPLIT_REGS
)
9289 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9294 /* ??? Not sure about the best option for the Intel chips.
9295 The following would seem to satisfy; the register is
9296 entirely cleared, breaking the dependency chain. We
9297 then store to the upper half, with a dependency depth
9298 of one. A rumor has it that Intel recommends two movsd
9299 followed by an unpacklpd, but this is unconfirmed. And
9300 given that the dependency depth of the unpacklpd would
9301 still be one, I'm not sure why this would be better. */
9302 zero
= CONST0_RTX (V2DFmode
);
9305 m
= adjust_address (op1
, DFmode
, 0);
9306 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9307 m
= adjust_address (op1
, DFmode
, 8);
9308 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9312 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9313 emit_move_insn (op0
, CONST0_RTX (mode
));
9315 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9317 if (mode
!= V4SFmode
)
9318 op0
= gen_lowpart (V4SFmode
, op0
);
9319 m
= adjust_address (op1
, V2SFmode
, 0);
9320 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9321 m
= adjust_address (op1
, V2SFmode
, 8);
9322 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9325 else if (MEM_P (op0
))
9327 /* If we're optimizing for size, movups is the smallest. */
9330 op0
= gen_lowpart (V4SFmode
, op0
);
9331 op1
= gen_lowpart (V4SFmode
, op1
);
9332 emit_insn (gen_sse_movups (op0
, op1
));
9336 /* ??? Similar to above, only less clear because of quote
9337 typeless stores unquote. */
9338 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9339 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9341 op0
= gen_lowpart (V16QImode
, op0
);
9342 op1
= gen_lowpart (V16QImode
, op1
);
9343 emit_insn (gen_sse2_movdqu (op0
, op1
));
9347 if (TARGET_SSE2
&& mode
== V2DFmode
)
9349 m
= adjust_address (op0
, DFmode
, 0);
9350 emit_insn (gen_sse2_storelpd (m
, op1
));
9351 m
= adjust_address (op0
, DFmode
, 8);
9352 emit_insn (gen_sse2_storehpd (m
, op1
));
9356 if (mode
!= V4SFmode
)
9357 op1
= gen_lowpart (V4SFmode
, op1
);
9358 m
= adjust_address (op0
, V2SFmode
, 0);
9359 emit_insn (gen_sse_storelps (m
, op1
));
9360 m
= adjust_address (op0
, V2SFmode
, 8);
9361 emit_insn (gen_sse_storehps (m
, op1
));
9368 /* Expand a push in MODE. This is some mode for which we do not support
9369 proper push instructions, at least from the registers that we expect
9370 the value to live in. */
9373 ix86_expand_push (enum machine_mode mode
, rtx x
)
9377 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9378 GEN_INT (-GET_MODE_SIZE (mode
)),
9379 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9380 if (tmp
!= stack_pointer_rtx
)
9381 emit_move_insn (stack_pointer_rtx
, tmp
);
9383 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9384 emit_move_insn (tmp
, x
);
9387 /* Helper function of ix86_fixup_binary_operands to canonicalize
9388 operand order. Returns true if the operands should be swapped. */
9391 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9394 rtx dst
= operands
[0];
9395 rtx src1
= operands
[1];
9396 rtx src2
= operands
[2];
9398 /* If the operation is not commutative, we can't do anything. */
9399 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9402 /* Highest priority is that src1 should match dst. */
9403 if (rtx_equal_p (dst
, src1
))
9405 if (rtx_equal_p (dst
, src2
))
9408 /* Next highest priority is that immediate constants come second. */
9409 if (immediate_operand (src2
, mode
))
9411 if (immediate_operand (src1
, mode
))
9414 /* Lowest priority is that memory references should come second. */
9424 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9425 destination to use for the operation. If different from the true
9426 destination in operands[0], a copy operation will be required. */
9429 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9432 rtx dst
= operands
[0];
9433 rtx src1
= operands
[1];
9434 rtx src2
= operands
[2];
9436 /* Canonicalize operand order. */
9437 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9444 /* Both source operands cannot be in memory. */
9445 if (MEM_P (src1
) && MEM_P (src2
))
9447 /* Optimization: Only read from memory once. */
9448 if (rtx_equal_p (src1
, src2
))
9450 src2
= force_reg (mode
, src2
);
9454 src2
= force_reg (mode
, src2
);
9457 /* If the destination is memory, and we do not have matching source
9458 operands, do things in registers. */
9459 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9460 dst
= gen_reg_rtx (mode
);
9462 /* Source 1 cannot be a constant. */
9463 if (CONSTANT_P (src1
))
9464 src1
= force_reg (mode
, src1
);
9466 /* Source 1 cannot be a non-matching memory. */
9467 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9468 src1
= force_reg (mode
, src1
);
9475 /* Similarly, but assume that the destination has already been
9479 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9480 enum machine_mode mode
, rtx operands
[])
9482 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9483 gcc_assert (dst
== operands
[0]);
9486 /* Attempt to expand a binary operator. Make the expansion closer to the
9487 actual machine, then just general_operand, which will allow 3 separate
9488 memory references (one output, two input) in a single insn. */
9491 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9494 rtx src1
, src2
, dst
, op
, clob
;
9496 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9500 /* Emit the instruction. */
9502 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9503 if (reload_in_progress
)
9505 /* Reload doesn't know about the flags register, and doesn't know that
9506 it doesn't want to clobber it. We can only do this with PLUS. */
9507 gcc_assert (code
== PLUS
);
9512 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9513 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9516 /* Fix up the destination if needed. */
9517 if (dst
!= operands
[0])
9518 emit_move_insn (operands
[0], dst
);
9521 /* Return TRUE or FALSE depending on whether the binary operator meets the
9522 appropriate constraints. */
9525 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9528 rtx dst
= operands
[0];
9529 rtx src1
= operands
[1];
9530 rtx src2
= operands
[2];
9532 /* Both source operands cannot be in memory. */
9533 if (MEM_P (src1
) && MEM_P (src2
))
9536 /* Canonicalize operand order for commutative operators. */
9537 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9544 /* If the destination is memory, we must have a matching source operand. */
9545 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9548 /* Source 1 cannot be a constant. */
9549 if (CONSTANT_P (src1
))
9552 /* Source 1 cannot be a non-matching memory. */
9553 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9559 /* Attempt to expand a unary operator. Make the expansion closer to the
9560 actual machine, then just general_operand, which will allow 2 separate
9561 memory references (one output, one input) in a single insn. */
9564 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9567 int matching_memory
;
9568 rtx src
, dst
, op
, clob
;
9573 /* If the destination is memory, and we do not have matching source
9574 operands, do things in registers. */
9575 matching_memory
= 0;
9578 if (rtx_equal_p (dst
, src
))
9579 matching_memory
= 1;
9581 dst
= gen_reg_rtx (mode
);
9584 /* When source operand is memory, destination must match. */
9585 if (MEM_P (src
) && !matching_memory
)
9586 src
= force_reg (mode
, src
);
9588 /* Emit the instruction. */
9590 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9591 if (reload_in_progress
|| code
== NOT
)
9593 /* Reload doesn't know about the flags register, and doesn't know that
9594 it doesn't want to clobber it. */
9595 gcc_assert (code
== NOT
);
9600 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9601 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9604 /* Fix up the destination if needed. */
9605 if (dst
!= operands
[0])
9606 emit_move_insn (operands
[0], dst
);
9609 /* Return TRUE or FALSE depending on whether the unary operator meets the
9610 appropriate constraints. */
9613 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9614 enum machine_mode mode ATTRIBUTE_UNUSED
,
9615 rtx operands
[2] ATTRIBUTE_UNUSED
)
9617 /* If one of operands is memory, source and destination must match. */
9618 if ((MEM_P (operands
[0])
9619 || MEM_P (operands
[1]))
9620 && ! rtx_equal_p (operands
[0], operands
[1]))
9625 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9626 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9627 true, then replicate the mask for all elements of the vector register.
9628 If INVERT is true, then create a mask excluding the sign bit. */
9631 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9633 enum machine_mode vec_mode
;
9634 HOST_WIDE_INT hi
, lo
;
9639 /* Find the sign bit, sign extended to 2*HWI. */
9641 lo
= 0x80000000, hi
= lo
< 0;
9642 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9643 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9645 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9650 /* Force this value into the low part of a fp vector constant. */
9651 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9652 mask
= gen_lowpart (mode
, mask
);
9657 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9659 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9660 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9661 vec_mode
= V4SFmode
;
9666 v
= gen_rtvec (2, mask
, mask
);
9668 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9669 vec_mode
= V2DFmode
;
9672 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9675 /* Generate code for floating point ABS or NEG. */
9678 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9681 rtx mask
, set
, use
, clob
, dst
, src
;
9682 bool matching_memory
;
9683 bool use_sse
= false;
9684 bool vector_mode
= VECTOR_MODE_P (mode
);
9685 enum machine_mode elt_mode
= mode
;
9689 elt_mode
= GET_MODE_INNER (mode
);
9692 else if (TARGET_SSE_MATH
)
9693 use_sse
= SSE_FLOAT_MODE_P (mode
);
9695 /* NEG and ABS performed with SSE use bitwise mask operations.
9696 Create the appropriate mask now. */
9698 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9705 /* If the destination is memory, and we don't have matching source
9706 operands or we're using the x87, do things in registers. */
9707 matching_memory
= false;
9710 if (use_sse
&& rtx_equal_p (dst
, src
))
9711 matching_memory
= true;
9713 dst
= gen_reg_rtx (mode
);
9715 if (MEM_P (src
) && !matching_memory
)
9716 src
= force_reg (mode
, src
);
9720 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9721 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9726 set
= gen_rtx_fmt_e (code
, mode
, src
);
9727 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9730 use
= gen_rtx_USE (VOIDmode
, mask
);
9731 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9732 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9733 gen_rtvec (3, set
, use
, clob
)));
9739 if (dst
!= operands
[0])
9740 emit_move_insn (operands
[0], dst
);
9743 /* Expand a copysign operation. Special case operand 0 being a constant. */
9746 ix86_expand_copysign (rtx operands
[])
9748 enum machine_mode mode
, vmode
;
9749 rtx dest
, op0
, op1
, mask
, nmask
;
9755 mode
= GET_MODE (dest
);
9756 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9758 if (GET_CODE (op0
) == CONST_DOUBLE
)
9762 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9763 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9765 if (op0
== CONST0_RTX (mode
))
9766 op0
= CONST0_RTX (vmode
);
9770 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9771 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9773 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9774 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9777 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9780 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9782 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9786 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9787 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9790 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9792 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9796 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9797 be a constant, and so has already been expanded into a vector constant. */
9800 ix86_split_copysign_const (rtx operands
[])
9802 enum machine_mode mode
, vmode
;
9803 rtx dest
, op0
, op1
, mask
, x
;
9810 mode
= GET_MODE (dest
);
9811 vmode
= GET_MODE (mask
);
9813 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9814 x
= gen_rtx_AND (vmode
, dest
, mask
);
9815 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9817 if (op0
!= CONST0_RTX (vmode
))
9819 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9820 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9824 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9825 so we have to do two masks. */
9828 ix86_split_copysign_var (rtx operands
[])
9830 enum machine_mode mode
, vmode
;
9831 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9834 scratch
= operands
[1];
9837 nmask
= operands
[4];
9840 mode
= GET_MODE (dest
);
9841 vmode
= GET_MODE (mask
);
9843 if (rtx_equal_p (op0
, op1
))
9845 /* Shouldn't happen often (it's useless, obviously), but when it does
9846 we'd generate incorrect code if we continue below. */
9847 emit_move_insn (dest
, op0
);
9851 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9853 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9855 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9856 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9859 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9860 x
= gen_rtx_NOT (vmode
, dest
);
9861 x
= gen_rtx_AND (vmode
, x
, op0
);
9862 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9866 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9868 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9870 else /* alternative 2,4 */
9872 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9873 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9874 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9876 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9878 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9880 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9881 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9883 else /* alternative 3,4 */
9885 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9887 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9888 x
= gen_rtx_AND (vmode
, dest
, op0
);
9890 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9893 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9894 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9897 /* Return TRUE or FALSE depending on whether the first SET in INSN
9898 has source and destination with matching CC modes, and that the
9899 CC mode is at least as constrained as REQ_MODE. */
9902 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9905 enum machine_mode set_mode
;
9907 set
= PATTERN (insn
);
9908 if (GET_CODE (set
) == PARALLEL
)
9909 set
= XVECEXP (set
, 0, 0);
9910 gcc_assert (GET_CODE (set
) == SET
);
9911 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9913 set_mode
= GET_MODE (SET_DEST (set
));
9917 if (req_mode
!= CCNOmode
9918 && (req_mode
!= CCmode
9919 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9923 if (req_mode
== CCGCmode
)
9927 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9931 if (req_mode
== CCZmode
)
9941 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9944 /* Generate insn patterns to do an integer compare of OPERANDS. */
9947 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9949 enum machine_mode cmpmode
;
9952 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9953 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9955 /* This is very simple, but making the interface the same as in the
9956 FP case makes the rest of the code easier. */
9957 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9958 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9960 /* Return the test that should be put into the flags user, i.e.
9961 the bcc, scc, or cmov instruction. */
9962 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9965 /* Figure out whether to use ordered or unordered fp comparisons.
9966 Return the appropriate mode to use. */
9969 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9971 /* ??? In order to make all comparisons reversible, we do all comparisons
9972 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9973 all forms trapping and nontrapping comparisons, we can make inequality
9974 comparisons trapping again, since it results in better code when using
9975 FCOM based compares. */
9976 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9980 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9982 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9983 return ix86_fp_compare_mode (code
);
9986 /* Only zero flag is needed. */
9988 case NE
: /* ZF!=0 */
9990 /* Codes needing carry flag. */
9991 case GEU
: /* CF=0 */
9992 case GTU
: /* CF=0 & ZF=0 */
9993 case LTU
: /* CF=1 */
9994 case LEU
: /* CF=1 | ZF=1 */
9996 /* Codes possibly doable only with sign flag when
9997 comparing against zero. */
9998 case GE
: /* SF=OF or SF=0 */
9999 case LT
: /* SF<>OF or SF=1 */
10000 if (op1
== const0_rtx
)
10003 /* For other cases Carry flag is not required. */
10005 /* Codes doable only with sign flag when comparing
10006 against zero, but we miss jump instruction for it
10007 so we need to use relational tests against overflow
10008 that thus needs to be zero. */
10009 case GT
: /* ZF=0 & SF=OF */
10010 case LE
: /* ZF=1 | SF<>OF */
10011 if (op1
== const0_rtx
)
10015 /* strcmp pattern do (use flags) and combine may ask us for proper
10020 gcc_unreachable ();
10024 /* Return the fixed registers used for condition codes. */
10027 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10034 /* If two condition code modes are compatible, return a condition code
10035 mode which is compatible with both. Otherwise, return
10038 static enum machine_mode
10039 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10044 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10047 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10048 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10054 gcc_unreachable ();
10076 /* These are only compatible with themselves, which we already
10082 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10085 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10087 enum rtx_code swapped_code
= swap_condition (code
);
10088 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10089 || (ix86_fp_comparison_cost (swapped_code
)
10090 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10093 /* Swap, force into registers, or otherwise massage the two operands
10094 to a fp comparison. The operands are updated in place; the new
10095 comparison code is returned. */
10097 static enum rtx_code
10098 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10100 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10101 rtx op0
= *pop0
, op1
= *pop1
;
10102 enum machine_mode op_mode
= GET_MODE (op0
);
10103 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10105 /* All of the unordered compare instructions only work on registers.
10106 The same is true of the fcomi compare instructions. The XFmode
10107 compare instructions require registers except when comparing
10108 against zero or when converting operand 1 from fixed point to
10112 && (fpcmp_mode
== CCFPUmode
10113 || (op_mode
== XFmode
10114 && ! (standard_80387_constant_p (op0
) == 1
10115 || standard_80387_constant_p (op1
) == 1)
10116 && GET_CODE (op1
) != FLOAT
)
10117 || ix86_use_fcomi_compare (code
)))
10119 op0
= force_reg (op_mode
, op0
);
10120 op1
= force_reg (op_mode
, op1
);
10124 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10125 things around if they appear profitable, otherwise force op0
10126 into a register. */
10128 if (standard_80387_constant_p (op0
) == 0
10130 && ! (standard_80387_constant_p (op1
) == 0
10134 tmp
= op0
, op0
= op1
, op1
= tmp
;
10135 code
= swap_condition (code
);
10139 op0
= force_reg (op_mode
, op0
);
10141 if (CONSTANT_P (op1
))
10143 int tmp
= standard_80387_constant_p (op1
);
10145 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10149 op1
= force_reg (op_mode
, op1
);
10152 op1
= force_reg (op_mode
, op1
);
10156 /* Try to rearrange the comparison to make it cheaper. */
10157 if (ix86_fp_comparison_cost (code
)
10158 > ix86_fp_comparison_cost (swap_condition (code
))
10159 && (REG_P (op1
) || !no_new_pseudos
))
10162 tmp
= op0
, op0
= op1
, op1
= tmp
;
10163 code
= swap_condition (code
);
10165 op0
= force_reg (op_mode
, op0
);
10173 /* Convert comparison codes we use to represent FP comparison to integer
10174 code that will result in proper branch. Return UNKNOWN if no such code
10178 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10207 /* Split comparison code CODE into comparisons we can do using branch
10208 instructions. BYPASS_CODE is comparison code for branch that will
10209 branch around FIRST_CODE and SECOND_CODE. If some of branches
10210 is not required, set value to UNKNOWN.
10211 We never require more than two branches. */
10214 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10215 enum rtx_code
*first_code
,
10216 enum rtx_code
*second_code
)
10218 *first_code
= code
;
10219 *bypass_code
= UNKNOWN
;
10220 *second_code
= UNKNOWN
;
10222 /* The fcomi comparison sets flags as follows:
10232 case GT
: /* GTU - CF=0 & ZF=0 */
10233 case GE
: /* GEU - CF=0 */
10234 case ORDERED
: /* PF=0 */
10235 case UNORDERED
: /* PF=1 */
10236 case UNEQ
: /* EQ - ZF=1 */
10237 case UNLT
: /* LTU - CF=1 */
10238 case UNLE
: /* LEU - CF=1 | ZF=1 */
10239 case LTGT
: /* EQ - ZF=0 */
10241 case LT
: /* LTU - CF=1 - fails on unordered */
10242 *first_code
= UNLT
;
10243 *bypass_code
= UNORDERED
;
10245 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10246 *first_code
= UNLE
;
10247 *bypass_code
= UNORDERED
;
10249 case EQ
: /* EQ - ZF=1 - fails on unordered */
10250 *first_code
= UNEQ
;
10251 *bypass_code
= UNORDERED
;
10253 case NE
: /* NE - ZF=0 - fails on unordered */
10254 *first_code
= LTGT
;
10255 *second_code
= UNORDERED
;
10257 case UNGE
: /* GEU - CF=0 - fails on unordered */
10259 *second_code
= UNORDERED
;
10261 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10263 *second_code
= UNORDERED
;
10266 gcc_unreachable ();
10268 if (!TARGET_IEEE_FP
)
10270 *second_code
= UNKNOWN
;
10271 *bypass_code
= UNKNOWN
;
10275 /* Return cost of comparison done fcom + arithmetics operations on AX.
10276 All following functions do use number of instructions as a cost metrics.
10277 In future this should be tweaked to compute bytes for optimize_size and
10278 take into account performance of various instructions on various CPUs. */
10280 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10282 if (!TARGET_IEEE_FP
)
10284 /* The cost of code output by ix86_expand_fp_compare. */
10308 gcc_unreachable ();
10312 /* Return cost of comparison done using fcomi operation.
10313 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10315 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10317 enum rtx_code bypass_code
, first_code
, second_code
;
10318 /* Return arbitrarily high cost when instruction is not supported - this
10319 prevents gcc from using it. */
10322 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10323 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10326 /* Return cost of comparison done using sahf operation.
10327 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10329 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10331 enum rtx_code bypass_code
, first_code
, second_code
;
10332 /* Return arbitrarily high cost when instruction is not preferred - this
10333 avoids gcc from using it. */
10334 if (!TARGET_USE_SAHF
&& !optimize_size
)
10336 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10337 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10340 /* Compute cost of the comparison done using any method.
10341 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10343 ix86_fp_comparison_cost (enum rtx_code code
)
10345 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10348 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10349 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10351 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10352 if (min
> sahf_cost
)
10354 if (min
> fcomi_cost
)
10359 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10362 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10363 rtx
*second_test
, rtx
*bypass_test
)
10365 enum machine_mode fpcmp_mode
, intcmp_mode
;
10367 int cost
= ix86_fp_comparison_cost (code
);
10368 enum rtx_code bypass_code
, first_code
, second_code
;
10370 fpcmp_mode
= ix86_fp_compare_mode (code
);
10371 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10374 *second_test
= NULL_RTX
;
10376 *bypass_test
= NULL_RTX
;
10378 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10380 /* Do fcomi/sahf based test when profitable. */
10381 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10382 && (second_code
== UNKNOWN
|| second_test
)
10383 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10387 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10388 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10394 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10395 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10397 scratch
= gen_reg_rtx (HImode
);
10398 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10399 emit_insn (gen_x86_sahf_1 (scratch
));
10402 /* The FP codes work out to act like unsigned. */
10403 intcmp_mode
= fpcmp_mode
;
10405 if (bypass_code
!= UNKNOWN
)
10406 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10407 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10409 if (second_code
!= UNKNOWN
)
10410 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10411 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10416 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10417 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10418 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10420 scratch
= gen_reg_rtx (HImode
);
10421 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10423 /* In the unordered case, we have to check C2 for NaN's, which
10424 doesn't happen to work out to anything nice combination-wise.
10425 So do some bit twiddling on the value we've got in AH to come
10426 up with an appropriate set of condition codes. */
10428 intcmp_mode
= CCNOmode
;
10433 if (code
== GT
|| !TARGET_IEEE_FP
)
10435 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10440 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10441 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10442 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10443 intcmp_mode
= CCmode
;
10449 if (code
== LT
&& TARGET_IEEE_FP
)
10451 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10452 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10453 intcmp_mode
= CCmode
;
10458 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10464 if (code
== GE
|| !TARGET_IEEE_FP
)
10466 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10471 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10472 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10479 if (code
== LE
&& TARGET_IEEE_FP
)
10481 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10482 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10483 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10484 intcmp_mode
= CCmode
;
10489 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10495 if (code
== EQ
&& TARGET_IEEE_FP
)
10497 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10498 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10499 intcmp_mode
= CCmode
;
10504 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10511 if (code
== NE
&& TARGET_IEEE_FP
)
10513 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10514 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10520 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10526 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10530 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10535 gcc_unreachable ();
10539 /* Return the test that should be put into the flags user, i.e.
10540 the bcc, scc, or cmov instruction. */
10541 return gen_rtx_fmt_ee (code
, VOIDmode
,
10542 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10547 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10550 op0
= ix86_compare_op0
;
10551 op1
= ix86_compare_op1
;
10554 *second_test
= NULL_RTX
;
10556 *bypass_test
= NULL_RTX
;
10558 if (ix86_compare_emitted
)
10560 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10561 ix86_compare_emitted
= NULL_RTX
;
10563 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10564 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10565 second_test
, bypass_test
);
10567 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10572 /* Return true if the CODE will result in nontrivial jump sequence. */
10574 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10576 enum rtx_code bypass_code
, first_code
, second_code
;
10579 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10580 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10584 ix86_expand_branch (enum rtx_code code
, rtx label
)
10588 /* If we have emitted a compare insn, go straight to simple.
10589 ix86_expand_compare won't emit anything if ix86_compare_emitted
10591 if (ix86_compare_emitted
)
10594 switch (GET_MODE (ix86_compare_op0
))
10600 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10601 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10602 gen_rtx_LABEL_REF (VOIDmode
, label
),
10604 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10613 enum rtx_code bypass_code
, first_code
, second_code
;
10615 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10616 &ix86_compare_op1
);
10618 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10620 /* Check whether we will use the natural sequence with one jump. If
10621 so, we can expand jump early. Otherwise delay expansion by
10622 creating compound insn to not confuse optimizers. */
10623 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10626 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10627 gen_rtx_LABEL_REF (VOIDmode
, label
),
10628 pc_rtx
, NULL_RTX
, NULL_RTX
);
10632 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10633 ix86_compare_op0
, ix86_compare_op1
);
10634 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10635 gen_rtx_LABEL_REF (VOIDmode
, label
),
10637 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10639 use_fcomi
= ix86_use_fcomi_compare (code
);
10640 vec
= rtvec_alloc (3 + !use_fcomi
);
10641 RTVEC_ELT (vec
, 0) = tmp
;
10643 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10645 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10648 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10650 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10659 /* Expand DImode branch into multiple compare+branch. */
10661 rtx lo
[2], hi
[2], label2
;
10662 enum rtx_code code1
, code2
, code3
;
10663 enum machine_mode submode
;
10665 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10667 tmp
= ix86_compare_op0
;
10668 ix86_compare_op0
= ix86_compare_op1
;
10669 ix86_compare_op1
= tmp
;
10670 code
= swap_condition (code
);
10672 if (GET_MODE (ix86_compare_op0
) == DImode
)
10674 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10675 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10680 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10681 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10685 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10686 avoid two branches. This costs one extra insn, so disable when
10687 optimizing for size. */
10689 if ((code
== EQ
|| code
== NE
)
10691 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10696 if (hi
[1] != const0_rtx
)
10697 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10698 NULL_RTX
, 0, OPTAB_WIDEN
);
10701 if (lo
[1] != const0_rtx
)
10702 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10703 NULL_RTX
, 0, OPTAB_WIDEN
);
10705 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10706 NULL_RTX
, 0, OPTAB_WIDEN
);
10708 ix86_compare_op0
= tmp
;
10709 ix86_compare_op1
= const0_rtx
;
10710 ix86_expand_branch (code
, label
);
10714 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10715 op1 is a constant and the low word is zero, then we can just
10716 examine the high word. */
10718 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
10721 case LT
: case LTU
: case GE
: case GEU
:
10722 ix86_compare_op0
= hi
[0];
10723 ix86_compare_op1
= hi
[1];
10724 ix86_expand_branch (code
, label
);
10730 /* Otherwise, we need two or three jumps. */
10732 label2
= gen_label_rtx ();
10735 code2
= swap_condition (code
);
10736 code3
= unsigned_condition (code
);
10740 case LT
: case GT
: case LTU
: case GTU
:
10743 case LE
: code1
= LT
; code2
= GT
; break;
10744 case GE
: code1
= GT
; code2
= LT
; break;
10745 case LEU
: code1
= LTU
; code2
= GTU
; break;
10746 case GEU
: code1
= GTU
; code2
= LTU
; break;
10748 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10749 case NE
: code2
= UNKNOWN
; break;
10752 gcc_unreachable ();
10757 * if (hi(a) < hi(b)) goto true;
10758 * if (hi(a) > hi(b)) goto false;
10759 * if (lo(a) < lo(b)) goto true;
10763 ix86_compare_op0
= hi
[0];
10764 ix86_compare_op1
= hi
[1];
10766 if (code1
!= UNKNOWN
)
10767 ix86_expand_branch (code1
, label
);
10768 if (code2
!= UNKNOWN
)
10769 ix86_expand_branch (code2
, label2
);
10771 ix86_compare_op0
= lo
[0];
10772 ix86_compare_op1
= lo
[1];
10773 ix86_expand_branch (code3
, label
);
10775 if (code2
!= UNKNOWN
)
10776 emit_label (label2
);
10781 gcc_unreachable ();
10785 /* Split branch based on floating point condition. */
10787 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10788 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10790 rtx second
, bypass
;
10791 rtx label
= NULL_RTX
;
10793 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10796 if (target2
!= pc_rtx
)
10799 code
= reverse_condition_maybe_unordered (code
);
10804 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10805 tmp
, &second
, &bypass
);
10807 /* Remove pushed operand from stack. */
10809 ix86_free_from_memory (GET_MODE (pushed
));
10811 if (split_branch_probability
>= 0)
10813 /* Distribute the probabilities across the jumps.
10814 Assume the BYPASS and SECOND to be always test
10816 probability
= split_branch_probability
;
10818 /* Value of 1 is low enough to make no need for probability
10819 to be updated. Later we may run some experiments and see
10820 if unordered values are more frequent in practice. */
10822 bypass_probability
= 1;
10824 second_probability
= 1;
10826 if (bypass
!= NULL_RTX
)
10828 label
= gen_label_rtx ();
10829 i
= emit_jump_insn (gen_rtx_SET
10831 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10833 gen_rtx_LABEL_REF (VOIDmode
,
10836 if (bypass_probability
>= 0)
10838 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10839 GEN_INT (bypass_probability
),
10842 i
= emit_jump_insn (gen_rtx_SET
10844 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10845 condition
, target1
, target2
)));
10846 if (probability
>= 0)
10848 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10849 GEN_INT (probability
),
10851 if (second
!= NULL_RTX
)
10853 i
= emit_jump_insn (gen_rtx_SET
10855 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10857 if (second_probability
>= 0)
10859 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10860 GEN_INT (second_probability
),
10863 if (label
!= NULL_RTX
)
10864 emit_label (label
);
10868 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10870 rtx ret
, tmp
, tmpreg
, equiv
;
10871 rtx second_test
, bypass_test
;
10873 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10874 return 0; /* FAIL */
10876 gcc_assert (GET_MODE (dest
) == QImode
);
10878 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10879 PUT_MODE (ret
, QImode
);
10884 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10885 if (bypass_test
|| second_test
)
10887 rtx test
= second_test
;
10889 rtx tmp2
= gen_reg_rtx (QImode
);
10892 gcc_assert (!second_test
);
10893 test
= bypass_test
;
10895 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10897 PUT_MODE (test
, QImode
);
10898 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10901 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10903 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10906 /* Attach a REG_EQUAL note describing the comparison result. */
10907 if (ix86_compare_op0
&& ix86_compare_op1
)
10909 equiv
= simplify_gen_relational (code
, QImode
,
10910 GET_MODE (ix86_compare_op0
),
10911 ix86_compare_op0
, ix86_compare_op1
);
10912 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10915 return 1; /* DONE */
10918 /* Expand comparison setting or clearing carry flag. Return true when
10919 successful and set pop for the operation. */
10921 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10923 enum machine_mode mode
=
10924 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10926 /* Do not handle DImode compares that go through special path. Also we can't
10927 deal with FP compares yet. This is possible to add. */
10928 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10930 if (FLOAT_MODE_P (mode
))
10932 rtx second_test
= NULL
, bypass_test
= NULL
;
10933 rtx compare_op
, compare_seq
;
10935 /* Shortcut: following common codes never translate into carry flag compares. */
10936 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10937 || code
== ORDERED
|| code
== UNORDERED
)
10940 /* These comparisons require zero flag; swap operands so they won't. */
10941 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10942 && !TARGET_IEEE_FP
)
10947 code
= swap_condition (code
);
10950 /* Try to expand the comparison and verify that we end up with carry flag
10951 based comparison. This is fails to be true only when we decide to expand
10952 comparison using arithmetic that is not too common scenario. */
10954 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10955 &second_test
, &bypass_test
);
10956 compare_seq
= get_insns ();
10959 if (second_test
|| bypass_test
)
10961 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10962 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10963 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10965 code
= GET_CODE (compare_op
);
10966 if (code
!= LTU
&& code
!= GEU
)
10968 emit_insn (compare_seq
);
10972 if (!INTEGRAL_MODE_P (mode
))
10980 /* Convert a==0 into (unsigned)a<1. */
10983 if (op1
!= const0_rtx
)
10986 code
= (code
== EQ
? LTU
: GEU
);
10989 /* Convert a>b into b<a or a>=b-1. */
10992 if (CONST_INT_P (op1
))
10994 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10995 /* Bail out on overflow. We still can swap operands but that
10996 would force loading of the constant into register. */
10997 if (op1
== const0_rtx
10998 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11000 code
= (code
== GTU
? GEU
: LTU
);
11007 code
= (code
== GTU
? LTU
: GEU
);
11011 /* Convert a>=0 into (unsigned)a<0x80000000. */
11014 if (mode
== DImode
|| op1
!= const0_rtx
)
11016 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11017 code
= (code
== LT
? GEU
: LTU
);
11021 if (mode
== DImode
|| op1
!= constm1_rtx
)
11023 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11024 code
= (code
== LE
? GEU
: LTU
);
11030 /* Swapping operands may cause constant to appear as first operand. */
11031 if (!nonimmediate_operand (op0
, VOIDmode
))
11033 if (no_new_pseudos
)
11035 op0
= force_reg (mode
, op0
);
11037 ix86_compare_op0
= op0
;
11038 ix86_compare_op1
= op1
;
11039 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11040 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11045 ix86_expand_int_movcc (rtx operands
[])
11047 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11048 rtx compare_seq
, compare_op
;
11049 rtx second_test
, bypass_test
;
11050 enum machine_mode mode
= GET_MODE (operands
[0]);
11051 bool sign_bit_compare_p
= false;;
11054 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11055 compare_seq
= get_insns ();
11058 compare_code
= GET_CODE (compare_op
);
11060 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11061 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11062 sign_bit_compare_p
= true;
11064 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11065 HImode insns, we'd be swallowed in word prefix ops. */
11067 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11068 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11069 && CONST_INT_P (operands
[2])
11070 && CONST_INT_P (operands
[3]))
11072 rtx out
= operands
[0];
11073 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11074 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11075 HOST_WIDE_INT diff
;
11078 /* Sign bit compares are better done using shifts than we do by using
11080 if (sign_bit_compare_p
11081 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11082 ix86_compare_op1
, &compare_op
))
11084 /* Detect overlap between destination and compare sources. */
11087 if (!sign_bit_compare_p
)
11089 bool fpcmp
= false;
11091 compare_code
= GET_CODE (compare_op
);
11093 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11094 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11097 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11100 /* To simplify rest of code, restrict to the GEU case. */
11101 if (compare_code
== LTU
)
11103 HOST_WIDE_INT tmp
= ct
;
11106 compare_code
= reverse_condition (compare_code
);
11107 code
= reverse_condition (code
);
11112 PUT_CODE (compare_op
,
11113 reverse_condition_maybe_unordered
11114 (GET_CODE (compare_op
)));
11116 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11120 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11121 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11122 tmp
= gen_reg_rtx (mode
);
11124 if (mode
== DImode
)
11125 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11127 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11131 if (code
== GT
|| code
== GE
)
11132 code
= reverse_condition (code
);
11135 HOST_WIDE_INT tmp
= ct
;
11140 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11141 ix86_compare_op1
, VOIDmode
, 0, -1);
11154 tmp
= expand_simple_binop (mode
, PLUS
,
11156 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11167 tmp
= expand_simple_binop (mode
, IOR
,
11169 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11171 else if (diff
== -1 && ct
)
11181 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11183 tmp
= expand_simple_binop (mode
, PLUS
,
11184 copy_rtx (tmp
), GEN_INT (cf
),
11185 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11193 * andl cf - ct, dest
11203 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11206 tmp
= expand_simple_binop (mode
, AND
,
11208 gen_int_mode (cf
- ct
, mode
),
11209 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11211 tmp
= expand_simple_binop (mode
, PLUS
,
11212 copy_rtx (tmp
), GEN_INT (ct
),
11213 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11216 if (!rtx_equal_p (tmp
, out
))
11217 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11219 return 1; /* DONE */
11225 tmp
= ct
, ct
= cf
, cf
= tmp
;
11227 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11229 /* We may be reversing unordered compare to normal compare, that
11230 is not valid in general (we may convert non-trapping condition
11231 to trapping one), however on i386 we currently emit all
11232 comparisons unordered. */
11233 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11234 code
= reverse_condition_maybe_unordered (code
);
11238 compare_code
= reverse_condition (compare_code
);
11239 code
= reverse_condition (code
);
11243 compare_code
= UNKNOWN
;
11244 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11245 && CONST_INT_P (ix86_compare_op1
))
11247 if (ix86_compare_op1
== const0_rtx
11248 && (code
== LT
|| code
== GE
))
11249 compare_code
= code
;
11250 else if (ix86_compare_op1
== constm1_rtx
)
11254 else if (code
== GT
)
11259 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11260 if (compare_code
!= UNKNOWN
11261 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11262 && (cf
== -1 || ct
== -1))
11264 /* If lea code below could be used, only optimize
11265 if it results in a 2 insn sequence. */
11267 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11268 || diff
== 3 || diff
== 5 || diff
== 9)
11269 || (compare_code
== LT
&& ct
== -1)
11270 || (compare_code
== GE
&& cf
== -1))
11273 * notl op1 (if necessary)
11281 code
= reverse_condition (code
);
11284 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11285 ix86_compare_op1
, VOIDmode
, 0, -1);
11287 out
= expand_simple_binop (mode
, IOR
,
11289 out
, 1, OPTAB_DIRECT
);
11290 if (out
!= operands
[0])
11291 emit_move_insn (operands
[0], out
);
11293 return 1; /* DONE */
11298 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11299 || diff
== 3 || diff
== 5 || diff
== 9)
11300 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11302 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11308 * lea cf(dest*(ct-cf)),dest
11312 * This also catches the degenerate setcc-only case.
11318 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11319 ix86_compare_op1
, VOIDmode
, 0, 1);
11322 /* On x86_64 the lea instruction operates on Pmode, so we need
11323 to get arithmetics done in proper mode to match. */
11325 tmp
= copy_rtx (out
);
11329 out1
= copy_rtx (out
);
11330 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11334 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11340 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11343 if (!rtx_equal_p (tmp
, out
))
11346 out
= force_operand (tmp
, copy_rtx (out
));
11348 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11350 if (!rtx_equal_p (out
, operands
[0]))
11351 emit_move_insn (operands
[0], copy_rtx (out
));
11353 return 1; /* DONE */
11357 * General case: Jumpful:
11358 * xorl dest,dest cmpl op1, op2
11359 * cmpl op1, op2 movl ct, dest
11360 * setcc dest jcc 1f
11361 * decl dest movl cf, dest
11362 * andl (cf-ct),dest 1:
11365 * Size 20. Size 14.
11367 * This is reasonably steep, but branch mispredict costs are
11368 * high on modern cpus, so consider failing only if optimizing
11372 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11373 && BRANCH_COST
>= 2)
11379 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11380 /* We may be reversing unordered compare to normal compare,
11381 that is not valid in general (we may convert non-trapping
11382 condition to trapping one), however on i386 we currently
11383 emit all comparisons unordered. */
11384 code
= reverse_condition_maybe_unordered (code
);
11387 code
= reverse_condition (code
);
11388 if (compare_code
!= UNKNOWN
)
11389 compare_code
= reverse_condition (compare_code
);
11393 if (compare_code
!= UNKNOWN
)
11395 /* notl op1 (if needed)
11400 For x < 0 (resp. x <= -1) there will be no notl,
11401 so if possible swap the constants to get rid of the
11403 True/false will be -1/0 while code below (store flag
11404 followed by decrement) is 0/-1, so the constants need
11405 to be exchanged once more. */
11407 if (compare_code
== GE
|| !cf
)
11409 code
= reverse_condition (code
);
11414 HOST_WIDE_INT tmp
= cf
;
11419 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11420 ix86_compare_op1
, VOIDmode
, 0, -1);
11424 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11425 ix86_compare_op1
, VOIDmode
, 0, 1);
11427 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11428 copy_rtx (out
), 1, OPTAB_DIRECT
);
11431 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11432 gen_int_mode (cf
- ct
, mode
),
11433 copy_rtx (out
), 1, OPTAB_DIRECT
);
11435 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11436 copy_rtx (out
), 1, OPTAB_DIRECT
);
11437 if (!rtx_equal_p (out
, operands
[0]))
11438 emit_move_insn (operands
[0], copy_rtx (out
));
11440 return 1; /* DONE */
11444 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11446 /* Try a few things more with specific constants and a variable. */
11449 rtx var
, orig_out
, out
, tmp
;
11451 if (BRANCH_COST
<= 2)
11452 return 0; /* FAIL */
11454 /* If one of the two operands is an interesting constant, load a
11455 constant with the above and mask it in with a logical operation. */
11457 if (CONST_INT_P (operands
[2]))
11460 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11461 operands
[3] = constm1_rtx
, op
= and_optab
;
11462 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11463 operands
[3] = const0_rtx
, op
= ior_optab
;
11465 return 0; /* FAIL */
11467 else if (CONST_INT_P (operands
[3]))
11470 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11471 operands
[2] = constm1_rtx
, op
= and_optab
;
11472 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11473 operands
[2] = const0_rtx
, op
= ior_optab
;
11475 return 0; /* FAIL */
11478 return 0; /* FAIL */
11480 orig_out
= operands
[0];
11481 tmp
= gen_reg_rtx (mode
);
11484 /* Recurse to get the constant loaded. */
11485 if (ix86_expand_int_movcc (operands
) == 0)
11486 return 0; /* FAIL */
11488 /* Mask in the interesting variable. */
11489 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11491 if (!rtx_equal_p (out
, orig_out
))
11492 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11494 return 1; /* DONE */
11498 * For comparison with above,
11508 if (! nonimmediate_operand (operands
[2], mode
))
11509 operands
[2] = force_reg (mode
, operands
[2]);
11510 if (! nonimmediate_operand (operands
[3], mode
))
11511 operands
[3] = force_reg (mode
, operands
[3]);
11513 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11515 rtx tmp
= gen_reg_rtx (mode
);
11516 emit_move_insn (tmp
, operands
[3]);
11519 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11521 rtx tmp
= gen_reg_rtx (mode
);
11522 emit_move_insn (tmp
, operands
[2]);
11526 if (! register_operand (operands
[2], VOIDmode
)
11528 || ! register_operand (operands
[3], VOIDmode
)))
11529 operands
[2] = force_reg (mode
, operands
[2]);
11532 && ! register_operand (operands
[3], VOIDmode
))
11533 operands
[3] = force_reg (mode
, operands
[3]);
11535 emit_insn (compare_seq
);
11536 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11537 gen_rtx_IF_THEN_ELSE (mode
,
11538 compare_op
, operands
[2],
11541 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11542 gen_rtx_IF_THEN_ELSE (mode
,
11544 copy_rtx (operands
[3]),
11545 copy_rtx (operands
[0]))));
11547 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11548 gen_rtx_IF_THEN_ELSE (mode
,
11550 copy_rtx (operands
[2]),
11551 copy_rtx (operands
[0]))));
11553 return 1; /* DONE */
11556 /* Swap, force into registers, or otherwise massage the two operands
11557 to an sse comparison with a mask result. Thus we differ a bit from
11558 ix86_prepare_fp_compare_args which expects to produce a flags result.
11560 The DEST operand exists to help determine whether to commute commutative
11561 operators. The POP0/POP1 operands are updated in place. The new
11562 comparison code is returned, or UNKNOWN if not implementable. */
11564 static enum rtx_code
11565 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11566 rtx
*pop0
, rtx
*pop1
)
11574 /* We have no LTGT as an operator. We could implement it with
11575 NE & ORDERED, but this requires an extra temporary. It's
11576 not clear that it's worth it. */
11583 /* These are supported directly. */
11590 /* For commutative operators, try to canonicalize the destination
11591 operand to be first in the comparison - this helps reload to
11592 avoid extra moves. */
11593 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11601 /* These are not supported directly. Swap the comparison operands
11602 to transform into something that is supported. */
11606 code
= swap_condition (code
);
11610 gcc_unreachable ();
11616 /* Detect conditional moves that exactly match min/max operational
11617 semantics. Note that this is IEEE safe, as long as we don't
11618 interchange the operands.
11620 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11621 and TRUE if the operation is successful and instructions are emitted. */
11624 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11625 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11627 enum machine_mode mode
;
11633 else if (code
== UNGE
)
11636 if_true
= if_false
;
11642 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11644 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11649 mode
= GET_MODE (dest
);
11651 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11652 but MODE may be a vector mode and thus not appropriate. */
11653 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11655 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11658 if_true
= force_reg (mode
, if_true
);
11659 v
= gen_rtvec (2, if_true
, if_false
);
11660 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11664 code
= is_min
? SMIN
: SMAX
;
11665 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11668 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11672 /* Expand an sse vector comparison. Return the register with the result. */
11675 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11676 rtx op_true
, rtx op_false
)
11678 enum machine_mode mode
= GET_MODE (dest
);
11681 cmp_op0
= force_reg (mode
, cmp_op0
);
11682 if (!nonimmediate_operand (cmp_op1
, mode
))
11683 cmp_op1
= force_reg (mode
, cmp_op1
);
11686 || reg_overlap_mentioned_p (dest
, op_true
)
11687 || reg_overlap_mentioned_p (dest
, op_false
))
11688 dest
= gen_reg_rtx (mode
);
11690 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11691 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11696 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11697 operations. This is used for both scalar and vector conditional moves. */
11700 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11702 enum machine_mode mode
= GET_MODE (dest
);
11705 if (op_false
== CONST0_RTX (mode
))
11707 op_true
= force_reg (mode
, op_true
);
11708 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11709 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11711 else if (op_true
== CONST0_RTX (mode
))
11713 op_false
= force_reg (mode
, op_false
);
11714 x
= gen_rtx_NOT (mode
, cmp
);
11715 x
= gen_rtx_AND (mode
, x
, op_false
);
11716 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11720 op_true
= force_reg (mode
, op_true
);
11721 op_false
= force_reg (mode
, op_false
);
11723 t2
= gen_reg_rtx (mode
);
11725 t3
= gen_reg_rtx (mode
);
11729 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11730 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11732 x
= gen_rtx_NOT (mode
, cmp
);
11733 x
= gen_rtx_AND (mode
, x
, op_false
);
11734 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11736 x
= gen_rtx_IOR (mode
, t3
, t2
);
11737 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11741 /* Expand a floating-point conditional move. Return true if successful. */
11744 ix86_expand_fp_movcc (rtx operands
[])
11746 enum machine_mode mode
= GET_MODE (operands
[0]);
11747 enum rtx_code code
= GET_CODE (operands
[1]);
11748 rtx tmp
, compare_op
, second_test
, bypass_test
;
11750 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11752 enum machine_mode cmode
;
11754 /* Since we've no cmove for sse registers, don't force bad register
11755 allocation just to gain access to it. Deny movcc when the
11756 comparison mode doesn't match the move mode. */
11757 cmode
= GET_MODE (ix86_compare_op0
);
11758 if (cmode
== VOIDmode
)
11759 cmode
= GET_MODE (ix86_compare_op1
);
11763 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11765 &ix86_compare_op1
);
11766 if (code
== UNKNOWN
)
11769 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11770 ix86_compare_op1
, operands
[2],
11774 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11775 ix86_compare_op1
, operands
[2], operands
[3]);
11776 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11780 /* The floating point conditional move instructions don't directly
11781 support conditions resulting from a signed integer comparison. */
11783 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11785 /* The floating point conditional move instructions don't directly
11786 support signed integer comparisons. */
11788 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11790 gcc_assert (!second_test
&& !bypass_test
);
11791 tmp
= gen_reg_rtx (QImode
);
11792 ix86_expand_setcc (code
, tmp
);
11794 ix86_compare_op0
= tmp
;
11795 ix86_compare_op1
= const0_rtx
;
11796 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11798 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11800 tmp
= gen_reg_rtx (mode
);
11801 emit_move_insn (tmp
, operands
[3]);
11804 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11806 tmp
= gen_reg_rtx (mode
);
11807 emit_move_insn (tmp
, operands
[2]);
11811 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11812 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11813 operands
[2], operands
[3])));
11815 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11816 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11817 operands
[3], operands
[0])));
11819 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11820 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11821 operands
[2], operands
[0])));
11826 /* Expand a floating-point vector conditional move; a vcond operation
11827 rather than a movcc operation. */
11830 ix86_expand_fp_vcond (rtx operands
[])
11832 enum rtx_code code
= GET_CODE (operands
[3]);
11835 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11836 &operands
[4], &operands
[5]);
11837 if (code
== UNKNOWN
)
11840 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11841 operands
[5], operands
[1], operands
[2]))
11844 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11845 operands
[1], operands
[2]);
11846 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11850 /* Expand a signed integral vector conditional move. */
11853 ix86_expand_int_vcond (rtx operands
[])
11855 enum machine_mode mode
= GET_MODE (operands
[0]);
11856 enum rtx_code code
= GET_CODE (operands
[3]);
11857 bool negate
= false;
11860 cop0
= operands
[4];
11861 cop1
= operands
[5];
11863 /* Canonicalize the comparison to EQ, GT, GTU. */
11874 code
= reverse_condition (code
);
11880 code
= reverse_condition (code
);
11886 code
= swap_condition (code
);
11887 x
= cop0
, cop0
= cop1
, cop1
= x
;
11891 gcc_unreachable ();
11894 /* Unsigned parallel compare is not supported by the hardware. Play some
11895 tricks to turn this into a signed comparison against 0. */
11898 cop0
= force_reg (mode
, cop0
);
11906 /* Perform a parallel modulo subtraction. */
11907 t1
= gen_reg_rtx (mode
);
11908 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11910 /* Extract the original sign bit of op0. */
11911 mask
= GEN_INT (-0x80000000);
11912 mask
= gen_rtx_CONST_VECTOR (mode
,
11913 gen_rtvec (4, mask
, mask
, mask
, mask
));
11914 mask
= force_reg (mode
, mask
);
11915 t2
= gen_reg_rtx (mode
);
11916 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11918 /* XOR it back into the result of the subtraction. This results
11919 in the sign bit set iff we saw unsigned underflow. */
11920 x
= gen_reg_rtx (mode
);
11921 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11929 /* Perform a parallel unsigned saturating subtraction. */
11930 x
= gen_reg_rtx (mode
);
11931 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11932 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11939 gcc_unreachable ();
11943 cop1
= CONST0_RTX (mode
);
11946 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11947 operands
[1+negate
], operands
[2-negate
]);
11949 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11950 operands
[2-negate
]);
11954 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11955 true if we should do zero extension, else sign extension. HIGH_P is
11956 true if we want the N/2 high elements, else the low elements. */
11959 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
11961 enum machine_mode imode
= GET_MODE (operands
[1]);
11962 rtx (*unpack
)(rtx
, rtx
, rtx
);
11969 unpack
= gen_vec_interleave_highv16qi
;
11971 unpack
= gen_vec_interleave_lowv16qi
;
11975 unpack
= gen_vec_interleave_highv8hi
;
11977 unpack
= gen_vec_interleave_lowv8hi
;
11981 unpack
= gen_vec_interleave_highv4si
;
11983 unpack
= gen_vec_interleave_lowv4si
;
11986 gcc_unreachable ();
11989 dest
= gen_lowpart (imode
, operands
[0]);
11992 se
= force_reg (imode
, CONST0_RTX (imode
));
11994 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
11995 operands
[1], pc_rtx
, pc_rtx
);
11997 emit_insn (unpack (dest
, operands
[1], se
));
12000 /* Expand conditional increment or decrement using adb/sbb instructions.
12001 The default case using setcc followed by the conditional move can be
12002 done by generic code. */
12004 ix86_expand_int_addcc (rtx operands
[])
12006 enum rtx_code code
= GET_CODE (operands
[1]);
12008 rtx val
= const0_rtx
;
12009 bool fpcmp
= false;
12010 enum machine_mode mode
= GET_MODE (operands
[0]);
12012 if (operands
[3] != const1_rtx
12013 && operands
[3] != constm1_rtx
)
12015 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12016 ix86_compare_op1
, &compare_op
))
12018 code
= GET_CODE (compare_op
);
12020 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12021 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12024 code
= ix86_fp_compare_code_to_integer (code
);
12031 PUT_CODE (compare_op
,
12032 reverse_condition_maybe_unordered
12033 (GET_CODE (compare_op
)));
12035 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12037 PUT_MODE (compare_op
, mode
);
12039 /* Construct either adc or sbb insn. */
12040 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12042 switch (GET_MODE (operands
[0]))
12045 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12048 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12051 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12054 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12057 gcc_unreachable ();
12062 switch (GET_MODE (operands
[0]))
12065 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12068 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12071 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12074 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12077 gcc_unreachable ();
12080 return 1; /* DONE */
12084 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12085 works for floating pointer parameters and nonoffsetable memories.
12086 For pushes, it returns just stack offsets; the values will be saved
12087 in the right order. Maximally three parts are generated. */
12090 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12095 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12097 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12099 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12100 gcc_assert (size
>= 2 && size
<= 3);
12102 /* Optimize constant pool reference to immediates. This is used by fp
12103 moves, that force all constants to memory to allow combining. */
12104 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12106 rtx tmp
= maybe_get_pool_constant (operand
);
12111 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12113 /* The only non-offsetable memories we handle are pushes. */
12114 int ok
= push_operand (operand
, VOIDmode
);
12118 operand
= copy_rtx (operand
);
12119 PUT_MODE (operand
, Pmode
);
12120 parts
[0] = parts
[1] = parts
[2] = operand
;
12124 if (GET_CODE (operand
) == CONST_VECTOR
)
12126 enum machine_mode imode
= int_mode_for_mode (mode
);
12127 /* Caution: if we looked through a constant pool memory above,
12128 the operand may actually have a different mode now. That's
12129 ok, since we want to pun this all the way back to an integer. */
12130 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12131 gcc_assert (operand
!= NULL
);
12137 if (mode
== DImode
)
12138 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12141 if (REG_P (operand
))
12143 gcc_assert (reload_completed
);
12144 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12145 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12147 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12149 else if (offsettable_memref_p (operand
))
12151 operand
= adjust_address (operand
, SImode
, 0);
12152 parts
[0] = operand
;
12153 parts
[1] = adjust_address (operand
, SImode
, 4);
12155 parts
[2] = adjust_address (operand
, SImode
, 8);
12157 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12162 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12166 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12167 parts
[2] = gen_int_mode (l
[2], SImode
);
12170 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12173 gcc_unreachable ();
12175 parts
[1] = gen_int_mode (l
[1], SImode
);
12176 parts
[0] = gen_int_mode (l
[0], SImode
);
12179 gcc_unreachable ();
12184 if (mode
== TImode
)
12185 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12186 if (mode
== XFmode
|| mode
== TFmode
)
12188 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12189 if (REG_P (operand
))
12191 gcc_assert (reload_completed
);
12192 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12193 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12195 else if (offsettable_memref_p (operand
))
12197 operand
= adjust_address (operand
, DImode
, 0);
12198 parts
[0] = operand
;
12199 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12201 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12206 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12207 real_to_target (l
, &r
, mode
);
12209 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12210 if (HOST_BITS_PER_WIDE_INT
>= 64)
12213 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12214 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12217 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12219 if (upper_mode
== SImode
)
12220 parts
[1] = gen_int_mode (l
[2], SImode
);
12221 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12224 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12225 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12228 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12231 gcc_unreachable ();
12238 /* Emit insns to perform a move or push of DI, DF, and XF values.
12239 Return false when normal moves are needed; true when all required
12240 insns have been emitted. Operands 2-4 contain the input values
12241 int the correct order; operands 5-7 contain the output values. */
12244 ix86_split_long_move (rtx operands
[])
12249 int collisions
= 0;
12250 enum machine_mode mode
= GET_MODE (operands
[0]);
12252 /* The DFmode expanders may ask us to move double.
12253 For 64bit target this is single move. By hiding the fact
12254 here we simplify i386.md splitters. */
12255 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12257 /* Optimize constant pool reference to immediates. This is used by
12258 fp moves, that force all constants to memory to allow combining. */
12260 if (MEM_P (operands
[1])
12261 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12262 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12263 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12264 if (push_operand (operands
[0], VOIDmode
))
12266 operands
[0] = copy_rtx (operands
[0]);
12267 PUT_MODE (operands
[0], Pmode
);
12270 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12271 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12272 emit_move_insn (operands
[0], operands
[1]);
12276 /* The only non-offsettable memory we handle is push. */
12277 if (push_operand (operands
[0], VOIDmode
))
12280 gcc_assert (!MEM_P (operands
[0])
12281 || offsettable_memref_p (operands
[0]));
12283 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12284 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12286 /* When emitting push, take care for source operands on the stack. */
12287 if (push
&& MEM_P (operands
[1])
12288 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12291 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12292 XEXP (part
[1][2], 0));
12293 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12294 XEXP (part
[1][1], 0));
12297 /* We need to do copy in the right order in case an address register
12298 of the source overlaps the destination. */
12299 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12301 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12303 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12306 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12309 /* Collision in the middle part can be handled by reordering. */
12310 if (collisions
== 1 && nparts
== 3
12311 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12314 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12315 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12318 /* If there are more collisions, we can't handle it by reordering.
12319 Do an lea to the last part and use only one colliding move. */
12320 else if (collisions
> 1)
12326 base
= part
[0][nparts
- 1];
12328 /* Handle the case when the last part isn't valid for lea.
12329 Happens in 64-bit mode storing the 12-byte XFmode. */
12330 if (GET_MODE (base
) != Pmode
)
12331 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12333 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12334 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12335 part
[1][1] = replace_equiv_address (part
[1][1],
12336 plus_constant (base
, UNITS_PER_WORD
));
12338 part
[1][2] = replace_equiv_address (part
[1][2],
12339 plus_constant (base
, 8));
12349 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12350 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12351 emit_move_insn (part
[0][2], part
[1][2]);
12356 /* In 64bit mode we don't have 32bit push available. In case this is
12357 register, it is OK - we will just use larger counterpart. We also
12358 retype memory - these comes from attempt to avoid REX prefix on
12359 moving of second half of TFmode value. */
12360 if (GET_MODE (part
[1][1]) == SImode
)
12362 switch (GET_CODE (part
[1][1]))
12365 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12369 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12373 gcc_unreachable ();
12376 if (GET_MODE (part
[1][0]) == SImode
)
12377 part
[1][0] = part
[1][1];
12380 emit_move_insn (part
[0][1], part
[1][1]);
12381 emit_move_insn (part
[0][0], part
[1][0]);
12385 /* Choose correct order to not overwrite the source before it is copied. */
12386 if ((REG_P (part
[0][0])
12387 && REG_P (part
[1][1])
12388 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12390 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12392 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12396 operands
[2] = part
[0][2];
12397 operands
[3] = part
[0][1];
12398 operands
[4] = part
[0][0];
12399 operands
[5] = part
[1][2];
12400 operands
[6] = part
[1][1];
12401 operands
[7] = part
[1][0];
12405 operands
[2] = part
[0][1];
12406 operands
[3] = part
[0][0];
12407 operands
[5] = part
[1][1];
12408 operands
[6] = part
[1][0];
12415 operands
[2] = part
[0][0];
12416 operands
[3] = part
[0][1];
12417 operands
[4] = part
[0][2];
12418 operands
[5] = part
[1][0];
12419 operands
[6] = part
[1][1];
12420 operands
[7] = part
[1][2];
12424 operands
[2] = part
[0][0];
12425 operands
[3] = part
[0][1];
12426 operands
[5] = part
[1][0];
12427 operands
[6] = part
[1][1];
12431 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12434 if (CONST_INT_P (operands
[5])
12435 && operands
[5] != const0_rtx
12436 && REG_P (operands
[2]))
12438 if (CONST_INT_P (operands
[6])
12439 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12440 operands
[6] = operands
[2];
12443 && CONST_INT_P (operands
[7])
12444 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12445 operands
[7] = operands
[2];
12449 && CONST_INT_P (operands
[6])
12450 && operands
[6] != const0_rtx
12451 && REG_P (operands
[3])
12452 && CONST_INT_P (operands
[7])
12453 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12454 operands
[7] = operands
[3];
12457 emit_move_insn (operands
[2], operands
[5]);
12458 emit_move_insn (operands
[3], operands
[6]);
12460 emit_move_insn (operands
[4], operands
[7]);
12465 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12466 left shift by a constant, either using a single shift or
12467 a sequence of add instructions. */
12470 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12474 emit_insn ((mode
== DImode
12476 : gen_adddi3
) (operand
, operand
, operand
));
12478 else if (!optimize_size
12479 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12482 for (i
=0; i
<count
; i
++)
12484 emit_insn ((mode
== DImode
12486 : gen_adddi3
) (operand
, operand
, operand
));
12490 emit_insn ((mode
== DImode
12492 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12496 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12498 rtx low
[2], high
[2];
12500 const int single_width
= mode
== DImode
? 32 : 64;
12502 if (CONST_INT_P (operands
[2]))
12504 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12505 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12507 if (count
>= single_width
)
12509 emit_move_insn (high
[0], low
[1]);
12510 emit_move_insn (low
[0], const0_rtx
);
12512 if (count
> single_width
)
12513 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12517 if (!rtx_equal_p (operands
[0], operands
[1]))
12518 emit_move_insn (operands
[0], operands
[1]);
12519 emit_insn ((mode
== DImode
12521 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12522 ix86_expand_ashl_const (low
[0], count
, mode
);
12527 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12529 if (operands
[1] == const1_rtx
)
12531 /* Assuming we've chosen a QImode capable registers, then 1 << N
12532 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12533 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12535 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12537 ix86_expand_clear (low
[0]);
12538 ix86_expand_clear (high
[0]);
12539 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12541 d
= gen_lowpart (QImode
, low
[0]);
12542 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12543 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12544 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12546 d
= gen_lowpart (QImode
, high
[0]);
12547 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12548 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12549 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12552 /* Otherwise, we can get the same results by manually performing
12553 a bit extract operation on bit 5/6, and then performing the two
12554 shifts. The two methods of getting 0/1 into low/high are exactly
12555 the same size. Avoiding the shift in the bit extract case helps
12556 pentium4 a bit; no one else seems to care much either way. */
12561 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12562 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12564 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12565 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12567 emit_insn ((mode
== DImode
12569 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12570 emit_insn ((mode
== DImode
12572 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12573 emit_move_insn (low
[0], high
[0]);
12574 emit_insn ((mode
== DImode
12576 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12579 emit_insn ((mode
== DImode
12581 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12582 emit_insn ((mode
== DImode
12584 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12588 if (operands
[1] == constm1_rtx
)
12590 /* For -1 << N, we can avoid the shld instruction, because we
12591 know that we're shifting 0...31/63 ones into a -1. */
12592 emit_move_insn (low
[0], constm1_rtx
);
12594 emit_move_insn (high
[0], low
[0]);
12596 emit_move_insn (high
[0], constm1_rtx
);
12600 if (!rtx_equal_p (operands
[0], operands
[1]))
12601 emit_move_insn (operands
[0], operands
[1]);
12603 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12604 emit_insn ((mode
== DImode
12606 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12609 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12611 if (TARGET_CMOVE
&& scratch
)
12613 ix86_expand_clear (scratch
);
12614 emit_insn ((mode
== DImode
12615 ? gen_x86_shift_adj_1
12616 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12619 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12623 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12625 rtx low
[2], high
[2];
12627 const int single_width
= mode
== DImode
? 32 : 64;
12629 if (CONST_INT_P (operands
[2]))
12631 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12632 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12634 if (count
== single_width
* 2 - 1)
12636 emit_move_insn (high
[0], high
[1]);
12637 emit_insn ((mode
== DImode
12639 : gen_ashrdi3
) (high
[0], high
[0],
12640 GEN_INT (single_width
- 1)));
12641 emit_move_insn (low
[0], high
[0]);
12644 else if (count
>= single_width
)
12646 emit_move_insn (low
[0], high
[1]);
12647 emit_move_insn (high
[0], low
[0]);
12648 emit_insn ((mode
== DImode
12650 : gen_ashrdi3
) (high
[0], high
[0],
12651 GEN_INT (single_width
- 1)));
12652 if (count
> single_width
)
12653 emit_insn ((mode
== DImode
12655 : gen_ashrdi3
) (low
[0], low
[0],
12656 GEN_INT (count
- single_width
)));
12660 if (!rtx_equal_p (operands
[0], operands
[1]))
12661 emit_move_insn (operands
[0], operands
[1]);
12662 emit_insn ((mode
== DImode
12664 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12665 emit_insn ((mode
== DImode
12667 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12672 if (!rtx_equal_p (operands
[0], operands
[1]))
12673 emit_move_insn (operands
[0], operands
[1]);
12675 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12677 emit_insn ((mode
== DImode
12679 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12680 emit_insn ((mode
== DImode
12682 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12684 if (TARGET_CMOVE
&& scratch
)
12686 emit_move_insn (scratch
, high
[0]);
12687 emit_insn ((mode
== DImode
12689 : gen_ashrdi3
) (scratch
, scratch
,
12690 GEN_INT (single_width
- 1)));
12691 emit_insn ((mode
== DImode
12692 ? gen_x86_shift_adj_1
12693 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12697 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12702 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12704 rtx low
[2], high
[2];
12706 const int single_width
= mode
== DImode
? 32 : 64;
12708 if (CONST_INT_P (operands
[2]))
12710 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12711 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12713 if (count
>= single_width
)
12715 emit_move_insn (low
[0], high
[1]);
12716 ix86_expand_clear (high
[0]);
12718 if (count
> single_width
)
12719 emit_insn ((mode
== DImode
12721 : gen_lshrdi3
) (low
[0], low
[0],
12722 GEN_INT (count
- single_width
)));
12726 if (!rtx_equal_p (operands
[0], operands
[1]))
12727 emit_move_insn (operands
[0], operands
[1]);
12728 emit_insn ((mode
== DImode
12730 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12731 emit_insn ((mode
== DImode
12733 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12738 if (!rtx_equal_p (operands
[0], operands
[1]))
12739 emit_move_insn (operands
[0], operands
[1]);
12741 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12743 emit_insn ((mode
== DImode
12745 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12746 emit_insn ((mode
== DImode
12748 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12750 /* Heh. By reversing the arguments, we can reuse this pattern. */
12751 if (TARGET_CMOVE
&& scratch
)
12753 ix86_expand_clear (scratch
);
12754 emit_insn ((mode
== DImode
12755 ? gen_x86_shift_adj_1
12756 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12760 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12764 /* Predict just emitted jump instruction to be taken with probability PROB. */
12766 predict_jump (int prob
)
12768 rtx insn
= get_last_insn ();
12769 gcc_assert (JUMP_P (insn
));
12771 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12776 /* Helper function for the string operations below. Dest VARIABLE whether
12777 it is aligned to VALUE bytes. If true, jump to the label. */
12779 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
12781 rtx label
= gen_label_rtx ();
12782 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12783 if (GET_MODE (variable
) == DImode
)
12784 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12786 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12787 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12790 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
12792 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
12796 /* Adjust COUNTER by the VALUE. */
12798 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12800 if (GET_MODE (countreg
) == DImode
)
12801 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12803 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12806 /* Zero extend possibly SImode EXP to Pmode register. */
12808 ix86_zero_extend_to_Pmode (rtx exp
)
12811 if (GET_MODE (exp
) == VOIDmode
)
12812 return force_reg (Pmode
, exp
);
12813 if (GET_MODE (exp
) == Pmode
)
12814 return copy_to_mode_reg (Pmode
, exp
);
12815 r
= gen_reg_rtx (Pmode
);
12816 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12820 /* Divide COUNTREG by SCALE. */
12822 scale_counter (rtx countreg
, int scale
)
12825 rtx piece_size_mask
;
12829 if (CONST_INT_P (countreg
))
12830 return GEN_INT (INTVAL (countreg
) / scale
);
12831 gcc_assert (REG_P (countreg
));
12833 piece_size_mask
= GEN_INT (scale
- 1);
12834 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
12835 GEN_INT (exact_log2 (scale
)),
12836 NULL
, 1, OPTAB_DIRECT
);
12840 /* When SRCPTR is non-NULL, output simple loop to move memory
12841 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
12842 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
12843 equivalent loop to set memory by VALUE (supposed to be in MODE).
12845 The size is rounded down to whole number of chunk size moved at once.
12846 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
12850 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
12851 rtx destptr
, rtx srcptr
, rtx value
,
12852 rtx count
, enum machine_mode mode
, int unroll
,
12855 rtx out_label
, top_label
, iter
, tmp
;
12856 enum machine_mode iter_mode
;
12857 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
12858 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
12864 iter_mode
= GET_MODE (count
);
12865 if (iter_mode
== VOIDmode
)
12866 iter_mode
= word_mode
;
12868 top_label
= gen_label_rtx ();
12869 out_label
= gen_label_rtx ();
12870 iter
= gen_reg_rtx (iter_mode
);
12872 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
12873 NULL
, 1, OPTAB_DIRECT
);
12874 /* Those two should combine. */
12875 if (piece_size
== const1_rtx
)
12877 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
12879 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
12881 emit_move_insn (iter
, const0_rtx
);
12883 emit_label (top_label
);
12885 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
12886 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
12887 destmem
= change_address (destmem
, mode
, x_addr
);
12891 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
12892 srcmem
= change_address (srcmem
, mode
, y_addr
);
12894 /* When unrolling for chips that reorder memory reads and writes,
12895 we can save registers by using single temporary.
12896 Also using 4 temporaries is overkill in 32bit mode. */
12897 if (!TARGET_64BIT
&& 0)
12899 for (i
= 0; i
< unroll
; i
++)
12904 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
12906 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
12908 emit_move_insn (destmem
, srcmem
);
12914 gcc_assert (unroll
<= 4);
12915 for (i
= 0; i
< unroll
; i
++)
12917 tmpreg
[i
] = gen_reg_rtx (mode
);
12921 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
12923 emit_move_insn (tmpreg
[i
], srcmem
);
12925 for (i
= 0; i
< unroll
; i
++)
12930 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
12932 emit_move_insn (destmem
, tmpreg
[i
]);
12937 for (i
= 0; i
< unroll
; i
++)
12941 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
12942 emit_move_insn (destmem
, value
);
12945 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
12946 true, OPTAB_LIB_WIDEN
);
12948 emit_move_insn (iter
, tmp
);
12950 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
12952 if (expected_size
!= -1)
12954 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
12955 if (expected_size
== 0)
12957 else if (expected_size
> REG_BR_PROB_BASE
)
12958 predict_jump (REG_BR_PROB_BASE
- 1);
12960 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
12963 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
12964 iter
= ix86_zero_extend_to_Pmode (iter
);
12965 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
12966 true, OPTAB_LIB_WIDEN
);
12967 if (tmp
!= destptr
)
12968 emit_move_insn (destptr
, tmp
);
12971 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
12972 true, OPTAB_LIB_WIDEN
);
12974 emit_move_insn (srcptr
, tmp
);
12976 emit_label (out_label
);
12979 /* Output "rep; mov" instruction.
12980 Arguments have same meaning as for previous function */
12982 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
12983 rtx destptr
, rtx srcptr
,
12985 enum machine_mode mode
)
12991 /* If the size is known, it is shorter to use rep movs. */
12992 if (mode
== QImode
&& CONST_INT_P (count
)
12993 && !(INTVAL (count
) & 3))
12996 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
12997 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
12998 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
12999 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13000 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13001 if (mode
!= QImode
)
13003 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13004 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13005 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13006 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13007 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13008 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13012 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13013 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13015 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13019 /* Output "rep; stos" instruction.
13020 Arguments have same meaning as for previous function */
13022 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13024 enum machine_mode mode
)
13029 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13030 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13031 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13032 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13033 if (mode
!= QImode
)
13035 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13036 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13037 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13040 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13041 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13045 emit_strmov (rtx destmem
, rtx srcmem
,
13046 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13048 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13049 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13050 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13053 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13055 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13056 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13059 if (CONST_INT_P (count
))
13061 HOST_WIDE_INT countval
= INTVAL (count
);
13064 if ((countval
& 0x16) && max_size
> 16)
13068 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13069 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13072 gcc_unreachable ();
13075 if ((countval
& 0x08) && max_size
> 8)
13078 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13081 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13082 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 4);
13086 if ((countval
& 0x04) && max_size
> 4)
13088 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13091 if ((countval
& 0x02) && max_size
> 2)
13093 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13096 if ((countval
& 0x01) && max_size
> 1)
13098 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13105 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13106 count
, 1, OPTAB_DIRECT
);
13107 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13108 count
, QImode
, 1, 4);
13112 /* When there are stringops, we can cheaply increase dest and src pointers.
13113 Otherwise we save code size by maintaining offset (zero is readily
13114 available from preceding rep operation) and using x86 addressing modes.
13116 if (TARGET_SINGLE_STRINGOP
)
13120 rtx label
= ix86_expand_aligntest (count
, 4, true);
13121 src
= change_address (srcmem
, SImode
, srcptr
);
13122 dest
= change_address (destmem
, SImode
, destptr
);
13123 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13124 emit_label (label
);
13125 LABEL_NUSES (label
) = 1;
13129 rtx label
= ix86_expand_aligntest (count
, 2, true);
13130 src
= change_address (srcmem
, HImode
, srcptr
);
13131 dest
= change_address (destmem
, HImode
, destptr
);
13132 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13133 emit_label (label
);
13134 LABEL_NUSES (label
) = 1;
13138 rtx label
= ix86_expand_aligntest (count
, 1, true);
13139 src
= change_address (srcmem
, QImode
, srcptr
);
13140 dest
= change_address (destmem
, QImode
, destptr
);
13141 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13142 emit_label (label
);
13143 LABEL_NUSES (label
) = 1;
13148 rtx offset
= force_reg (Pmode
, const0_rtx
);
13153 rtx label
= ix86_expand_aligntest (count
, 4, true);
13154 src
= change_address (srcmem
, SImode
, srcptr
);
13155 dest
= change_address (destmem
, SImode
, destptr
);
13156 emit_move_insn (dest
, src
);
13157 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13158 true, OPTAB_LIB_WIDEN
);
13160 emit_move_insn (offset
, tmp
);
13161 emit_label (label
);
13162 LABEL_NUSES (label
) = 1;
13166 rtx label
= ix86_expand_aligntest (count
, 2, true);
13167 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13168 src
= change_address (srcmem
, HImode
, tmp
);
13169 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13170 dest
= change_address (destmem
, HImode
, tmp
);
13171 emit_move_insn (dest
, src
);
13172 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13173 true, OPTAB_LIB_WIDEN
);
13175 emit_move_insn (offset
, tmp
);
13176 emit_label (label
);
13177 LABEL_NUSES (label
) = 1;
13181 rtx label
= ix86_expand_aligntest (count
, 1, true);
13182 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13183 src
= change_address (srcmem
, QImode
, tmp
);
13184 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13185 dest
= change_address (destmem
, QImode
, tmp
);
13186 emit_move_insn (dest
, src
);
13187 emit_label (label
);
13188 LABEL_NUSES (label
) = 1;
13193 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13195 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13196 rtx count
, int max_size
)
13199 expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13200 count
, 1, OPTAB_DIRECT
);
13201 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13202 gen_lowpart (QImode
, value
), count
, QImode
,
13206 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13208 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13212 if (CONST_INT_P (count
))
13214 HOST_WIDE_INT countval
= INTVAL (count
);
13217 if ((countval
& 0x16) && max_size
> 16)
13221 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13222 emit_insn (gen_strset (destptr
, dest
, value
));
13223 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13224 emit_insn (gen_strset (destptr
, dest
, value
));
13227 gcc_unreachable ();
13230 if ((countval
& 0x08) && max_size
> 8)
13234 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13235 emit_insn (gen_strset (destptr
, dest
, value
));
13239 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13240 emit_insn (gen_strset (destptr
, dest
, value
));
13241 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13242 emit_insn (gen_strset (destptr
, dest
, value
));
13246 if ((countval
& 0x04) && max_size
> 4)
13248 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13249 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13252 if ((countval
& 0x02) && max_size
> 2)
13254 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13255 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13258 if ((countval
& 0x01) && max_size
> 1)
13260 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13261 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13268 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13273 rtx label
= ix86_expand_aligntest (count
, 16, true);
13276 dest
= change_address (destmem
, DImode
, destptr
);
13277 emit_insn (gen_strset (destptr
, dest
, value
));
13278 emit_insn (gen_strset (destptr
, dest
, value
));
13282 dest
= change_address (destmem
, SImode
, destptr
);
13283 emit_insn (gen_strset (destptr
, dest
, value
));
13284 emit_insn (gen_strset (destptr
, dest
, value
));
13285 emit_insn (gen_strset (destptr
, dest
, value
));
13286 emit_insn (gen_strset (destptr
, dest
, value
));
13288 emit_label (label
);
13289 LABEL_NUSES (label
) = 1;
13293 rtx label
= ix86_expand_aligntest (count
, 8, true);
13296 dest
= change_address (destmem
, DImode
, destptr
);
13297 emit_insn (gen_strset (destptr
, dest
, value
));
13301 dest
= change_address (destmem
, SImode
, destptr
);
13302 emit_insn (gen_strset (destptr
, dest
, value
));
13303 emit_insn (gen_strset (destptr
, dest
, value
));
13305 emit_label (label
);
13306 LABEL_NUSES (label
) = 1;
13310 rtx label
= ix86_expand_aligntest (count
, 4, true);
13311 dest
= change_address (destmem
, SImode
, destptr
);
13312 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13313 emit_label (label
);
13314 LABEL_NUSES (label
) = 1;
13318 rtx label
= ix86_expand_aligntest (count
, 2, true);
13319 dest
= change_address (destmem
, HImode
, destptr
);
13320 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13321 emit_label (label
);
13322 LABEL_NUSES (label
) = 1;
13326 rtx label
= ix86_expand_aligntest (count
, 1, true);
13327 dest
= change_address (destmem
, QImode
, destptr
);
13328 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13329 emit_label (label
);
13330 LABEL_NUSES (label
) = 1;
13334 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13335 DESIRED_ALIGNMENT. */
13337 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13338 rtx destptr
, rtx srcptr
, rtx count
,
13339 int align
, int desired_alignment
)
13341 if (align
<= 1 && desired_alignment
> 1)
13343 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13344 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13345 destmem
= change_address (destmem
, QImode
, destptr
);
13346 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13347 ix86_adjust_counter (count
, 1);
13348 emit_label (label
);
13349 LABEL_NUSES (label
) = 1;
13351 if (align
<= 2 && desired_alignment
> 2)
13353 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13354 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13355 destmem
= change_address (destmem
, HImode
, destptr
);
13356 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13357 ix86_adjust_counter (count
, 2);
13358 emit_label (label
);
13359 LABEL_NUSES (label
) = 1;
13361 if (align
<= 4 && desired_alignment
> 4)
13363 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13364 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13365 destmem
= change_address (destmem
, SImode
, destptr
);
13366 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13367 ix86_adjust_counter (count
, 4);
13368 emit_label (label
);
13369 LABEL_NUSES (label
) = 1;
13371 gcc_assert (desired_alignment
<= 8);
13374 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13375 DESIRED_ALIGNMENT. */
13377 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13378 int align
, int desired_alignment
)
13380 if (align
<= 1 && desired_alignment
> 1)
13382 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13383 destmem
= change_address (destmem
, QImode
, destptr
);
13384 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13385 ix86_adjust_counter (count
, 1);
13386 emit_label (label
);
13387 LABEL_NUSES (label
) = 1;
13389 if (align
<= 2 && desired_alignment
> 2)
13391 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13392 destmem
= change_address (destmem
, HImode
, destptr
);
13393 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13394 ix86_adjust_counter (count
, 2);
13395 emit_label (label
);
13396 LABEL_NUSES (label
) = 1;
13398 if (align
<= 4 && desired_alignment
> 4)
13400 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13401 destmem
= change_address (destmem
, SImode
, destptr
);
13402 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13403 ix86_adjust_counter (count
, 4);
13404 emit_label (label
);
13405 LABEL_NUSES (label
) = 1;
13407 gcc_assert (desired_alignment
<= 8);
13410 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13411 static enum stringop_alg
13412 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13413 int *dynamic_check
)
13415 const struct stringop_algs
* algs
;
13417 *dynamic_check
= -1;
13419 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13421 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13422 if (stringop_alg
!= no_stringop
)
13423 return stringop_alg
;
13424 /* rep; movq or rep; movl is the smallest variant. */
13425 else if (optimize_size
)
13427 if (!count
|| (count
& 3))
13428 return rep_prefix_1_byte
;
13430 return rep_prefix_4_byte
;
13432 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13434 else if (expected_size
!= -1 && expected_size
< 4)
13435 return loop_1_byte
;
13436 else if (expected_size
!= -1)
13439 enum stringop_alg alg
= libcall
;
13440 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13442 gcc_assert (algs
->size
[i
].max
);
13443 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13445 if (algs
->size
[i
].alg
!= libcall
)
13446 alg
= algs
->size
[i
].alg
;
13447 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13448 last non-libcall inline algorithm. */
13449 if (TARGET_INLINE_ALL_STRINGOPS
)
13451 /* When the current size is best to be copied by a libcall,
13452 but we are still forced to inline, run the heuristic bellow
13453 that will pick code for medium sized blocks. */
13454 if (alg
!= libcall
)
13459 return algs
->size
[i
].alg
;
13462 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13464 /* When asked to inline the call anyway, try to pick meaningful choice.
13465 We look for maximal size of block that is faster to copy by hand and
13466 take blocks of at most of that size guessing that average size will
13467 be roughly half of the block.
13469 If this turns out to be bad, we might simply specify the preferred
13470 choice in ix86_costs. */
13471 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13472 && algs
->unknown_size
== libcall
)
13475 enum stringop_alg alg
;
13478 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13479 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13480 max
= algs
->size
[i
].max
;
13483 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13484 gcc_assert (*dynamic_check
== -1);
13485 gcc_assert (alg
!= libcall
);
13486 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13487 *dynamic_check
= max
;
13490 return algs
->unknown_size
;
13493 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13494 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13496 decide_alignment (int align
,
13497 enum stringop_alg alg
,
13500 int desired_align
= 0;
13504 gcc_unreachable ();
13506 case unrolled_loop
:
13507 desired_align
= GET_MODE_SIZE (Pmode
);
13509 case rep_prefix_8_byte
:
13512 case rep_prefix_4_byte
:
13513 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13514 copying whole cacheline at once. */
13515 if (TARGET_PENTIUMPRO
)
13520 case rep_prefix_1_byte
:
13521 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13522 copying whole cacheline at once. */
13523 if (TARGET_PENTIUMPRO
)
13537 if (desired_align
< align
)
13538 desired_align
= align
;
13539 if (expected_size
!= -1 && expected_size
< 4)
13540 desired_align
= align
;
13541 return desired_align
;
13544 /* Return the smallest power of 2 greater than VAL. */
13546 smallest_pow2_greater_than (int val
)
13554 /* Expand string move (memcpy) operation. Use i386 string operations when
13555 profitable. expand_clrmem contains similar code. The code depends upon
13556 architecture, block size and alignment, but always has the same
13559 1) Prologue guard: Conditional that jumps up to epilogues for small
13560 blocks that can be handled by epilogue alone. This is faster but
13561 also needed for correctness, since prologue assume the block is larger
13562 than the desired alignment.
13564 Optional dynamic check for size and libcall for large
13565 blocks is emitted here too, with -minline-stringops-dynamically.
13567 2) Prologue: copy first few bytes in order to get destination aligned
13568 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13569 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13570 We emit either a jump tree on power of two sized blocks, or a byte loop.
13572 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13573 with specified algorithm.
13575 4) Epilogue: code copying tail of the block that is too small to be
13576 handled by main body (or up to size guarded by prologue guard). */
13579 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
13580 rtx expected_align_exp
, rtx expected_size_exp
)
13586 rtx jump_around_label
= NULL
;
13587 HOST_WIDE_INT align
= 1;
13588 unsigned HOST_WIDE_INT count
= 0;
13589 HOST_WIDE_INT expected_size
= -1;
13590 int size_needed
= 0, epilogue_size_needed
;
13591 int desired_align
= 0;
13592 enum stringop_alg alg
;
13595 if (CONST_INT_P (align_exp
))
13596 align
= INTVAL (align_exp
);
13597 /* i386 can do misaligned access on reasonably increased cost. */
13598 if (CONST_INT_P (expected_align_exp
)
13599 && INTVAL (expected_align_exp
) > align
)
13600 align
= INTVAL (expected_align_exp
);
13601 if (CONST_INT_P (count_exp
))
13602 count
= expected_size
= INTVAL (count_exp
);
13603 if (CONST_INT_P (expected_size_exp
) && count
== 0)
13604 expected_size
= INTVAL (expected_size_exp
);
13606 /* Step 0: Decide on preferred algorithm, desired alignment and
13607 size of chunks to be copied by main loop. */
13609 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
13610 desired_align
= decide_alignment (align
, alg
, expected_size
);
13612 if (!TARGET_ALIGN_STRINGOPS
)
13613 align
= desired_align
;
13615 if (alg
== libcall
)
13617 gcc_assert (alg
!= no_stringop
);
13619 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
13620 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13621 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
13626 gcc_unreachable ();
13628 size_needed
= GET_MODE_SIZE (Pmode
);
13630 case unrolled_loop
:
13631 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
13633 case rep_prefix_8_byte
:
13636 case rep_prefix_4_byte
:
13639 case rep_prefix_1_byte
:
13645 epilogue_size_needed
= size_needed
;
13647 /* Step 1: Prologue guard. */
13649 /* Alignment code needs count to be in register. */
13650 if (CONST_INT_P (count_exp
) && desired_align
> align
)
13652 enum machine_mode mode
= SImode
;
13653 if (TARGET_64BIT
&& (count
& ~0xffffffff))
13655 count_exp
= force_reg (mode
, count_exp
);
13657 gcc_assert (desired_align
>= 1 && align
>= 1);
13659 /* Ensure that alignment prologue won't copy past end of block. */
13660 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
13663 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
13665 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
13666 Make sure it is power of 2. */
13667 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
13669 label
= gen_label_rtx ();
13670 emit_cmp_and_jump_insns (count_exp
,
13671 GEN_INT (epilogue_size_needed
),
13672 LTU
, 0, GET_MODE (count_exp
), 1, label
);
13673 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
13674 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
13676 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
13678 /* Emit code to decide on runtime whether library call or inline should be
13680 if (dynamic_check
!= -1)
13682 rtx hot_label
= gen_label_rtx ();
13683 jump_around_label
= gen_label_rtx ();
13684 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
13685 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
13686 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13687 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
13688 emit_jump (jump_around_label
);
13689 emit_label (hot_label
);
13692 /* Step 2: Alignment prologue. */
13694 if (desired_align
> align
)
13696 /* Except for the first move in epilogue, we no longer know
13697 constant offset in aliasing info. It don't seems to worth
13698 the pain to maintain it for the first move, so throw away
13700 src
= change_address (src
, BLKmode
, srcreg
);
13701 dst
= change_address (dst
, BLKmode
, destreg
);
13702 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
13705 if (label
&& size_needed
== 1)
13707 emit_label (label
);
13708 LABEL_NUSES (label
) = 1;
13712 /* Step 3: Main loop. */
13718 gcc_unreachable ();
13720 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13721 count_exp
, QImode
, 1, expected_size
);
13724 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13725 count_exp
, Pmode
, 1, expected_size
);
13727 case unrolled_loop
:
13728 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13729 registers for 4 temporaries anyway. */
13730 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13731 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
13734 case rep_prefix_8_byte
:
13735 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13738 case rep_prefix_4_byte
:
13739 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13742 case rep_prefix_1_byte
:
13743 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13747 /* Adjust properly the offset of src and dest memory for aliasing. */
13748 if (CONST_INT_P (count_exp
))
13750 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
13751 (count
/ size_needed
) * size_needed
);
13752 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
13753 (count
/ size_needed
) * size_needed
);
13757 src
= change_address (src
, BLKmode
, srcreg
);
13758 dst
= change_address (dst
, BLKmode
, destreg
);
13761 /* Step 4: Epilogue to copy the remaining bytes. */
13765 /* When the main loop is done, COUNT_EXP might hold original count,
13766 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
13767 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
13768 bytes. Compensate if needed. */
13770 if (size_needed
< epilogue_size_needed
)
13773 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
13774 GEN_INT (size_needed
- 1), count_exp
, 1,
13776 if (tmp
!= count_exp
)
13777 emit_move_insn (count_exp
, tmp
);
13779 emit_label (label
);
13780 LABEL_NUSES (label
) = 1;
13783 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
13784 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
13785 epilogue_size_needed
);
13786 if (jump_around_label
)
13787 emit_label (jump_around_label
);
13791 /* Helper function for memcpy. For QImode value 0xXY produce
13792 0xXYXYXYXY of wide specified by MODE. This is essentially
13793 a * 0x10101010, but we can do slightly better than
13794 synth_mult by unwinding the sequence by hand on CPUs with
13797 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
13799 enum machine_mode valmode
= GET_MODE (val
);
13801 int nops
= mode
== DImode
? 3 : 2;
13803 gcc_assert (mode
== SImode
|| mode
== DImode
);
13804 if (val
== const0_rtx
)
13805 return copy_to_mode_reg (mode
, const0_rtx
);
13806 if (CONST_INT_P (val
))
13808 HOST_WIDE_INT v
= INTVAL (val
) & 255;
13812 if (mode
== DImode
)
13813 v
|= (v
<< 16) << 16;
13814 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
13817 if (valmode
== VOIDmode
)
13819 if (valmode
!= QImode
)
13820 val
= gen_lowpart (QImode
, val
);
13821 if (mode
== QImode
)
13823 if (!TARGET_PARTIAL_REG_STALL
)
13825 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
13826 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
13827 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
13828 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
13830 rtx reg
= convert_modes (mode
, QImode
, val
, true);
13831 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
13832 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
13837 rtx reg
= convert_modes (mode
, QImode
, val
, true);
13839 if (!TARGET_PARTIAL_REG_STALL
)
13840 if (mode
== SImode
)
13841 emit_insn (gen_movsi_insv_1 (reg
, reg
));
13843 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
13846 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
13847 NULL
, 1, OPTAB_DIRECT
);
13849 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
13851 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
13852 NULL
, 1, OPTAB_DIRECT
);
13853 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
13854 if (mode
== SImode
)
13856 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
13857 NULL
, 1, OPTAB_DIRECT
);
13858 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
13863 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
13864 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
13865 alignment from ALIGN to DESIRED_ALIGN. */
13867 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
13872 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
13873 promoted_val
= promote_duplicated_reg (DImode
, val
);
13874 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
13875 promoted_val
= promote_duplicated_reg (SImode
, val
);
13876 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
13877 promoted_val
= promote_duplicated_reg (HImode
, val
);
13879 promoted_val
= val
;
13881 return promoted_val
;
13884 /* Expand string clear operation (bzero). Use i386 string operations when
13885 profitable. See expand_movmem comment for explanation of individual
13886 steps performed. */
13888 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
13889 rtx expected_align_exp
, rtx expected_size_exp
)
13894 rtx jump_around_label
= NULL
;
13895 HOST_WIDE_INT align
= 1;
13896 unsigned HOST_WIDE_INT count
= 0;
13897 HOST_WIDE_INT expected_size
= -1;
13898 int size_needed
= 0, epilogue_size_needed
;
13899 int desired_align
= 0;
13900 enum stringop_alg alg
;
13901 rtx promoted_val
= NULL
;
13902 bool force_loopy_epilogue
= false;
13905 if (CONST_INT_P (align_exp
))
13906 align
= INTVAL (align_exp
);
13907 /* i386 can do misaligned access on reasonably increased cost. */
13908 if (CONST_INT_P (expected_align_exp
)
13909 && INTVAL (expected_align_exp
) > align
)
13910 align
= INTVAL (expected_align_exp
);
13911 if (CONST_INT_P (count_exp
))
13912 count
= expected_size
= INTVAL (count_exp
);
13913 if (CONST_INT_P (expected_size_exp
) && count
== 0)
13914 expected_size
= INTVAL (expected_size_exp
);
13916 /* Step 0: Decide on preferred algorithm, desired alignment and
13917 size of chunks to be copied by main loop. */
13919 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
13920 desired_align
= decide_alignment (align
, alg
, expected_size
);
13922 if (!TARGET_ALIGN_STRINGOPS
)
13923 align
= desired_align
;
13925 if (alg
== libcall
)
13927 gcc_assert (alg
!= no_stringop
);
13929 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
13930 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13935 gcc_unreachable ();
13937 size_needed
= GET_MODE_SIZE (Pmode
);
13939 case unrolled_loop
:
13940 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
13942 case rep_prefix_8_byte
:
13945 case rep_prefix_4_byte
:
13948 case rep_prefix_1_byte
:
13953 epilogue_size_needed
= size_needed
;
13955 /* Step 1: Prologue guard. */
13957 /* Alignment code needs count to be in register. */
13958 if (CONST_INT_P (count_exp
) && desired_align
> align
)
13960 enum machine_mode mode
= SImode
;
13961 if (TARGET_64BIT
&& (count
& ~0xffffffff))
13963 count_exp
= force_reg (mode
, count_exp
);
13965 /* Do the cheap promotion to allow better CSE across the
13966 main loop and epilogue (ie one load of the big constant in the
13967 front of all code. */
13968 if (CONST_INT_P (val_exp
))
13969 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
13970 desired_align
, align
);
13971 /* Ensure that alignment prologue won't copy past end of block. */
13972 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
13975 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
13977 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
13978 Make sure it is power of 2. */
13979 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
13981 /* To improve performance of small blocks, we jump around the VAL
13982 promoting mode. This mean that if the promoted VAL is not constant,
13983 we might not use it in the epilogue and have to use byte
13985 if (epilogue_size_needed
> 2 && !promoted_val
)
13986 force_loopy_epilogue
= true;
13987 label
= gen_label_rtx ();
13988 emit_cmp_and_jump_insns (count_exp
,
13989 GEN_INT (epilogue_size_needed
),
13990 LTU
, 0, GET_MODE (count_exp
), 1, label
);
13991 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
13992 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
13994 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
13996 if (dynamic_check
!= -1)
13998 rtx hot_label
= gen_label_rtx ();
13999 jump_around_label
= gen_label_rtx ();
14000 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14001 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14002 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14003 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14004 emit_jump (jump_around_label
);
14005 emit_label (hot_label
);
14008 /* Step 2: Alignment prologue. */
14010 /* Do the expensive promotion once we branched off the small blocks. */
14012 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14013 desired_align
, align
);
14014 gcc_assert (desired_align
>= 1 && align
>= 1);
14016 if (desired_align
> align
)
14018 /* Except for the first move in epilogue, we no longer know
14019 constant offset in aliasing info. It don't seems to worth
14020 the pain to maintain it for the first move, so throw away
14022 dst
= change_address (dst
, BLKmode
, destreg
);
14023 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14026 if (label
&& size_needed
== 1)
14028 emit_label (label
);
14029 LABEL_NUSES (label
) = 1;
14033 /* Step 3: Main loop. */
14039 gcc_unreachable ();
14041 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14042 count_exp
, QImode
, 1, expected_size
);
14045 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14046 count_exp
, Pmode
, 1, expected_size
);
14048 case unrolled_loop
:
14049 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14050 count_exp
, Pmode
, 4, expected_size
);
14052 case rep_prefix_8_byte
:
14053 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14056 case rep_prefix_4_byte
:
14057 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14060 case rep_prefix_1_byte
:
14061 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14065 /* Adjust properly the offset of src and dest memory for aliasing. */
14066 if (CONST_INT_P (count_exp
))
14067 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14068 (count
/ size_needed
) * size_needed
);
14070 dst
= change_address (dst
, BLKmode
, destreg
);
14072 /* Step 4: Epilogue to copy the remaining bytes. */
14076 /* When the main loop is done, COUNT_EXP might hold original count,
14077 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14078 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14079 bytes. Compensate if needed. */
14081 if (size_needed
< desired_align
- align
)
14084 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14085 GEN_INT (size_needed
- 1), count_exp
, 1,
14087 size_needed
= desired_align
- align
+ 1;
14088 if (tmp
!= count_exp
)
14089 emit_move_insn (count_exp
, tmp
);
14091 emit_label (label
);
14092 LABEL_NUSES (label
) = 1;
14094 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14096 if (force_loopy_epilogue
)
14097 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14100 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14103 if (jump_around_label
)
14104 emit_label (jump_around_label
);
14108 /* Expand strlen. */
14110 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14112 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14114 /* The generic case of strlen expander is long. Avoid it's
14115 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14117 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14118 && !TARGET_INLINE_ALL_STRINGOPS
14120 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14123 addr
= force_reg (Pmode
, XEXP (src
, 0));
14124 scratch1
= gen_reg_rtx (Pmode
);
14126 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14129 /* Well it seems that some optimizer does not combine a call like
14130 foo(strlen(bar), strlen(bar));
14131 when the move and the subtraction is done here. It does calculate
14132 the length just once when these instructions are done inside of
14133 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14134 often used and I use one fewer register for the lifetime of
14135 output_strlen_unroll() this is better. */
14137 emit_move_insn (out
, addr
);
14139 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14141 /* strlensi_unroll_1 returns the address of the zero at the end of
14142 the string, like memchr(), so compute the length by subtracting
14143 the start address. */
14145 emit_insn (gen_subdi3 (out
, out
, addr
));
14147 emit_insn (gen_subsi3 (out
, out
, addr
));
14152 scratch2
= gen_reg_rtx (Pmode
);
14153 scratch3
= gen_reg_rtx (Pmode
);
14154 scratch4
= force_reg (Pmode
, constm1_rtx
);
14156 emit_move_insn (scratch3
, addr
);
14157 eoschar
= force_reg (QImode
, eoschar
);
14159 src
= replace_equiv_address_nv (src
, scratch3
);
14161 /* If .md starts supporting :P, this can be done in .md. */
14162 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14163 scratch4
), UNSPEC_SCAS
);
14164 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14167 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14168 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14172 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14173 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14179 /* Expand the appropriate insns for doing strlen if not just doing
14182 out = result, initialized with the start address
14183 align_rtx = alignment of the address.
14184 scratch = scratch register, initialized with the startaddress when
14185 not aligned, otherwise undefined
14187 This is just the body. It needs the initializations mentioned above and
14188 some address computing at the end. These things are done in i386.md. */
14191 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14195 rtx align_2_label
= NULL_RTX
;
14196 rtx align_3_label
= NULL_RTX
;
14197 rtx align_4_label
= gen_label_rtx ();
14198 rtx end_0_label
= gen_label_rtx ();
14200 rtx tmpreg
= gen_reg_rtx (SImode
);
14201 rtx scratch
= gen_reg_rtx (SImode
);
14205 if (CONST_INT_P (align_rtx
))
14206 align
= INTVAL (align_rtx
);
14208 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14210 /* Is there a known alignment and is it less than 4? */
14213 rtx scratch1
= gen_reg_rtx (Pmode
);
14214 emit_move_insn (scratch1
, out
);
14215 /* Is there a known alignment and is it not 2? */
14218 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14219 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14221 /* Leave just the 3 lower bits. */
14222 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14223 NULL_RTX
, 0, OPTAB_WIDEN
);
14225 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14226 Pmode
, 1, align_4_label
);
14227 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14228 Pmode
, 1, align_2_label
);
14229 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14230 Pmode
, 1, align_3_label
);
14234 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14235 check if is aligned to 4 - byte. */
14237 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14238 NULL_RTX
, 0, OPTAB_WIDEN
);
14240 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14241 Pmode
, 1, align_4_label
);
14244 mem
= change_address (src
, QImode
, out
);
14246 /* Now compare the bytes. */
14248 /* Compare the first n unaligned byte on a byte per byte basis. */
14249 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14250 QImode
, 1, end_0_label
);
14252 /* Increment the address. */
14254 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14256 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14258 /* Not needed with an alignment of 2 */
14261 emit_label (align_2_label
);
14263 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14267 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14269 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14271 emit_label (align_3_label
);
14274 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14278 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14280 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14283 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14284 align this loop. It gives only huge programs, but does not help to
14286 emit_label (align_4_label
);
14288 mem
= change_address (src
, SImode
, out
);
14289 emit_move_insn (scratch
, mem
);
14291 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14293 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14295 /* This formula yields a nonzero result iff one of the bytes is zero.
14296 This saves three branches inside loop and many cycles. */
14298 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14299 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14300 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14301 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14302 gen_int_mode (0x80808080, SImode
)));
14303 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14308 rtx reg
= gen_reg_rtx (SImode
);
14309 rtx reg2
= gen_reg_rtx (Pmode
);
14310 emit_move_insn (reg
, tmpreg
);
14311 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14313 /* If zero is not in the first two bytes, move two bytes forward. */
14314 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14315 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14316 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14317 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14318 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14321 /* Emit lea manually to avoid clobbering of flags. */
14322 emit_insn (gen_rtx_SET (SImode
, reg2
,
14323 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14325 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14326 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14327 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14328 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14335 rtx end_2_label
= gen_label_rtx ();
14336 /* Is zero in the first two bytes? */
14338 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14339 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14340 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14341 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14342 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14344 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14345 JUMP_LABEL (tmp
) = end_2_label
;
14347 /* Not in the first two. Move two bytes forward. */
14348 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14350 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14352 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14354 emit_label (end_2_label
);
14358 /* Avoid branch in fixing the byte. */
14359 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14360 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14361 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14363 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14365 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14367 emit_label (end_0_label
);
14371 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14372 rtx callarg2 ATTRIBUTE_UNUSED
,
14373 rtx pop
, int sibcall
)
14375 rtx use
= NULL
, call
;
14377 if (pop
== const0_rtx
)
14379 gcc_assert (!TARGET_64BIT
|| !pop
);
14381 if (TARGET_MACHO
&& !TARGET_64BIT
)
14384 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14385 fnaddr
= machopic_indirect_call_target (fnaddr
);
14390 /* Static functions and indirect calls don't need the pic register. */
14391 if (! TARGET_64BIT
&& flag_pic
14392 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14393 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14394 use_reg (&use
, pic_offset_table_rtx
);
14397 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14399 rtx al
= gen_rtx_REG (QImode
, 0);
14400 emit_move_insn (al
, callarg2
);
14401 use_reg (&use
, al
);
14404 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14406 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14407 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14409 if (sibcall
&& TARGET_64BIT
14410 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14413 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14414 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14415 emit_move_insn (fnaddr
, addr
);
14416 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14419 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14421 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14424 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14425 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14426 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14429 call
= emit_call_insn (call
);
14431 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14435 /* Clear stack slot assignments remembered from previous functions.
14436 This is called from INIT_EXPANDERS once before RTL is emitted for each
14439 static struct machine_function
*
14440 ix86_init_machine_status (void)
14442 struct machine_function
*f
;
14444 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14445 f
->use_fast_prologue_epilogue_nregs
= -1;
14446 f
->tls_descriptor_call_expanded_p
= 0;
14451 /* Return a MEM corresponding to a stack slot with mode MODE.
14452 Allocate a new slot if necessary.
14454 The RTL for a function can have several slots available: N is
14455 which slot to use. */
14458 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14460 struct stack_local_entry
*s
;
14462 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14464 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14465 if (s
->mode
== mode
&& s
->n
== n
)
14466 return copy_rtx (s
->rtl
);
14468 s
= (struct stack_local_entry
*)
14469 ggc_alloc (sizeof (struct stack_local_entry
));
14472 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14474 s
->next
= ix86_stack_locals
;
14475 ix86_stack_locals
= s
;
14479 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14481 static GTY(()) rtx ix86_tls_symbol
;
14483 ix86_tls_get_addr (void)
14486 if (!ix86_tls_symbol
)
14488 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14489 (TARGET_ANY_GNU_TLS
14491 ? "___tls_get_addr"
14492 : "__tls_get_addr");
14495 return ix86_tls_symbol
;
14498 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14500 static GTY(()) rtx ix86_tls_module_base_symbol
;
14502 ix86_tls_module_base (void)
14505 if (!ix86_tls_module_base_symbol
)
14507 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14508 "_TLS_MODULE_BASE_");
14509 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
14510 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
14513 return ix86_tls_module_base_symbol
;
14516 /* Calculate the length of the memory address in the instruction
14517 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14520 memory_address_length (rtx addr
)
14522 struct ix86_address parts
;
14523 rtx base
, index
, disp
;
14527 if (GET_CODE (addr
) == PRE_DEC
14528 || GET_CODE (addr
) == POST_INC
14529 || GET_CODE (addr
) == PRE_MODIFY
14530 || GET_CODE (addr
) == POST_MODIFY
)
14533 ok
= ix86_decompose_address (addr
, &parts
);
14536 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14537 parts
.base
= SUBREG_REG (parts
.base
);
14538 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14539 parts
.index
= SUBREG_REG (parts
.index
);
14542 index
= parts
.index
;
14547 - esp as the base always wants an index,
14548 - ebp as the base always wants a displacement. */
14550 /* Register Indirect. */
14551 if (base
&& !index
&& !disp
)
14553 /* esp (for its index) and ebp (for its displacement) need
14554 the two-byte modrm form. */
14555 if (addr
== stack_pointer_rtx
14556 || addr
== arg_pointer_rtx
14557 || addr
== frame_pointer_rtx
14558 || addr
== hard_frame_pointer_rtx
)
14562 /* Direct Addressing. */
14563 else if (disp
&& !base
&& !index
)
14568 /* Find the length of the displacement constant. */
14571 if (base
&& satisfies_constraint_K (disp
))
14576 /* ebp always wants a displacement. */
14577 else if (base
== hard_frame_pointer_rtx
)
14580 /* An index requires the two-byte modrm form.... */
14582 /* ...like esp, which always wants an index. */
14583 || base
== stack_pointer_rtx
14584 || base
== arg_pointer_rtx
14585 || base
== frame_pointer_rtx
)
14592 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14593 is set, expect that insn have 8bit immediate alternative. */
14595 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
14599 extract_insn_cached (insn
);
14600 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14601 if (CONSTANT_P (recog_data
.operand
[i
]))
14604 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
14608 switch (get_attr_mode (insn
))
14619 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14624 fatal_insn ("unknown insn mode", insn
);
14630 /* Compute default value for "length_address" attribute. */
14632 ix86_attr_length_address_default (rtx insn
)
14636 if (get_attr_type (insn
) == TYPE_LEA
)
14638 rtx set
= PATTERN (insn
);
14640 if (GET_CODE (set
) == PARALLEL
)
14641 set
= XVECEXP (set
, 0, 0);
14643 gcc_assert (GET_CODE (set
) == SET
);
14645 return memory_address_length (SET_SRC (set
));
14648 extract_insn_cached (insn
);
14649 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14650 if (MEM_P (recog_data
.operand
[i
]))
14652 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
14658 /* Return the maximum number of instructions a cpu can issue. */
14661 ix86_issue_rate (void)
14665 case PROCESSOR_PENTIUM
:
14669 case PROCESSOR_PENTIUMPRO
:
14670 case PROCESSOR_PENTIUM4
:
14671 case PROCESSOR_ATHLON
:
14673 case PROCESSOR_NOCONA
:
14674 case PROCESSOR_GENERIC32
:
14675 case PROCESSOR_GENERIC64
:
14678 case PROCESSOR_CORE2
:
14686 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14687 by DEP_INSN and nothing set by DEP_INSN. */
14690 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
14694 /* Simplify the test for uninteresting insns. */
14695 if (insn_type
!= TYPE_SETCC
14696 && insn_type
!= TYPE_ICMOV
14697 && insn_type
!= TYPE_FCMOV
14698 && insn_type
!= TYPE_IBR
)
14701 if ((set
= single_set (dep_insn
)) != 0)
14703 set
= SET_DEST (set
);
14706 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
14707 && XVECLEN (PATTERN (dep_insn
), 0) == 2
14708 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
14709 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
14711 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
14712 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
14717 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
14720 /* This test is true if the dependent insn reads the flags but
14721 not any other potentially set register. */
14722 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
14725 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
14731 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14732 address with operands set by DEP_INSN. */
14735 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
14739 if (insn_type
== TYPE_LEA
14742 addr
= PATTERN (insn
);
14744 if (GET_CODE (addr
) == PARALLEL
)
14745 addr
= XVECEXP (addr
, 0, 0);
14747 gcc_assert (GET_CODE (addr
) == SET
);
14749 addr
= SET_SRC (addr
);
14754 extract_insn_cached (insn
);
14755 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14756 if (MEM_P (recog_data
.operand
[i
]))
14758 addr
= XEXP (recog_data
.operand
[i
], 0);
14765 return modified_in_p (addr
, dep_insn
);
14769 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
14771 enum attr_type insn_type
, dep_insn_type
;
14772 enum attr_memory memory
;
14774 int dep_insn_code_number
;
14776 /* Anti and output dependencies have zero cost on all CPUs. */
14777 if (REG_NOTE_KIND (link
) != 0)
14780 dep_insn_code_number
= recog_memoized (dep_insn
);
14782 /* If we can't recognize the insns, we can't really do anything. */
14783 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
14786 insn_type
= get_attr_type (insn
);
14787 dep_insn_type
= get_attr_type (dep_insn
);
14791 case PROCESSOR_PENTIUM
:
14792 /* Address Generation Interlock adds a cycle of latency. */
14793 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14796 /* ??? Compares pair with jump/setcc. */
14797 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
14800 /* Floating point stores require value to be ready one cycle earlier. */
14801 if (insn_type
== TYPE_FMOV
14802 && get_attr_memory (insn
) == MEMORY_STORE
14803 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14807 case PROCESSOR_PENTIUMPRO
:
14808 memory
= get_attr_memory (insn
);
14810 /* INT->FP conversion is expensive. */
14811 if (get_attr_fp_int_src (dep_insn
))
14814 /* There is one cycle extra latency between an FP op and a store. */
14815 if (insn_type
== TYPE_FMOV
14816 && (set
= single_set (dep_insn
)) != NULL_RTX
14817 && (set2
= single_set (insn
)) != NULL_RTX
14818 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
14819 && MEM_P (SET_DEST (set2
)))
14822 /* Show ability of reorder buffer to hide latency of load by executing
14823 in parallel with previous instruction in case
14824 previous instruction is not needed to compute the address. */
14825 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
14826 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14828 /* Claim moves to take one cycle, as core can issue one load
14829 at time and the next load can start cycle later. */
14830 if (dep_insn_type
== TYPE_IMOV
14831 || dep_insn_type
== TYPE_FMOV
)
14839 memory
= get_attr_memory (insn
);
14841 /* The esp dependency is resolved before the instruction is really
14843 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
14844 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
14847 /* INT->FP conversion is expensive. */
14848 if (get_attr_fp_int_src (dep_insn
))
14851 /* Show ability of reorder buffer to hide latency of load by executing
14852 in parallel with previous instruction in case
14853 previous instruction is not needed to compute the address. */
14854 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
14855 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14857 /* Claim moves to take one cycle, as core can issue one load
14858 at time and the next load can start cycle later. */
14859 if (dep_insn_type
== TYPE_IMOV
14860 || dep_insn_type
== TYPE_FMOV
)
14869 case PROCESSOR_ATHLON
:
14871 case PROCESSOR_GENERIC32
:
14872 case PROCESSOR_GENERIC64
:
14873 memory
= get_attr_memory (insn
);
14875 /* Show ability of reorder buffer to hide latency of load by executing
14876 in parallel with previous instruction in case
14877 previous instruction is not needed to compute the address. */
14878 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
14879 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14881 enum attr_unit unit
= get_attr_unit (insn
);
14884 /* Because of the difference between the length of integer and
14885 floating unit pipeline preparation stages, the memory operands
14886 for floating point are cheaper.
14888 ??? For Athlon it the difference is most probably 2. */
14889 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
14892 loadcost
= TARGET_ATHLON
? 2 : 0;
14894 if (cost
>= loadcost
)
14907 /* How many alternative schedules to try. This should be as wide as the
14908 scheduling freedom in the DFA, but no wider. Making this value too
14909 large results extra work for the scheduler. */
14912 ia32_multipass_dfa_lookahead (void)
14914 if (ix86_tune
== PROCESSOR_PENTIUM
)
14917 if (ix86_tune
== PROCESSOR_PENTIUMPRO
14918 || ix86_tune
== PROCESSOR_K6
)
14926 /* Compute the alignment given to a constant that is being placed in memory.
14927 EXP is the constant and ALIGN is the alignment that the object would
14929 The value of this function is used instead of that alignment to align
14933 ix86_constant_alignment (tree exp
, int align
)
14935 if (TREE_CODE (exp
) == REAL_CST
)
14937 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
14939 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
14942 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
14943 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
14944 return BITS_PER_WORD
;
14949 /* Compute the alignment for a static variable.
14950 TYPE is the data type, and ALIGN is the alignment that
14951 the object would ordinarily have. The value of this function is used
14952 instead of that alignment to align the object. */
14955 ix86_data_alignment (tree type
, int align
)
14957 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
14959 if (AGGREGATE_TYPE_P (type
)
14960 && TYPE_SIZE (type
)
14961 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14962 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
14963 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
14964 && align
< max_align
)
14967 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14968 to 16byte boundary. */
14971 if (AGGREGATE_TYPE_P (type
)
14972 && TYPE_SIZE (type
)
14973 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14974 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
14975 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14979 if (TREE_CODE (type
) == ARRAY_TYPE
)
14981 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14983 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14986 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14989 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14991 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14994 else if ((TREE_CODE (type
) == RECORD_TYPE
14995 || TREE_CODE (type
) == UNION_TYPE
14996 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14997 && TYPE_FIELDS (type
))
14999 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15001 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15004 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15005 || TREE_CODE (type
) == INTEGER_TYPE
)
15007 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15009 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15016 /* Compute the alignment for a local variable.
15017 TYPE is the data type, and ALIGN is the alignment that
15018 the object would ordinarily have. The value of this macro is used
15019 instead of that alignment to align the object. */
15022 ix86_local_alignment (tree type
, int align
)
15024 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15025 to 16byte boundary. */
15028 if (AGGREGATE_TYPE_P (type
)
15029 && TYPE_SIZE (type
)
15030 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15031 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15032 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15035 if (TREE_CODE (type
) == ARRAY_TYPE
)
15037 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15039 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15042 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15044 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15046 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15049 else if ((TREE_CODE (type
) == RECORD_TYPE
15050 || TREE_CODE (type
) == UNION_TYPE
15051 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15052 && TYPE_FIELDS (type
))
15054 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15056 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15059 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15060 || TREE_CODE (type
) == INTEGER_TYPE
)
15063 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15065 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15071 /* Emit RTL insns to initialize the variable parts of a trampoline.
15072 FNADDR is an RTX for the address of the function's pure code.
15073 CXT is an RTX for the static chain value for the function. */
15075 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15079 /* Compute offset from the end of the jmp to the target function. */
15080 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15081 plus_constant (tramp
, 10),
15082 NULL_RTX
, 1, OPTAB_DIRECT
);
15083 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15084 gen_int_mode (0xb9, QImode
));
15085 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15086 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15087 gen_int_mode (0xe9, QImode
));
15088 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15093 /* Try to load address using shorter movl instead of movabs.
15094 We may want to support movq for kernel mode, but kernel does not use
15095 trampolines at the moment. */
15096 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15098 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15099 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15100 gen_int_mode (0xbb41, HImode
));
15101 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15102 gen_lowpart (SImode
, fnaddr
));
15107 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15108 gen_int_mode (0xbb49, HImode
));
15109 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15113 /* Load static chain using movabs to r10. */
15114 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15115 gen_int_mode (0xba49, HImode
));
15116 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15119 /* Jump to the r11 */
15120 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15121 gen_int_mode (0xff49, HImode
));
15122 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15123 gen_int_mode (0xe3, QImode
));
15125 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15128 #ifdef ENABLE_EXECUTE_STACK
15129 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15130 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15134 /* Codes for all the SSE/MMX builtins. */
15137 IX86_BUILTIN_ADDPS
,
15138 IX86_BUILTIN_ADDSS
,
15139 IX86_BUILTIN_DIVPS
,
15140 IX86_BUILTIN_DIVSS
,
15141 IX86_BUILTIN_MULPS
,
15142 IX86_BUILTIN_MULSS
,
15143 IX86_BUILTIN_SUBPS
,
15144 IX86_BUILTIN_SUBSS
,
15146 IX86_BUILTIN_CMPEQPS
,
15147 IX86_BUILTIN_CMPLTPS
,
15148 IX86_BUILTIN_CMPLEPS
,
15149 IX86_BUILTIN_CMPGTPS
,
15150 IX86_BUILTIN_CMPGEPS
,
15151 IX86_BUILTIN_CMPNEQPS
,
15152 IX86_BUILTIN_CMPNLTPS
,
15153 IX86_BUILTIN_CMPNLEPS
,
15154 IX86_BUILTIN_CMPNGTPS
,
15155 IX86_BUILTIN_CMPNGEPS
,
15156 IX86_BUILTIN_CMPORDPS
,
15157 IX86_BUILTIN_CMPUNORDPS
,
15158 IX86_BUILTIN_CMPEQSS
,
15159 IX86_BUILTIN_CMPLTSS
,
15160 IX86_BUILTIN_CMPLESS
,
15161 IX86_BUILTIN_CMPNEQSS
,
15162 IX86_BUILTIN_CMPNLTSS
,
15163 IX86_BUILTIN_CMPNLESS
,
15164 IX86_BUILTIN_CMPNGTSS
,
15165 IX86_BUILTIN_CMPNGESS
,
15166 IX86_BUILTIN_CMPORDSS
,
15167 IX86_BUILTIN_CMPUNORDSS
,
15169 IX86_BUILTIN_COMIEQSS
,
15170 IX86_BUILTIN_COMILTSS
,
15171 IX86_BUILTIN_COMILESS
,
15172 IX86_BUILTIN_COMIGTSS
,
15173 IX86_BUILTIN_COMIGESS
,
15174 IX86_BUILTIN_COMINEQSS
,
15175 IX86_BUILTIN_UCOMIEQSS
,
15176 IX86_BUILTIN_UCOMILTSS
,
15177 IX86_BUILTIN_UCOMILESS
,
15178 IX86_BUILTIN_UCOMIGTSS
,
15179 IX86_BUILTIN_UCOMIGESS
,
15180 IX86_BUILTIN_UCOMINEQSS
,
15182 IX86_BUILTIN_CVTPI2PS
,
15183 IX86_BUILTIN_CVTPS2PI
,
15184 IX86_BUILTIN_CVTSI2SS
,
15185 IX86_BUILTIN_CVTSI642SS
,
15186 IX86_BUILTIN_CVTSS2SI
,
15187 IX86_BUILTIN_CVTSS2SI64
,
15188 IX86_BUILTIN_CVTTPS2PI
,
15189 IX86_BUILTIN_CVTTSS2SI
,
15190 IX86_BUILTIN_CVTTSS2SI64
,
15192 IX86_BUILTIN_MAXPS
,
15193 IX86_BUILTIN_MAXSS
,
15194 IX86_BUILTIN_MINPS
,
15195 IX86_BUILTIN_MINSS
,
15197 IX86_BUILTIN_LOADUPS
,
15198 IX86_BUILTIN_STOREUPS
,
15199 IX86_BUILTIN_MOVSS
,
15201 IX86_BUILTIN_MOVHLPS
,
15202 IX86_BUILTIN_MOVLHPS
,
15203 IX86_BUILTIN_LOADHPS
,
15204 IX86_BUILTIN_LOADLPS
,
15205 IX86_BUILTIN_STOREHPS
,
15206 IX86_BUILTIN_STORELPS
,
15208 IX86_BUILTIN_MASKMOVQ
,
15209 IX86_BUILTIN_MOVMSKPS
,
15210 IX86_BUILTIN_PMOVMSKB
,
15212 IX86_BUILTIN_MOVNTPS
,
15213 IX86_BUILTIN_MOVNTQ
,
15215 IX86_BUILTIN_LOADDQU
,
15216 IX86_BUILTIN_STOREDQU
,
15218 IX86_BUILTIN_PACKSSWB
,
15219 IX86_BUILTIN_PACKSSDW
,
15220 IX86_BUILTIN_PACKUSWB
,
15222 IX86_BUILTIN_PADDB
,
15223 IX86_BUILTIN_PADDW
,
15224 IX86_BUILTIN_PADDD
,
15225 IX86_BUILTIN_PADDQ
,
15226 IX86_BUILTIN_PADDSB
,
15227 IX86_BUILTIN_PADDSW
,
15228 IX86_BUILTIN_PADDUSB
,
15229 IX86_BUILTIN_PADDUSW
,
15230 IX86_BUILTIN_PSUBB
,
15231 IX86_BUILTIN_PSUBW
,
15232 IX86_BUILTIN_PSUBD
,
15233 IX86_BUILTIN_PSUBQ
,
15234 IX86_BUILTIN_PSUBSB
,
15235 IX86_BUILTIN_PSUBSW
,
15236 IX86_BUILTIN_PSUBUSB
,
15237 IX86_BUILTIN_PSUBUSW
,
15240 IX86_BUILTIN_PANDN
,
15244 IX86_BUILTIN_PAVGB
,
15245 IX86_BUILTIN_PAVGW
,
15247 IX86_BUILTIN_PCMPEQB
,
15248 IX86_BUILTIN_PCMPEQW
,
15249 IX86_BUILTIN_PCMPEQD
,
15250 IX86_BUILTIN_PCMPGTB
,
15251 IX86_BUILTIN_PCMPGTW
,
15252 IX86_BUILTIN_PCMPGTD
,
15254 IX86_BUILTIN_PMADDWD
,
15256 IX86_BUILTIN_PMAXSW
,
15257 IX86_BUILTIN_PMAXUB
,
15258 IX86_BUILTIN_PMINSW
,
15259 IX86_BUILTIN_PMINUB
,
15261 IX86_BUILTIN_PMULHUW
,
15262 IX86_BUILTIN_PMULHW
,
15263 IX86_BUILTIN_PMULLW
,
15265 IX86_BUILTIN_PSADBW
,
15266 IX86_BUILTIN_PSHUFW
,
15268 IX86_BUILTIN_PSLLW
,
15269 IX86_BUILTIN_PSLLD
,
15270 IX86_BUILTIN_PSLLQ
,
15271 IX86_BUILTIN_PSRAW
,
15272 IX86_BUILTIN_PSRAD
,
15273 IX86_BUILTIN_PSRLW
,
15274 IX86_BUILTIN_PSRLD
,
15275 IX86_BUILTIN_PSRLQ
,
15276 IX86_BUILTIN_PSLLWI
,
15277 IX86_BUILTIN_PSLLDI
,
15278 IX86_BUILTIN_PSLLQI
,
15279 IX86_BUILTIN_PSRAWI
,
15280 IX86_BUILTIN_PSRADI
,
15281 IX86_BUILTIN_PSRLWI
,
15282 IX86_BUILTIN_PSRLDI
,
15283 IX86_BUILTIN_PSRLQI
,
15285 IX86_BUILTIN_PUNPCKHBW
,
15286 IX86_BUILTIN_PUNPCKHWD
,
15287 IX86_BUILTIN_PUNPCKHDQ
,
15288 IX86_BUILTIN_PUNPCKLBW
,
15289 IX86_BUILTIN_PUNPCKLWD
,
15290 IX86_BUILTIN_PUNPCKLDQ
,
15292 IX86_BUILTIN_SHUFPS
,
15294 IX86_BUILTIN_RCPPS
,
15295 IX86_BUILTIN_RCPSS
,
15296 IX86_BUILTIN_RSQRTPS
,
15297 IX86_BUILTIN_RSQRTSS
,
15298 IX86_BUILTIN_SQRTPS
,
15299 IX86_BUILTIN_SQRTSS
,
15301 IX86_BUILTIN_UNPCKHPS
,
15302 IX86_BUILTIN_UNPCKLPS
,
15304 IX86_BUILTIN_ANDPS
,
15305 IX86_BUILTIN_ANDNPS
,
15307 IX86_BUILTIN_XORPS
,
15310 IX86_BUILTIN_LDMXCSR
,
15311 IX86_BUILTIN_STMXCSR
,
15312 IX86_BUILTIN_SFENCE
,
15314 /* 3DNow! Original */
15315 IX86_BUILTIN_FEMMS
,
15316 IX86_BUILTIN_PAVGUSB
,
15317 IX86_BUILTIN_PF2ID
,
15318 IX86_BUILTIN_PFACC
,
15319 IX86_BUILTIN_PFADD
,
15320 IX86_BUILTIN_PFCMPEQ
,
15321 IX86_BUILTIN_PFCMPGE
,
15322 IX86_BUILTIN_PFCMPGT
,
15323 IX86_BUILTIN_PFMAX
,
15324 IX86_BUILTIN_PFMIN
,
15325 IX86_BUILTIN_PFMUL
,
15326 IX86_BUILTIN_PFRCP
,
15327 IX86_BUILTIN_PFRCPIT1
,
15328 IX86_BUILTIN_PFRCPIT2
,
15329 IX86_BUILTIN_PFRSQIT1
,
15330 IX86_BUILTIN_PFRSQRT
,
15331 IX86_BUILTIN_PFSUB
,
15332 IX86_BUILTIN_PFSUBR
,
15333 IX86_BUILTIN_PI2FD
,
15334 IX86_BUILTIN_PMULHRW
,
15336 /* 3DNow! Athlon Extensions */
15337 IX86_BUILTIN_PF2IW
,
15338 IX86_BUILTIN_PFNACC
,
15339 IX86_BUILTIN_PFPNACC
,
15340 IX86_BUILTIN_PI2FW
,
15341 IX86_BUILTIN_PSWAPDSI
,
15342 IX86_BUILTIN_PSWAPDSF
,
15345 IX86_BUILTIN_ADDPD
,
15346 IX86_BUILTIN_ADDSD
,
15347 IX86_BUILTIN_DIVPD
,
15348 IX86_BUILTIN_DIVSD
,
15349 IX86_BUILTIN_MULPD
,
15350 IX86_BUILTIN_MULSD
,
15351 IX86_BUILTIN_SUBPD
,
15352 IX86_BUILTIN_SUBSD
,
15354 IX86_BUILTIN_CMPEQPD
,
15355 IX86_BUILTIN_CMPLTPD
,
15356 IX86_BUILTIN_CMPLEPD
,
15357 IX86_BUILTIN_CMPGTPD
,
15358 IX86_BUILTIN_CMPGEPD
,
15359 IX86_BUILTIN_CMPNEQPD
,
15360 IX86_BUILTIN_CMPNLTPD
,
15361 IX86_BUILTIN_CMPNLEPD
,
15362 IX86_BUILTIN_CMPNGTPD
,
15363 IX86_BUILTIN_CMPNGEPD
,
15364 IX86_BUILTIN_CMPORDPD
,
15365 IX86_BUILTIN_CMPUNORDPD
,
15366 IX86_BUILTIN_CMPNEPD
,
15367 IX86_BUILTIN_CMPEQSD
,
15368 IX86_BUILTIN_CMPLTSD
,
15369 IX86_BUILTIN_CMPLESD
,
15370 IX86_BUILTIN_CMPNEQSD
,
15371 IX86_BUILTIN_CMPNLTSD
,
15372 IX86_BUILTIN_CMPNLESD
,
15373 IX86_BUILTIN_CMPORDSD
,
15374 IX86_BUILTIN_CMPUNORDSD
,
15375 IX86_BUILTIN_CMPNESD
,
15377 IX86_BUILTIN_COMIEQSD
,
15378 IX86_BUILTIN_COMILTSD
,
15379 IX86_BUILTIN_COMILESD
,
15380 IX86_BUILTIN_COMIGTSD
,
15381 IX86_BUILTIN_COMIGESD
,
15382 IX86_BUILTIN_COMINEQSD
,
15383 IX86_BUILTIN_UCOMIEQSD
,
15384 IX86_BUILTIN_UCOMILTSD
,
15385 IX86_BUILTIN_UCOMILESD
,
15386 IX86_BUILTIN_UCOMIGTSD
,
15387 IX86_BUILTIN_UCOMIGESD
,
15388 IX86_BUILTIN_UCOMINEQSD
,
15390 IX86_BUILTIN_MAXPD
,
15391 IX86_BUILTIN_MAXSD
,
15392 IX86_BUILTIN_MINPD
,
15393 IX86_BUILTIN_MINSD
,
15395 IX86_BUILTIN_ANDPD
,
15396 IX86_BUILTIN_ANDNPD
,
15398 IX86_BUILTIN_XORPD
,
15400 IX86_BUILTIN_SQRTPD
,
15401 IX86_BUILTIN_SQRTSD
,
15403 IX86_BUILTIN_UNPCKHPD
,
15404 IX86_BUILTIN_UNPCKLPD
,
15406 IX86_BUILTIN_SHUFPD
,
15408 IX86_BUILTIN_LOADUPD
,
15409 IX86_BUILTIN_STOREUPD
,
15410 IX86_BUILTIN_MOVSD
,
15412 IX86_BUILTIN_LOADHPD
,
15413 IX86_BUILTIN_LOADLPD
,
15415 IX86_BUILTIN_CVTDQ2PD
,
15416 IX86_BUILTIN_CVTDQ2PS
,
15418 IX86_BUILTIN_CVTPD2DQ
,
15419 IX86_BUILTIN_CVTPD2PI
,
15420 IX86_BUILTIN_CVTPD2PS
,
15421 IX86_BUILTIN_CVTTPD2DQ
,
15422 IX86_BUILTIN_CVTTPD2PI
,
15424 IX86_BUILTIN_CVTPI2PD
,
15425 IX86_BUILTIN_CVTSI2SD
,
15426 IX86_BUILTIN_CVTSI642SD
,
15428 IX86_BUILTIN_CVTSD2SI
,
15429 IX86_BUILTIN_CVTSD2SI64
,
15430 IX86_BUILTIN_CVTSD2SS
,
15431 IX86_BUILTIN_CVTSS2SD
,
15432 IX86_BUILTIN_CVTTSD2SI
,
15433 IX86_BUILTIN_CVTTSD2SI64
,
15435 IX86_BUILTIN_CVTPS2DQ
,
15436 IX86_BUILTIN_CVTPS2PD
,
15437 IX86_BUILTIN_CVTTPS2DQ
,
15439 IX86_BUILTIN_MOVNTI
,
15440 IX86_BUILTIN_MOVNTPD
,
15441 IX86_BUILTIN_MOVNTDQ
,
15444 IX86_BUILTIN_MASKMOVDQU
,
15445 IX86_BUILTIN_MOVMSKPD
,
15446 IX86_BUILTIN_PMOVMSKB128
,
15448 IX86_BUILTIN_PACKSSWB128
,
15449 IX86_BUILTIN_PACKSSDW128
,
15450 IX86_BUILTIN_PACKUSWB128
,
15452 IX86_BUILTIN_PADDB128
,
15453 IX86_BUILTIN_PADDW128
,
15454 IX86_BUILTIN_PADDD128
,
15455 IX86_BUILTIN_PADDQ128
,
15456 IX86_BUILTIN_PADDSB128
,
15457 IX86_BUILTIN_PADDSW128
,
15458 IX86_BUILTIN_PADDUSB128
,
15459 IX86_BUILTIN_PADDUSW128
,
15460 IX86_BUILTIN_PSUBB128
,
15461 IX86_BUILTIN_PSUBW128
,
15462 IX86_BUILTIN_PSUBD128
,
15463 IX86_BUILTIN_PSUBQ128
,
15464 IX86_BUILTIN_PSUBSB128
,
15465 IX86_BUILTIN_PSUBSW128
,
15466 IX86_BUILTIN_PSUBUSB128
,
15467 IX86_BUILTIN_PSUBUSW128
,
15469 IX86_BUILTIN_PAND128
,
15470 IX86_BUILTIN_PANDN128
,
15471 IX86_BUILTIN_POR128
,
15472 IX86_BUILTIN_PXOR128
,
15474 IX86_BUILTIN_PAVGB128
,
15475 IX86_BUILTIN_PAVGW128
,
15477 IX86_BUILTIN_PCMPEQB128
,
15478 IX86_BUILTIN_PCMPEQW128
,
15479 IX86_BUILTIN_PCMPEQD128
,
15480 IX86_BUILTIN_PCMPGTB128
,
15481 IX86_BUILTIN_PCMPGTW128
,
15482 IX86_BUILTIN_PCMPGTD128
,
15484 IX86_BUILTIN_PMADDWD128
,
15486 IX86_BUILTIN_PMAXSW128
,
15487 IX86_BUILTIN_PMAXUB128
,
15488 IX86_BUILTIN_PMINSW128
,
15489 IX86_BUILTIN_PMINUB128
,
15491 IX86_BUILTIN_PMULUDQ
,
15492 IX86_BUILTIN_PMULUDQ128
,
15493 IX86_BUILTIN_PMULHUW128
,
15494 IX86_BUILTIN_PMULHW128
,
15495 IX86_BUILTIN_PMULLW128
,
15497 IX86_BUILTIN_PSADBW128
,
15498 IX86_BUILTIN_PSHUFHW
,
15499 IX86_BUILTIN_PSHUFLW
,
15500 IX86_BUILTIN_PSHUFD
,
15502 IX86_BUILTIN_PSLLW128
,
15503 IX86_BUILTIN_PSLLD128
,
15504 IX86_BUILTIN_PSLLQ128
,
15505 IX86_BUILTIN_PSRAW128
,
15506 IX86_BUILTIN_PSRAD128
,
15507 IX86_BUILTIN_PSRLW128
,
15508 IX86_BUILTIN_PSRLD128
,
15509 IX86_BUILTIN_PSRLQ128
,
15510 IX86_BUILTIN_PSLLDQI128
,
15511 IX86_BUILTIN_PSLLWI128
,
15512 IX86_BUILTIN_PSLLDI128
,
15513 IX86_BUILTIN_PSLLQI128
,
15514 IX86_BUILTIN_PSRAWI128
,
15515 IX86_BUILTIN_PSRADI128
,
15516 IX86_BUILTIN_PSRLDQI128
,
15517 IX86_BUILTIN_PSRLWI128
,
15518 IX86_BUILTIN_PSRLDI128
,
15519 IX86_BUILTIN_PSRLQI128
,
15521 IX86_BUILTIN_PUNPCKHBW128
,
15522 IX86_BUILTIN_PUNPCKHWD128
,
15523 IX86_BUILTIN_PUNPCKHDQ128
,
15524 IX86_BUILTIN_PUNPCKHQDQ128
,
15525 IX86_BUILTIN_PUNPCKLBW128
,
15526 IX86_BUILTIN_PUNPCKLWD128
,
15527 IX86_BUILTIN_PUNPCKLDQ128
,
15528 IX86_BUILTIN_PUNPCKLQDQ128
,
15530 IX86_BUILTIN_CLFLUSH
,
15531 IX86_BUILTIN_MFENCE
,
15532 IX86_BUILTIN_LFENCE
,
15534 /* Prescott New Instructions. */
15535 IX86_BUILTIN_ADDSUBPS
,
15536 IX86_BUILTIN_HADDPS
,
15537 IX86_BUILTIN_HSUBPS
,
15538 IX86_BUILTIN_MOVSHDUP
,
15539 IX86_BUILTIN_MOVSLDUP
,
15540 IX86_BUILTIN_ADDSUBPD
,
15541 IX86_BUILTIN_HADDPD
,
15542 IX86_BUILTIN_HSUBPD
,
15543 IX86_BUILTIN_LDDQU
,
15545 IX86_BUILTIN_MONITOR
,
15546 IX86_BUILTIN_MWAIT
,
15549 IX86_BUILTIN_PHADDW
,
15550 IX86_BUILTIN_PHADDD
,
15551 IX86_BUILTIN_PHADDSW
,
15552 IX86_BUILTIN_PHSUBW
,
15553 IX86_BUILTIN_PHSUBD
,
15554 IX86_BUILTIN_PHSUBSW
,
15555 IX86_BUILTIN_PMADDUBSW
,
15556 IX86_BUILTIN_PMULHRSW
,
15557 IX86_BUILTIN_PSHUFB
,
15558 IX86_BUILTIN_PSIGNB
,
15559 IX86_BUILTIN_PSIGNW
,
15560 IX86_BUILTIN_PSIGND
,
15561 IX86_BUILTIN_PALIGNR
,
15562 IX86_BUILTIN_PABSB
,
15563 IX86_BUILTIN_PABSW
,
15564 IX86_BUILTIN_PABSD
,
15566 IX86_BUILTIN_PHADDW128
,
15567 IX86_BUILTIN_PHADDD128
,
15568 IX86_BUILTIN_PHADDSW128
,
15569 IX86_BUILTIN_PHSUBW128
,
15570 IX86_BUILTIN_PHSUBD128
,
15571 IX86_BUILTIN_PHSUBSW128
,
15572 IX86_BUILTIN_PMADDUBSW128
,
15573 IX86_BUILTIN_PMULHRSW128
,
15574 IX86_BUILTIN_PSHUFB128
,
15575 IX86_BUILTIN_PSIGNB128
,
15576 IX86_BUILTIN_PSIGNW128
,
15577 IX86_BUILTIN_PSIGND128
,
15578 IX86_BUILTIN_PALIGNR128
,
15579 IX86_BUILTIN_PABSB128
,
15580 IX86_BUILTIN_PABSW128
,
15581 IX86_BUILTIN_PABSD128
,
15583 IX86_BUILTIN_VEC_INIT_V2SI
,
15584 IX86_BUILTIN_VEC_INIT_V4HI
,
15585 IX86_BUILTIN_VEC_INIT_V8QI
,
15586 IX86_BUILTIN_VEC_EXT_V2DF
,
15587 IX86_BUILTIN_VEC_EXT_V2DI
,
15588 IX86_BUILTIN_VEC_EXT_V4SF
,
15589 IX86_BUILTIN_VEC_EXT_V4SI
,
15590 IX86_BUILTIN_VEC_EXT_V8HI
,
15591 IX86_BUILTIN_VEC_EXT_V2SI
,
15592 IX86_BUILTIN_VEC_EXT_V4HI
,
15593 IX86_BUILTIN_VEC_SET_V8HI
,
15594 IX86_BUILTIN_VEC_SET_V4HI
,
15599 /* Table for the ix86 builtin decls. */
15600 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
15602 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15603 * if the target_flags include one of MASK. Stores the function decl
15604 * in the ix86_builtins array.
15605 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15608 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
15610 tree decl
= NULL_TREE
;
15612 if (mask
& target_flags
15613 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
15615 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
15617 ix86_builtins
[(int) code
] = decl
;
15623 /* Like def_builtin, but also marks the function decl "const". */
15626 def_builtin_const (int mask
, const char *name
, tree type
,
15627 enum ix86_builtins code
)
15629 tree decl
= def_builtin (mask
, name
, type
, code
);
15631 TREE_READONLY (decl
) = 1;
15635 /* Bits for builtin_description.flag. */
15637 /* Set when we don't support the comparison natively, and should
15638 swap_comparison in order to support it. */
15639 #define BUILTIN_DESC_SWAP_OPERANDS 1
15641 struct builtin_description
15643 const unsigned int mask
;
15644 const enum insn_code icode
;
15645 const char *const name
;
15646 const enum ix86_builtins code
;
15647 const enum rtx_code comparison
;
15648 const unsigned int flag
;
15651 static const struct builtin_description bdesc_comi
[] =
15653 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
15654 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
15655 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
15656 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
15657 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
15658 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
15659 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
15660 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
15661 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
15662 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
15663 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
15664 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
15665 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
15666 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
15667 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
15668 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
15669 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
15670 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
15671 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
15672 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
15673 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
15674 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
15675 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
15676 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
15679 static const struct builtin_description bdesc_2arg
[] =
15682 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
15683 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
15684 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
15685 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
15686 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
15687 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
15688 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
15689 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
15691 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
15692 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
15693 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
15694 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
15695 BUILTIN_DESC_SWAP_OPERANDS
},
15696 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
15697 BUILTIN_DESC_SWAP_OPERANDS
},
15698 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
15699 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
15700 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
15701 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
15702 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
15703 BUILTIN_DESC_SWAP_OPERANDS
},
15704 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
15705 BUILTIN_DESC_SWAP_OPERANDS
},
15706 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
15707 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
15708 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
15709 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
15710 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
15711 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
15712 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
15713 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
15714 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
15715 BUILTIN_DESC_SWAP_OPERANDS
},
15716 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
15717 BUILTIN_DESC_SWAP_OPERANDS
},
15718 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
15720 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
15721 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
15722 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
15723 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
15725 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
15726 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
15727 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
15728 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
15730 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
15731 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
15732 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
15733 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
15734 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
15737 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
15738 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
15739 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
15740 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
15741 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
15742 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
15743 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
15744 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
15746 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
15747 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
15748 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
15749 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
15750 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
15751 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
15752 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
15753 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
15755 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
15756 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
15757 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
15759 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
15760 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
15761 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
15762 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
15764 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
15765 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
15767 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
15768 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
15769 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
15770 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
15771 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
15772 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
15774 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
15775 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
15776 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
15777 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
15779 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
15780 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
15781 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
15782 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
15783 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
15784 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
15787 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
15788 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
15789 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
15791 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
15792 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
15793 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
15795 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
15796 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
15797 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
15798 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
15799 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
15800 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
15802 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
15803 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
15804 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
15805 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
15806 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
15807 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
15809 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
15810 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
15811 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
15812 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
15814 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
15815 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
15818 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
15819 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
15820 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
15821 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
15822 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
15823 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
15824 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
15825 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
15827 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
15828 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
15829 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
15830 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
15831 BUILTIN_DESC_SWAP_OPERANDS
},
15832 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
15833 BUILTIN_DESC_SWAP_OPERANDS
},
15834 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
15835 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
15836 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
15837 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
15838 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
15839 BUILTIN_DESC_SWAP_OPERANDS
},
15840 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
15841 BUILTIN_DESC_SWAP_OPERANDS
},
15842 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
15843 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
15844 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
15845 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
15846 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
15847 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
15848 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
15849 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
15850 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
15852 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
15853 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
15854 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
15855 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
15857 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
15858 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
15859 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
15860 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
15862 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
15863 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
15864 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
15867 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
15868 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
15869 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
15870 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
15871 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
15872 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
15873 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
15874 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
15876 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
15877 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
15878 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
15879 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
15880 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
15881 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
15882 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
15883 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
15885 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
15886 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
15888 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
15889 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
15890 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
15891 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
15893 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
15894 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
15896 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
15897 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
15898 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
15899 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
15900 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
15901 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
15903 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
15904 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
15905 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
15906 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
15908 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
15909 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
15910 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
15911 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
15912 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
15913 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
15914 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
15915 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
15917 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
15918 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
15919 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
15921 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
15922 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
15924 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
15925 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
15927 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
15928 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
15929 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
15931 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
15932 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
15933 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
15935 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
15936 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
15938 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
15940 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
15941 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
15942 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
15943 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
15946 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
15947 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
15948 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
15949 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
15950 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
15951 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
15954 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
15955 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
15956 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
15957 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
15958 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
15959 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
15960 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
15961 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
15962 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
15963 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
15964 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
15965 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
15966 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
15967 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
15968 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
15969 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
15970 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
15971 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
15972 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
15973 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
15974 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
15975 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
15976 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
15977 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
15980 static const struct builtin_description bdesc_1arg
[] =
15982 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
15983 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
15985 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
15986 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
15987 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
15989 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
15990 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
15991 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
15992 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
15993 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
15994 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
15996 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
15997 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
15999 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16001 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16002 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16004 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16005 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16006 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16007 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16008 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16010 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16012 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16013 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16014 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16015 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16017 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16018 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16019 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16022 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16023 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16026 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16027 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16028 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16029 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16030 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16031 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16035 ix86_init_builtins (void)
16038 ix86_init_mmx_sse_builtins ();
16041 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16042 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16045 ix86_init_mmx_sse_builtins (void)
16047 const struct builtin_description
* d
;
16050 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16051 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16052 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16053 tree V2DI_type_node
16054 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16055 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16056 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16057 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16058 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16059 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16060 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16062 tree pchar_type_node
= build_pointer_type (char_type_node
);
16063 tree pcchar_type_node
= build_pointer_type (
16064 build_type_variant (char_type_node
, 1, 0));
16065 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16066 tree pcfloat_type_node
= build_pointer_type (
16067 build_type_variant (float_type_node
, 1, 0));
16068 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16069 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16070 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16073 tree int_ftype_v4sf_v4sf
16074 = build_function_type_list (integer_type_node
,
16075 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16076 tree v4si_ftype_v4sf_v4sf
16077 = build_function_type_list (V4SI_type_node
,
16078 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16079 /* MMX/SSE/integer conversions. */
16080 tree int_ftype_v4sf
16081 = build_function_type_list (integer_type_node
,
16082 V4SF_type_node
, NULL_TREE
);
16083 tree int64_ftype_v4sf
16084 = build_function_type_list (long_long_integer_type_node
,
16085 V4SF_type_node
, NULL_TREE
);
16086 tree int_ftype_v8qi
16087 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16088 tree v4sf_ftype_v4sf_int
16089 = build_function_type_list (V4SF_type_node
,
16090 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16091 tree v4sf_ftype_v4sf_int64
16092 = build_function_type_list (V4SF_type_node
,
16093 V4SF_type_node
, long_long_integer_type_node
,
16095 tree v4sf_ftype_v4sf_v2si
16096 = build_function_type_list (V4SF_type_node
,
16097 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16099 /* Miscellaneous. */
16100 tree v8qi_ftype_v4hi_v4hi
16101 = build_function_type_list (V8QI_type_node
,
16102 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16103 tree v4hi_ftype_v2si_v2si
16104 = build_function_type_list (V4HI_type_node
,
16105 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16106 tree v4sf_ftype_v4sf_v4sf_int
16107 = build_function_type_list (V4SF_type_node
,
16108 V4SF_type_node
, V4SF_type_node
,
16109 integer_type_node
, NULL_TREE
);
16110 tree v2si_ftype_v4hi_v4hi
16111 = build_function_type_list (V2SI_type_node
,
16112 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16113 tree v4hi_ftype_v4hi_int
16114 = build_function_type_list (V4HI_type_node
,
16115 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16116 tree v4hi_ftype_v4hi_di
16117 = build_function_type_list (V4HI_type_node
,
16118 V4HI_type_node
, long_long_unsigned_type_node
,
16120 tree v2si_ftype_v2si_di
16121 = build_function_type_list (V2SI_type_node
,
16122 V2SI_type_node
, long_long_unsigned_type_node
,
16124 tree void_ftype_void
16125 = build_function_type (void_type_node
, void_list_node
);
16126 tree void_ftype_unsigned
16127 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16128 tree void_ftype_unsigned_unsigned
16129 = build_function_type_list (void_type_node
, unsigned_type_node
,
16130 unsigned_type_node
, NULL_TREE
);
16131 tree void_ftype_pcvoid_unsigned_unsigned
16132 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16133 unsigned_type_node
, unsigned_type_node
,
16135 tree unsigned_ftype_void
16136 = build_function_type (unsigned_type_node
, void_list_node
);
16137 tree v2si_ftype_v4sf
16138 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16139 /* Loads/stores. */
16140 tree void_ftype_v8qi_v8qi_pchar
16141 = build_function_type_list (void_type_node
,
16142 V8QI_type_node
, V8QI_type_node
,
16143 pchar_type_node
, NULL_TREE
);
16144 tree v4sf_ftype_pcfloat
16145 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16146 /* @@@ the type is bogus */
16147 tree v4sf_ftype_v4sf_pv2si
16148 = build_function_type_list (V4SF_type_node
,
16149 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16150 tree void_ftype_pv2si_v4sf
16151 = build_function_type_list (void_type_node
,
16152 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16153 tree void_ftype_pfloat_v4sf
16154 = build_function_type_list (void_type_node
,
16155 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16156 tree void_ftype_pdi_di
16157 = build_function_type_list (void_type_node
,
16158 pdi_type_node
, long_long_unsigned_type_node
,
16160 tree void_ftype_pv2di_v2di
16161 = build_function_type_list (void_type_node
,
16162 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16163 /* Normal vector unops. */
16164 tree v4sf_ftype_v4sf
16165 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16166 tree v16qi_ftype_v16qi
16167 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16168 tree v8hi_ftype_v8hi
16169 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16170 tree v4si_ftype_v4si
16171 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16172 tree v8qi_ftype_v8qi
16173 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16174 tree v4hi_ftype_v4hi
16175 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16177 /* Normal vector binops. */
16178 tree v4sf_ftype_v4sf_v4sf
16179 = build_function_type_list (V4SF_type_node
,
16180 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16181 tree v8qi_ftype_v8qi_v8qi
16182 = build_function_type_list (V8QI_type_node
,
16183 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16184 tree v4hi_ftype_v4hi_v4hi
16185 = build_function_type_list (V4HI_type_node
,
16186 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16187 tree v2si_ftype_v2si_v2si
16188 = build_function_type_list (V2SI_type_node
,
16189 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16190 tree di_ftype_di_di
16191 = build_function_type_list (long_long_unsigned_type_node
,
16192 long_long_unsigned_type_node
,
16193 long_long_unsigned_type_node
, NULL_TREE
);
16195 tree di_ftype_di_di_int
16196 = build_function_type_list (long_long_unsigned_type_node
,
16197 long_long_unsigned_type_node
,
16198 long_long_unsigned_type_node
,
16199 integer_type_node
, NULL_TREE
);
16201 tree v2si_ftype_v2sf
16202 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16203 tree v2sf_ftype_v2si
16204 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16205 tree v2si_ftype_v2si
16206 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16207 tree v2sf_ftype_v2sf
16208 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16209 tree v2sf_ftype_v2sf_v2sf
16210 = build_function_type_list (V2SF_type_node
,
16211 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16212 tree v2si_ftype_v2sf_v2sf
16213 = build_function_type_list (V2SI_type_node
,
16214 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16215 tree pint_type_node
= build_pointer_type (integer_type_node
);
16216 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16217 tree pcdouble_type_node
= build_pointer_type (
16218 build_type_variant (double_type_node
, 1, 0));
16219 tree int_ftype_v2df_v2df
16220 = build_function_type_list (integer_type_node
,
16221 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16223 tree void_ftype_pcvoid
16224 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16225 tree v4sf_ftype_v4si
16226 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16227 tree v4si_ftype_v4sf
16228 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16229 tree v2df_ftype_v4si
16230 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16231 tree v4si_ftype_v2df
16232 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16233 tree v2si_ftype_v2df
16234 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16235 tree v4sf_ftype_v2df
16236 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16237 tree v2df_ftype_v2si
16238 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16239 tree v2df_ftype_v4sf
16240 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16241 tree int_ftype_v2df
16242 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16243 tree int64_ftype_v2df
16244 = build_function_type_list (long_long_integer_type_node
,
16245 V2DF_type_node
, NULL_TREE
);
16246 tree v2df_ftype_v2df_int
16247 = build_function_type_list (V2DF_type_node
,
16248 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16249 tree v2df_ftype_v2df_int64
16250 = build_function_type_list (V2DF_type_node
,
16251 V2DF_type_node
, long_long_integer_type_node
,
16253 tree v4sf_ftype_v4sf_v2df
16254 = build_function_type_list (V4SF_type_node
,
16255 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16256 tree v2df_ftype_v2df_v4sf
16257 = build_function_type_list (V2DF_type_node
,
16258 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16259 tree v2df_ftype_v2df_v2df_int
16260 = build_function_type_list (V2DF_type_node
,
16261 V2DF_type_node
, V2DF_type_node
,
16264 tree v2df_ftype_v2df_pcdouble
16265 = build_function_type_list (V2DF_type_node
,
16266 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16267 tree void_ftype_pdouble_v2df
16268 = build_function_type_list (void_type_node
,
16269 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16270 tree void_ftype_pint_int
16271 = build_function_type_list (void_type_node
,
16272 pint_type_node
, integer_type_node
, NULL_TREE
);
16273 tree void_ftype_v16qi_v16qi_pchar
16274 = build_function_type_list (void_type_node
,
16275 V16QI_type_node
, V16QI_type_node
,
16276 pchar_type_node
, NULL_TREE
);
16277 tree v2df_ftype_pcdouble
16278 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16279 tree v2df_ftype_v2df_v2df
16280 = build_function_type_list (V2DF_type_node
,
16281 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16282 tree v16qi_ftype_v16qi_v16qi
16283 = build_function_type_list (V16QI_type_node
,
16284 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16285 tree v8hi_ftype_v8hi_v8hi
16286 = build_function_type_list (V8HI_type_node
,
16287 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16288 tree v4si_ftype_v4si_v4si
16289 = build_function_type_list (V4SI_type_node
,
16290 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16291 tree v2di_ftype_v2di_v2di
16292 = build_function_type_list (V2DI_type_node
,
16293 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16294 tree v2di_ftype_v2df_v2df
16295 = build_function_type_list (V2DI_type_node
,
16296 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16297 tree v2df_ftype_v2df
16298 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16299 tree v2di_ftype_v2di_int
16300 = build_function_type_list (V2DI_type_node
,
16301 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16302 tree v2di_ftype_v2di_v2di_int
16303 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16304 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16305 tree v4si_ftype_v4si_int
16306 = build_function_type_list (V4SI_type_node
,
16307 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16308 tree v8hi_ftype_v8hi_int
16309 = build_function_type_list (V8HI_type_node
,
16310 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16311 tree v8hi_ftype_v8hi_v2di
16312 = build_function_type_list (V8HI_type_node
,
16313 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16314 tree v4si_ftype_v4si_v2di
16315 = build_function_type_list (V4SI_type_node
,
16316 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16317 tree v4si_ftype_v8hi_v8hi
16318 = build_function_type_list (V4SI_type_node
,
16319 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16320 tree di_ftype_v8qi_v8qi
16321 = build_function_type_list (long_long_unsigned_type_node
,
16322 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16323 tree di_ftype_v2si_v2si
16324 = build_function_type_list (long_long_unsigned_type_node
,
16325 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16326 tree v2di_ftype_v16qi_v16qi
16327 = build_function_type_list (V2DI_type_node
,
16328 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16329 tree v2di_ftype_v4si_v4si
16330 = build_function_type_list (V2DI_type_node
,
16331 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16332 tree int_ftype_v16qi
16333 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16334 tree v16qi_ftype_pcchar
16335 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16336 tree void_ftype_pchar_v16qi
16337 = build_function_type_list (void_type_node
,
16338 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16341 tree float128_type
;
16344 /* The __float80 type. */
16345 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16346 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16350 /* The __float80 type. */
16351 float80_type
= make_node (REAL_TYPE
);
16352 TYPE_PRECISION (float80_type
) = 80;
16353 layout_type (float80_type
);
16354 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16359 float128_type
= make_node (REAL_TYPE
);
16360 TYPE_PRECISION (float128_type
) = 128;
16361 layout_type (float128_type
);
16362 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16365 /* Add all builtins that are more or less simple operations on two
16367 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16369 /* Use one of the operands; the target can have a different mode for
16370 mask-generating compares. */
16371 enum machine_mode mode
;
16376 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16381 type
= v16qi_ftype_v16qi_v16qi
;
16384 type
= v8hi_ftype_v8hi_v8hi
;
16387 type
= v4si_ftype_v4si_v4si
;
16390 type
= v2di_ftype_v2di_v2di
;
16393 type
= v2df_ftype_v2df_v2df
;
16396 type
= v4sf_ftype_v4sf_v4sf
;
16399 type
= v8qi_ftype_v8qi_v8qi
;
16402 type
= v4hi_ftype_v4hi_v4hi
;
16405 type
= v2si_ftype_v2si_v2si
;
16408 type
= di_ftype_di_di
;
16412 gcc_unreachable ();
16415 /* Override for comparisons. */
16416 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16417 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16418 type
= v4si_ftype_v4sf_v4sf
;
16420 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16421 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16422 type
= v2di_ftype_v2df_v2df
;
16424 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16427 /* Add all builtins that are more or less simple operations on 1 operand. */
16428 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16430 enum machine_mode mode
;
16435 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16440 type
= v16qi_ftype_v16qi
;
16443 type
= v8hi_ftype_v8hi
;
16446 type
= v4si_ftype_v4si
;
16449 type
= v2df_ftype_v2df
;
16452 type
= v4sf_ftype_v4sf
;
16455 type
= v8qi_ftype_v8qi
;
16458 type
= v4hi_ftype_v4hi
;
16461 type
= v2si_ftype_v2si
;
16468 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16471 /* Add the remaining MMX insns with somewhat more complicated types. */
16472 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16473 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16474 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16475 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16477 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16478 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16479 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16481 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
16482 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
16484 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
16485 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
16487 /* comi/ucomi insns. */
16488 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16489 if (d
->mask
== MASK_SSE2
)
16490 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
16492 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
16494 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
16495 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
16496 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
16498 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
16499 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
16500 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
16501 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
16502 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
16503 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
16504 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
16505 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
16506 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
16507 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
16508 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
16510 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
16512 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
16513 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
16515 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
16516 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
16517 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
16518 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
16520 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
16521 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
16522 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
16523 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
16525 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
16527 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
16529 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
16530 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
16531 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
16532 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
16533 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
16534 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
16536 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
16538 /* Original 3DNow! */
16539 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
16540 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
16541 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
16542 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
16543 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
16544 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
16545 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
16546 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
16547 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
16548 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
16549 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
16550 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
16551 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
16552 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
16553 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
16554 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
16555 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
16556 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
16557 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
16558 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
16560 /* 3DNow! extension as used in the Athlon CPU. */
16561 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
16562 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
16563 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
16564 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
16565 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
16566 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
16569 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
16571 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
16572 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
16574 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
16575 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
16577 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
16578 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
16579 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
16580 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
16581 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
16583 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
16584 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
16585 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
16586 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
16588 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
16589 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
16591 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
16593 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
16594 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
16596 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
16597 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
16598 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
16599 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
16600 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
16602 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
16604 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
16605 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
16606 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
16607 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
16609 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
16610 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
16611 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
16613 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
16614 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
16615 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
16616 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
16618 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
16619 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
16620 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
16622 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
16623 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
16625 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
16626 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
16628 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
16629 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
16630 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
16632 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
16633 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
16634 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
16636 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
16637 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
16639 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
16640 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
16641 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
16642 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
16644 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
16645 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
16646 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
16647 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
16649 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
16650 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
16652 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
16654 /* Prescott New Instructions. */
16655 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
16656 void_ftype_pcvoid_unsigned_unsigned
,
16657 IX86_BUILTIN_MONITOR
);
16658 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
16659 void_ftype_unsigned_unsigned
,
16660 IX86_BUILTIN_MWAIT
);
16661 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
16663 IX86_BUILTIN_MOVSHDUP
);
16664 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
16666 IX86_BUILTIN_MOVSLDUP
);
16667 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
16668 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
16671 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
16672 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
16673 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
16674 IX86_BUILTIN_PALIGNR
);
16676 /* Access to the vec_init patterns. */
16677 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
16678 integer_type_node
, NULL_TREE
);
16679 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
16680 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
16682 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
16683 short_integer_type_node
,
16684 short_integer_type_node
,
16685 short_integer_type_node
, NULL_TREE
);
16686 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
16687 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
16689 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
16690 char_type_node
, char_type_node
,
16691 char_type_node
, char_type_node
,
16692 char_type_node
, char_type_node
,
16693 char_type_node
, NULL_TREE
);
16694 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
16695 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
16697 /* Access to the vec_extract patterns. */
16698 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
16699 integer_type_node
, NULL_TREE
);
16700 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
16701 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
16703 ftype
= build_function_type_list (long_long_integer_type_node
,
16704 V2DI_type_node
, integer_type_node
,
16706 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
16707 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
16709 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
16710 integer_type_node
, NULL_TREE
);
16711 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
16712 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
16714 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
16715 integer_type_node
, NULL_TREE
);
16716 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
16717 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
16719 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
16720 integer_type_node
, NULL_TREE
);
16721 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
16722 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
16724 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
16725 integer_type_node
, NULL_TREE
);
16726 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
16727 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
16729 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
16730 integer_type_node
, NULL_TREE
);
16731 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
16732 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
16734 /* Access to the vec_set patterns. */
16735 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
16737 integer_type_node
, NULL_TREE
);
16738 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
16739 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
16741 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
16743 integer_type_node
, NULL_TREE
);
16744 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
16745 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
16748 /* Errors in the source file can cause expand_expr to return const0_rtx
16749 where we expect a vector. To avoid crashing, use one of the vector
16750 clear instructions. */
16752 safe_vector_operand (rtx x
, enum machine_mode mode
)
16754 if (x
== const0_rtx
)
16755 x
= CONST0_RTX (mode
);
16759 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16762 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
16765 tree arg0
= TREE_VALUE (arglist
);
16766 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16767 rtx op0
= expand_normal (arg0
);
16768 rtx op1
= expand_normal (arg1
);
16769 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16770 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16771 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16773 if (VECTOR_MODE_P (mode0
))
16774 op0
= safe_vector_operand (op0
, mode0
);
16775 if (VECTOR_MODE_P (mode1
))
16776 op1
= safe_vector_operand (op1
, mode1
);
16778 if (optimize
|| !target
16779 || GET_MODE (target
) != tmode
16780 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16781 target
= gen_reg_rtx (tmode
);
16783 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
16785 rtx x
= gen_reg_rtx (V4SImode
);
16786 emit_insn (gen_sse2_loadd (x
, op1
));
16787 op1
= gen_lowpart (TImode
, x
);
16790 /* The insn must want input operands in the same modes as the
16792 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
16793 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
16795 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16796 op0
= copy_to_mode_reg (mode0
, op0
);
16797 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16798 op1
= copy_to_mode_reg (mode1
, op1
);
16800 /* ??? Using ix86_fixup_binary_operands is problematic when
16801 we've got mismatched modes. Fake it. */
16807 if (tmode
== mode0
&& tmode
== mode1
)
16809 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
16813 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
16815 op0
= force_reg (mode0
, op0
);
16816 op1
= force_reg (mode1
, op1
);
16817 target
= gen_reg_rtx (tmode
);
16820 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16827 /* Subroutine of ix86_expand_builtin to take care of stores. */
16830 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
16833 tree arg0
= TREE_VALUE (arglist
);
16834 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16835 rtx op0
= expand_normal (arg0
);
16836 rtx op1
= expand_normal (arg1
);
16837 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
16838 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
16840 if (VECTOR_MODE_P (mode1
))
16841 op1
= safe_vector_operand (op1
, mode1
);
16843 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16844 op1
= copy_to_mode_reg (mode1
, op1
);
16846 pat
= GEN_FCN (icode
) (op0
, op1
);
16852 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16855 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
16856 rtx target
, int do_load
)
16859 tree arg0
= TREE_VALUE (arglist
);
16860 rtx op0
= expand_normal (arg0
);
16861 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16862 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16864 if (optimize
|| !target
16865 || GET_MODE (target
) != tmode
16866 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16867 target
= gen_reg_rtx (tmode
);
16869 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16872 if (VECTOR_MODE_P (mode0
))
16873 op0
= safe_vector_operand (op0
, mode0
);
16875 if ((optimize
&& !register_operand (op0
, mode0
))
16876 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16877 op0
= copy_to_mode_reg (mode0
, op0
);
16880 pat
= GEN_FCN (icode
) (target
, op0
);
16887 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16888 sqrtss, rsqrtss, rcpss. */
16891 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
16894 tree arg0
= TREE_VALUE (arglist
);
16895 rtx op1
, op0
= expand_normal (arg0
);
16896 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16897 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16899 if (optimize
|| !target
16900 || GET_MODE (target
) != tmode
16901 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16902 target
= gen_reg_rtx (tmode
);
16904 if (VECTOR_MODE_P (mode0
))
16905 op0
= safe_vector_operand (op0
, mode0
);
16907 if ((optimize
&& !register_operand (op0
, mode0
))
16908 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16909 op0
= copy_to_mode_reg (mode0
, op0
);
16912 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
16913 op1
= copy_to_mode_reg (mode0
, op1
);
16915 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16922 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16925 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
16929 tree arg0
= TREE_VALUE (arglist
);
16930 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16931 rtx op0
= expand_normal (arg0
);
16932 rtx op1
= expand_normal (arg1
);
16934 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
16935 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
16936 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
16937 enum rtx_code comparison
= d
->comparison
;
16939 if (VECTOR_MODE_P (mode0
))
16940 op0
= safe_vector_operand (op0
, mode0
);
16941 if (VECTOR_MODE_P (mode1
))
16942 op1
= safe_vector_operand (op1
, mode1
);
16944 /* Swap operands if we have a comparison that isn't available in
16946 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
16948 rtx tmp
= gen_reg_rtx (mode1
);
16949 emit_move_insn (tmp
, op1
);
16954 if (optimize
|| !target
16955 || GET_MODE (target
) != tmode
16956 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
16957 target
= gen_reg_rtx (tmode
);
16959 if ((optimize
&& !register_operand (op0
, mode0
))
16960 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
16961 op0
= copy_to_mode_reg (mode0
, op0
);
16962 if ((optimize
&& !register_operand (op1
, mode1
))
16963 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
16964 op1
= copy_to_mode_reg (mode1
, op1
);
16966 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
16967 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
16974 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16977 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
16981 tree arg0
= TREE_VALUE (arglist
);
16982 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16983 rtx op0
= expand_normal (arg0
);
16984 rtx op1
= expand_normal (arg1
);
16986 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
16987 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
16988 enum rtx_code comparison
= d
->comparison
;
16990 if (VECTOR_MODE_P (mode0
))
16991 op0
= safe_vector_operand (op0
, mode0
);
16992 if (VECTOR_MODE_P (mode1
))
16993 op1
= safe_vector_operand (op1
, mode1
);
16995 /* Swap operands if we have a comparison that isn't available in
16997 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17004 target
= gen_reg_rtx (SImode
);
17005 emit_move_insn (target
, const0_rtx
);
17006 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17008 if ((optimize
&& !register_operand (op0
, mode0
))
17009 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17010 op0
= copy_to_mode_reg (mode0
, op0
);
17011 if ((optimize
&& !register_operand (op1
, mode1
))
17012 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17013 op1
= copy_to_mode_reg (mode1
, op1
);
17015 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17016 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17020 emit_insn (gen_rtx_SET (VOIDmode
,
17021 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17022 gen_rtx_fmt_ee (comparison
, QImode
,
17026 return SUBREG_REG (target
);
17029 /* Return the integer constant in ARG. Constrain it to be in the range
17030 of the subparts of VEC_TYPE; issue an error if not. */
17033 get_element_number (tree vec_type
, tree arg
)
17035 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17037 if (!host_integerp (arg
, 1)
17038 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17040 error ("selector must be an integer constant in the range 0..%wi", max
);
17047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17048 ix86_expand_vector_init. We DO have language-level syntax for this, in
17049 the form of (type){ init-list }. Except that since we can't place emms
17050 instructions from inside the compiler, we can't allow the use of MMX
17051 registers unless the user explicitly asks for it. So we do *not* define
17052 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17053 we have builtins invoked by mmintrin.h that gives us license to emit
17054 these sorts of instructions. */
17057 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
17059 enum machine_mode tmode
= TYPE_MODE (type
);
17060 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17061 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17062 rtvec v
= rtvec_alloc (n_elt
);
17064 gcc_assert (VECTOR_MODE_P (tmode
));
17066 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
17068 rtx x
= expand_normal (TREE_VALUE (arglist
));
17069 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17072 gcc_assert (arglist
== NULL
);
17074 if (!target
|| !register_operand (target
, tmode
))
17075 target
= gen_reg_rtx (tmode
);
17077 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17081 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17082 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17083 had a language-level syntax for referencing vector elements. */
17086 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
17088 enum machine_mode tmode
, mode0
;
17093 arg0
= TREE_VALUE (arglist
);
17094 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17096 op0
= expand_normal (arg0
);
17097 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17099 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17100 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17101 gcc_assert (VECTOR_MODE_P (mode0
));
17103 op0
= force_reg (mode0
, op0
);
17105 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17106 target
= gen_reg_rtx (tmode
);
17108 ix86_expand_vector_extract (true, target
, op0
, elt
);
17113 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17114 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17115 a language-level syntax for referencing vector elements. */
17118 ix86_expand_vec_set_builtin (tree arglist
)
17120 enum machine_mode tmode
, mode1
;
17121 tree arg0
, arg1
, arg2
;
17125 arg0
= TREE_VALUE (arglist
);
17126 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17127 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17129 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17130 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17131 gcc_assert (VECTOR_MODE_P (tmode
));
17133 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17134 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17135 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17137 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17138 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17140 op0
= force_reg (tmode
, op0
);
17141 op1
= force_reg (mode1
, op1
);
17143 ix86_expand_vector_set (true, op0
, op1
, elt
);
17148 /* Expand an expression EXP that calls a built-in function,
17149 with result going to TARGET if that's convenient
17150 (and in mode MODE if that's convenient).
17151 SUBTARGET may be used as the target for computing one of EXP's operands.
17152 IGNORE is nonzero if the value is to be ignored. */
17155 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17156 enum machine_mode mode ATTRIBUTE_UNUSED
,
17157 int ignore ATTRIBUTE_UNUSED
)
17159 const struct builtin_description
*d
;
17161 enum insn_code icode
;
17162 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
17163 tree arglist
= TREE_OPERAND (exp
, 1);
17164 tree arg0
, arg1
, arg2
;
17165 rtx op0
, op1
, op2
, pat
;
17166 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
;
17167 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17171 case IX86_BUILTIN_EMMS
:
17172 emit_insn (gen_mmx_emms ());
17175 case IX86_BUILTIN_SFENCE
:
17176 emit_insn (gen_sse_sfence ());
17179 case IX86_BUILTIN_MASKMOVQ
:
17180 case IX86_BUILTIN_MASKMOVDQU
:
17181 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17182 ? CODE_FOR_mmx_maskmovq
17183 : CODE_FOR_sse2_maskmovdqu
);
17184 /* Note the arg order is different from the operand order. */
17185 arg1
= TREE_VALUE (arglist
);
17186 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
17187 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17188 op0
= expand_normal (arg0
);
17189 op1
= expand_normal (arg1
);
17190 op2
= expand_normal (arg2
);
17191 mode0
= insn_data
[icode
].operand
[0].mode
;
17192 mode1
= insn_data
[icode
].operand
[1].mode
;
17193 mode2
= insn_data
[icode
].operand
[2].mode
;
17195 op0
= force_reg (Pmode
, op0
);
17196 op0
= gen_rtx_MEM (mode1
, op0
);
17198 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17199 op0
= copy_to_mode_reg (mode0
, op0
);
17200 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17201 op1
= copy_to_mode_reg (mode1
, op1
);
17202 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17203 op2
= copy_to_mode_reg (mode2
, op2
);
17204 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17210 case IX86_BUILTIN_SQRTSS
:
17211 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
17212 case IX86_BUILTIN_RSQRTSS
:
17213 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
17214 case IX86_BUILTIN_RCPSS
:
17215 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
17217 case IX86_BUILTIN_LOADUPS
:
17218 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
17220 case IX86_BUILTIN_STOREUPS
:
17221 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
17223 case IX86_BUILTIN_LOADHPS
:
17224 case IX86_BUILTIN_LOADLPS
:
17225 case IX86_BUILTIN_LOADHPD
:
17226 case IX86_BUILTIN_LOADLPD
:
17227 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17228 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17229 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17230 : CODE_FOR_sse2_loadlpd
);
17231 arg0
= TREE_VALUE (arglist
);
17232 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17233 op0
= expand_normal (arg0
);
17234 op1
= expand_normal (arg1
);
17235 tmode
= insn_data
[icode
].operand
[0].mode
;
17236 mode0
= insn_data
[icode
].operand
[1].mode
;
17237 mode1
= insn_data
[icode
].operand
[2].mode
;
17239 op0
= force_reg (mode0
, op0
);
17240 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17241 if (optimize
|| target
== 0
17242 || GET_MODE (target
) != tmode
17243 || !register_operand (target
, tmode
))
17244 target
= gen_reg_rtx (tmode
);
17245 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17251 case IX86_BUILTIN_STOREHPS
:
17252 case IX86_BUILTIN_STORELPS
:
17253 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17254 : CODE_FOR_sse_storelps
);
17255 arg0
= TREE_VALUE (arglist
);
17256 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17257 op0
= expand_normal (arg0
);
17258 op1
= expand_normal (arg1
);
17259 mode0
= insn_data
[icode
].operand
[0].mode
;
17260 mode1
= insn_data
[icode
].operand
[1].mode
;
17262 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17263 op1
= force_reg (mode1
, op1
);
17265 pat
= GEN_FCN (icode
) (op0
, op1
);
17271 case IX86_BUILTIN_MOVNTPS
:
17272 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
17273 case IX86_BUILTIN_MOVNTQ
:
17274 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
17276 case IX86_BUILTIN_LDMXCSR
:
17277 op0
= expand_normal (TREE_VALUE (arglist
));
17278 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17279 emit_move_insn (target
, op0
);
17280 emit_insn (gen_sse_ldmxcsr (target
));
17283 case IX86_BUILTIN_STMXCSR
:
17284 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17285 emit_insn (gen_sse_stmxcsr (target
));
17286 return copy_to_mode_reg (SImode
, target
);
17288 case IX86_BUILTIN_SHUFPS
:
17289 case IX86_BUILTIN_SHUFPD
:
17290 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17291 ? CODE_FOR_sse_shufps
17292 : CODE_FOR_sse2_shufpd
);
17293 arg0
= TREE_VALUE (arglist
);
17294 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17295 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17296 op0
= expand_normal (arg0
);
17297 op1
= expand_normal (arg1
);
17298 op2
= expand_normal (arg2
);
17299 tmode
= insn_data
[icode
].operand
[0].mode
;
17300 mode0
= insn_data
[icode
].operand
[1].mode
;
17301 mode1
= insn_data
[icode
].operand
[2].mode
;
17302 mode2
= insn_data
[icode
].operand
[3].mode
;
17304 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17305 op0
= copy_to_mode_reg (mode0
, op0
);
17306 if ((optimize
&& !register_operand (op1
, mode1
))
17307 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17308 op1
= copy_to_mode_reg (mode1
, op1
);
17309 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17311 /* @@@ better error message */
17312 error ("mask must be an immediate");
17313 return gen_reg_rtx (tmode
);
17315 if (optimize
|| target
== 0
17316 || GET_MODE (target
) != tmode
17317 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17318 target
= gen_reg_rtx (tmode
);
17319 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17325 case IX86_BUILTIN_PSHUFW
:
17326 case IX86_BUILTIN_PSHUFD
:
17327 case IX86_BUILTIN_PSHUFHW
:
17328 case IX86_BUILTIN_PSHUFLW
:
17329 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17330 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17331 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17332 : CODE_FOR_mmx_pshufw
);
17333 arg0
= TREE_VALUE (arglist
);
17334 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17335 op0
= expand_normal (arg0
);
17336 op1
= expand_normal (arg1
);
17337 tmode
= insn_data
[icode
].operand
[0].mode
;
17338 mode1
= insn_data
[icode
].operand
[1].mode
;
17339 mode2
= insn_data
[icode
].operand
[2].mode
;
17341 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17342 op0
= copy_to_mode_reg (mode1
, op0
);
17343 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17345 /* @@@ better error message */
17346 error ("mask must be an immediate");
17350 || GET_MODE (target
) != tmode
17351 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17352 target
= gen_reg_rtx (tmode
);
17353 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17359 case IX86_BUILTIN_PSLLDQI128
:
17360 case IX86_BUILTIN_PSRLDQI128
:
17361 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17362 : CODE_FOR_sse2_lshrti3
);
17363 arg0
= TREE_VALUE (arglist
);
17364 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17365 op0
= expand_normal (arg0
);
17366 op1
= expand_normal (arg1
);
17367 tmode
= insn_data
[icode
].operand
[0].mode
;
17368 mode1
= insn_data
[icode
].operand
[1].mode
;
17369 mode2
= insn_data
[icode
].operand
[2].mode
;
17371 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17373 op0
= copy_to_reg (op0
);
17374 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17376 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17378 error ("shift must be an immediate");
17381 target
= gen_reg_rtx (V2DImode
);
17382 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
17388 case IX86_BUILTIN_FEMMS
:
17389 emit_insn (gen_mmx_femms ());
17392 case IX86_BUILTIN_PAVGUSB
:
17393 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
17395 case IX86_BUILTIN_PF2ID
:
17396 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
17398 case IX86_BUILTIN_PFACC
:
17399 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
17401 case IX86_BUILTIN_PFADD
:
17402 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
17404 case IX86_BUILTIN_PFCMPEQ
:
17405 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
17407 case IX86_BUILTIN_PFCMPGE
:
17408 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
17410 case IX86_BUILTIN_PFCMPGT
:
17411 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
17413 case IX86_BUILTIN_PFMAX
:
17414 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
17416 case IX86_BUILTIN_PFMIN
:
17417 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
17419 case IX86_BUILTIN_PFMUL
:
17420 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
17422 case IX86_BUILTIN_PFRCP
:
17423 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
17425 case IX86_BUILTIN_PFRCPIT1
:
17426 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
17428 case IX86_BUILTIN_PFRCPIT2
:
17429 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
17431 case IX86_BUILTIN_PFRSQIT1
:
17432 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
17434 case IX86_BUILTIN_PFRSQRT
:
17435 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
17437 case IX86_BUILTIN_PFSUB
:
17438 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
17440 case IX86_BUILTIN_PFSUBR
:
17441 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
17443 case IX86_BUILTIN_PI2FD
:
17444 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
17446 case IX86_BUILTIN_PMULHRW
:
17447 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
17449 case IX86_BUILTIN_PF2IW
:
17450 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
17452 case IX86_BUILTIN_PFNACC
:
17453 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
17455 case IX86_BUILTIN_PFPNACC
:
17456 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
17458 case IX86_BUILTIN_PI2FW
:
17459 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
17461 case IX86_BUILTIN_PSWAPDSI
:
17462 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
17464 case IX86_BUILTIN_PSWAPDSF
:
17465 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
17467 case IX86_BUILTIN_SQRTSD
:
17468 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
17469 case IX86_BUILTIN_LOADUPD
:
17470 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
17471 case IX86_BUILTIN_STOREUPD
:
17472 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
17474 case IX86_BUILTIN_MFENCE
:
17475 emit_insn (gen_sse2_mfence ());
17477 case IX86_BUILTIN_LFENCE
:
17478 emit_insn (gen_sse2_lfence ());
17481 case IX86_BUILTIN_CLFLUSH
:
17482 arg0
= TREE_VALUE (arglist
);
17483 op0
= expand_normal (arg0
);
17484 icode
= CODE_FOR_sse2_clflush
;
17485 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
17486 op0
= copy_to_mode_reg (Pmode
, op0
);
17488 emit_insn (gen_sse2_clflush (op0
));
17491 case IX86_BUILTIN_MOVNTPD
:
17492 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
17493 case IX86_BUILTIN_MOVNTDQ
:
17494 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
17495 case IX86_BUILTIN_MOVNTI
:
17496 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
17498 case IX86_BUILTIN_LOADDQU
:
17499 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
17500 case IX86_BUILTIN_STOREDQU
:
17501 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
17503 case IX86_BUILTIN_MONITOR
:
17504 arg0
= TREE_VALUE (arglist
);
17505 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17506 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17507 op0
= expand_normal (arg0
);
17508 op1
= expand_normal (arg1
);
17509 op2
= expand_normal (arg2
);
17511 op0
= copy_to_mode_reg (Pmode
, op0
);
17513 op1
= copy_to_mode_reg (SImode
, op1
);
17515 op2
= copy_to_mode_reg (SImode
, op2
);
17517 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
17519 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
17522 case IX86_BUILTIN_MWAIT
:
17523 arg0
= TREE_VALUE (arglist
);
17524 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17525 op0
= expand_normal (arg0
);
17526 op1
= expand_normal (arg1
);
17528 op0
= copy_to_mode_reg (SImode
, op0
);
17530 op1
= copy_to_mode_reg (SImode
, op1
);
17531 emit_insn (gen_sse3_mwait (op0
, op1
));
17534 case IX86_BUILTIN_LDDQU
:
17535 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
17538 case IX86_BUILTIN_PALIGNR
:
17539 case IX86_BUILTIN_PALIGNR128
:
17540 if (fcode
== IX86_BUILTIN_PALIGNR
)
17542 icode
= CODE_FOR_ssse3_palignrdi
;
17547 icode
= CODE_FOR_ssse3_palignrti
;
17550 arg0
= TREE_VALUE (arglist
);
17551 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17552 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17553 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
17554 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
17555 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
17556 tmode
= insn_data
[icode
].operand
[0].mode
;
17557 mode1
= insn_data
[icode
].operand
[1].mode
;
17558 mode2
= insn_data
[icode
].operand
[2].mode
;
17559 mode3
= insn_data
[icode
].operand
[3].mode
;
17561 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17563 op0
= copy_to_reg (op0
);
17564 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17566 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17568 op1
= copy_to_reg (op1
);
17569 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
17571 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17573 error ("shift must be an immediate");
17576 target
= gen_reg_rtx (mode
);
17577 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
17584 case IX86_BUILTIN_VEC_INIT_V2SI
:
17585 case IX86_BUILTIN_VEC_INIT_V4HI
:
17586 case IX86_BUILTIN_VEC_INIT_V8QI
:
17587 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
17589 case IX86_BUILTIN_VEC_EXT_V2DF
:
17590 case IX86_BUILTIN_VEC_EXT_V2DI
:
17591 case IX86_BUILTIN_VEC_EXT_V4SF
:
17592 case IX86_BUILTIN_VEC_EXT_V4SI
:
17593 case IX86_BUILTIN_VEC_EXT_V8HI
:
17594 case IX86_BUILTIN_VEC_EXT_V2SI
:
17595 case IX86_BUILTIN_VEC_EXT_V4HI
:
17596 return ix86_expand_vec_ext_builtin (arglist
, target
);
17598 case IX86_BUILTIN_VEC_SET_V8HI
:
17599 case IX86_BUILTIN_VEC_SET_V4HI
:
17600 return ix86_expand_vec_set_builtin (arglist
);
17606 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17607 if (d
->code
== fcode
)
17609 /* Compares are treated specially. */
17610 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17611 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
17612 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17613 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17614 return ix86_expand_sse_compare (d
, arglist
, target
);
17616 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
17619 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17620 if (d
->code
== fcode
)
17621 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
17623 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17624 if (d
->code
== fcode
)
17625 return ix86_expand_sse_comi (d
, arglist
, target
);
17627 gcc_unreachable ();
17630 /* Returns a function decl for a vectorized version of the builtin function
17631 with builtin function code FN and the result vector type TYPE, or NULL_TREE
17632 if it is not available. */
17635 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type
)
17637 enum machine_mode el_mode
;
17640 if (TREE_CODE (type
) != VECTOR_TYPE
)
17643 el_mode
= TYPE_MODE (TREE_TYPE (type
));
17644 n
= TYPE_VECTOR_SUBPARTS (type
);
17648 case BUILT_IN_SQRT
:
17649 if (el_mode
== DFmode
&& n
== 2)
17650 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
17653 case BUILT_IN_SQRTF
:
17654 if (el_mode
== SFmode
&& n
== 4)
17655 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
17665 /* Store OPERAND to the memory after reload is completed. This means
17666 that we can't easily use assign_stack_local. */
17668 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
17672 gcc_assert (reload_completed
);
17673 if (TARGET_RED_ZONE
)
17675 result
= gen_rtx_MEM (mode
,
17676 gen_rtx_PLUS (Pmode
,
17678 GEN_INT (-RED_ZONE_SIZE
)));
17679 emit_move_insn (result
, operand
);
17681 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
17687 operand
= gen_lowpart (DImode
, operand
);
17691 gen_rtx_SET (VOIDmode
,
17692 gen_rtx_MEM (DImode
,
17693 gen_rtx_PRE_DEC (DImode
,
17694 stack_pointer_rtx
)),
17698 gcc_unreachable ();
17700 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
17709 split_di (&operand
, 1, operands
, operands
+ 1);
17711 gen_rtx_SET (VOIDmode
,
17712 gen_rtx_MEM (SImode
,
17713 gen_rtx_PRE_DEC (Pmode
,
17714 stack_pointer_rtx
)),
17717 gen_rtx_SET (VOIDmode
,
17718 gen_rtx_MEM (SImode
,
17719 gen_rtx_PRE_DEC (Pmode
,
17720 stack_pointer_rtx
)),
17725 /* Store HImodes as SImodes. */
17726 operand
= gen_lowpart (SImode
, operand
);
17730 gen_rtx_SET (VOIDmode
,
17731 gen_rtx_MEM (GET_MODE (operand
),
17732 gen_rtx_PRE_DEC (SImode
,
17733 stack_pointer_rtx
)),
17737 gcc_unreachable ();
17739 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
17744 /* Free operand from the memory. */
17746 ix86_free_from_memory (enum machine_mode mode
)
17748 if (!TARGET_RED_ZONE
)
17752 if (mode
== DImode
|| TARGET_64BIT
)
17756 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17757 to pop or add instruction if registers are available. */
17758 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17759 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
17764 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17765 QImode must go into class Q_REGS.
17766 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17767 movdf to do mem-to-mem moves through integer regs. */
17769 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
17771 enum machine_mode mode
= GET_MODE (x
);
17773 /* We're only allowed to return a subclass of CLASS. Many of the
17774 following checks fail for NO_REGS, so eliminate that early. */
17775 if (class == NO_REGS
)
17778 /* All classes can load zeros. */
17779 if (x
== CONST0_RTX (mode
))
17782 /* Force constants into memory if we are loading a (nonzero) constant into
17783 an MMX or SSE register. This is because there are no MMX/SSE instructions
17784 to load from a constant. */
17786 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17789 /* Prefer SSE regs only, if we can use them for math. */
17790 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
17791 return SSE_CLASS_P (class) ? class : NO_REGS
;
17793 /* Floating-point constants need more complex checks. */
17794 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
17796 /* General regs can load everything. */
17797 if (reg_class_subset_p (class, GENERAL_REGS
))
17800 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17801 zero above. We only want to wind up preferring 80387 registers if
17802 we plan on doing computation with them. */
17804 && standard_80387_constant_p (x
))
17806 /* Limit class to non-sse. */
17807 if (class == FLOAT_SSE_REGS
)
17809 if (class == FP_TOP_SSE_REGS
)
17811 if (class == FP_SECOND_SSE_REGS
)
17812 return FP_SECOND_REG
;
17813 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
17820 /* Generally when we see PLUS here, it's the function invariant
17821 (plus soft-fp const_int). Which can only be computed into general
17823 if (GET_CODE (x
) == PLUS
)
17824 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
17826 /* QImode constants are easy to load, but non-constant QImode data
17827 must go into Q_REGS. */
17828 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
17830 if (reg_class_subset_p (class, Q_REGS
))
17832 if (reg_class_subset_p (Q_REGS
, class))
17840 /* Discourage putting floating-point values in SSE registers unless
17841 SSE math is being used, and likewise for the 387 registers. */
17843 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
17845 enum machine_mode mode
= GET_MODE (x
);
17847 /* Restrict the output reload class to the register bank that we are doing
17848 math on. If we would like not to return a subset of CLASS, reject this
17849 alternative: if reload cannot do this, it will still use its choice. */
17850 mode
= GET_MODE (x
);
17851 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
17852 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
17854 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
17856 if (class == FP_TOP_SSE_REGS
)
17858 else if (class == FP_SECOND_SSE_REGS
)
17859 return FP_SECOND_REG
;
17861 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
17867 /* If we are copying between general and FP registers, we need a memory
17868 location. The same is true for SSE and MMX registers.
17870 The macro can't work reliably when one of the CLASSES is class containing
17871 registers from multiple units (SSE, MMX, integer). We avoid this by never
17872 combining those units in single alternative in the machine description.
17873 Ensure that this constraint holds to avoid unexpected surprises.
17875 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17876 enforce these sanity checks. */
17879 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
17880 enum machine_mode mode
, int strict
)
17882 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
17883 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
17884 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
17885 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
17886 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
17887 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
17889 gcc_assert (!strict
);
17893 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
17896 /* ??? This is a lie. We do have moves between mmx/general, and for
17897 mmx/sse2. But by saying we need secondary memory we discourage the
17898 register allocator from using the mmx registers unless needed. */
17899 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
17902 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
17904 /* SSE1 doesn't have any direct moves from other classes. */
17908 /* If the target says that inter-unit moves are more expensive
17909 than moving through memory, then don't generate them. */
17910 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
17913 /* Between SSE and general, we have moves no larger than word size. */
17914 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
17917 /* ??? For the cost of one register reformat penalty, we could use
17918 the same instructions to move SFmode and DFmode data, but the
17919 relevant move patterns don't support those alternatives. */
17920 if (mode
== SFmode
|| mode
== DFmode
)
17927 /* Return true if the registers in CLASS cannot represent the change from
17928 modes FROM to TO. */
17931 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
17932 enum reg_class
class)
17937 /* x87 registers can't do subreg at all, as all values are reformatted
17938 to extended precision. */
17939 if (MAYBE_FLOAT_CLASS_P (class))
17942 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17944 /* Vector registers do not support QI or HImode loads. If we don't
17945 disallow a change to these modes, reload will assume it's ok to
17946 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17947 the vec_dupv4hi pattern. */
17948 if (GET_MODE_SIZE (from
) < 4)
17951 /* Vector registers do not support subreg with nonzero offsets, which
17952 are otherwise valid for integer registers. Since we can't see
17953 whether we have a nonzero offset from here, prohibit all
17954 nonparadoxical subregs changing size. */
17955 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
17962 /* Return the cost of moving data from a register in class CLASS1 to
17963 one in class CLASS2.
17965 It is not required that the cost always equal 2 when FROM is the same as TO;
17966 on some machines it is expensive to move between registers if they are not
17967 general registers. */
17970 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
17971 enum reg_class class2
)
17973 /* In case we require secondary memory, compute cost of the store followed
17974 by load. In order to avoid bad register allocation choices, we need
17975 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17977 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
17981 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
17982 MEMORY_MOVE_COST (mode
, class1
, 1));
17983 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
17984 MEMORY_MOVE_COST (mode
, class2
, 1));
17986 /* In case of copying from general_purpose_register we may emit multiple
17987 stores followed by single load causing memory size mismatch stall.
17988 Count this as arbitrarily high cost of 20. */
17989 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
17992 /* In the case of FP/MMX moves, the registers actually overlap, and we
17993 have to switch modes in order to treat them differently. */
17994 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
17995 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18001 /* Moves between SSE/MMX and integer unit are expensive. */
18002 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18003 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18004 return ix86_cost
->mmxsse_to_integer
;
18005 if (MAYBE_FLOAT_CLASS_P (class1
))
18006 return ix86_cost
->fp_move
;
18007 if (MAYBE_SSE_CLASS_P (class1
))
18008 return ix86_cost
->sse_move
;
18009 if (MAYBE_MMX_CLASS_P (class1
))
18010 return ix86_cost
->mmx_move
;
18014 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18017 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18019 /* Flags and only flags can only hold CCmode values. */
18020 if (CC_REGNO_P (regno
))
18021 return GET_MODE_CLASS (mode
) == MODE_CC
;
18022 if (GET_MODE_CLASS (mode
) == MODE_CC
18023 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18024 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18026 if (FP_REGNO_P (regno
))
18027 return VALID_FP_MODE_P (mode
);
18028 if (SSE_REGNO_P (regno
))
18030 /* We implement the move patterns for all vector modes into and
18031 out of SSE registers, even when no operation instructions
18033 return (VALID_SSE_REG_MODE (mode
)
18034 || VALID_SSE2_REG_MODE (mode
)
18035 || VALID_MMX_REG_MODE (mode
)
18036 || VALID_MMX_REG_MODE_3DNOW (mode
));
18038 if (MMX_REGNO_P (regno
))
18040 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18041 so if the register is available at all, then we can move data of
18042 the given mode into or out of it. */
18043 return (VALID_MMX_REG_MODE (mode
)
18044 || VALID_MMX_REG_MODE_3DNOW (mode
));
18047 if (mode
== QImode
)
18049 /* Take care for QImode values - they can be in non-QI regs,
18050 but then they do cause partial register stalls. */
18051 if (regno
< 4 || TARGET_64BIT
)
18053 if (!TARGET_PARTIAL_REG_STALL
)
18055 return reload_in_progress
|| reload_completed
;
18057 /* We handle both integer and floats in the general purpose registers. */
18058 else if (VALID_INT_MODE_P (mode
))
18060 else if (VALID_FP_MODE_P (mode
))
18062 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18063 on to use that value in smaller contexts, this can easily force a
18064 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18065 supporting DImode, allow it. */
18066 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18072 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18073 tieable integer mode. */
18076 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18085 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18088 return TARGET_64BIT
;
18095 /* Return true if MODE1 is accessible in a register that can hold MODE2
18096 without copying. That is, all register classes that can hold MODE2
18097 can also hold MODE1. */
18100 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18102 if (mode1
== mode2
)
18105 if (ix86_tieable_integer_mode_p (mode1
)
18106 && ix86_tieable_integer_mode_p (mode2
))
18109 /* MODE2 being XFmode implies fp stack or general regs, which means we
18110 can tie any smaller floating point modes to it. Note that we do not
18111 tie this with TFmode. */
18112 if (mode2
== XFmode
)
18113 return mode1
== SFmode
|| mode1
== DFmode
;
18115 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18116 that we can tie it with SFmode. */
18117 if (mode2
== DFmode
)
18118 return mode1
== SFmode
;
18120 /* If MODE2 is only appropriate for an SSE register, then tie with
18121 any other mode acceptable to SSE registers. */
18122 if (GET_MODE_SIZE (mode2
) >= 8
18123 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18124 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
18126 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18127 with any other mode acceptable to MMX registers. */
18128 if (GET_MODE_SIZE (mode2
) == 8
18129 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18130 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
18135 /* Return the cost of moving data of mode M between a
18136 register and memory. A value of 2 is the default; this cost is
18137 relative to those in `REGISTER_MOVE_COST'.
18139 If moving between registers and memory is more expensive than
18140 between two registers, you should define this macro to express the
18143 Model also increased moving costs of QImode registers in non
18147 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18149 if (FLOAT_CLASS_P (class))
18166 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18168 if (SSE_CLASS_P (class))
18171 switch (GET_MODE_SIZE (mode
))
18185 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18187 if (MMX_CLASS_P (class))
18190 switch (GET_MODE_SIZE (mode
))
18201 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18203 switch (GET_MODE_SIZE (mode
))
18207 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18208 : ix86_cost
->movzbl_load
);
18210 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18211 : ix86_cost
->int_store
[0] + 4);
18214 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18216 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18217 if (mode
== TFmode
)
18219 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18220 * (((int) GET_MODE_SIZE (mode
)
18221 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18225 /* Compute a (partial) cost for rtx X. Return true if the complete
18226 cost has been computed, and false if subexpressions should be
18227 scanned. In either case, *TOTAL contains the cost result. */
18230 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18232 enum machine_mode mode
= GET_MODE (x
);
18240 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18242 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18244 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18246 || (!GET_CODE (x
) != LABEL_REF
18247 && (GET_CODE (x
) != SYMBOL_REF
18248 || !SYMBOL_REF_LOCAL_P (x
)))))
18255 if (mode
== VOIDmode
)
18258 switch (standard_80387_constant_p (x
))
18263 default: /* Other constants */
18268 /* Start with (MEM (SYMBOL_REF)), since that's where
18269 it'll probably end up. Add a penalty for size. */
18270 *total
= (COSTS_N_INSNS (1)
18271 + (flag_pic
!= 0 && !TARGET_64BIT
)
18272 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18278 /* The zero extensions is often completely free on x86_64, so make
18279 it as cheap as possible. */
18280 if (TARGET_64BIT
&& mode
== DImode
18281 && GET_MODE (XEXP (x
, 0)) == SImode
)
18283 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18284 *total
= ix86_cost
->add
;
18286 *total
= ix86_cost
->movzx
;
18290 *total
= ix86_cost
->movsx
;
18294 if (CONST_INT_P (XEXP (x
, 1))
18295 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18297 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18300 *total
= ix86_cost
->add
;
18303 if ((value
== 2 || value
== 3)
18304 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
18306 *total
= ix86_cost
->lea
;
18316 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
18318 if (CONST_INT_P (XEXP (x
, 1)))
18320 if (INTVAL (XEXP (x
, 1)) > 32)
18321 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
18323 *total
= ix86_cost
->shift_const
* 2;
18327 if (GET_CODE (XEXP (x
, 1)) == AND
)
18328 *total
= ix86_cost
->shift_var
* 2;
18330 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
18335 if (CONST_INT_P (XEXP (x
, 1)))
18336 *total
= ix86_cost
->shift_const
;
18338 *total
= ix86_cost
->shift_var
;
18343 if (FLOAT_MODE_P (mode
))
18345 *total
= ix86_cost
->fmul
;
18350 rtx op0
= XEXP (x
, 0);
18351 rtx op1
= XEXP (x
, 1);
18353 if (CONST_INT_P (XEXP (x
, 1)))
18355 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18356 for (nbits
= 0; value
!= 0; value
&= value
- 1)
18360 /* This is arbitrary. */
18363 /* Compute costs correctly for widening multiplication. */
18364 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
18365 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
18366 == GET_MODE_SIZE (mode
))
18368 int is_mulwiden
= 0;
18369 enum machine_mode inner_mode
= GET_MODE (op0
);
18371 if (GET_CODE (op0
) == GET_CODE (op1
))
18372 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
18373 else if (CONST_INT_P (op1
))
18375 if (GET_CODE (op0
) == SIGN_EXTEND
)
18376 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
18379 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
18383 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
18386 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
18387 + nbits
* ix86_cost
->mult_bit
18388 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
18397 if (FLOAT_MODE_P (mode
))
18398 *total
= ix86_cost
->fdiv
;
18400 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
18404 if (FLOAT_MODE_P (mode
))
18405 *total
= ix86_cost
->fadd
;
18406 else if (GET_MODE_CLASS (mode
) == MODE_INT
18407 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
18409 if (GET_CODE (XEXP (x
, 0)) == PLUS
18410 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
18411 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
18412 && CONSTANT_P (XEXP (x
, 1)))
18414 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
18415 if (val
== 2 || val
== 4 || val
== 8)
18417 *total
= ix86_cost
->lea
;
18418 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
18419 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
18421 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18425 else if (GET_CODE (XEXP (x
, 0)) == MULT
18426 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
18428 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
18429 if (val
== 2 || val
== 4 || val
== 8)
18431 *total
= ix86_cost
->lea
;
18432 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
18433 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18437 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
18439 *total
= ix86_cost
->lea
;
18440 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
18441 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
18442 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18449 if (FLOAT_MODE_P (mode
))
18451 *total
= ix86_cost
->fadd
;
18459 if (!TARGET_64BIT
&& mode
== DImode
)
18461 *total
= (ix86_cost
->add
* 2
18462 + (rtx_cost (XEXP (x
, 0), outer_code
)
18463 << (GET_MODE (XEXP (x
, 0)) != DImode
))
18464 + (rtx_cost (XEXP (x
, 1), outer_code
)
18465 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
18471 if (FLOAT_MODE_P (mode
))
18473 *total
= ix86_cost
->fchs
;
18479 if (!TARGET_64BIT
&& mode
== DImode
)
18480 *total
= ix86_cost
->add
* 2;
18482 *total
= ix86_cost
->add
;
18486 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
18487 && XEXP (XEXP (x
, 0), 1) == const1_rtx
18488 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
18489 && XEXP (x
, 1) == const0_rtx
)
18491 /* This kind of construct is implemented using test[bwl].
18492 Treat it as if we had an AND. */
18493 *total
= (ix86_cost
->add
18494 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
18495 + rtx_cost (const1_rtx
, outer_code
));
18501 if (!TARGET_SSE_MATH
18503 || (mode
== DFmode
&& !TARGET_SSE2
))
18508 if (FLOAT_MODE_P (mode
))
18509 *total
= ix86_cost
->fabs
;
18513 if (FLOAT_MODE_P (mode
))
18514 *total
= ix86_cost
->fsqrt
;
18518 if (XINT (x
, 1) == UNSPEC_TP
)
18529 static int current_machopic_label_num
;
18531 /* Given a symbol name and its associated stub, write out the
18532 definition of the stub. */
18535 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
18537 unsigned int length
;
18538 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
18539 int label
= ++current_machopic_label_num
;
18541 /* For 64-bit we shouldn't get here. */
18542 gcc_assert (!TARGET_64BIT
);
18544 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18545 symb
= (*targetm
.strip_name_encoding
) (symb
);
18547 length
= strlen (stub
);
18548 binder_name
= alloca (length
+ 32);
18549 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
18551 length
= strlen (symb
);
18552 symbol_name
= alloca (length
+ 32);
18553 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
18555 sprintf (lazy_ptr_name
, "L%d$lz", label
);
18558 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
18560 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
18562 fprintf (file
, "%s:\n", stub
);
18563 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
18567 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
18568 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
18569 fprintf (file
, "\tjmp\t*%%edx\n");
18572 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
18574 fprintf (file
, "%s:\n", binder_name
);
18578 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
18579 fprintf (file
, "\tpushl\t%%eax\n");
18582 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
18584 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
18586 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
18587 fprintf (file
, "%s:\n", lazy_ptr_name
);
18588 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
18589 fprintf (file
, "\t.long %s\n", binder_name
);
18593 darwin_x86_file_end (void)
18595 darwin_file_end ();
18598 #endif /* TARGET_MACHO */
18600 /* Order the registers for register allocator. */
18603 x86_order_regs_for_local_alloc (void)
18608 /* First allocate the local general purpose registers. */
18609 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
18610 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
18611 reg_alloc_order
[pos
++] = i
;
18613 /* Global general purpose registers. */
18614 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
18615 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
18616 reg_alloc_order
[pos
++] = i
;
18618 /* x87 registers come first in case we are doing FP math
18620 if (!TARGET_SSE_MATH
)
18621 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
18622 reg_alloc_order
[pos
++] = i
;
18624 /* SSE registers. */
18625 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
18626 reg_alloc_order
[pos
++] = i
;
18627 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
18628 reg_alloc_order
[pos
++] = i
;
18630 /* x87 registers. */
18631 if (TARGET_SSE_MATH
)
18632 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
18633 reg_alloc_order
[pos
++] = i
;
18635 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
18636 reg_alloc_order
[pos
++] = i
;
18638 /* Initialize the rest of array as we do not allocate some registers
18640 while (pos
< FIRST_PSEUDO_REGISTER
)
18641 reg_alloc_order
[pos
++] = 0;
18644 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18645 struct attribute_spec.handler. */
18647 ix86_handle_struct_attribute (tree
*node
, tree name
,
18648 tree args ATTRIBUTE_UNUSED
,
18649 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
18652 if (DECL_P (*node
))
18654 if (TREE_CODE (*node
) == TYPE_DECL
)
18655 type
= &TREE_TYPE (*node
);
18660 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
18661 || TREE_CODE (*type
) == UNION_TYPE
)))
18663 warning (OPT_Wattributes
, "%qs attribute ignored",
18664 IDENTIFIER_POINTER (name
));
18665 *no_add_attrs
= true;
18668 else if ((is_attribute_p ("ms_struct", name
)
18669 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
18670 || ((is_attribute_p ("gcc_struct", name
)
18671 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
18673 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
18674 IDENTIFIER_POINTER (name
));
18675 *no_add_attrs
= true;
18682 ix86_ms_bitfield_layout_p (tree record_type
)
18684 return (TARGET_MS_BITFIELD_LAYOUT
&&
18685 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
18686 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
18689 /* Returns an expression indicating where the this parameter is
18690 located on entry to the FUNCTION. */
18693 x86_this_parameter (tree function
)
18695 tree type
= TREE_TYPE (function
);
18699 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
18700 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
18703 if (ix86_function_regparm (type
, function
) > 0)
18707 parm
= TYPE_ARG_TYPES (type
);
18708 /* Figure out whether or not the function has a variable number of
18710 for (; parm
; parm
= TREE_CHAIN (parm
))
18711 if (TREE_VALUE (parm
) == void_type_node
)
18713 /* If not, the this parameter is in the first argument. */
18717 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
18719 return gen_rtx_REG (SImode
, regno
);
18723 if (aggregate_value_p (TREE_TYPE (type
), type
))
18724 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
18726 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
18729 /* Determine whether x86_output_mi_thunk can succeed. */
18732 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
18733 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
18734 HOST_WIDE_INT vcall_offset
, tree function
)
18736 /* 64-bit can handle anything. */
18740 /* For 32-bit, everything's fine if we have one free register. */
18741 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
18744 /* Need a free register for vcall_offset. */
18748 /* Need a free register for GOT references. */
18749 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
18752 /* Otherwise ok. */
18756 /* Output the assembler code for a thunk function. THUNK_DECL is the
18757 declaration for the thunk function itself, FUNCTION is the decl for
18758 the target function. DELTA is an immediate constant offset to be
18759 added to THIS. If VCALL_OFFSET is nonzero, the word at
18760 *(*this + vcall_offset) should be added to THIS. */
18763 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
18764 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
18765 HOST_WIDE_INT vcall_offset
, tree function
)
18768 rtx
this = x86_this_parameter (function
);
18771 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18772 pull it in now and let DELTA benefit. */
18775 else if (vcall_offset
)
18777 /* Put the this parameter into %eax. */
18779 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
18780 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
18783 this_reg
= NULL_RTX
;
18785 /* Adjust the this parameter by a fixed constant. */
18788 xops
[0] = GEN_INT (delta
);
18789 xops
[1] = this_reg
? this_reg
: this;
18792 if (!x86_64_general_operand (xops
[0], DImode
))
18794 tmp
= gen_rtx_REG (DImode
, R10_REG
);
18796 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
18800 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
18803 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
18806 /* Adjust the this parameter by a value stored in the vtable. */
18810 tmp
= gen_rtx_REG (DImode
, R10_REG
);
18813 int tmp_regno
= 2 /* ECX */;
18814 if (lookup_attribute ("fastcall",
18815 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
18816 tmp_regno
= 0 /* EAX */;
18817 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
18820 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
18823 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
18825 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
18827 /* Adjust the this parameter. */
18828 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
18829 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
18831 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
18832 xops
[0] = GEN_INT (vcall_offset
);
18834 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
18835 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
18837 xops
[1] = this_reg
;
18839 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
18841 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
18844 /* If necessary, drop THIS back to its stack slot. */
18845 if (this_reg
&& this_reg
!= this)
18847 xops
[0] = this_reg
;
18849 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
18852 xops
[0] = XEXP (DECL_RTL (function
), 0);
18855 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
18856 output_asm_insn ("jmp\t%P0", xops
);
18859 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
18860 tmp
= gen_rtx_CONST (Pmode
, tmp
);
18861 tmp
= gen_rtx_MEM (QImode
, tmp
);
18863 output_asm_insn ("jmp\t%A0", xops
);
18868 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
18869 output_asm_insn ("jmp\t%P0", xops
);
18874 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
18875 tmp
= (gen_rtx_SYMBOL_REF
18877 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
18878 tmp
= gen_rtx_MEM (QImode
, tmp
);
18880 output_asm_insn ("jmp\t%0", xops
);
18883 #endif /* TARGET_MACHO */
18885 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
18886 output_set_got (tmp
, NULL_RTX
);
18889 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
18890 output_asm_insn ("jmp\t{*}%1", xops
);
18896 x86_file_start (void)
18898 default_file_start ();
18900 darwin_file_start ();
18902 if (X86_FILE_START_VERSION_DIRECTIVE
)
18903 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
18904 if (X86_FILE_START_FLTUSED
)
18905 fputs ("\t.global\t__fltused\n", asm_out_file
);
18906 if (ix86_asm_dialect
== ASM_INTEL
)
18907 fputs ("\t.intel_syntax\n", asm_out_file
);
18911 x86_field_alignment (tree field
, int computed
)
18913 enum machine_mode mode
;
18914 tree type
= TREE_TYPE (field
);
18916 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
18918 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
18919 ? get_inner_array_type (type
) : type
);
18920 if (mode
== DFmode
|| mode
== DCmode
18921 || GET_MODE_CLASS (mode
) == MODE_INT
18922 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
18923 return MIN (32, computed
);
18927 /* Output assembler code to FILE to increment profiler label # LABELNO
18928 for profiling a function entry. */
18930 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
18935 #ifndef NO_PROFILE_COUNTERS
18936 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
18938 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
18942 #ifndef NO_PROFILE_COUNTERS
18943 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
18945 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
18949 #ifndef NO_PROFILE_COUNTERS
18950 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18951 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
18953 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
18957 #ifndef NO_PROFILE_COUNTERS
18958 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
18959 PROFILE_COUNT_REGISTER
);
18961 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
18965 /* We don't have exact information about the insn sizes, but we may assume
18966 quite safely that we are informed about all 1 byte insns and memory
18967 address sizes. This is enough to eliminate unnecessary padding in
18971 min_insn_size (rtx insn
)
18975 if (!INSN_P (insn
) || !active_insn_p (insn
))
18978 /* Discard alignments we've emit and jump instructions. */
18979 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
18980 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
18983 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
18984 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
18987 /* Important case - calls are always 5 bytes.
18988 It is common to have many calls in the row. */
18990 && symbolic_reference_mentioned_p (PATTERN (insn
))
18991 && !SIBLING_CALL_P (insn
))
18993 if (get_attr_length (insn
) <= 1)
18996 /* For normal instructions we may rely on the sizes of addresses
18997 and the presence of symbol to require 4 bytes of encoding.
18998 This is not the case for jumps where references are PC relative. */
18999 if (!JUMP_P (insn
))
19001 l
= get_attr_length_address (insn
);
19002 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19011 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19015 ix86_avoid_jump_misspredicts (void)
19017 rtx insn
, start
= get_insns ();
19018 int nbytes
= 0, njumps
= 0;
19021 /* Look for all minimal intervals of instructions containing 4 jumps.
19022 The intervals are bounded by START and INSN. NBYTES is the total
19023 size of instructions in the interval including INSN and not including
19024 START. When the NBYTES is smaller than 16 bytes, it is possible
19025 that the end of START and INSN ends up in the same 16byte page.
19027 The smallest offset in the page INSN can start is the case where START
19028 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19029 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19031 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19034 nbytes
+= min_insn_size (insn
);
19036 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19037 INSN_UID (insn
), min_insn_size (insn
));
19039 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19040 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19048 start
= NEXT_INSN (start
);
19049 if ((JUMP_P (start
)
19050 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19051 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19053 njumps
--, isjump
= 1;
19056 nbytes
-= min_insn_size (start
);
19058 gcc_assert (njumps
>= 0);
19060 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19061 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19063 if (njumps
== 3 && isjump
&& nbytes
< 16)
19065 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19068 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19069 INSN_UID (insn
), padsize
);
19070 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19075 /* AMD Athlon works faster
19076 when RET is not destination of conditional jump or directly preceded
19077 by other jump instruction. We avoid the penalty by inserting NOP just
19078 before the RET instructions in such cases. */
19080 ix86_pad_returns (void)
19085 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19087 basic_block bb
= e
->src
;
19088 rtx ret
= BB_END (bb
);
19090 bool replace
= false;
19092 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19093 || !maybe_hot_bb_p (bb
))
19095 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19096 if (active_insn_p (prev
) || LABEL_P (prev
))
19098 if (prev
&& LABEL_P (prev
))
19103 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19104 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19105 && !(e
->flags
& EDGE_FALLTHRU
))
19110 prev
= prev_active_insn (ret
);
19112 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19115 /* Empty functions get branch mispredict even when the jump destination
19116 is not visible to us. */
19117 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19122 emit_insn_before (gen_return_internal_long (), ret
);
19128 /* Implement machine specific optimizations. We implement padding of returns
19129 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19133 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19134 ix86_pad_returns ();
19135 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19136 ix86_avoid_jump_misspredicts ();
19139 /* Return nonzero when QImode register that must be represented via REX prefix
19142 x86_extended_QIreg_mentioned_p (rtx insn
)
19145 extract_insn_cached (insn
);
19146 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19147 if (REG_P (recog_data
.operand
[i
])
19148 && REGNO (recog_data
.operand
[i
]) >= 4)
19153 /* Return nonzero when P points to register encoded via REX prefix.
19154 Called via for_each_rtx. */
19156 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19158 unsigned int regno
;
19161 regno
= REGNO (*p
);
19162 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19165 /* Return true when INSN mentions register that must be encoded using REX
19168 x86_extended_reg_mentioned_p (rtx insn
)
19170 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19173 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19174 optabs would emit if we didn't have TFmode patterns. */
19177 x86_emit_floatuns (rtx operands
[2])
19179 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19180 enum machine_mode mode
, inmode
;
19182 inmode
= GET_MODE (operands
[1]);
19183 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19186 in
= force_reg (inmode
, operands
[1]);
19187 mode
= GET_MODE (out
);
19188 neglab
= gen_label_rtx ();
19189 donelab
= gen_label_rtx ();
19190 i1
= gen_reg_rtx (Pmode
);
19191 f0
= gen_reg_rtx (mode
);
19193 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
19195 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
19196 emit_jump_insn (gen_jump (donelab
));
19199 emit_label (neglab
);
19201 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
19202 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
19203 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19204 expand_float (f0
, i0
, 0);
19205 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19207 emit_label (donelab
);
19210 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19211 with all elements equal to VAR. Return true if successful. */
19214 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19215 rtx target
, rtx val
)
19217 enum machine_mode smode
, wsmode
, wvmode
;
19232 val
= force_reg (GET_MODE_INNER (mode
), val
);
19233 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19234 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19240 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19242 val
= gen_lowpart (SImode
, val
);
19243 x
= gen_rtx_TRUNCATE (HImode
, val
);
19244 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19245 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19267 /* Extend HImode to SImode using a paradoxical SUBREG. */
19268 tmp1
= gen_reg_rtx (SImode
);
19269 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19270 /* Insert the SImode value as low element of V4SImode vector. */
19271 tmp2
= gen_reg_rtx (V4SImode
);
19272 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19273 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19274 CONST0_RTX (V4SImode
),
19276 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19277 /* Cast the V4SImode vector back to a V8HImode vector. */
19278 tmp1
= gen_reg_rtx (V8HImode
);
19279 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19280 /* Duplicate the low short through the whole low SImode word. */
19281 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19282 /* Cast the V8HImode vector back to a V4SImode vector. */
19283 tmp2
= gen_reg_rtx (V4SImode
);
19284 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19285 /* Replicate the low element of the V4SImode vector. */
19286 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19287 /* Cast the V2SImode back to V8HImode, and store in target. */
19288 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
19299 /* Extend QImode to SImode using a paradoxical SUBREG. */
19300 tmp1
= gen_reg_rtx (SImode
);
19301 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19302 /* Insert the SImode value as low element of V4SImode vector. */
19303 tmp2
= gen_reg_rtx (V4SImode
);
19304 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19305 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19306 CONST0_RTX (V4SImode
),
19308 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19309 /* Cast the V4SImode vector back to a V16QImode vector. */
19310 tmp1
= gen_reg_rtx (V16QImode
);
19311 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
19312 /* Duplicate the low byte through the whole low SImode word. */
19313 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19314 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19315 /* Cast the V16QImode vector back to a V4SImode vector. */
19316 tmp2
= gen_reg_rtx (V4SImode
);
19317 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19318 /* Replicate the low element of the V4SImode vector. */
19319 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19320 /* Cast the V2SImode back to V16QImode, and store in target. */
19321 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
19329 /* Replicate the value once into the next wider mode and recurse. */
19330 val
= convert_modes (wsmode
, smode
, val
, true);
19331 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
19332 GEN_INT (GET_MODE_BITSIZE (smode
)),
19333 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19334 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
19336 x
= gen_reg_rtx (wvmode
);
19337 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
19338 gcc_unreachable ();
19339 emit_move_insn (target
, gen_lowpart (mode
, x
));
19347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19348 whose ONE_VAR element is VAR, and other elements are zero. Return true
19352 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
19353 rtx target
, rtx var
, int one_var
)
19355 enum machine_mode vsimode
;
19371 var
= force_reg (GET_MODE_INNER (mode
), var
);
19372 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
19373 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19378 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
19379 new_target
= gen_reg_rtx (mode
);
19381 new_target
= target
;
19382 var
= force_reg (GET_MODE_INNER (mode
), var
);
19383 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
19384 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
19385 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
19388 /* We need to shuffle the value to the correct position, so
19389 create a new pseudo to store the intermediate result. */
19391 /* With SSE2, we can use the integer shuffle insns. */
19392 if (mode
!= V4SFmode
&& TARGET_SSE2
)
19394 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
19396 GEN_INT (one_var
== 1 ? 0 : 1),
19397 GEN_INT (one_var
== 2 ? 0 : 1),
19398 GEN_INT (one_var
== 3 ? 0 : 1)));
19399 if (target
!= new_target
)
19400 emit_move_insn (target
, new_target
);
19404 /* Otherwise convert the intermediate result to V4SFmode and
19405 use the SSE1 shuffle instructions. */
19406 if (mode
!= V4SFmode
)
19408 tmp
= gen_reg_rtx (V4SFmode
);
19409 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
19414 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
19416 GEN_INT (one_var
== 1 ? 0 : 1),
19417 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
19418 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
19420 if (mode
!= V4SFmode
)
19421 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
19422 else if (tmp
!= target
)
19423 emit_move_insn (target
, tmp
);
19425 else if (target
!= new_target
)
19426 emit_move_insn (target
, new_target
);
19431 vsimode
= V4SImode
;
19437 vsimode
= V2SImode
;
19443 /* Zero extend the variable element to SImode and recurse. */
19444 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
19446 x
= gen_reg_rtx (vsimode
);
19447 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
19449 gcc_unreachable ();
19451 emit_move_insn (target
, gen_lowpart (mode
, x
));
19459 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19460 consisting of the values in VALS. It is known that all elements
19461 except ONE_VAR are constants. Return true if successful. */
19464 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
19465 rtx target
, rtx vals
, int one_var
)
19467 rtx var
= XVECEXP (vals
, 0, one_var
);
19468 enum machine_mode wmode
;
19471 const_vec
= copy_rtx (vals
);
19472 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
19473 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
19481 /* For the two element vectors, it's just as easy to use
19482 the general case. */
19498 /* There's no way to set one QImode entry easily. Combine
19499 the variable value with its adjacent constant value, and
19500 promote to an HImode set. */
19501 x
= XVECEXP (vals
, 0, one_var
^ 1);
19504 var
= convert_modes (HImode
, QImode
, var
, true);
19505 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
19506 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19507 x
= GEN_INT (INTVAL (x
) & 0xff);
19511 var
= convert_modes (HImode
, QImode
, var
, true);
19512 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
19514 if (x
!= const0_rtx
)
19515 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
19516 1, OPTAB_LIB_WIDEN
);
19518 x
= gen_reg_rtx (wmode
);
19519 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
19520 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
19522 emit_move_insn (target
, gen_lowpart (mode
, x
));
19529 emit_move_insn (target
, const_vec
);
19530 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
19534 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19535 all values variable, and none identical. */
19538 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
19539 rtx target
, rtx vals
)
19541 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
19542 rtx op0
= NULL
, op1
= NULL
;
19543 bool use_vec_concat
= false;
19549 if (!mmx_ok
&& !TARGET_SSE
)
19555 /* For the two element vectors, we always implement VEC_CONCAT. */
19556 op0
= XVECEXP (vals
, 0, 0);
19557 op1
= XVECEXP (vals
, 0, 1);
19558 use_vec_concat
= true;
19562 half_mode
= V2SFmode
;
19565 half_mode
= V2SImode
;
19571 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19572 Recurse to load the two halves. */
19574 op0
= gen_reg_rtx (half_mode
);
19575 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
19576 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
19578 op1
= gen_reg_rtx (half_mode
);
19579 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
19580 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
19582 use_vec_concat
= true;
19593 gcc_unreachable ();
19596 if (use_vec_concat
)
19598 if (!register_operand (op0
, half_mode
))
19599 op0
= force_reg (half_mode
, op0
);
19600 if (!register_operand (op1
, half_mode
))
19601 op1
= force_reg (half_mode
, op1
);
19603 emit_insn (gen_rtx_SET (VOIDmode
, target
,
19604 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
19608 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
19609 enum machine_mode inner_mode
;
19610 rtx words
[4], shift
;
19612 inner_mode
= GET_MODE_INNER (mode
);
19613 n_elts
= GET_MODE_NUNITS (mode
);
19614 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
19615 n_elt_per_word
= n_elts
/ n_words
;
19616 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
19618 for (i
= 0; i
< n_words
; ++i
)
19620 rtx word
= NULL_RTX
;
19622 for (j
= 0; j
< n_elt_per_word
; ++j
)
19624 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
19625 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
19631 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
19632 word
, 1, OPTAB_LIB_WIDEN
);
19633 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
19634 word
, 1, OPTAB_LIB_WIDEN
);
19642 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
19643 else if (n_words
== 2)
19645 rtx tmp
= gen_reg_rtx (mode
);
19646 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
19647 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
19648 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
19649 emit_move_insn (target
, tmp
);
19651 else if (n_words
== 4)
19653 rtx tmp
= gen_reg_rtx (V4SImode
);
19654 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
19655 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
19656 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
19659 gcc_unreachable ();
19663 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19664 instructions unless MMX_OK is true. */
19667 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
19669 enum machine_mode mode
= GET_MODE (target
);
19670 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
19671 int n_elts
= GET_MODE_NUNITS (mode
);
19672 int n_var
= 0, one_var
= -1;
19673 bool all_same
= true, all_const_zero
= true;
19677 for (i
= 0; i
< n_elts
; ++i
)
19679 x
= XVECEXP (vals
, 0, i
);
19680 if (!CONSTANT_P (x
))
19681 n_var
++, one_var
= i
;
19682 else if (x
!= CONST0_RTX (inner_mode
))
19683 all_const_zero
= false;
19684 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
19688 /* Constants are best loaded from the constant pool. */
19691 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
19695 /* If all values are identical, broadcast the value. */
19697 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
19698 XVECEXP (vals
, 0, 0)))
19701 /* Values where only one field is non-constant are best loaded from
19702 the pool and overwritten via move later. */
19706 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
19707 XVECEXP (vals
, 0, one_var
),
19711 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
19715 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
19719 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
19721 enum machine_mode mode
= GET_MODE (target
);
19722 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
19723 bool use_vec_merge
= false;
19732 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
19733 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
19735 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
19737 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
19738 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19748 /* For the two element vectors, we implement a VEC_CONCAT with
19749 the extraction of the other element. */
19751 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
19752 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
19755 op0
= val
, op1
= tmp
;
19757 op0
= tmp
, op1
= val
;
19759 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
19760 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19768 use_vec_merge
= true;
19772 /* tmp = target = A B C D */
19773 tmp
= copy_to_reg (target
);
19774 /* target = A A B B */
19775 emit_insn (gen_sse_unpcklps (target
, target
, target
));
19776 /* target = X A B B */
19777 ix86_expand_vector_set (false, target
, val
, 0);
19778 /* target = A X C D */
19779 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
19780 GEN_INT (1), GEN_INT (0),
19781 GEN_INT (2+4), GEN_INT (3+4)));
19785 /* tmp = target = A B C D */
19786 tmp
= copy_to_reg (target
);
19787 /* tmp = X B C D */
19788 ix86_expand_vector_set (false, tmp
, val
, 0);
19789 /* target = A B X D */
19790 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
19791 GEN_INT (0), GEN_INT (1),
19792 GEN_INT (0+4), GEN_INT (3+4)));
19796 /* tmp = target = A B C D */
19797 tmp
= copy_to_reg (target
);
19798 /* tmp = X B C D */
19799 ix86_expand_vector_set (false, tmp
, val
, 0);
19800 /* target = A B X D */
19801 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
19802 GEN_INT (0), GEN_INT (1),
19803 GEN_INT (2+4), GEN_INT (0+4)));
19807 gcc_unreachable ();
19812 /* Element 0 handled by vec_merge below. */
19815 use_vec_merge
= true;
19821 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19822 store into element 0, then shuffle them back. */
19826 order
[0] = GEN_INT (elt
);
19827 order
[1] = const1_rtx
;
19828 order
[2] = const2_rtx
;
19829 order
[3] = GEN_INT (3);
19830 order
[elt
] = const0_rtx
;
19832 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
19833 order
[1], order
[2], order
[3]));
19835 ix86_expand_vector_set (false, target
, val
, 0);
19837 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
19838 order
[1], order
[2], order
[3]));
19842 /* For SSE1, we have to reuse the V4SF code. */
19843 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
19844 gen_lowpart (SFmode
, val
), elt
);
19849 use_vec_merge
= TARGET_SSE2
;
19852 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
19863 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19864 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
19865 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19869 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
19871 emit_move_insn (mem
, target
);
19873 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
19874 emit_move_insn (tmp
, val
);
19876 emit_move_insn (target
, mem
);
19881 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
19883 enum machine_mode mode
= GET_MODE (vec
);
19884 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
19885 bool use_vec_extr
= false;
19898 use_vec_extr
= true;
19910 tmp
= gen_reg_rtx (mode
);
19911 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
19912 GEN_INT (elt
), GEN_INT (elt
),
19913 GEN_INT (elt
+4), GEN_INT (elt
+4)));
19917 tmp
= gen_reg_rtx (mode
);
19918 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
19922 gcc_unreachable ();
19925 use_vec_extr
= true;
19940 tmp
= gen_reg_rtx (mode
);
19941 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
19942 GEN_INT (elt
), GEN_INT (elt
),
19943 GEN_INT (elt
), GEN_INT (elt
)));
19947 tmp
= gen_reg_rtx (mode
);
19948 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
19952 gcc_unreachable ();
19955 use_vec_extr
= true;
19960 /* For SSE1, we have to reuse the V4SF code. */
19961 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
19962 gen_lowpart (V4SFmode
, vec
), elt
);
19968 use_vec_extr
= TARGET_SSE2
;
19971 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
19976 /* ??? Could extract the appropriate HImode element and shift. */
19983 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
19984 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
19986 /* Let the rtl optimizers know about the zero extension performed. */
19987 if (inner_mode
== HImode
)
19989 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
19990 target
= gen_lowpart (SImode
, target
);
19993 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19997 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
19999 emit_move_insn (mem
, vec
);
20001 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20002 emit_move_insn (target
, tmp
);
20006 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20007 pattern to reduce; DEST is the destination; IN is the input vector. */
20010 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20012 rtx tmp1
, tmp2
, tmp3
;
20014 tmp1
= gen_reg_rtx (V4SFmode
);
20015 tmp2
= gen_reg_rtx (V4SFmode
);
20016 tmp3
= gen_reg_rtx (V4SFmode
);
20018 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20019 emit_insn (fn (tmp2
, tmp1
, in
));
20021 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20022 GEN_INT (1), GEN_INT (1),
20023 GEN_INT (1+4), GEN_INT (1+4)));
20024 emit_insn (fn (dest
, tmp2
, tmp3
));
20027 /* Target hook for scalar_mode_supported_p. */
20029 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20031 if (DECIMAL_FLOAT_MODE_P (mode
))
20034 return default_scalar_mode_supported_p (mode
);
20037 /* Implements target hook vector_mode_supported_p. */
20039 ix86_vector_mode_supported_p (enum machine_mode mode
)
20041 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20043 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20045 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20047 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20052 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20054 We do this in the new i386 backend to maintain source compatibility
20055 with the old cc0-based compiler. */
20058 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20059 tree inputs ATTRIBUTE_UNUSED
,
20062 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20064 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20069 /* Return true if this goes in small data/bss. */
20072 ix86_in_large_data_p (tree exp
)
20074 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20077 /* Functions are never large data. */
20078 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20081 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20083 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20084 if (strcmp (section
, ".ldata") == 0
20085 || strcmp (section
, ".lbss") == 0)
20091 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20093 /* If this is an incomplete type with size 0, then we can't put it
20094 in data because it might be too big when completed. */
20095 if (!size
|| size
> ix86_section_threshold
)
20102 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20104 default_encode_section_info (decl
, rtl
, first
);
20106 if (TREE_CODE (decl
) == VAR_DECL
20107 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20108 && ix86_in_large_data_p (decl
))
20109 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20112 /* Worker function for REVERSE_CONDITION. */
20115 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20117 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20118 ? reverse_condition (code
)
20119 : reverse_condition_maybe_unordered (code
));
20122 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20126 output_387_reg_move (rtx insn
, rtx
*operands
)
20128 if (REG_P (operands
[1])
20129 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20131 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20132 return output_387_ffreep (operands
, 0);
20133 return "fstp\t%y0";
20135 if (STACK_TOP_P (operands
[0]))
20136 return "fld%z1\t%y1";
20140 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20141 FP status register is set. */
20144 ix86_emit_fp_unordered_jump (rtx label
)
20146 rtx reg
= gen_reg_rtx (HImode
);
20149 emit_insn (gen_x86_fnstsw_1 (reg
));
20151 if (TARGET_USE_SAHF
)
20153 emit_insn (gen_x86_sahf_1 (reg
));
20155 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20156 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20160 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20162 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20163 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20166 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20167 gen_rtx_LABEL_REF (VOIDmode
, label
),
20169 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20170 emit_jump_insn (temp
);
20173 /* Output code to perform a log1p XFmode calculation. */
20175 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20177 rtx label1
= gen_label_rtx ();
20178 rtx label2
= gen_label_rtx ();
20180 rtx tmp
= gen_reg_rtx (XFmode
);
20181 rtx tmp2
= gen_reg_rtx (XFmode
);
20183 emit_insn (gen_absxf2 (tmp
, op1
));
20184 emit_insn (gen_cmpxf (tmp
,
20185 CONST_DOUBLE_FROM_REAL_VALUE (
20186 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20188 emit_jump_insn (gen_bge (label1
));
20190 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20191 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20192 emit_jump (label2
);
20194 emit_label (label1
);
20195 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20196 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20197 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20198 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20200 emit_label (label2
);
20203 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20206 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20209 /* With Binutils 2.15, the "@unwind" marker must be specified on
20210 every occurrence of the ".eh_frame" section, not just the first
20213 && strcmp (name
, ".eh_frame") == 0)
20215 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20216 flags
& SECTION_WRITE
? "aw" : "a");
20219 default_elf_asm_named_section (name
, flags
, decl
);
20222 /* Return the mangling of TYPE if it is an extended fundamental type. */
20224 static const char *
20225 ix86_mangle_fundamental_type (tree type
)
20227 switch (TYPE_MODE (type
))
20230 /* __float128 is "g". */
20233 /* "long double" or __float80 is "e". */
20240 /* For 32-bit code we can save PIC register setup by using
20241 __stack_chk_fail_local hidden function instead of calling
20242 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20243 register, so it is better to call __stack_chk_fail directly. */
20246 ix86_stack_protect_fail (void)
20248 return TARGET_64BIT
20249 ? default_external_stack_protect_fail ()
20250 : default_hidden_stack_protect_fail ();
20253 /* Select a format to encode pointers in exception handling data. CODE
20254 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20255 true if the symbol may be affected by dynamic relocations.
20257 ??? All x86 object file formats are capable of representing this.
20258 After all, the relocation needed is the same as for the call insn.
20259 Whether or not a particular assembler allows us to enter such, I
20260 guess we'll have to see. */
20262 asm_preferred_eh_data_format (int code
, int global
)
20266 int type
= DW_EH_PE_sdata8
;
20268 || ix86_cmodel
== CM_SMALL_PIC
20269 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20270 type
= DW_EH_PE_sdata4
;
20271 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20273 if (ix86_cmodel
== CM_SMALL
20274 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20275 return DW_EH_PE_udata4
;
20276 return DW_EH_PE_absptr
;
20279 /* Expand copysign from SIGN to the positive value ABS_VALUE
20280 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20283 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20285 enum machine_mode mode
= GET_MODE (sign
);
20286 rtx sgn
= gen_reg_rtx (mode
);
20287 if (mask
== NULL_RTX
)
20289 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20290 if (!VECTOR_MODE_P (mode
))
20292 /* We need to generate a scalar mode mask in this case. */
20293 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20294 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20295 mask
= gen_reg_rtx (mode
);
20296 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20300 mask
= gen_rtx_NOT (mode
, mask
);
20301 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20302 gen_rtx_AND (mode
, mask
, sign
)));
20303 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20304 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20307 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20308 mask for masking out the sign-bit is stored in *SMASK, if that is
20311 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20313 enum machine_mode mode
= GET_MODE (op0
);
20316 xa
= gen_reg_rtx (mode
);
20317 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20318 if (!VECTOR_MODE_P (mode
))
20320 /* We need to generate a scalar mode mask in this case. */
20321 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20322 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20323 mask
= gen_reg_rtx (mode
);
20324 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20326 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
20327 gen_rtx_AND (mode
, op0
, mask
)));
20335 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20336 swapping the operands if SWAP_OPERANDS is true. The expanded
20337 code is a forward jump to a newly created label in case the
20338 comparison is true. The generated label rtx is returned. */
20340 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
20341 bool swap_operands
)
20352 label
= gen_label_rtx ();
20353 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
20354 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20355 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
20356 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
20357 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
20358 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
20359 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
20360 JUMP_LABEL (tmp
) = label
;
20365 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20366 using comparison code CODE. Operands are swapped for the comparison if
20367 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20369 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
20370 bool swap_operands
)
20372 enum machine_mode mode
= GET_MODE (op0
);
20373 rtx mask
= gen_reg_rtx (mode
);
20382 if (mode
== DFmode
)
20383 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
20384 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
20386 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
20387 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
20392 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20393 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20395 ix86_gen_TWO52 (enum machine_mode mode
)
20397 REAL_VALUE_TYPE TWO52r
;
20400 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
20401 TWO52
= const_double_from_real_value (TWO52r
, mode
);
20402 TWO52
= force_reg (mode
, TWO52
);
20407 /* Expand SSE sequence for computing lround from OP1 storing
20410 ix86_expand_lround (rtx op0
, rtx op1
)
20412 /* C code for the stuff we're doing below:
20413 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20416 enum machine_mode mode
= GET_MODE (op1
);
20417 const struct real_format
*fmt
;
20418 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
20421 /* load nextafter (0.5, 0.0) */
20422 fmt
= REAL_MODE_FORMAT (mode
);
20423 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
20424 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
20426 /* adj = copysign (0.5, op1) */
20427 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
20428 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
20430 /* adj = op1 + adj */
20431 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
20433 /* op0 = (imode)adj */
20434 expand_fix (op0
, adj
, 0);
20437 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20440 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
20442 /* C code for the stuff we're doing below (for do_floor):
20444 xi -= (double)xi > op1 ? 1 : 0;
20447 enum machine_mode fmode
= GET_MODE (op1
);
20448 enum machine_mode imode
= GET_MODE (op0
);
20449 rtx ireg
, freg
, label
, tmp
;
20451 /* reg = (long)op1 */
20452 ireg
= gen_reg_rtx (imode
);
20453 expand_fix (ireg
, op1
, 0);
20455 /* freg = (double)reg */
20456 freg
= gen_reg_rtx (fmode
);
20457 expand_float (freg
, ireg
, 0);
20459 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20460 label
= ix86_expand_sse_compare_and_jump (UNLE
,
20461 freg
, op1
, !do_floor
);
20462 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
20463 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
20464 emit_move_insn (ireg
, tmp
);
20466 emit_label (label
);
20467 LABEL_NUSES (label
) = 1;
20469 emit_move_insn (op0
, ireg
);
20472 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20473 result in OPERAND0. */
20475 ix86_expand_rint (rtx operand0
, rtx operand1
)
20477 /* C code for the stuff we're doing below:
20478 xa = fabs (operand1);
20479 if (!isless (xa, 2**52))
20481 xa = xa + 2**52 - 2**52;
20482 return copysign (xa, operand1);
20484 enum machine_mode mode
= GET_MODE (operand0
);
20485 rtx res
, xa
, label
, TWO52
, mask
;
20487 res
= gen_reg_rtx (mode
);
20488 emit_move_insn (res
, operand1
);
20490 /* xa = abs (operand1) */
20491 xa
= ix86_expand_sse_fabs (res
, &mask
);
20493 /* if (!isless (xa, TWO52)) goto label; */
20494 TWO52
= ix86_gen_TWO52 (mode
);
20495 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20497 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20498 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
20500 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
20502 emit_label (label
);
20503 LABEL_NUSES (label
) = 1;
20505 emit_move_insn (operand0
, res
);
20508 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20511 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
20513 /* C code for the stuff we expand below.
20514 double xa = fabs (x), x2;
20515 if (!isless (xa, TWO52))
20517 xa = xa + TWO52 - TWO52;
20518 x2 = copysign (xa, x);
20527 enum machine_mode mode
= GET_MODE (operand0
);
20528 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
20530 TWO52
= ix86_gen_TWO52 (mode
);
20532 /* Temporary for holding the result, initialized to the input
20533 operand to ease control flow. */
20534 res
= gen_reg_rtx (mode
);
20535 emit_move_insn (res
, operand1
);
20537 /* xa = abs (operand1) */
20538 xa
= ix86_expand_sse_fabs (res
, &mask
);
20540 /* if (!isless (xa, TWO52)) goto label; */
20541 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20543 /* xa = xa + TWO52 - TWO52; */
20544 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20545 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
20547 /* xa = copysign (xa, operand1) */
20548 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
20550 /* generate 1.0 or -1.0 */
20551 one
= force_reg (mode
,
20552 const_double_from_real_value (do_floor
20553 ? dconst1
: dconstm1
, mode
));
20555 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20556 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
20557 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20558 gen_rtx_AND (mode
, one
, tmp
)));
20559 /* We always need to subtract here to preserve signed zero. */
20560 tmp
= expand_simple_binop (mode
, MINUS
,
20561 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
20562 emit_move_insn (res
, tmp
);
20564 emit_label (label
);
20565 LABEL_NUSES (label
) = 1;
20567 emit_move_insn (operand0
, res
);
20570 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20573 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
20575 /* C code for the stuff we expand below.
20576 double xa = fabs (x), x2;
20577 if (!isless (xa, TWO52))
20579 x2 = (double)(long)x;
20586 if (HONOR_SIGNED_ZEROS (mode))
20587 return copysign (x2, x);
20590 enum machine_mode mode
= GET_MODE (operand0
);
20591 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
20593 TWO52
= ix86_gen_TWO52 (mode
);
20595 /* Temporary for holding the result, initialized to the input
20596 operand to ease control flow. */
20597 res
= gen_reg_rtx (mode
);
20598 emit_move_insn (res
, operand1
);
20600 /* xa = abs (operand1) */
20601 xa
= ix86_expand_sse_fabs (res
, &mask
);
20603 /* if (!isless (xa, TWO52)) goto label; */
20604 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20606 /* xa = (double)(long)x */
20607 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
20608 expand_fix (xi
, res
, 0);
20609 expand_float (xa
, xi
, 0);
20612 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
20614 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20615 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
20616 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20617 gen_rtx_AND (mode
, one
, tmp
)));
20618 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
20619 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
20620 emit_move_insn (res
, tmp
);
20622 if (HONOR_SIGNED_ZEROS (mode
))
20623 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
20625 emit_label (label
);
20626 LABEL_NUSES (label
) = 1;
20628 emit_move_insn (operand0
, res
);
20631 /* Expand SSE sequence for computing round from OPERAND1 storing
20632 into OPERAND0. Sequence that works without relying on DImode truncation
20633 via cvttsd2siq that is only available on 64bit targets. */
20635 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
20637 /* C code for the stuff we expand below.
20638 double xa = fabs (x), xa2, x2;
20639 if (!isless (xa, TWO52))
20641 Using the absolute value and copying back sign makes
20642 -0.0 -> -0.0 correct.
20643 xa2 = xa + TWO52 - TWO52;
20648 else if (dxa > 0.5)
20650 x2 = copysign (xa2, x);
20653 enum machine_mode mode
= GET_MODE (operand0
);
20654 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
20656 TWO52
= ix86_gen_TWO52 (mode
);
20658 /* Temporary for holding the result, initialized to the input
20659 operand to ease control flow. */
20660 res
= gen_reg_rtx (mode
);
20661 emit_move_insn (res
, operand1
);
20663 /* xa = abs (operand1) */
20664 xa
= ix86_expand_sse_fabs (res
, &mask
);
20666 /* if (!isless (xa, TWO52)) goto label; */
20667 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20669 /* xa2 = xa + TWO52 - TWO52; */
20670 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20671 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
20673 /* dxa = xa2 - xa; */
20674 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
20676 /* generate 0.5, 1.0 and -0.5 */
20677 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
20678 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
20679 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
20683 tmp
= gen_reg_rtx (mode
);
20684 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
20685 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
20686 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20687 gen_rtx_AND (mode
, one
, tmp
)));
20688 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
20689 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
20690 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
20691 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20692 gen_rtx_AND (mode
, one
, tmp
)));
20693 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
20695 /* res = copysign (xa2, operand1) */
20696 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
20698 emit_label (label
);
20699 LABEL_NUSES (label
) = 1;
20701 emit_move_insn (operand0
, res
);
20704 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20707 ix86_expand_trunc (rtx operand0
, rtx operand1
)
20709 /* C code for SSE variant we expand below.
20710 double xa = fabs (x), x2;
20711 if (!isless (xa, TWO52))
20713 x2 = (double)(long)x;
20714 if (HONOR_SIGNED_ZEROS (mode))
20715 return copysign (x2, x);
20718 enum machine_mode mode
= GET_MODE (operand0
);
20719 rtx xa
, xi
, TWO52
, label
, res
, mask
;
20721 TWO52
= ix86_gen_TWO52 (mode
);
20723 /* Temporary for holding the result, initialized to the input
20724 operand to ease control flow. */
20725 res
= gen_reg_rtx (mode
);
20726 emit_move_insn (res
, operand1
);
20728 /* xa = abs (operand1) */
20729 xa
= ix86_expand_sse_fabs (res
, &mask
);
20731 /* if (!isless (xa, TWO52)) goto label; */
20732 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20734 /* x = (double)(long)x */
20735 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
20736 expand_fix (xi
, res
, 0);
20737 expand_float (res
, xi
, 0);
20739 if (HONOR_SIGNED_ZEROS (mode
))
20740 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
20742 emit_label (label
);
20743 LABEL_NUSES (label
) = 1;
20745 emit_move_insn (operand0
, res
);
20748 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20751 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
20753 enum machine_mode mode
= GET_MODE (operand0
);
20754 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
20756 /* C code for SSE variant we expand below.
20757 double xa = fabs (x), x2;
20758 if (!isless (xa, TWO52))
20760 xa2 = xa + TWO52 - TWO52;
20764 x2 = copysign (xa2, x);
20768 TWO52
= ix86_gen_TWO52 (mode
);
20770 /* Temporary for holding the result, initialized to the input
20771 operand to ease control flow. */
20772 res
= gen_reg_rtx (mode
);
20773 emit_move_insn (res
, operand1
);
20775 /* xa = abs (operand1) */
20776 xa
= ix86_expand_sse_fabs (res
, &smask
);
20778 /* if (!isless (xa, TWO52)) goto label; */
20779 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20781 /* res = xa + TWO52 - TWO52; */
20782 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20783 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
20784 emit_move_insn (res
, tmp
);
20787 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
20789 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
20790 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
20791 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
20792 gen_rtx_AND (mode
, mask
, one
)));
20793 tmp
= expand_simple_binop (mode
, MINUS
,
20794 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
20795 emit_move_insn (res
, tmp
);
20797 /* res = copysign (res, operand1) */
20798 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
20800 emit_label (label
);
20801 LABEL_NUSES (label
) = 1;
20803 emit_move_insn (operand0
, res
);
20806 /* Expand SSE sequence for computing round from OPERAND1 storing
20809 ix86_expand_round (rtx operand0
, rtx operand1
)
20811 /* C code for the stuff we're doing below:
20812 double xa = fabs (x);
20813 if (!isless (xa, TWO52))
20815 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20816 return copysign (xa, x);
20818 enum machine_mode mode
= GET_MODE (operand0
);
20819 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
20820 const struct real_format
*fmt
;
20821 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
20823 /* Temporary for holding the result, initialized to the input
20824 operand to ease control flow. */
20825 res
= gen_reg_rtx (mode
);
20826 emit_move_insn (res
, operand1
);
20828 TWO52
= ix86_gen_TWO52 (mode
);
20829 xa
= ix86_expand_sse_fabs (res
, &mask
);
20830 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20832 /* load nextafter (0.5, 0.0) */
20833 fmt
= REAL_MODE_FORMAT (mode
);
20834 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
20835 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
20837 /* xa = xa + 0.5 */
20838 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
20839 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
20841 /* xa = (double)(int64_t)xa */
20842 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
20843 expand_fix (xi
, xa
, 0);
20844 expand_float (xa
, xi
, 0);
20846 /* res = copysign (xa, operand1) */
20847 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
20849 emit_label (label
);
20850 LABEL_NUSES (label
) = 1;
20852 emit_move_insn (operand0
, res
);
20855 #include "gt-i386.h"