1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1043 | m_NOCONA
| m_CORE2
| m_GENERIC32
,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1221 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1223 static enum stringop_alg stringop_alg
= no_stringop
;
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1232 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1233 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1238 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1240 /* ax, dx, cx, bx */
1241 AREG
, DREG
, CREG
, BREG
,
1242 /* si, di, bp, sp */
1243 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1245 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1246 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1251 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1253 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1255 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1256 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1257 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1261 /* The "default" register map used in 32bit mode. */
1263 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1274 static int const x86_64_int_parameter_registers
[6] =
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1280 static int const x86_64_int_return_registers
[4] =
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1351 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1365 rtx ix86_compare_op0
= NULL_RTX
;
1366 rtx ix86_compare_op1
= NULL_RTX
;
1367 rtx ix86_compare_emitted
= NULL_RTX
;
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1372 /* Define the structure for the machine field in struct function. */
1374 struct stack_local_entry
GTY(())
1376 unsigned short mode
;
1379 struct stack_local_entry
*next
;
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1406 HOST_WIDE_INT frame
;
1408 int outgoing_arguments_size
;
1411 HOST_WIDE_INT to_allocate
;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset
;
1414 HOST_WIDE_INT hard_frame_pointer_offset
;
1415 HOST_WIDE_INT stack_pointer_offset
;
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov
;
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel
;
1425 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1427 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath
;
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune
;
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch
;
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse
;
1441 /* true if cmpxchg16b is supported. */
1444 /* true if sahf is supported. Early Intel CPUs with Intel 64
1445 lacked LAHF and SAHF instructions supported by AMD64 until
1446 introduction of Pentium 4 G1 step in December 2005. */
1449 /* ix86_regparm_string as a number */
1450 static int ix86_regparm
;
1452 /* -mstackrealign option */
1453 extern int ix86_force_align_arg_pointer
;
1454 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1456 /* Preferred alignment for stack boundary in bits. */
1457 unsigned int ix86_preferred_stack_boundary
;
1459 /* Values 1-5: see jump.c */
1460 int ix86_branch_cost
;
1462 /* Variables which are this size or smaller are put in the data/bss
1463 or ldata/lbss sections. */
1465 int ix86_section_threshold
= 65536;
1467 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1468 char internal_label_prefix
[16];
1469 int internal_label_prefix_len
;
1471 static bool ix86_handle_option (size_t, const char *, int);
1472 static void output_pic_addr_const (FILE *, rtx
, int);
1473 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1475 static const char *get_some_local_dynamic_name (void);
1476 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1477 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1478 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1480 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1481 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1483 static rtx
get_thread_pointer (int);
1484 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1485 static void get_pc_thunk_name (char [32], unsigned int);
1486 static rtx
gen_push (rtx
);
1487 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1488 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1489 static struct machine_function
* ix86_init_machine_status (void);
1490 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1491 static int ix86_nsaved_regs (void);
1492 static void ix86_emit_save_regs (void);
1493 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1494 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1495 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1496 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1497 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1498 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1499 static int ix86_issue_rate (void);
1500 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1501 static int ia32_multipass_dfa_lookahead (void);
1502 static void ix86_init_mmx_sse_builtins (void);
1503 static rtx
x86_this_parameter (tree
);
1504 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1505 HOST_WIDE_INT
, tree
);
1506 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1507 static void x86_file_start (void);
1508 static void ix86_reorg (void);
1509 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1510 static tree
ix86_build_builtin_va_list (void);
1511 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1513 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1514 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1515 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1517 static int ix86_address_cost (rtx
);
1518 static bool ix86_cannot_force_const_mem (rtx
);
1519 static rtx
ix86_delegitimize_address (rtx
);
1521 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1523 struct builtin_description
;
1524 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1526 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1528 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1529 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1530 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1531 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1532 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1533 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1534 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1535 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1536 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1537 static int ix86_fp_comparison_cost (enum rtx_code code
);
1538 static unsigned int ix86_select_alt_pic_regnum (void);
1539 static int ix86_save_reg (unsigned int, int);
1540 static void ix86_compute_frame_layout (struct ix86_frame
*);
1541 static int ix86_comp_type_attributes (tree
, tree
);
1542 static int ix86_function_regparm (tree
, tree
);
1543 const struct attribute_spec ix86_attribute_table
[];
1544 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1545 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1546 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1547 static bool contains_128bit_aligned_vector_p (tree
);
1548 static rtx
ix86_struct_value_rtx (tree
, int);
1549 static bool ix86_ms_bitfield_layout_p (tree
);
1550 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1551 static int extended_reg_mentioned_1 (rtx
*, void *);
1552 static bool ix86_rtx_costs (rtx
, int, int, int *);
1553 static int min_insn_size (rtx
);
1554 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1555 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1556 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1558 static void ix86_init_builtins (void);
1559 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1560 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1561 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1562 static const char *ix86_mangle_fundamental_type (tree
);
1563 static tree
ix86_stack_protect_fail (void);
1564 static rtx
ix86_internal_arg_pointer (void);
1565 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1566 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1569 /* This function is only used on Solaris. */
1570 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1573 /* Register class used for passing given 64bit part of the argument.
1574 These represent classes as documented by the PS ABI, with the exception
1575 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1576 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1578 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1579 whenever possible (upper half does contain padding).
1581 enum x86_64_reg_class
1584 X86_64_INTEGER_CLASS
,
1585 X86_64_INTEGERSI_CLASS
,
1592 X86_64_COMPLEX_X87_CLASS
,
1595 static const char * const x86_64_reg_class_name
[] = {
1596 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1597 "sseup", "x87", "x87up", "cplx87", "no"
1600 #define MAX_CLASSES 4
1602 /* Table of constants used by fldpi, fldln2, etc.... */
1603 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1604 static bool ext_80387_constants_init
= 0;
1605 static void init_ext_80387_constants (void);
1606 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1607 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1608 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1609 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1610 unsigned HOST_WIDE_INT align
)
1613 /* Initialize the GCC target structure. */
1614 #undef TARGET_ATTRIBUTE_TABLE
1615 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1616 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1617 # undef TARGET_MERGE_DECL_ATTRIBUTES
1618 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1621 #undef TARGET_COMP_TYPE_ATTRIBUTES
1622 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1624 #undef TARGET_INIT_BUILTINS
1625 #define TARGET_INIT_BUILTINS ix86_init_builtins
1626 #undef TARGET_EXPAND_BUILTIN
1627 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1629 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1630 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1631 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1632 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1634 #undef TARGET_ASM_FUNCTION_EPILOGUE
1635 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1637 #undef TARGET_ENCODE_SECTION_INFO
1638 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1639 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1641 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1644 #undef TARGET_ASM_OPEN_PAREN
1645 #define TARGET_ASM_OPEN_PAREN ""
1646 #undef TARGET_ASM_CLOSE_PAREN
1647 #define TARGET_ASM_CLOSE_PAREN ""
1649 #undef TARGET_ASM_ALIGNED_HI_OP
1650 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1651 #undef TARGET_ASM_ALIGNED_SI_OP
1652 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1654 #undef TARGET_ASM_ALIGNED_DI_OP
1655 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1658 #undef TARGET_ASM_UNALIGNED_HI_OP
1659 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1660 #undef TARGET_ASM_UNALIGNED_SI_OP
1661 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1662 #undef TARGET_ASM_UNALIGNED_DI_OP
1663 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1665 #undef TARGET_SCHED_ADJUST_COST
1666 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1667 #undef TARGET_SCHED_ISSUE_RATE
1668 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1671 ia32_multipass_dfa_lookahead
1673 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1674 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1677 #undef TARGET_HAVE_TLS
1678 #define TARGET_HAVE_TLS true
1680 #undef TARGET_CANNOT_FORCE_CONST_MEM
1681 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1682 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1683 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1685 #undef TARGET_DELEGITIMIZE_ADDRESS
1686 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1688 #undef TARGET_MS_BITFIELD_LAYOUT_P
1689 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1692 #undef TARGET_BINDS_LOCAL_P
1693 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1696 #undef TARGET_ASM_OUTPUT_MI_THUNK
1697 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1698 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1699 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1701 #undef TARGET_ASM_FILE_START
1702 #define TARGET_ASM_FILE_START x86_file_start
1704 #undef TARGET_DEFAULT_TARGET_FLAGS
1705 #define TARGET_DEFAULT_TARGET_FLAGS \
1707 | TARGET_64BIT_DEFAULT \
1708 | TARGET_SUBTARGET_DEFAULT \
1709 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1711 #undef TARGET_HANDLE_OPTION
1712 #define TARGET_HANDLE_OPTION ix86_handle_option
1714 #undef TARGET_RTX_COSTS
1715 #define TARGET_RTX_COSTS ix86_rtx_costs
1716 #undef TARGET_ADDRESS_COST
1717 #define TARGET_ADDRESS_COST ix86_address_cost
1719 #undef TARGET_FIXED_CONDITION_CODE_REGS
1720 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1721 #undef TARGET_CC_MODES_COMPATIBLE
1722 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1724 #undef TARGET_MACHINE_DEPENDENT_REORG
1725 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1727 #undef TARGET_BUILD_BUILTIN_VA_LIST
1728 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1730 #undef TARGET_MD_ASM_CLOBBERS
1731 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1733 #undef TARGET_PROMOTE_PROTOTYPES
1734 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1735 #undef TARGET_STRUCT_VALUE_RTX
1736 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1737 #undef TARGET_SETUP_INCOMING_VARARGS
1738 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1739 #undef TARGET_MUST_PASS_IN_STACK
1740 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1741 #undef TARGET_PASS_BY_REFERENCE
1742 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1743 #undef TARGET_INTERNAL_ARG_POINTER
1744 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1745 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1746 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1748 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1749 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1751 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1752 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1754 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1755 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1758 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1759 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1762 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1763 #undef TARGET_INSERT_ATTRIBUTES
1764 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1767 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1768 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1770 #undef TARGET_STACK_PROTECT_FAIL
1771 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1773 #undef TARGET_FUNCTION_VALUE
1774 #define TARGET_FUNCTION_VALUE ix86_function_value
1776 struct gcc_target targetm
= TARGET_INITIALIZER
;
1779 /* The svr4 ABI for the i386 says that records and unions are returned
1781 #ifndef DEFAULT_PCC_STRUCT_RETURN
1782 #define DEFAULT_PCC_STRUCT_RETURN 1
1785 /* Implement TARGET_HANDLE_OPTION. */
1788 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1795 target_flags
&= ~MASK_3DNOW_A
;
1796 target_flags_explicit
|= MASK_3DNOW_A
;
1803 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1804 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1811 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1812 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1819 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1820 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1827 target_flags
&= ~MASK_SSE4A
;
1828 target_flags_explicit
|= MASK_SSE4A
;
1837 /* Sometimes certain combinations of command options do not make
1838 sense on a particular target machine. You can define a macro
1839 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1840 defined, is executed once just after all the command options have
1843 Don't use this macro to turn on various extra optimizations for
1844 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1847 override_options (void)
1850 int ix86_tune_defaulted
= 0;
1851 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1853 /* Comes from final.c -- no real reason to change it. */
1854 #define MAX_CODE_ALIGN 16
1858 const struct processor_costs
*cost
; /* Processor costs */
1859 const int target_enable
; /* Target flags to enable. */
1860 const int target_disable
; /* Target flags to disable. */
1861 const int align_loop
; /* Default alignments. */
1862 const int align_loop_max_skip
;
1863 const int align_jump
;
1864 const int align_jump_max_skip
;
1865 const int align_func
;
1867 const processor_target_table
[PROCESSOR_max
] =
1869 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1870 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1871 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1872 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1873 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1874 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1875 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1876 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1877 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1878 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1879 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1880 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1881 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1882 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1885 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1888 const char *const name
; /* processor name or nickname. */
1889 const enum processor_type processor
;
1890 const enum pta_flags
1896 PTA_PREFETCH_SSE
= 1 << 4,
1898 PTA_3DNOW_A
= 1 << 6,
1902 PTA_POPCNT
= 1 << 10,
1904 PTA_SSE4A
= 1 << 12,
1905 PTA_NO_SAHF
= 1 << 13
1908 const processor_alias_table
[] =
1910 {"i386", PROCESSOR_I386
, 0},
1911 {"i486", PROCESSOR_I486
, 0},
1912 {"i586", PROCESSOR_PENTIUM
, 0},
1913 {"pentium", PROCESSOR_PENTIUM
, 0},
1914 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1915 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1916 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1917 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1918 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1919 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1920 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1921 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1922 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1923 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1924 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1925 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1926 | PTA_MMX
| PTA_PREFETCH_SSE
},
1927 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1928 | PTA_MMX
| PTA_PREFETCH_SSE
},
1929 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1930 | PTA_MMX
| PTA_PREFETCH_SSE
},
1931 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1932 | PTA_MMX
| PTA_PREFETCH_SSE
1933 | PTA_CX16
| PTA_NO_SAHF
},
1934 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1935 | PTA_64BIT
| PTA_MMX
1936 | PTA_PREFETCH_SSE
| PTA_CX16
},
1937 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1939 {"k6", PROCESSOR_K6
, PTA_MMX
},
1940 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1941 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1942 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1944 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1945 | PTA_3DNOW
| PTA_3DNOW_A
},
1946 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1947 | PTA_3DNOW_A
| PTA_SSE
},
1948 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1949 | PTA_3DNOW_A
| PTA_SSE
},
1950 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1951 | PTA_3DNOW_A
| PTA_SSE
},
1952 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1953 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1954 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1955 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1956 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1957 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1958 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1959 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1960 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1961 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1962 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1963 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1964 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1965 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1966 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1967 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1970 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1973 SUBTARGET_OVERRIDE_OPTIONS
;
1976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1977 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1980 /* -fPIC is the default for x86_64. */
1981 if (TARGET_MACHO
&& TARGET_64BIT
)
1984 /* Set the default values for switches whose default depends on TARGET_64BIT
1985 in case they weren't overwritten by command line options. */
1988 /* Mach-O doesn't support omitting the frame pointer for now. */
1989 if (flag_omit_frame_pointer
== 2)
1990 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1991 if (flag_asynchronous_unwind_tables
== 2)
1992 flag_asynchronous_unwind_tables
= 1;
1993 if (flag_pcc_struct_return
== 2)
1994 flag_pcc_struct_return
= 0;
1998 if (flag_omit_frame_pointer
== 2)
1999 flag_omit_frame_pointer
= 0;
2000 if (flag_asynchronous_unwind_tables
== 2)
2001 flag_asynchronous_unwind_tables
= 0;
2002 if (flag_pcc_struct_return
== 2)
2003 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2006 /* Need to check -mtune=generic first. */
2007 if (ix86_tune_string
)
2009 if (!strcmp (ix86_tune_string
, "generic")
2010 || !strcmp (ix86_tune_string
, "i686")
2011 /* As special support for cross compilers we read -mtune=native
2012 as -mtune=generic. With native compilers we won't see the
2013 -mtune=native, as it was changed by the driver. */
2014 || !strcmp (ix86_tune_string
, "native"))
2017 ix86_tune_string
= "generic64";
2019 ix86_tune_string
= "generic32";
2021 else if (!strncmp (ix86_tune_string
, "generic", 7))
2022 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2026 if (ix86_arch_string
)
2027 ix86_tune_string
= ix86_arch_string
;
2028 if (!ix86_tune_string
)
2030 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2031 ix86_tune_defaulted
= 1;
2034 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2035 need to use a sensible tune option. */
2036 if (!strcmp (ix86_tune_string
, "generic")
2037 || !strcmp (ix86_tune_string
, "x86-64")
2038 || !strcmp (ix86_tune_string
, "i686"))
2041 ix86_tune_string
= "generic64";
2043 ix86_tune_string
= "generic32";
2046 if (ix86_stringop_string
)
2048 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2049 stringop_alg
= rep_prefix_1_byte
;
2050 else if (!strcmp (ix86_stringop_string
, "libcall"))
2051 stringop_alg
= libcall
;
2052 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2053 stringop_alg
= rep_prefix_4_byte
;
2054 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2055 stringop_alg
= rep_prefix_8_byte
;
2056 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2057 stringop_alg
= loop_1_byte
;
2058 else if (!strcmp (ix86_stringop_string
, "loop"))
2059 stringop_alg
= loop
;
2060 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2061 stringop_alg
= unrolled_loop
;
2063 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2065 if (!strcmp (ix86_tune_string
, "x86-64"))
2066 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2067 "-mtune=generic instead as appropriate.");
2069 if (!ix86_arch_string
)
2070 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2071 if (!strcmp (ix86_arch_string
, "generic"))
2072 error ("generic CPU can be used only for -mtune= switch");
2073 if (!strncmp (ix86_arch_string
, "generic", 7))
2074 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2076 if (ix86_cmodel_string
!= 0)
2078 if (!strcmp (ix86_cmodel_string
, "small"))
2079 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2080 else if (!strcmp (ix86_cmodel_string
, "medium"))
2081 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2082 else if (!strcmp (ix86_cmodel_string
, "large"))
2083 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2085 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2086 else if (!strcmp (ix86_cmodel_string
, "32"))
2087 ix86_cmodel
= CM_32
;
2088 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2089 ix86_cmodel
= CM_KERNEL
;
2091 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2095 ix86_cmodel
= CM_32
;
2097 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2099 if (ix86_asm_string
!= 0)
2102 && !strcmp (ix86_asm_string
, "intel"))
2103 ix86_asm_dialect
= ASM_INTEL
;
2104 else if (!strcmp (ix86_asm_string
, "att"))
2105 ix86_asm_dialect
= ASM_ATT
;
2107 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2109 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2110 error ("code model %qs not supported in the %s bit mode",
2111 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2112 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2113 sorry ("%i-bit mode not compiled in",
2114 (target_flags
& MASK_64BIT
) ? 64 : 32);
2116 for (i
= 0; i
< pta_size
; i
++)
2117 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2119 ix86_arch
= processor_alias_table
[i
].processor
;
2120 /* Default cpu tuning to the architecture. */
2121 ix86_tune
= ix86_arch
;
2122 if (processor_alias_table
[i
].flags
& PTA_MMX
2123 && !(target_flags_explicit
& MASK_MMX
))
2124 target_flags
|= MASK_MMX
;
2125 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2126 && !(target_flags_explicit
& MASK_3DNOW
))
2127 target_flags
|= MASK_3DNOW
;
2128 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2129 && !(target_flags_explicit
& MASK_3DNOW_A
))
2130 target_flags
|= MASK_3DNOW_A
;
2131 if (processor_alias_table
[i
].flags
& PTA_SSE
2132 && !(target_flags_explicit
& MASK_SSE
))
2133 target_flags
|= MASK_SSE
;
2134 if (processor_alias_table
[i
].flags
& PTA_SSE2
2135 && !(target_flags_explicit
& MASK_SSE2
))
2136 target_flags
|= MASK_SSE2
;
2137 if (processor_alias_table
[i
].flags
& PTA_SSE3
2138 && !(target_flags_explicit
& MASK_SSE3
))
2139 target_flags
|= MASK_SSE3
;
2140 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2141 && !(target_flags_explicit
& MASK_SSSE3
))
2142 target_flags
|= MASK_SSSE3
;
2143 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2144 x86_prefetch_sse
= true;
2145 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2146 x86_cmpxchg16b
= true;
2147 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2148 && !(target_flags_explicit
& MASK_POPCNT
))
2149 target_flags
|= MASK_POPCNT
;
2150 if (processor_alias_table
[i
].flags
& PTA_ABM
2151 && !(target_flags_explicit
& MASK_ABM
))
2152 target_flags
|= MASK_ABM
;
2153 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2154 && !(target_flags_explicit
& MASK_SSE4A
))
2155 target_flags
|= MASK_SSE4A
;
2156 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2158 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2159 error ("CPU you selected does not support x86-64 "
2165 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2167 ix86_arch_mask
= 1u << ix86_arch
;
2168 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2169 ix86_arch_features
[i
] &= ix86_arch_mask
;
2171 for (i
= 0; i
< pta_size
; i
++)
2172 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2174 ix86_tune
= processor_alias_table
[i
].processor
;
2175 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2177 if (ix86_tune_defaulted
)
2179 ix86_tune_string
= "x86-64";
2180 for (i
= 0; i
< pta_size
; i
++)
2181 if (! strcmp (ix86_tune_string
,
2182 processor_alias_table
[i
].name
))
2184 ix86_tune
= processor_alias_table
[i
].processor
;
2187 error ("CPU you selected does not support x86-64 "
2190 /* Intel CPUs have always interpreted SSE prefetch instructions as
2191 NOPs; so, we can enable SSE prefetch instructions even when
2192 -mtune (rather than -march) points us to a processor that has them.
2193 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2194 higher processors. */
2195 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2196 x86_prefetch_sse
= true;
2200 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2202 ix86_tune_mask
= 1u << ix86_tune
;
2203 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2204 ix86_tune_features
[i
] &= ix86_tune_mask
;
2207 ix86_cost
= &size_cost
;
2209 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2210 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2211 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2213 /* Arrange to set up i386_stack_locals for all functions. */
2214 init_machine_status
= ix86_init_machine_status
;
2216 /* Validate -mregparm= value. */
2217 if (ix86_regparm_string
)
2219 i
= atoi (ix86_regparm_string
);
2220 if (i
< 0 || i
> REGPARM_MAX
)
2221 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2227 ix86_regparm
= REGPARM_MAX
;
2229 /* If the user has provided any of the -malign-* options,
2230 warn and use that value only if -falign-* is not set.
2231 Remove this code in GCC 3.2 or later. */
2232 if (ix86_align_loops_string
)
2234 warning (0, "-malign-loops is obsolete, use -falign-loops");
2235 if (align_loops
== 0)
2237 i
= atoi (ix86_align_loops_string
);
2238 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2239 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2241 align_loops
= 1 << i
;
2245 if (ix86_align_jumps_string
)
2247 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2248 if (align_jumps
== 0)
2250 i
= atoi (ix86_align_jumps_string
);
2251 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2252 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2254 align_jumps
= 1 << i
;
2258 if (ix86_align_funcs_string
)
2260 warning (0, "-malign-functions is obsolete, use -falign-functions");
2261 if (align_functions
== 0)
2263 i
= atoi (ix86_align_funcs_string
);
2264 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2265 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2267 align_functions
= 1 << i
;
2271 /* Default align_* from the processor table. */
2272 if (align_loops
== 0)
2274 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2275 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2277 if (align_jumps
== 0)
2279 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2280 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2282 if (align_functions
== 0)
2284 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2287 /* Validate -mbranch-cost= value, or provide default. */
2288 ix86_branch_cost
= ix86_cost
->branch_cost
;
2289 if (ix86_branch_cost_string
)
2291 i
= atoi (ix86_branch_cost_string
);
2293 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2295 ix86_branch_cost
= i
;
2297 if (ix86_section_threshold_string
)
2299 i
= atoi (ix86_section_threshold_string
);
2301 error ("-mlarge-data-threshold=%d is negative", i
);
2303 ix86_section_threshold
= i
;
2306 if (ix86_tls_dialect_string
)
2308 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2309 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2310 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2311 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2312 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2313 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2315 error ("bad value (%s) for -mtls-dialect= switch",
2316 ix86_tls_dialect_string
);
2319 /* Keep nonleaf frame pointers. */
2320 if (flag_omit_frame_pointer
)
2321 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2322 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2323 flag_omit_frame_pointer
= 1;
2325 /* If we're doing fast math, we don't care about comparison order
2326 wrt NaNs. This lets us use a shorter comparison sequence. */
2327 if (flag_finite_math_only
)
2328 target_flags
&= ~MASK_IEEE_FP
;
2330 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2331 since the insns won't need emulation. */
2332 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2333 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2335 /* Likewise, if the target doesn't have a 387, or we've specified
2336 software floating point, don't use 387 inline intrinsics. */
2338 target_flags
|= MASK_NO_FANCY_MATH_387
;
2340 /* Turn on SSE3 builtins for -mssse3. */
2342 target_flags
|= MASK_SSE3
;
2344 /* Turn on SSE3 builtins for -msse4a. */
2346 target_flags
|= MASK_SSE3
;
2348 /* Turn on SSE2 builtins for -msse3. */
2350 target_flags
|= MASK_SSE2
;
2352 /* Turn on SSE builtins for -msse2. */
2354 target_flags
|= MASK_SSE
;
2356 /* Turn on MMX builtins for -msse. */
2359 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2360 x86_prefetch_sse
= true;
2363 /* Turn on MMX builtins for 3Dnow. */
2365 target_flags
|= MASK_MMX
;
2367 /* Turn on POPCNT builtins for -mabm. */
2369 target_flags
|= MASK_POPCNT
;
2373 if (TARGET_ALIGN_DOUBLE
)
2374 error ("-malign-double makes no sense in the 64bit mode");
2376 error ("-mrtd calling convention not supported in the 64bit mode");
2378 /* Enable by default the SSE and MMX builtins. Do allow the user to
2379 explicitly disable any of these. In particular, disabling SSE and
2380 MMX for kernel code is extremely useful. */
2382 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2383 & ~target_flags_explicit
);
2387 /* i386 ABI does not specify red zone. It still makes sense to use it
2388 when programmer takes care to stack from being destroyed. */
2389 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2390 target_flags
|= MASK_NO_RED_ZONE
;
2393 /* Validate -mpreferred-stack-boundary= value, or provide default.
2394 The default of 128 bits is for Pentium III's SSE __m128. We can't
2395 change it because of optimize_size. Otherwise, we can't mix object
2396 files compiled with -Os and -On. */
2397 ix86_preferred_stack_boundary
= 128;
2398 if (ix86_preferred_stack_boundary_string
)
2400 i
= atoi (ix86_preferred_stack_boundary_string
);
2401 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2402 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2403 TARGET_64BIT
? 4 : 2);
2405 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2408 /* Accept -msseregparm only if at least SSE support is enabled. */
2409 if (TARGET_SSEREGPARM
2411 error ("-msseregparm used without SSE enabled");
2413 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2414 if (ix86_fpmath_string
!= 0)
2416 if (! strcmp (ix86_fpmath_string
, "387"))
2417 ix86_fpmath
= FPMATH_387
;
2418 else if (! strcmp (ix86_fpmath_string
, "sse"))
2422 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2423 ix86_fpmath
= FPMATH_387
;
2426 ix86_fpmath
= FPMATH_SSE
;
2428 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2429 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2433 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2434 ix86_fpmath
= FPMATH_387
;
2436 else if (!TARGET_80387
)
2438 warning (0, "387 instruction set disabled, using SSE arithmetics");
2439 ix86_fpmath
= FPMATH_SSE
;
2442 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2445 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2448 /* If the i387 is disabled, then do not return values in it. */
2450 target_flags
&= ~MASK_FLOAT_RETURNS
;
2452 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2453 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2455 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2457 /* ??? Unwind info is not correct around the CFG unless either a frame
2458 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2459 unwind info generation to be aware of the CFG and propagating states
2461 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2462 || flag_exceptions
|| flag_non_call_exceptions
)
2463 && flag_omit_frame_pointer
2464 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2466 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2467 warning (0, "unwind tables currently require either a frame pointer "
2468 "or -maccumulate-outgoing-args for correctness");
2469 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2472 /* For sane SSE instruction set generation we need fcomi instruction.
2473 It is safe to enable all CMOVE instructions. */
2477 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2480 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2481 p
= strchr (internal_label_prefix
, 'X');
2482 internal_label_prefix_len
= p
- internal_label_prefix
;
2486 /* When scheduling description is not available, disable scheduler pass
2487 so it won't slow down the compilation and make x87 code slower. */
2488 if (!TARGET_SCHEDULE
)
2489 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2491 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2492 set_param_value ("simultaneous-prefetches",
2493 ix86_cost
->simultaneous_prefetches
);
2494 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2495 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2498 /* switch to the appropriate section for output of DECL.
2499 DECL is either a `VAR_DECL' node or a constant of some sort.
2500 RELOC indicates whether forming the initial value of DECL requires
2501 link-time relocations. */
2504 x86_64_elf_select_section (tree decl
, int reloc
,
2505 unsigned HOST_WIDE_INT align
)
2507 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2508 && ix86_in_large_data_p (decl
))
2510 const char *sname
= NULL
;
2511 unsigned int flags
= SECTION_WRITE
;
2512 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2517 case SECCAT_DATA_REL
:
2518 sname
= ".ldata.rel";
2520 case SECCAT_DATA_REL_LOCAL
:
2521 sname
= ".ldata.rel.local";
2523 case SECCAT_DATA_REL_RO
:
2524 sname
= ".ldata.rel.ro";
2526 case SECCAT_DATA_REL_RO_LOCAL
:
2527 sname
= ".ldata.rel.ro.local";
2531 flags
|= SECTION_BSS
;
2534 case SECCAT_RODATA_MERGE_STR
:
2535 case SECCAT_RODATA_MERGE_STR_INIT
:
2536 case SECCAT_RODATA_MERGE_CONST
:
2540 case SECCAT_SRODATA
:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2553 /* We might get called with string constants, but get_named_section
2554 doesn't like them as they are not DECLs. Also, we need to set
2555 flags in that case. */
2557 return get_section (sname
, flags
, NULL
);
2558 return get_named_section (decl
, sname
, reloc
);
2561 return default_elf_select_section (decl
, reloc
, align
);
2564 /* Build up a unique section name, expressed as a
2565 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2566 RELOC indicates whether the initial value of EXP requires
2567 link-time relocations. */
2570 x86_64_elf_unique_section (tree decl
, int reloc
)
2572 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2573 && ix86_in_large_data_p (decl
))
2575 const char *prefix
= NULL
;
2576 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2577 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2579 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2582 case SECCAT_DATA_REL
:
2583 case SECCAT_DATA_REL_LOCAL
:
2584 case SECCAT_DATA_REL_RO
:
2585 case SECCAT_DATA_REL_RO_LOCAL
:
2586 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2589 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2592 case SECCAT_RODATA_MERGE_STR
:
2593 case SECCAT_RODATA_MERGE_STR_INIT
:
2594 case SECCAT_RODATA_MERGE_CONST
:
2595 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2597 case SECCAT_SRODATA
:
2604 /* We don't split these for medium model. Place them into
2605 default sections and hope for best. */
2613 plen
= strlen (prefix
);
2615 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2616 name
= targetm
.strip_name_encoding (name
);
2617 nlen
= strlen (name
);
2619 string
= alloca (nlen
+ plen
+ 1);
2620 memcpy (string
, prefix
, plen
);
2621 memcpy (string
+ plen
, name
, nlen
+ 1);
2623 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2627 default_unique_section (decl
, reloc
);
2630 #ifdef COMMON_ASM_OP
2631 /* This says how to output assembler code to declare an
2632 uninitialized external linkage data object.
2634 For medium model x86-64 we need to use .largecomm opcode for
2637 x86_elf_aligned_common (FILE *file
,
2638 const char *name
, unsigned HOST_WIDE_INT size
,
2641 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2642 && size
> (unsigned int)ix86_section_threshold
)
2643 fprintf (file
, ".largecomm\t");
2645 fprintf (file
, "%s", COMMON_ASM_OP
);
2646 assemble_name (file
, name
);
2647 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2648 size
, align
/ BITS_PER_UNIT
);
2651 /* Utility function for targets to use in implementing
2652 ASM_OUTPUT_ALIGNED_BSS. */
2655 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2656 const char *name
, unsigned HOST_WIDE_INT size
,
2659 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2660 && size
> (unsigned int)ix86_section_threshold
)
2661 switch_to_section (get_named_section (decl
, ".lbss", 0));
2663 switch_to_section (bss_section
);
2664 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2665 #ifdef ASM_DECLARE_OBJECT_NAME
2666 last_assemble_variable_decl
= decl
;
2667 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2669 /* Standard thing is just output label for the object. */
2670 ASM_OUTPUT_LABEL (file
, name
);
2671 #endif /* ASM_DECLARE_OBJECT_NAME */
2672 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2676 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2678 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2679 make the problem with not enough registers even worse. */
2680 #ifdef INSN_SCHEDULING
2682 flag_schedule_insns
= 0;
2686 /* The Darwin libraries never set errno, so we might as well
2687 avoid calling them when that's the only reason we would. */
2688 flag_errno_math
= 0;
2690 /* The default values of these switches depend on the TARGET_64BIT
2691 that is not known at this moment. Mark these values with 2 and
2692 let user the to override these. In case there is no command line option
2693 specifying them, we will set the defaults in override_options. */
2695 flag_omit_frame_pointer
= 2;
2696 flag_pcc_struct_return
= 2;
2697 flag_asynchronous_unwind_tables
= 2;
2698 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2699 SUBTARGET_OPTIMIZATION_OPTIONS
;
2703 /* Table of valid machine attributes. */
2704 const struct attribute_spec ix86_attribute_table
[] =
2706 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2707 /* Stdcall attribute says callee is responsible for popping arguments
2708 if they are not variable. */
2709 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2710 /* Fastcall attribute says callee is responsible for popping arguments
2711 if they are not variable. */
2712 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2713 /* Cdecl attribute says the callee is a normal C declaration */
2714 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2715 /* Regparm attribute specifies how many integer arguments are to be
2716 passed in registers. */
2717 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2718 /* Sseregparm attribute says we are using x86_64 calling conventions
2719 for FP arguments. */
2720 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2721 /* force_align_arg_pointer says this function realigns the stack at entry. */
2722 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2723 false, true, true, ix86_handle_cconv_attribute
},
2724 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2725 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2726 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2727 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2729 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2730 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2731 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2732 SUBTARGET_ATTRIBUTE_TABLE
,
2734 { NULL
, 0, 0, false, false, false, NULL
}
2737 /* Decide whether we can make a sibling call to a function. DECL is the
2738 declaration of the function being targeted by the call and EXP is the
2739 CALL_EXPR representing the call. */
2742 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2747 /* If we are generating position-independent code, we cannot sibcall
2748 optimize any indirect call, or a direct call to a global function,
2749 as the PLT requires %ebx be live. */
2750 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2757 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2758 if (POINTER_TYPE_P (func
))
2759 func
= TREE_TYPE (func
);
2762 /* Check that the return value locations are the same. Like
2763 if we are returning floats on the 80387 register stack, we cannot
2764 make a sibcall from a function that doesn't return a float to a
2765 function that does or, conversely, from a function that does return
2766 a float to a function that doesn't; the necessary stack adjustment
2767 would not be executed. This is also the place we notice
2768 differences in the return value ABI. Note that it is ok for one
2769 of the functions to have void return type as long as the return
2770 value of the other is passed in a register. */
2771 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2772 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2774 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2776 if (!rtx_equal_p (a
, b
))
2779 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2781 else if (!rtx_equal_p (a
, b
))
2784 /* If this call is indirect, we'll need to be able to use a call-clobbered
2785 register for the address of the target function. Make sure that all
2786 such registers are not used for passing parameters. */
2787 if (!decl
&& !TARGET_64BIT
)
2791 /* We're looking at the CALL_EXPR, we need the type of the function. */
2792 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2793 type
= TREE_TYPE (type
); /* pointer type */
2794 type
= TREE_TYPE (type
); /* function type */
2796 if (ix86_function_regparm (type
, NULL
) >= 3)
2798 /* ??? Need to count the actual number of registers to be used,
2799 not the possible number of registers. Fix later. */
2804 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2805 /* Dllimport'd functions are also called indirectly. */
2806 if (decl
&& DECL_DLLIMPORT_P (decl
)
2807 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2811 /* If we forced aligned the stack, then sibcalling would unalign the
2812 stack, which may break the called function. */
2813 if (cfun
->machine
->force_align_arg_pointer
)
2816 /* Otherwise okay. That also includes certain types of indirect calls. */
2820 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2821 calling convention attributes;
2822 arguments as in struct attribute_spec.handler. */
2825 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2827 int flags ATTRIBUTE_UNUSED
,
2830 if (TREE_CODE (*node
) != FUNCTION_TYPE
2831 && TREE_CODE (*node
) != METHOD_TYPE
2832 && TREE_CODE (*node
) != FIELD_DECL
2833 && TREE_CODE (*node
) != TYPE_DECL
)
2835 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2836 IDENTIFIER_POINTER (name
));
2837 *no_add_attrs
= true;
2841 /* Can combine regparm with all attributes but fastcall. */
2842 if (is_attribute_p ("regparm", name
))
2846 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2848 error ("fastcall and regparm attributes are not compatible");
2851 cst
= TREE_VALUE (args
);
2852 if (TREE_CODE (cst
) != INTEGER_CST
)
2854 warning (OPT_Wattributes
,
2855 "%qs attribute requires an integer constant argument",
2856 IDENTIFIER_POINTER (name
));
2857 *no_add_attrs
= true;
2859 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2861 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2862 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2863 *no_add_attrs
= true;
2867 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2868 TYPE_ATTRIBUTES (*node
))
2869 && compare_tree_int (cst
, REGPARM_MAX
-1))
2871 error ("%s functions limited to %d register parameters",
2872 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2880 warning (OPT_Wattributes
, "%qs attribute ignored",
2881 IDENTIFIER_POINTER (name
));
2882 *no_add_attrs
= true;
2886 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2887 if (is_attribute_p ("fastcall", name
))
2889 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2891 error ("fastcall and cdecl attributes are not compatible");
2893 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2895 error ("fastcall and stdcall attributes are not compatible");
2897 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2899 error ("fastcall and regparm attributes are not compatible");
2903 /* Can combine stdcall with fastcall (redundant), regparm and
2905 else if (is_attribute_p ("stdcall", name
))
2907 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2909 error ("stdcall and cdecl attributes are not compatible");
2911 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2913 error ("stdcall and fastcall attributes are not compatible");
2917 /* Can combine cdecl with regparm and sseregparm. */
2918 else if (is_attribute_p ("cdecl", name
))
2920 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2922 error ("stdcall and cdecl attributes are not compatible");
2924 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2926 error ("fastcall and cdecl attributes are not compatible");
2930 /* Can combine sseregparm with all attributes. */
2935 /* Return 0 if the attributes for two types are incompatible, 1 if they
2936 are compatible, and 2 if they are nearly compatible (which causes a
2937 warning to be generated). */
2940 ix86_comp_type_attributes (tree type1
, tree type2
)
2942 /* Check for mismatch of non-default calling convention. */
2943 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2945 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2948 /* Check for mismatched fastcall/regparm types. */
2949 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2950 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2951 || (ix86_function_regparm (type1
, NULL
)
2952 != ix86_function_regparm (type2
, NULL
)))
2955 /* Check for mismatched sseregparm types. */
2956 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2957 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2960 /* Check for mismatched return types (cdecl vs stdcall). */
2961 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2962 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2968 /* Return the regparm value for a function with the indicated TYPE and DECL.
2969 DECL may be NULL when calling function indirectly
2970 or considering a libcall. */
2973 ix86_function_regparm (tree type
, tree decl
)
2976 int regparm
= ix86_regparm
;
2977 bool user_convention
= false;
2981 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2984 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2985 user_convention
= true;
2988 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2991 user_convention
= true;
2994 /* Use register calling convention for local functions when possible. */
2995 if (!TARGET_64BIT
&& !user_convention
&& decl
2996 && flag_unit_at_a_time
&& !profile_flag
)
2998 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3001 int local_regparm
, globals
= 0, regno
;
3003 /* Make sure no regparm register is taken by a global register
3005 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3006 if (global_regs
[local_regparm
])
3008 /* We can't use regparm(3) for nested functions as these use
3009 static chain pointer in third argument. */
3010 if (local_regparm
== 3
3011 && decl_function_context (decl
)
3012 && !DECL_NO_STATIC_CHAIN (decl
))
3014 /* If the function realigns its stackpointer, the
3015 prologue will clobber %ecx. If we've already
3016 generated code for the callee, the callee
3017 DECL_STRUCT_FUNCTION is gone, so we fall back to
3018 scanning the attributes for the self-realigning
3020 if ((DECL_STRUCT_FUNCTION (decl
)
3021 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
3022 || (!DECL_STRUCT_FUNCTION (decl
)
3023 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3024 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3026 /* Each global register variable increases register preassure,
3027 so the more global reg vars there are, the smaller regparm
3028 optimization use, unless requested by the user explicitly. */
3029 for (regno
= 0; regno
< 6; regno
++)
3030 if (global_regs
[regno
])
3033 = globals
< local_regparm
? local_regparm
- globals
: 0;
3035 if (local_regparm
> regparm
)
3036 regparm
= local_regparm
;
3043 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3044 DFmode (2) arguments in SSE registers for a function with the
3045 indicated TYPE and DECL. DECL may be NULL when calling function
3046 indirectly or considering a libcall. Otherwise return 0. */
3049 ix86_function_sseregparm (tree type
, tree decl
)
3051 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3052 by the sseregparm attribute. */
3053 if (TARGET_SSEREGPARM
3055 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3060 error ("Calling %qD with attribute sseregparm without "
3061 "SSE/SSE2 enabled", decl
);
3063 error ("Calling %qT with attribute sseregparm without "
3064 "SSE/SSE2 enabled", type
);
3071 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3072 (and DFmode for SSE2) arguments in SSE registers,
3073 even for 32-bit targets. */
3074 if (!TARGET_64BIT
&& decl
3075 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3077 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3079 return TARGET_SSE2
? 2 : 1;
3085 /* Return true if EAX is live at the start of the function. Used by
3086 ix86_expand_prologue to determine if we need special help before
3087 calling allocate_stack_worker. */
3090 ix86_eax_live_at_start_p (void)
3092 /* Cheat. Don't bother working forward from ix86_function_regparm
3093 to the function type to whether an actual argument is located in
3094 eax. Instead just look at cfg info, which is still close enough
3095 to correct at this point. This gives false positives for broken
3096 functions that might use uninitialized data that happens to be
3097 allocated in eax, but who cares? */
3098 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3101 /* Value is the number of bytes of arguments automatically
3102 popped when returning from a subroutine call.
3103 FUNDECL is the declaration node of the function (as a tree),
3104 FUNTYPE is the data type of the function (as a tree),
3105 or for a library call it is an identifier node for the subroutine name.
3106 SIZE is the number of bytes of arguments passed on the stack.
3108 On the 80386, the RTD insn may be used to pop them if the number
3109 of args is fixed, but if the number is variable then the caller
3110 must pop them all. RTD can't be used for library calls now
3111 because the library is compiled with the Unix compiler.
3112 Use of RTD is a selectable option, since it is incompatible with
3113 standard Unix calling sequences. If the option is not selected,
3114 the caller must always pop the args.
3116 The attribute stdcall is equivalent to RTD on a per module basis. */
3119 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3121 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3123 /* Cdecl functions override -mrtd, and never pop the stack. */
3124 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3126 /* Stdcall and fastcall functions will pop the stack if not
3128 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3129 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3133 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3134 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3135 == void_type_node
)))
3139 /* Lose any fake structure return argument if it is passed on the stack. */
3140 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3142 && !KEEP_AGGREGATE_RETURN_POINTER
)
3144 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3147 return GET_MODE_SIZE (Pmode
);
3153 /* Argument support functions. */
3155 /* Return true when register may be used to pass function parameters. */
3157 ix86_function_arg_regno_p (int regno
)
3163 return (regno
< REGPARM_MAX
3164 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3166 return (regno
< REGPARM_MAX
3167 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3168 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3169 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3170 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3175 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3180 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3181 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3184 /* RAX is used as hidden argument to va_arg functions. */
3187 for (i
= 0; i
< REGPARM_MAX
; i
++)
3188 if (regno
== x86_64_int_parameter_registers
[i
])
3193 /* Return if we do not know how to pass TYPE solely in registers. */
3196 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3198 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3201 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3202 The layout_type routine is crafty and tries to trick us into passing
3203 currently unsupported vector types on the stack by using TImode. */
3204 return (!TARGET_64BIT
&& mode
== TImode
3205 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3208 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3209 for a call to a function whose data type is FNTYPE.
3210 For a library call, FNTYPE is 0. */
3213 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3214 tree fntype
, /* tree ptr for function decl */
3215 rtx libname
, /* SYMBOL_REF of library name or 0 */
3218 static CUMULATIVE_ARGS zero_cum
;
3219 tree param
, next_param
;
3221 if (TARGET_DEBUG_ARG
)
3223 fprintf (stderr
, "\ninit_cumulative_args (");
3225 fprintf (stderr
, "fntype code = %s, ret code = %s",
3226 tree_code_name
[(int) TREE_CODE (fntype
)],
3227 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3229 fprintf (stderr
, "no fntype");
3232 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3237 /* Set up the number of registers to use for passing arguments. */
3238 cum
->nregs
= ix86_regparm
;
3240 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3242 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3243 cum
->warn_sse
= true;
3244 cum
->warn_mmx
= true;
3245 cum
->maybe_vaarg
= false;
3247 /* Use ecx and edx registers if function has fastcall attribute,
3248 else look for regparm information. */
3249 if (fntype
&& !TARGET_64BIT
)
3251 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3257 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3260 /* Set up the number of SSE registers used for passing SFmode
3261 and DFmode arguments. Warn for mismatching ABI. */
3262 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3264 /* Determine if this function has variable arguments. This is
3265 indicated by the last argument being 'void_type_mode' if there
3266 are no variable arguments. If there are variable arguments, then
3267 we won't pass anything in registers in 32-bit mode. */
3269 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3271 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3272 param
!= 0; param
= next_param
)
3274 next_param
= TREE_CHAIN (param
);
3275 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3285 cum
->float_in_sse
= 0;
3287 cum
->maybe_vaarg
= true;
3291 if ((!fntype
&& !libname
)
3292 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3293 cum
->maybe_vaarg
= true;
3295 if (TARGET_DEBUG_ARG
)
3296 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3301 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3302 But in the case of vector types, it is some vector mode.
3304 When we have only some of our vector isa extensions enabled, then there
3305 are some modes for which vector_mode_supported_p is false. For these
3306 modes, the generic vector support in gcc will choose some non-vector mode
3307 in order to implement the type. By computing the natural mode, we'll
3308 select the proper ABI location for the operand and not depend on whatever
3309 the middle-end decides to do with these vector types. */
3311 static enum machine_mode
3312 type_natural_mode (tree type
)
3314 enum machine_mode mode
= TYPE_MODE (type
);
3316 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3318 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3319 if ((size
== 8 || size
== 16)
3320 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3321 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3323 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3325 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3326 mode
= MIN_MODE_VECTOR_FLOAT
;
3328 mode
= MIN_MODE_VECTOR_INT
;
3330 /* Get the mode which has this inner mode and number of units. */
3331 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3332 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3333 && GET_MODE_INNER (mode
) == innermode
)
3343 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3344 this may not agree with the mode that the type system has chosen for the
3345 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3346 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3349 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3354 if (orig_mode
!= BLKmode
)
3355 tmp
= gen_rtx_REG (orig_mode
, regno
);
3358 tmp
= gen_rtx_REG (mode
, regno
);
3359 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3360 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3366 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3367 of this code is to classify each 8bytes of incoming argument by the register
3368 class and assign registers accordingly. */
3370 /* Return the union class of CLASS1 and CLASS2.
3371 See the x86-64 PS ABI for details. */
3373 static enum x86_64_reg_class
3374 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3376 /* Rule #1: If both classes are equal, this is the resulting class. */
3377 if (class1
== class2
)
3380 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3382 if (class1
== X86_64_NO_CLASS
)
3384 if (class2
== X86_64_NO_CLASS
)
3387 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3388 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3389 return X86_64_MEMORY_CLASS
;
3391 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3392 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3393 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3394 return X86_64_INTEGERSI_CLASS
;
3395 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3396 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3397 return X86_64_INTEGER_CLASS
;
3399 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3401 if (class1
== X86_64_X87_CLASS
3402 || class1
== X86_64_X87UP_CLASS
3403 || class1
== X86_64_COMPLEX_X87_CLASS
3404 || class2
== X86_64_X87_CLASS
3405 || class2
== X86_64_X87UP_CLASS
3406 || class2
== X86_64_COMPLEX_X87_CLASS
)
3407 return X86_64_MEMORY_CLASS
;
3409 /* Rule #6: Otherwise class SSE is used. */
3410 return X86_64_SSE_CLASS
;
3413 /* Classify the argument of type TYPE and mode MODE.
3414 CLASSES will be filled by the register class used to pass each word
3415 of the operand. The number of words is returned. In case the parameter
3416 should be passed in memory, 0 is returned. As a special case for zero
3417 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3419 BIT_OFFSET is used internally for handling records and specifies offset
3420 of the offset in bits modulo 256 to avoid overflow cases.
3422 See the x86-64 PS ABI for details.
3426 classify_argument (enum machine_mode mode
, tree type
,
3427 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3429 HOST_WIDE_INT bytes
=
3430 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3431 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3433 /* Variable sized entities are always passed/returned in memory. */
3437 if (mode
!= VOIDmode
3438 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3441 if (type
&& AGGREGATE_TYPE_P (type
))
3445 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3447 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3451 for (i
= 0; i
< words
; i
++)
3452 classes
[i
] = X86_64_NO_CLASS
;
3454 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3455 signalize memory class, so handle it as special case. */
3458 classes
[0] = X86_64_NO_CLASS
;
3462 /* Classify each field of record and merge classes. */
3463 switch (TREE_CODE (type
))
3466 /* And now merge the fields of structure. */
3467 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3469 if (TREE_CODE (field
) == FIELD_DECL
)
3473 if (TREE_TYPE (field
) == error_mark_node
)
3476 /* Bitfields are always classified as integer. Handle them
3477 early, since later code would consider them to be
3478 misaligned integers. */
3479 if (DECL_BIT_FIELD (field
))
3481 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3482 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3483 + tree_low_cst (DECL_SIZE (field
), 0)
3486 merge_classes (X86_64_INTEGER_CLASS
,
3491 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3492 TREE_TYPE (field
), subclasses
,
3493 (int_bit_position (field
)
3494 + bit_offset
) % 256);
3497 for (i
= 0; i
< num
; i
++)
3500 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3502 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3510 /* Arrays are handled as small records. */
3513 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3514 TREE_TYPE (type
), subclasses
, bit_offset
);
3518 /* The partial classes are now full classes. */
3519 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3520 subclasses
[0] = X86_64_SSE_CLASS
;
3521 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3522 subclasses
[0] = X86_64_INTEGER_CLASS
;
3524 for (i
= 0; i
< words
; i
++)
3525 classes
[i
] = subclasses
[i
% num
];
3530 case QUAL_UNION_TYPE
:
3531 /* Unions are similar to RECORD_TYPE but offset is always 0.
3533 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3535 if (TREE_CODE (field
) == FIELD_DECL
)
3539 if (TREE_TYPE (field
) == error_mark_node
)
3542 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3543 TREE_TYPE (field
), subclasses
,
3547 for (i
= 0; i
< num
; i
++)
3548 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3557 /* Final merger cleanup. */
3558 for (i
= 0; i
< words
; i
++)
3560 /* If one class is MEMORY, everything should be passed in
3562 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3565 /* The X86_64_SSEUP_CLASS should be always preceded by
3566 X86_64_SSE_CLASS. */
3567 if (classes
[i
] == X86_64_SSEUP_CLASS
3568 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3569 classes
[i
] = X86_64_SSE_CLASS
;
3571 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3572 if (classes
[i
] == X86_64_X87UP_CLASS
3573 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3574 classes
[i
] = X86_64_SSE_CLASS
;
3579 /* Compute alignment needed. We align all types to natural boundaries with
3580 exception of XFmode that is aligned to 64bits. */
3581 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3583 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3586 mode_alignment
= 128;
3587 else if (mode
== XCmode
)
3588 mode_alignment
= 256;
3589 if (COMPLEX_MODE_P (mode
))
3590 mode_alignment
/= 2;
3591 /* Misaligned fields are always returned in memory. */
3592 if (bit_offset
% mode_alignment
)
3596 /* for V1xx modes, just use the base mode */
3597 if (VECTOR_MODE_P (mode
)
3598 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3599 mode
= GET_MODE_INNER (mode
);
3601 /* Classification of atomic types. */
3606 classes
[0] = X86_64_SSE_CLASS
;
3609 classes
[0] = X86_64_SSE_CLASS
;
3610 classes
[1] = X86_64_SSEUP_CLASS
;
3619 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3620 classes
[0] = X86_64_INTEGERSI_CLASS
;
3622 classes
[0] = X86_64_INTEGER_CLASS
;
3626 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3631 if (!(bit_offset
% 64))
3632 classes
[0] = X86_64_SSESF_CLASS
;
3634 classes
[0] = X86_64_SSE_CLASS
;
3637 classes
[0] = X86_64_SSEDF_CLASS
;
3640 classes
[0] = X86_64_X87_CLASS
;
3641 classes
[1] = X86_64_X87UP_CLASS
;
3644 classes
[0] = X86_64_SSE_CLASS
;
3645 classes
[1] = X86_64_SSEUP_CLASS
;
3648 classes
[0] = X86_64_SSE_CLASS
;
3651 classes
[0] = X86_64_SSEDF_CLASS
;
3652 classes
[1] = X86_64_SSEDF_CLASS
;
3655 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3658 /* This modes is larger than 16 bytes. */
3666 classes
[0] = X86_64_SSE_CLASS
;
3667 classes
[1] = X86_64_SSEUP_CLASS
;
3673 classes
[0] = X86_64_SSE_CLASS
;
3679 gcc_assert (VECTOR_MODE_P (mode
));
3684 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3686 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3687 classes
[0] = X86_64_INTEGERSI_CLASS
;
3689 classes
[0] = X86_64_INTEGER_CLASS
;
3690 classes
[1] = X86_64_INTEGER_CLASS
;
3691 return 1 + (bytes
> 8);
3695 /* Examine the argument and return set number of register required in each
3696 class. Return 0 iff parameter should be passed in memory. */
3698 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3699 int *int_nregs
, int *sse_nregs
)
3701 enum x86_64_reg_class
class[MAX_CLASSES
];
3702 int n
= classify_argument (mode
, type
, class, 0);
3708 for (n
--; n
>= 0; n
--)
3711 case X86_64_INTEGER_CLASS
:
3712 case X86_64_INTEGERSI_CLASS
:
3715 case X86_64_SSE_CLASS
:
3716 case X86_64_SSESF_CLASS
:
3717 case X86_64_SSEDF_CLASS
:
3720 case X86_64_NO_CLASS
:
3721 case X86_64_SSEUP_CLASS
:
3723 case X86_64_X87_CLASS
:
3724 case X86_64_X87UP_CLASS
:
3728 case X86_64_COMPLEX_X87_CLASS
:
3729 return in_return
? 2 : 0;
3730 case X86_64_MEMORY_CLASS
:
3736 /* Construct container for the argument used by GCC interface. See
3737 FUNCTION_ARG for the detailed description. */
3740 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3741 tree type
, int in_return
, int nintregs
, int nsseregs
,
3742 const int *intreg
, int sse_regno
)
3744 /* The following variables hold the static issued_error state. */
3745 static bool issued_sse_arg_error
;
3746 static bool issued_sse_ret_error
;
3747 static bool issued_x87_ret_error
;
3749 enum machine_mode tmpmode
;
3751 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3752 enum x86_64_reg_class
class[MAX_CLASSES
];
3756 int needed_sseregs
, needed_intregs
;
3757 rtx exp
[MAX_CLASSES
];
3760 n
= classify_argument (mode
, type
, class, 0);
3761 if (TARGET_DEBUG_ARG
)
3764 fprintf (stderr
, "Memory class\n");
3767 fprintf (stderr
, "Classes:");
3768 for (i
= 0; i
< n
; i
++)
3770 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3772 fprintf (stderr
, "\n");
3777 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3780 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3783 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3784 some less clueful developer tries to use floating-point anyway. */
3785 if (needed_sseregs
&& !TARGET_SSE
)
3789 if (!issued_sse_ret_error
)
3791 error ("SSE register return with SSE disabled");
3792 issued_sse_ret_error
= true;
3795 else if (!issued_sse_arg_error
)
3797 error ("SSE register argument with SSE disabled");
3798 issued_sse_arg_error
= true;
3803 /* Likewise, error if the ABI requires us to return values in the
3804 x87 registers and the user specified -mno-80387. */
3805 if (!TARGET_80387
&& in_return
)
3806 for (i
= 0; i
< n
; i
++)
3807 if (class[i
] == X86_64_X87_CLASS
3808 || class[i
] == X86_64_X87UP_CLASS
3809 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3811 if (!issued_x87_ret_error
)
3813 error ("x87 register return with x87 disabled");
3814 issued_x87_ret_error
= true;
3819 /* First construct simple cases. Avoid SCmode, since we want to use
3820 single register to pass this type. */
3821 if (n
== 1 && mode
!= SCmode
)
3824 case X86_64_INTEGER_CLASS
:
3825 case X86_64_INTEGERSI_CLASS
:
3826 return gen_rtx_REG (mode
, intreg
[0]);
3827 case X86_64_SSE_CLASS
:
3828 case X86_64_SSESF_CLASS
:
3829 case X86_64_SSEDF_CLASS
:
3830 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3831 case X86_64_X87_CLASS
:
3832 case X86_64_COMPLEX_X87_CLASS
:
3833 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3834 case X86_64_NO_CLASS
:
3835 /* Zero sized array, struct or class. */
3840 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3842 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3844 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3845 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3846 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3847 && class[1] == X86_64_INTEGER_CLASS
3848 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3849 && intreg
[0] + 1 == intreg
[1])
3850 return gen_rtx_REG (mode
, intreg
[0]);
3852 /* Otherwise figure out the entries of the PARALLEL. */
3853 for (i
= 0; i
< n
; i
++)
3857 case X86_64_NO_CLASS
:
3859 case X86_64_INTEGER_CLASS
:
3860 case X86_64_INTEGERSI_CLASS
:
3861 /* Merge TImodes on aligned occasions here too. */
3862 if (i
* 8 + 8 > bytes
)
3863 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3864 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3868 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3869 if (tmpmode
== BLKmode
)
3871 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3872 gen_rtx_REG (tmpmode
, *intreg
),
3876 case X86_64_SSESF_CLASS
:
3877 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3878 gen_rtx_REG (SFmode
,
3879 SSE_REGNO (sse_regno
)),
3883 case X86_64_SSEDF_CLASS
:
3884 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3885 gen_rtx_REG (DFmode
,
3886 SSE_REGNO (sse_regno
)),
3890 case X86_64_SSE_CLASS
:
3891 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3895 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3896 gen_rtx_REG (tmpmode
,
3897 SSE_REGNO (sse_regno
)),
3899 if (tmpmode
== TImode
)
3908 /* Empty aligned struct, union or class. */
3912 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3913 for (i
= 0; i
< nexps
; i
++)
3914 XVECEXP (ret
, 0, i
) = exp
[i
];
3918 /* Update the data in CUM to advance over an argument
3919 of mode MODE and data type TYPE.
3920 (TYPE is null for libcalls where that information may not be available.) */
3923 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3924 tree type
, int named
)
3927 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3928 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3931 mode
= type_natural_mode (type
);
3933 if (TARGET_DEBUG_ARG
)
3934 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3935 "mode=%s, named=%d)\n\n",
3936 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3937 GET_MODE_NAME (mode
), named
);
3941 int int_nregs
, sse_nregs
;
3942 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3943 cum
->words
+= words
;
3944 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3946 cum
->nregs
-= int_nregs
;
3947 cum
->sse_nregs
-= sse_nregs
;
3948 cum
->regno
+= int_nregs
;
3949 cum
->sse_regno
+= sse_nregs
;
3952 cum
->words
+= words
;
3970 cum
->words
+= words
;
3971 cum
->nregs
-= words
;
3972 cum
->regno
+= words
;
3974 if (cum
->nregs
<= 0)
3982 if (cum
->float_in_sse
< 2)
3985 if (cum
->float_in_sse
< 1)
3996 if (!type
|| !AGGREGATE_TYPE_P (type
))
3998 cum
->sse_words
+= words
;
3999 cum
->sse_nregs
-= 1;
4000 cum
->sse_regno
+= 1;
4001 if (cum
->sse_nregs
<= 0)
4013 if (!type
|| !AGGREGATE_TYPE_P (type
))
4015 cum
->mmx_words
+= words
;
4016 cum
->mmx_nregs
-= 1;
4017 cum
->mmx_regno
+= 1;
4018 if (cum
->mmx_nregs
<= 0)
4029 /* Define where to put the arguments to a function.
4030 Value is zero to push the argument on the stack,
4031 or a hard register in which to store the argument.
4033 MODE is the argument's machine mode.
4034 TYPE is the data type of the argument (as a tree).
4035 This is null for libcalls where that information may
4037 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4038 the preceding args and about the function being called.
4039 NAMED is nonzero if this argument is a named parameter
4040 (otherwise it is an extra parameter matching an ellipsis). */
4043 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
4044 tree type
, int named
)
4046 enum machine_mode mode
= orig_mode
;
4049 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
4050 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4051 static bool warnedsse
, warnedmmx
;
4053 /* To simplify the code below, represent vector types with a vector mode
4054 even if MMX/SSE are not active. */
4055 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4056 mode
= type_natural_mode (type
);
4058 /* Handle a hidden AL argument containing number of registers for varargs
4059 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4061 if (mode
== VOIDmode
)
4064 return GEN_INT (cum
->maybe_vaarg
4065 ? (cum
->sse_nregs
< 0
4073 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4075 &x86_64_int_parameter_registers
[cum
->regno
],
4080 /* For now, pass fp/complex values on the stack. */
4092 if (words
<= cum
->nregs
)
4094 int regno
= cum
->regno
;
4096 /* Fastcall allocates the first two DWORD (SImode) or
4097 smaller arguments to ECX and EDX. */
4100 if (mode
== BLKmode
|| mode
== DImode
)
4103 /* ECX not EAX is the first allocated register. */
4107 ret
= gen_rtx_REG (mode
, regno
);
4111 if (cum
->float_in_sse
< 2)
4114 if (cum
->float_in_sse
< 1)
4124 if (!type
|| !AGGREGATE_TYPE_P (type
))
4126 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4129 warning (0, "SSE vector argument without SSE enabled "
4133 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4134 cum
->sse_regno
+ FIRST_SSE_REG
);
4141 if (!type
|| !AGGREGATE_TYPE_P (type
))
4143 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4146 warning (0, "MMX vector argument without MMX enabled "
4150 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4151 cum
->mmx_regno
+ FIRST_MMX_REG
);
4156 if (TARGET_DEBUG_ARG
)
4159 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4160 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4163 print_simple_rtl (stderr
, ret
);
4165 fprintf (stderr
, ", stack");
4167 fprintf (stderr
, " )\n");
4173 /* A C expression that indicates when an argument must be passed by
4174 reference. If nonzero for an argument, a copy of that argument is
4175 made in memory and a pointer to the argument is passed instead of
4176 the argument itself. The pointer is passed in whatever way is
4177 appropriate for passing a pointer to that type. */
4180 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4181 enum machine_mode mode ATTRIBUTE_UNUSED
,
4182 tree type
, bool named ATTRIBUTE_UNUSED
)
4187 if (type
&& int_size_in_bytes (type
) == -1)
4189 if (TARGET_DEBUG_ARG
)
4190 fprintf (stderr
, "function_arg_pass_by_reference\n");
4197 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4198 ABI. Only called if TARGET_SSE. */
4200 contains_128bit_aligned_vector_p (tree type
)
4202 enum machine_mode mode
= TYPE_MODE (type
);
4203 if (SSE_REG_MODE_P (mode
)
4204 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4206 if (TYPE_ALIGN (type
) < 128)
4209 if (AGGREGATE_TYPE_P (type
))
4211 /* Walk the aggregates recursively. */
4212 switch (TREE_CODE (type
))
4216 case QUAL_UNION_TYPE
:
4220 /* Walk all the structure fields. */
4221 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4223 if (TREE_CODE (field
) == FIELD_DECL
4224 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4231 /* Just for use if some languages passes arrays by value. */
4232 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4243 /* Gives the alignment boundary, in bits, of an argument with the
4244 specified mode and type. */
4247 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4251 align
= TYPE_ALIGN (type
);
4253 align
= GET_MODE_ALIGNMENT (mode
);
4254 if (align
< PARM_BOUNDARY
)
4255 align
= PARM_BOUNDARY
;
4258 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4259 make an exception for SSE modes since these require 128bit
4262 The handling here differs from field_alignment. ICC aligns MMX
4263 arguments to 4 byte boundaries, while structure fields are aligned
4264 to 8 byte boundaries. */
4266 align
= PARM_BOUNDARY
;
4269 if (!SSE_REG_MODE_P (mode
))
4270 align
= PARM_BOUNDARY
;
4274 if (!contains_128bit_aligned_vector_p (type
))
4275 align
= PARM_BOUNDARY
;
4283 /* Return true if N is a possible register number of function value. */
4285 ix86_function_value_regno_p (int regno
)
4291 return ((regno
) == 0
4292 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4293 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4295 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4296 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4297 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4302 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4303 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4307 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4314 /* Define how to find the value returned by a function.
4315 VALTYPE is the data type of the value (as a tree).
4316 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4317 otherwise, FUNC is 0. */
4319 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4320 bool outgoing ATTRIBUTE_UNUSED
)
4322 enum machine_mode natmode
= type_natural_mode (valtype
);
4326 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4327 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4328 x86_64_int_return_registers
, 0);
4329 /* For zero sized structures, construct_container return NULL, but we
4330 need to keep rest of compiler happy by returning meaningful value. */
4332 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4337 tree fn
= NULL_TREE
, fntype
;
4339 && DECL_P (fntype_or_decl
))
4340 fn
= fntype_or_decl
;
4341 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4342 return gen_rtx_REG (TYPE_MODE (valtype
),
4343 ix86_value_regno (natmode
, fn
, fntype
));
4347 /* Return true iff type is returned in memory. */
4349 ix86_return_in_memory (tree type
)
4351 int needed_intregs
, needed_sseregs
, size
;
4352 enum machine_mode mode
= type_natural_mode (type
);
4355 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4357 if (mode
== BLKmode
)
4360 size
= int_size_in_bytes (type
);
4362 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4365 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4367 /* User-created vectors small enough to fit in EAX. */
4371 /* MMX/3dNow values are returned in MM0,
4372 except when it doesn't exits. */
4374 return (TARGET_MMX
? 0 : 1);
4376 /* SSE values are returned in XMM0, except when it doesn't exist. */
4378 return (TARGET_SSE
? 0 : 1);
4392 /* When returning SSE vector types, we have a choice of either
4393 (1) being abi incompatible with a -march switch, or
4394 (2) generating an error.
4395 Given no good solution, I think the safest thing is one warning.
4396 The user won't be able to use -Werror, but....
4398 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4399 called in response to actually generating a caller or callee that
4400 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4401 via aggregate_value_p for general type probing from tree-ssa. */
4404 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4406 static bool warnedsse
, warnedmmx
;
4410 /* Look at the return type of the function, not the function type. */
4411 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4413 if (!TARGET_SSE
&& !warnedsse
)
4416 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4419 warning (0, "SSE vector return without SSE enabled "
4424 if (!TARGET_MMX
&& !warnedmmx
)
4426 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4429 warning (0, "MMX vector return without MMX enabled "
4438 /* Define how to find the value returned by a library function
4439 assuming the value has mode MODE. */
4441 ix86_libcall_value (enum machine_mode mode
)
4455 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4458 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4462 return gen_rtx_REG (mode
, 0);
4466 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4469 /* Given a mode, return the register to use for a return value. */
4472 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4474 gcc_assert (!TARGET_64BIT
);
4476 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4477 we normally prevent this case when mmx is not available. However
4478 some ABIs may require the result to be returned like DImode. */
4479 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4480 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4482 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4483 we prevent this case when sse is not available. However some ABIs
4484 may require the result to be returned like integer TImode. */
4485 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4486 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4488 /* Decimal floating point values can go in %eax, unlike other float modes. */
4489 if (DECIMAL_FLOAT_MODE_P (mode
))
4492 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4493 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4496 /* Floating point return values in %st(0), except for local functions when
4497 SSE math is enabled or for functions with sseregparm attribute. */
4498 if ((func
|| fntype
)
4499 && (mode
== SFmode
|| mode
== DFmode
))
4501 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4502 if ((sse_level
>= 1 && mode
== SFmode
)
4503 || (sse_level
== 2 && mode
== DFmode
))
4504 return FIRST_SSE_REG
;
4507 return FIRST_FLOAT_REG
;
4510 /* Create the va_list data type. */
4513 ix86_build_builtin_va_list (void)
4515 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4517 /* For i386 we use plain pointer to argument area. */
4519 return build_pointer_type (char_type_node
);
4521 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4522 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4524 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4525 unsigned_type_node
);
4526 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4527 unsigned_type_node
);
4528 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4530 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4533 va_list_gpr_counter_field
= f_gpr
;
4534 va_list_fpr_counter_field
= f_fpr
;
4536 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4537 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4538 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4539 DECL_FIELD_CONTEXT (f_sav
) = record
;
4541 TREE_CHAIN (record
) = type_decl
;
4542 TYPE_NAME (record
) = type_decl
;
4543 TYPE_FIELDS (record
) = f_gpr
;
4544 TREE_CHAIN (f_gpr
) = f_fpr
;
4545 TREE_CHAIN (f_fpr
) = f_ovf
;
4546 TREE_CHAIN (f_ovf
) = f_sav
;
4548 layout_type (record
);
4550 /* The correct type is an array type of one element. */
4551 return build_array_type (record
, build_index_type (size_zero_node
));
4554 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4557 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4558 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4561 CUMULATIVE_ARGS next_cum
;
4562 rtx save_area
= NULL_RTX
, mem
;
4575 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4578 /* Indicate to allocate space on the stack for varargs save area. */
4579 ix86_save_varrargs_registers
= 1;
4581 cfun
->stack_alignment_needed
= 128;
4583 fntype
= TREE_TYPE (current_function_decl
);
4584 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4585 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4586 != void_type_node
));
4588 /* For varargs, we do not want to skip the dummy va_dcl argument.
4589 For stdargs, we do want to skip the last named argument. */
4592 function_arg_advance (&next_cum
, mode
, type
, 1);
4595 save_area
= frame_pointer_rtx
;
4597 set
= get_varargs_alias_set ();
4599 for (i
= next_cum
.regno
;
4601 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4604 mem
= gen_rtx_MEM (Pmode
,
4605 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4606 MEM_NOTRAP_P (mem
) = 1;
4607 set_mem_alias_set (mem
, set
);
4608 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4609 x86_64_int_parameter_registers
[i
]));
4612 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4614 /* Now emit code to save SSE registers. The AX parameter contains number
4615 of SSE parameter registers used to call this function. We use
4616 sse_prologue_save insn template that produces computed jump across
4617 SSE saves. We need some preparation work to get this working. */
4619 label
= gen_label_rtx ();
4620 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4622 /* Compute address to jump to :
4623 label - 5*eax + nnamed_sse_arguments*5 */
4624 tmp_reg
= gen_reg_rtx (Pmode
);
4625 nsse_reg
= gen_reg_rtx (Pmode
);
4626 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4627 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4628 gen_rtx_MULT (Pmode
, nsse_reg
,
4630 if (next_cum
.sse_regno
)
4633 gen_rtx_CONST (DImode
,
4634 gen_rtx_PLUS (DImode
,
4636 GEN_INT (next_cum
.sse_regno
* 4))));
4638 emit_move_insn (nsse_reg
, label_ref
);
4639 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4641 /* Compute address of memory block we save into. We always use pointer
4642 pointing 127 bytes after first byte to store - this is needed to keep
4643 instruction size limited by 4 bytes. */
4644 tmp_reg
= gen_reg_rtx (Pmode
);
4645 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4646 plus_constant (save_area
,
4647 8 * REGPARM_MAX
+ 127)));
4648 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4649 MEM_NOTRAP_P (mem
) = 1;
4650 set_mem_alias_set (mem
, set
);
4651 set_mem_align (mem
, BITS_PER_WORD
);
4653 /* And finally do the dirty job! */
4654 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4655 GEN_INT (next_cum
.sse_regno
), label
));
4660 /* Implement va_start. */
4663 ix86_va_start (tree valist
, rtx nextarg
)
4665 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4666 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4667 tree gpr
, fpr
, ovf
, sav
, t
;
4670 /* Only 64bit target needs something special. */
4673 std_expand_builtin_va_start (valist
, nextarg
);
4677 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4678 f_fpr
= TREE_CHAIN (f_gpr
);
4679 f_ovf
= TREE_CHAIN (f_fpr
);
4680 f_sav
= TREE_CHAIN (f_ovf
);
4682 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4683 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4684 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4685 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4686 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4688 /* Count number of gp and fp argument registers used. */
4689 words
= current_function_args_info
.words
;
4690 n_gpr
= current_function_args_info
.regno
;
4691 n_fpr
= current_function_args_info
.sse_regno
;
4693 if (TARGET_DEBUG_ARG
)
4694 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4695 (int) words
, (int) n_gpr
, (int) n_fpr
);
4697 if (cfun
->va_list_gpr_size
)
4699 type
= TREE_TYPE (gpr
);
4700 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4701 build_int_cst (type
, n_gpr
* 8));
4702 TREE_SIDE_EFFECTS (t
) = 1;
4703 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4706 if (cfun
->va_list_fpr_size
)
4708 type
= TREE_TYPE (fpr
);
4709 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4710 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4711 TREE_SIDE_EFFECTS (t
) = 1;
4712 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4715 /* Find the overflow area. */
4716 type
= TREE_TYPE (ovf
);
4717 t
= make_tree (type
, virtual_incoming_args_rtx
);
4719 t
= build2 (PLUS_EXPR
, type
, t
,
4720 build_int_cst (type
, words
* UNITS_PER_WORD
));
4721 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4722 TREE_SIDE_EFFECTS (t
) = 1;
4723 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4725 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4727 /* Find the register save area.
4728 Prologue of the function save it right above stack frame. */
4729 type
= TREE_TYPE (sav
);
4730 t
= make_tree (type
, frame_pointer_rtx
);
4731 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4732 TREE_SIDE_EFFECTS (t
) = 1;
4733 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4737 /* Implement va_arg. */
4740 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4742 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4743 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4744 tree gpr
, fpr
, ovf
, sav
, t
;
4746 tree lab_false
, lab_over
= NULL_TREE
;
4751 enum machine_mode nat_mode
;
4753 /* Only 64bit target needs something special. */
4755 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4757 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4758 f_fpr
= TREE_CHAIN (f_gpr
);
4759 f_ovf
= TREE_CHAIN (f_fpr
);
4760 f_sav
= TREE_CHAIN (f_ovf
);
4762 valist
= build_va_arg_indirect_ref (valist
);
4763 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4764 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4765 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4766 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4768 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4770 type
= build_pointer_type (type
);
4771 size
= int_size_in_bytes (type
);
4772 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4774 nat_mode
= type_natural_mode (type
);
4775 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4776 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4778 /* Pull the value out of the saved registers. */
4780 addr
= create_tmp_var (ptr_type_node
, "addr");
4781 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4785 int needed_intregs
, needed_sseregs
;
4787 tree int_addr
, sse_addr
;
4789 lab_false
= create_artificial_label ();
4790 lab_over
= create_artificial_label ();
4792 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4794 need_temp
= (!REG_P (container
)
4795 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4796 || TYPE_ALIGN (type
) > 128));
4798 /* In case we are passing structure, verify that it is consecutive block
4799 on the register save area. If not we need to do moves. */
4800 if (!need_temp
&& !REG_P (container
))
4802 /* Verify that all registers are strictly consecutive */
4803 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4807 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4809 rtx slot
= XVECEXP (container
, 0, i
);
4810 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4811 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4819 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4821 rtx slot
= XVECEXP (container
, 0, i
);
4822 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4823 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4835 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4836 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4837 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4838 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4841 /* First ensure that we fit completely in registers. */
4844 t
= build_int_cst (TREE_TYPE (gpr
),
4845 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4846 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4847 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4848 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4849 gimplify_and_add (t
, pre_p
);
4853 t
= build_int_cst (TREE_TYPE (fpr
),
4854 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4856 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4857 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4858 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4859 gimplify_and_add (t
, pre_p
);
4862 /* Compute index to start of area used for integer regs. */
4865 /* int_addr = gpr + sav; */
4866 t
= fold_convert (ptr_type_node
, gpr
);
4867 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4868 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4869 gimplify_and_add (t
, pre_p
);
4873 /* sse_addr = fpr + sav; */
4874 t
= fold_convert (ptr_type_node
, fpr
);
4875 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4876 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4877 gimplify_and_add (t
, pre_p
);
4882 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4885 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4886 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4887 gimplify_and_add (t
, pre_p
);
4889 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4891 rtx slot
= XVECEXP (container
, 0, i
);
4892 rtx reg
= XEXP (slot
, 0);
4893 enum machine_mode mode
= GET_MODE (reg
);
4894 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4895 tree addr_type
= build_pointer_type (piece_type
);
4898 tree dest_addr
, dest
;
4900 if (SSE_REGNO_P (REGNO (reg
)))
4902 src_addr
= sse_addr
;
4903 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4907 src_addr
= int_addr
;
4908 src_offset
= REGNO (reg
) * 8;
4910 src_addr
= fold_convert (addr_type
, src_addr
);
4911 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4912 size_int (src_offset
));
4913 src
= build_va_arg_indirect_ref (src_addr
);
4915 dest_addr
= fold_convert (addr_type
, addr
);
4916 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4917 size_int (INTVAL (XEXP (slot
, 1))));
4918 dest
= build_va_arg_indirect_ref (dest_addr
);
4920 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4921 gimplify_and_add (t
, pre_p
);
4927 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4928 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4929 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4930 gimplify_and_add (t
, pre_p
);
4934 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4935 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4936 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4937 gimplify_and_add (t
, pre_p
);
4940 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4941 gimplify_and_add (t
, pre_p
);
4943 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4944 append_to_statement_list (t
, pre_p
);
4947 /* ... otherwise out of the overflow area. */
4949 /* Care for on-stack alignment if needed. */
4950 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4951 || integer_zerop (TYPE_SIZE (type
)))
4955 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4956 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4957 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4958 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4959 build_int_cst (TREE_TYPE (t
), -align
));
4961 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4963 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4964 gimplify_and_add (t2
, pre_p
);
4966 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4967 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4968 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4969 gimplify_and_add (t
, pre_p
);
4973 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4974 append_to_statement_list (t
, pre_p
);
4977 ptrtype
= build_pointer_type (type
);
4978 addr
= fold_convert (ptrtype
, addr
);
4981 addr
= build_va_arg_indirect_ref (addr
);
4982 return build_va_arg_indirect_ref (addr
);
4985 /* Return nonzero if OPNUM's MEM should be matched
4986 in movabs* patterns. */
4989 ix86_check_movabs (rtx insn
, int opnum
)
4993 set
= PATTERN (insn
);
4994 if (GET_CODE (set
) == PARALLEL
)
4995 set
= XVECEXP (set
, 0, 0);
4996 gcc_assert (GET_CODE (set
) == SET
);
4997 mem
= XEXP (set
, opnum
);
4998 while (GET_CODE (mem
) == SUBREG
)
4999 mem
= SUBREG_REG (mem
);
5000 gcc_assert (MEM_P (mem
));
5001 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5004 /* Initialize the table of extra 80387 mathematical constants. */
5007 init_ext_80387_constants (void)
5009 static const char * cst
[5] =
5011 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5012 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5013 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5014 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5015 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5019 for (i
= 0; i
< 5; i
++)
5021 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5022 /* Ensure each constant is rounded to XFmode precision. */
5023 real_convert (&ext_80387_constants_table
[i
],
5024 XFmode
, &ext_80387_constants_table
[i
]);
5027 ext_80387_constants_init
= 1;
5030 /* Return true if the constant is something that can be loaded with
5031 a special instruction. */
5034 standard_80387_constant_p (rtx x
)
5038 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
5041 if (x
== CONST0_RTX (GET_MODE (x
)))
5043 if (x
== CONST1_RTX (GET_MODE (x
)))
5046 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5048 /* For XFmode constants, try to find a special 80387 instruction when
5049 optimizing for size or on those CPUs that benefit from them. */
5050 if (GET_MODE (x
) == XFmode
5051 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5055 if (! ext_80387_constants_init
)
5056 init_ext_80387_constants ();
5058 for (i
= 0; i
< 5; i
++)
5059 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5063 /* Load of the constant -0.0 or -1.0 will be split as
5064 fldz;fchs or fld1;fchs sequence. */
5065 if (real_isnegzero (&r
))
5067 if (real_identical (&r
, &dconstm1
))
5073 /* Return the opcode of the special instruction to be used to load
5077 standard_80387_constant_opcode (rtx x
)
5079 switch (standard_80387_constant_p (x
))
5103 /* Return the CONST_DOUBLE representing the 80387 constant that is
5104 loaded by the specified special instruction. The argument IDX
5105 matches the return value from standard_80387_constant_p. */
5108 standard_80387_constant_rtx (int idx
)
5112 if (! ext_80387_constants_init
)
5113 init_ext_80387_constants ();
5129 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5133 /* Return 1 if mode is a valid mode for sse. */
5135 standard_sse_mode_p (enum machine_mode mode
)
5152 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5155 standard_sse_constant_p (rtx x
)
5157 enum machine_mode mode
= GET_MODE (x
);
5159 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5161 if (vector_all_ones_operand (x
, mode
)
5162 && standard_sse_mode_p (mode
))
5163 return TARGET_SSE2
? 2 : -1;
5168 /* Return the opcode of the special instruction to be used to load
5172 standard_sse_constant_opcode (rtx insn
, rtx x
)
5174 switch (standard_sse_constant_p (x
))
5177 if (get_attr_mode (insn
) == MODE_V4SF
)
5178 return "xorps\t%0, %0";
5179 else if (get_attr_mode (insn
) == MODE_V2DF
)
5180 return "xorpd\t%0, %0";
5182 return "pxor\t%0, %0";
5184 return "pcmpeqd\t%0, %0";
5189 /* Returns 1 if OP contains a symbol reference */
5192 symbolic_reference_mentioned_p (rtx op
)
5197 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5200 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5201 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5207 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5208 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5212 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5219 /* Return 1 if it is appropriate to emit `ret' instructions in the
5220 body of a function. Do this only if the epilogue is simple, needing a
5221 couple of insns. Prior to reloading, we can't tell how many registers
5222 must be saved, so return 0 then. Return 0 if there is no frame
5223 marker to de-allocate. */
5226 ix86_can_use_return_insn_p (void)
5228 struct ix86_frame frame
;
5230 if (! reload_completed
|| frame_pointer_needed
)
5233 /* Don't allow more than 32 pop, since that's all we can do
5234 with one instruction. */
5235 if (current_function_pops_args
5236 && current_function_args_size
>= 32768)
5239 ix86_compute_frame_layout (&frame
);
5240 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5243 /* Value should be nonzero if functions must have frame pointers.
5244 Zero means the frame pointer need not be set up (and parms may
5245 be accessed via the stack pointer) in functions that seem suitable. */
5248 ix86_frame_pointer_required (void)
5250 /* If we accessed previous frames, then the generated code expects
5251 to be able to access the saved ebp value in our frame. */
5252 if (cfun
->machine
->accesses_prev_frame
)
5255 /* Several x86 os'es need a frame pointer for other reasons,
5256 usually pertaining to setjmp. */
5257 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5260 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5261 the frame pointer by default. Turn it back on now if we've not
5262 got a leaf function. */
5263 if (TARGET_OMIT_LEAF_FRAME_POINTER
5264 && (!current_function_is_leaf
5265 || ix86_current_function_calls_tls_descriptor
))
5268 if (current_function_profile
)
5274 /* Record that the current function accesses previous call frames. */
5277 ix86_setup_frame_addresses (void)
5279 cfun
->machine
->accesses_prev_frame
= 1;
5282 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5283 # define USE_HIDDEN_LINKONCE 1
5285 # define USE_HIDDEN_LINKONCE 0
5288 static int pic_labels_used
;
5290 /* Fills in the label name that should be used for a pc thunk for
5291 the given register. */
5294 get_pc_thunk_name (char name
[32], unsigned int regno
)
5296 gcc_assert (!TARGET_64BIT
);
5298 if (USE_HIDDEN_LINKONCE
)
5299 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5301 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5305 /* This function generates code for -fpic that loads %ebx with
5306 the return address of the caller and then returns. */
5309 ix86_file_end (void)
5314 for (regno
= 0; regno
< 8; ++regno
)
5318 if (! ((pic_labels_used
>> regno
) & 1))
5321 get_pc_thunk_name (name
, regno
);
5326 switch_to_section (darwin_sections
[text_coal_section
]);
5327 fputs ("\t.weak_definition\t", asm_out_file
);
5328 assemble_name (asm_out_file
, name
);
5329 fputs ("\n\t.private_extern\t", asm_out_file
);
5330 assemble_name (asm_out_file
, name
);
5331 fputs ("\n", asm_out_file
);
5332 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5336 if (USE_HIDDEN_LINKONCE
)
5340 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5342 TREE_PUBLIC (decl
) = 1;
5343 TREE_STATIC (decl
) = 1;
5344 DECL_ONE_ONLY (decl
) = 1;
5346 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5347 switch_to_section (get_named_section (decl
, NULL
, 0));
5349 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5350 fputs ("\t.hidden\t", asm_out_file
);
5351 assemble_name (asm_out_file
, name
);
5352 fputc ('\n', asm_out_file
);
5353 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5357 switch_to_section (text_section
);
5358 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5361 xops
[0] = gen_rtx_REG (SImode
, regno
);
5362 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5363 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5364 output_asm_insn ("ret", xops
);
5367 if (NEED_INDICATE_EXEC_STACK
)
5368 file_end_indicate_exec_stack ();
5371 /* Emit code for the SET_GOT patterns. */
5374 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5380 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5382 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5383 xops
[2] = gen_rtx_MEM (Pmode
,
5384 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5385 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5387 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5388 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5389 an unadorned address. */
5390 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5391 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5392 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5396 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5398 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5400 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5403 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5405 output_asm_insn ("call\t%a2", xops
);
5408 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5409 is what will be referenced by the Mach-O PIC subsystem. */
5411 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5414 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5415 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5418 output_asm_insn ("pop{l}\t%0", xops
);
5423 get_pc_thunk_name (name
, REGNO (dest
));
5424 pic_labels_used
|= 1 << REGNO (dest
);
5426 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5427 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5428 output_asm_insn ("call\t%X2", xops
);
5429 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5430 is what will be referenced by the Mach-O PIC subsystem. */
5433 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5435 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5436 CODE_LABEL_NUMBER (label
));
5443 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5444 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5446 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5451 /* Generate an "push" pattern for input ARG. */
5456 return gen_rtx_SET (VOIDmode
,
5458 gen_rtx_PRE_DEC (Pmode
,
5459 stack_pointer_rtx
)),
5463 /* Return >= 0 if there is an unused call-clobbered register available
5464 for the entire function. */
5467 ix86_select_alt_pic_regnum (void)
5469 if (current_function_is_leaf
&& !current_function_profile
5470 && !ix86_current_function_calls_tls_descriptor
)
5473 for (i
= 2; i
>= 0; --i
)
5474 if (!regs_ever_live
[i
])
5478 return INVALID_REGNUM
;
5481 /* Return 1 if we need to save REGNO. */
5483 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5485 if (pic_offset_table_rtx
5486 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5487 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5488 || current_function_profile
5489 || current_function_calls_eh_return
5490 || current_function_uses_const_pool
))
5492 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5497 if (current_function_calls_eh_return
&& maybe_eh_return
)
5502 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5503 if (test
== INVALID_REGNUM
)
5510 if (cfun
->machine
->force_align_arg_pointer
5511 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5514 return (regs_ever_live
[regno
]
5515 && !call_used_regs
[regno
]
5516 && !fixed_regs
[regno
]
5517 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5520 /* Return number of registers to be saved on the stack. */
5523 ix86_nsaved_regs (void)
5528 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5529 if (ix86_save_reg (regno
, true))
5534 /* Return the offset between two registers, one to be eliminated, and the other
5535 its replacement, at the start of a routine. */
5538 ix86_initial_elimination_offset (int from
, int to
)
5540 struct ix86_frame frame
;
5541 ix86_compute_frame_layout (&frame
);
5543 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5544 return frame
.hard_frame_pointer_offset
;
5545 else if (from
== FRAME_POINTER_REGNUM
5546 && to
== HARD_FRAME_POINTER_REGNUM
)
5547 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5550 gcc_assert (to
== STACK_POINTER_REGNUM
);
5552 if (from
== ARG_POINTER_REGNUM
)
5553 return frame
.stack_pointer_offset
;
5555 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5556 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5560 /* Fill structure ix86_frame about frame of currently computed function. */
5563 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5565 HOST_WIDE_INT total_size
;
5566 unsigned int stack_alignment_needed
;
5567 HOST_WIDE_INT offset
;
5568 unsigned int preferred_alignment
;
5569 HOST_WIDE_INT size
= get_frame_size ();
5571 frame
->nregs
= ix86_nsaved_regs ();
5574 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5575 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5577 /* During reload iteration the amount of registers saved can change.
5578 Recompute the value as needed. Do not recompute when amount of registers
5579 didn't change as reload does multiple calls to the function and does not
5580 expect the decision to change within single iteration. */
5582 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5584 int count
= frame
->nregs
;
5586 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5587 /* The fast prologue uses move instead of push to save registers. This
5588 is significantly longer, but also executes faster as modern hardware
5589 can execute the moves in parallel, but can't do that for push/pop.
5591 Be careful about choosing what prologue to emit: When function takes
5592 many instructions to execute we may use slow version as well as in
5593 case function is known to be outside hot spot (this is known with
5594 feedback only). Weight the size of function by number of registers
5595 to save as it is cheap to use one or two push instructions but very
5596 slow to use many of them. */
5598 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5599 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5600 || (flag_branch_probabilities
5601 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5602 cfun
->machine
->use_fast_prologue_epilogue
= false;
5604 cfun
->machine
->use_fast_prologue_epilogue
5605 = !expensive_function_p (count
);
5607 if (TARGET_PROLOGUE_USING_MOVE
5608 && cfun
->machine
->use_fast_prologue_epilogue
)
5609 frame
->save_regs_using_mov
= true;
5611 frame
->save_regs_using_mov
= false;
5614 /* Skip return address and saved base pointer. */
5615 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5617 frame
->hard_frame_pointer_offset
= offset
;
5619 /* Do some sanity checking of stack_alignment_needed and
5620 preferred_alignment, since i386 port is the only using those features
5621 that may break easily. */
5623 gcc_assert (!size
|| stack_alignment_needed
);
5624 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5625 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5626 gcc_assert (stack_alignment_needed
5627 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5629 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5630 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5632 /* Register save area */
5633 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5636 if (ix86_save_varrargs_registers
)
5638 offset
+= X86_64_VARARGS_SIZE
;
5639 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5642 frame
->va_arg_size
= 0;
5644 /* Align start of frame for local function. */
5645 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5646 & -stack_alignment_needed
) - offset
;
5648 offset
+= frame
->padding1
;
5650 /* Frame pointer points here. */
5651 frame
->frame_pointer_offset
= offset
;
5655 /* Add outgoing arguments area. Can be skipped if we eliminated
5656 all the function calls as dead code.
5657 Skipping is however impossible when function calls alloca. Alloca
5658 expander assumes that last current_function_outgoing_args_size
5659 of stack frame are unused. */
5660 if (ACCUMULATE_OUTGOING_ARGS
5661 && (!current_function_is_leaf
|| current_function_calls_alloca
5662 || ix86_current_function_calls_tls_descriptor
))
5664 offset
+= current_function_outgoing_args_size
;
5665 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5668 frame
->outgoing_arguments_size
= 0;
5670 /* Align stack boundary. Only needed if we're calling another function
5672 if (!current_function_is_leaf
|| current_function_calls_alloca
5673 || ix86_current_function_calls_tls_descriptor
)
5674 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5675 & -preferred_alignment
) - offset
;
5677 frame
->padding2
= 0;
5679 offset
+= frame
->padding2
;
5681 /* We've reached end of stack frame. */
5682 frame
->stack_pointer_offset
= offset
;
5684 /* Size prologue needs to allocate. */
5685 frame
->to_allocate
=
5686 (size
+ frame
->padding1
+ frame
->padding2
5687 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5689 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5690 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5691 frame
->save_regs_using_mov
= false;
5693 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5694 && current_function_is_leaf
5695 && !ix86_current_function_calls_tls_descriptor
)
5697 frame
->red_zone_size
= frame
->to_allocate
;
5698 if (frame
->save_regs_using_mov
)
5699 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5700 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5701 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5704 frame
->red_zone_size
= 0;
5705 frame
->to_allocate
-= frame
->red_zone_size
;
5706 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5708 fprintf (stderr
, "\n");
5709 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5710 fprintf (stderr
, "size: %ld\n", (long)size
);
5711 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5712 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5713 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5714 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5715 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5716 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5717 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5718 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5719 (long)frame
->hard_frame_pointer_offset
);
5720 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5721 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5722 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5723 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5727 /* Emit code to save registers in the prologue. */
5730 ix86_emit_save_regs (void)
5735 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5736 if (ix86_save_reg (regno
, true))
5738 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5739 RTX_FRAME_RELATED_P (insn
) = 1;
5743 /* Emit code to save registers using MOV insns. First register
5744 is restored from POINTER + OFFSET. */
5746 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5751 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5752 if (ix86_save_reg (regno
, true))
5754 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5756 gen_rtx_REG (Pmode
, regno
));
5757 RTX_FRAME_RELATED_P (insn
) = 1;
5758 offset
+= UNITS_PER_WORD
;
5762 /* Expand prologue or epilogue stack adjustment.
5763 The pattern exist to put a dependency on all ebp-based memory accesses.
5764 STYLE should be negative if instructions should be marked as frame related,
5765 zero if %r11 register is live and cannot be freely used and positive
5769 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5774 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5775 else if (x86_64_immediate_operand (offset
, DImode
))
5776 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5780 /* r11 is used by indirect sibcall return as well, set before the
5781 epilogue and used after the epilogue. ATM indirect sibcall
5782 shouldn't be used together with huge frame sizes in one
5783 function because of the frame_size check in sibcall.c. */
5785 r11
= gen_rtx_REG (DImode
, R11_REG
);
5786 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5788 RTX_FRAME_RELATED_P (insn
) = 1;
5789 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5793 RTX_FRAME_RELATED_P (insn
) = 1;
5796 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5799 ix86_internal_arg_pointer (void)
5801 bool has_force_align_arg_pointer
=
5802 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5803 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5804 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5805 && DECL_NAME (current_function_decl
)
5806 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5807 && DECL_FILE_SCOPE_P (current_function_decl
))
5808 || ix86_force_align_arg_pointer
5809 || has_force_align_arg_pointer
)
5811 /* Nested functions can't realign the stack due to a register
5813 if (DECL_CONTEXT (current_function_decl
)
5814 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5816 if (ix86_force_align_arg_pointer
)
5817 warning (0, "-mstackrealign ignored for nested functions");
5818 if (has_force_align_arg_pointer
)
5819 error ("%s not supported for nested functions",
5820 ix86_force_align_arg_pointer_string
);
5821 return virtual_incoming_args_rtx
;
5823 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5824 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5827 return virtual_incoming_args_rtx
;
5830 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5831 This is called from dwarf2out.c to emit call frame instructions
5832 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5834 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5836 rtx unspec
= SET_SRC (pattern
);
5837 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5841 case UNSPEC_REG_SAVE
:
5842 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5843 SET_DEST (pattern
));
5845 case UNSPEC_DEF_CFA
:
5846 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5847 INTVAL (XVECEXP (unspec
, 0, 0)));
5854 /* Expand the prologue into a bunch of separate insns. */
5857 ix86_expand_prologue (void)
5861 struct ix86_frame frame
;
5862 HOST_WIDE_INT allocate
;
5864 ix86_compute_frame_layout (&frame
);
5866 if (cfun
->machine
->force_align_arg_pointer
)
5870 /* Grab the argument pointer. */
5871 x
= plus_constant (stack_pointer_rtx
, 4);
5872 y
= cfun
->machine
->force_align_arg_pointer
;
5873 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5874 RTX_FRAME_RELATED_P (insn
) = 1;
5876 /* The unwind info consists of two parts: install the fafp as the cfa,
5877 and record the fafp as the "save register" of the stack pointer.
5878 The later is there in order that the unwinder can see where it
5879 should restore the stack pointer across the and insn. */
5880 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5881 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5882 RTX_FRAME_RELATED_P (x
) = 1;
5883 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5885 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5886 RTX_FRAME_RELATED_P (y
) = 1;
5887 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5888 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5889 REG_NOTES (insn
) = x
;
5891 /* Align the stack. */
5892 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5895 /* And here we cheat like madmen with the unwind info. We force the
5896 cfa register back to sp+4, which is exactly what it was at the
5897 start of the function. Re-pushing the return address results in
5898 the return at the same spot relative to the cfa, and thus is
5899 correct wrt the unwind info. */
5900 x
= cfun
->machine
->force_align_arg_pointer
;
5901 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5902 insn
= emit_insn (gen_push (x
));
5903 RTX_FRAME_RELATED_P (insn
) = 1;
5906 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5907 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5908 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5909 REG_NOTES (insn
) = x
;
5912 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5913 slower on all targets. Also sdb doesn't like it. */
5915 if (frame_pointer_needed
)
5917 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5918 RTX_FRAME_RELATED_P (insn
) = 1;
5920 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5921 RTX_FRAME_RELATED_P (insn
) = 1;
5924 allocate
= frame
.to_allocate
;
5926 if (!frame
.save_regs_using_mov
)
5927 ix86_emit_save_regs ();
5929 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5931 /* When using red zone we may start register saving before allocating
5932 the stack frame saving one cycle of the prologue. */
5933 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5934 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5935 : stack_pointer_rtx
,
5936 -frame
.nregs
* UNITS_PER_WORD
);
5940 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5941 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5942 GEN_INT (-allocate
), -1);
5945 /* Only valid for Win32. */
5946 rtx eax
= gen_rtx_REG (SImode
, 0);
5947 bool eax_live
= ix86_eax_live_at_start_p ();
5950 gcc_assert (!TARGET_64BIT
);
5954 emit_insn (gen_push (eax
));
5958 emit_move_insn (eax
, GEN_INT (allocate
));
5960 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5961 RTX_FRAME_RELATED_P (insn
) = 1;
5962 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5963 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5964 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5965 t
, REG_NOTES (insn
));
5969 if (frame_pointer_needed
)
5970 t
= plus_constant (hard_frame_pointer_rtx
,
5973 - frame
.nregs
* UNITS_PER_WORD
);
5975 t
= plus_constant (stack_pointer_rtx
, allocate
);
5976 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5980 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5982 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5983 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5985 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5986 -frame
.nregs
* UNITS_PER_WORD
);
5989 pic_reg_used
= false;
5990 if (pic_offset_table_rtx
5991 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5992 || current_function_profile
))
5994 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5996 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5997 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5999 pic_reg_used
= true;
6006 if (ix86_cmodel
== CM_LARGE_PIC
)
6008 rtx tmp_reg
= gen_rtx_REG (DImode
,
6009 FIRST_REX_INT_REG
+ 3 /* R11 */);
6010 rtx label
= gen_label_rtx ();
6012 LABEL_PRESERVE_P (label
) = 1;
6013 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6014 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6015 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6016 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6017 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6018 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6019 pic_offset_table_rtx
, tmp_reg
));
6022 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6025 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6027 /* Even with accurate pre-reload life analysis, we can wind up
6028 deleting all references to the pic register after reload.
6029 Consider if cross-jumping unifies two sides of a branch
6030 controlled by a comparison vs the only read from a global.
6031 In which case, allow the set_got to be deleted, though we're
6032 too late to do anything about the ebx save in the prologue. */
6033 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6036 /* Prevent function calls from be scheduled before the call to mcount.
6037 In the pic_reg_used case, make sure that the got load isn't deleted. */
6038 if (current_function_profile
)
6039 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6042 /* Emit code to restore saved registers using MOV insns. First register
6043 is restored from POINTER + OFFSET. */
6045 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6046 int maybe_eh_return
)
6049 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6051 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6052 if (ix86_save_reg (regno
, maybe_eh_return
))
6054 /* Ensure that adjust_address won't be forced to produce pointer
6055 out of range allowed by x86-64 instruction set. */
6056 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6060 r11
= gen_rtx_REG (DImode
, R11_REG
);
6061 emit_move_insn (r11
, GEN_INT (offset
));
6062 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6063 base_address
= gen_rtx_MEM (Pmode
, r11
);
6066 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6067 adjust_address (base_address
, Pmode
, offset
));
6068 offset
+= UNITS_PER_WORD
;
6072 /* Restore function stack, frame, and registers. */
6075 ix86_expand_epilogue (int style
)
6078 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6079 struct ix86_frame frame
;
6080 HOST_WIDE_INT offset
;
6082 ix86_compute_frame_layout (&frame
);
6084 /* Calculate start of saved registers relative to ebp. Special care
6085 must be taken for the normal return case of a function using
6086 eh_return: the eax and edx registers are marked as saved, but not
6087 restored along this path. */
6088 offset
= frame
.nregs
;
6089 if (current_function_calls_eh_return
&& style
!= 2)
6091 offset
*= -UNITS_PER_WORD
;
6093 /* If we're only restoring one register and sp is not valid then
6094 using a move instruction to restore the register since it's
6095 less work than reloading sp and popping the register.
6097 The default code result in stack adjustment using add/lea instruction,
6098 while this code results in LEAVE instruction (or discrete equivalent),
6099 so it is profitable in some other cases as well. Especially when there
6100 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6101 and there is exactly one register to pop. This heuristic may need some
6102 tuning in future. */
6103 if ((!sp_valid
&& frame
.nregs
<= 1)
6104 || (TARGET_EPILOGUE_USING_MOVE
6105 && cfun
->machine
->use_fast_prologue_epilogue
6106 && (frame
.nregs
> 1 || frame
.to_allocate
))
6107 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6108 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6109 && cfun
->machine
->use_fast_prologue_epilogue
6110 && frame
.nregs
== 1)
6111 || current_function_calls_eh_return
)
6113 /* Restore registers. We can use ebp or esp to address the memory
6114 locations. If both are available, default to ebp, since offsets
6115 are known to be small. Only exception is esp pointing directly to the
6116 end of block of saved registers, where we may simplify addressing
6119 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6120 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6121 frame
.to_allocate
, style
== 2);
6123 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6124 offset
, style
== 2);
6126 /* eh_return epilogues need %ecx added to the stack pointer. */
6129 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6131 if (frame_pointer_needed
)
6133 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6134 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6135 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6137 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6138 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6140 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6145 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6146 tmp
= plus_constant (tmp
, (frame
.to_allocate
6147 + frame
.nregs
* UNITS_PER_WORD
));
6148 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6151 else if (!frame_pointer_needed
)
6152 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6153 GEN_INT (frame
.to_allocate
6154 + frame
.nregs
* UNITS_PER_WORD
),
6156 /* If not an i386, mov & pop is faster than "leave". */
6157 else if (TARGET_USE_LEAVE
|| optimize_size
6158 || !cfun
->machine
->use_fast_prologue_epilogue
)
6159 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6162 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6163 hard_frame_pointer_rtx
,
6166 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6168 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6173 /* First step is to deallocate the stack frame so that we can
6174 pop the registers. */
6177 gcc_assert (frame_pointer_needed
);
6178 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6179 hard_frame_pointer_rtx
,
6180 GEN_INT (offset
), style
);
6182 else if (frame
.to_allocate
)
6183 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6184 GEN_INT (frame
.to_allocate
), style
);
6186 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6187 if (ix86_save_reg (regno
, false))
6190 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6192 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6194 if (frame_pointer_needed
)
6196 /* Leave results in shorter dependency chains on CPUs that are
6197 able to grok it fast. */
6198 if (TARGET_USE_LEAVE
)
6199 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6200 else if (TARGET_64BIT
)
6201 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6203 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6207 if (cfun
->machine
->force_align_arg_pointer
)
6209 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6210 cfun
->machine
->force_align_arg_pointer
,
6214 /* Sibcall epilogues don't want a return instruction. */
6218 if (current_function_pops_args
&& current_function_args_size
)
6220 rtx popc
= GEN_INT (current_function_pops_args
);
6222 /* i386 can only pop 64K bytes. If asked to pop more, pop
6223 return address, do explicit add, and jump indirectly to the
6226 if (current_function_pops_args
>= 65536)
6228 rtx ecx
= gen_rtx_REG (SImode
, 2);
6230 /* There is no "pascal" calling convention in 64bit ABI. */
6231 gcc_assert (!TARGET_64BIT
);
6233 emit_insn (gen_popsi1 (ecx
));
6234 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6235 emit_jump_insn (gen_return_indirect_internal (ecx
));
6238 emit_jump_insn (gen_return_pop_internal (popc
));
6241 emit_jump_insn (gen_return_internal ());
6244 /* Reset from the function's potential modifications. */
6247 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6248 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6250 if (pic_offset_table_rtx
)
6251 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6253 /* Mach-O doesn't support labels at the end of objects, so if
6254 it looks like we might want one, insert a NOP. */
6256 rtx insn
= get_last_insn ();
6259 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6260 insn
= PREV_INSN (insn
);
6264 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6265 fputs ("\tnop\n", file
);
6271 /* Extract the parts of an RTL expression that is a valid memory address
6272 for an instruction. Return 0 if the structure of the address is
6273 grossly off. Return -1 if the address contains ASHIFT, so it is not
6274 strictly valid, but still used for computing length of lea instruction. */
6277 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6279 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6280 rtx base_reg
, index_reg
;
6281 HOST_WIDE_INT scale
= 1;
6282 rtx scale_rtx
= NULL_RTX
;
6284 enum ix86_address_seg seg
= SEG_DEFAULT
;
6286 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6288 else if (GET_CODE (addr
) == PLUS
)
6298 addends
[n
++] = XEXP (op
, 1);
6301 while (GET_CODE (op
) == PLUS
);
6306 for (i
= n
; i
>= 0; --i
)
6309 switch (GET_CODE (op
))
6314 index
= XEXP (op
, 0);
6315 scale_rtx
= XEXP (op
, 1);
6319 if (XINT (op
, 1) == UNSPEC_TP
6320 && TARGET_TLS_DIRECT_SEG_REFS
6321 && seg
== SEG_DEFAULT
)
6322 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6351 else if (GET_CODE (addr
) == MULT
)
6353 index
= XEXP (addr
, 0); /* index*scale */
6354 scale_rtx
= XEXP (addr
, 1);
6356 else if (GET_CODE (addr
) == ASHIFT
)
6360 /* We're called for lea too, which implements ashift on occasion. */
6361 index
= XEXP (addr
, 0);
6362 tmp
= XEXP (addr
, 1);
6363 if (!CONST_INT_P (tmp
))
6365 scale
= INTVAL (tmp
);
6366 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6372 disp
= addr
; /* displacement */
6374 /* Extract the integral value of scale. */
6377 if (!CONST_INT_P (scale_rtx
))
6379 scale
= INTVAL (scale_rtx
);
6382 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6383 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6385 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6386 if (base_reg
&& index_reg
&& scale
== 1
6387 && (index_reg
== arg_pointer_rtx
6388 || index_reg
== frame_pointer_rtx
6389 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6392 tmp
= base
, base
= index
, index
= tmp
;
6393 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6397 if ((base_reg
== hard_frame_pointer_rtx
6398 || base_reg
== frame_pointer_rtx
6399 || base_reg
== arg_pointer_rtx
) && !disp
)
6402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6403 Avoid this by transforming to [%esi+0]. */
6404 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6405 && base_reg
&& !index_reg
&& !disp
6407 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6410 /* Special case: encode reg+reg instead of reg*2. */
6411 if (!base
&& index
&& scale
&& scale
== 2)
6412 base
= index
, base_reg
= index_reg
, scale
= 1;
6414 /* Special case: scaling cannot be encoded without base or displacement. */
6415 if (!base
&& !disp
&& index
&& scale
!= 1)
6427 /* Return cost of the memory address x.
6428 For i386, it is better to use a complex address than let gcc copy
6429 the address into a reg and make a new pseudo. But not if the address
6430 requires to two regs - that would mean more pseudos with longer
6433 ix86_address_cost (rtx x
)
6435 struct ix86_address parts
;
6437 int ok
= ix86_decompose_address (x
, &parts
);
6441 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6442 parts
.base
= SUBREG_REG (parts
.base
);
6443 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6444 parts
.index
= SUBREG_REG (parts
.index
);
6446 /* More complex memory references are better. */
6447 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6449 if (parts
.seg
!= SEG_DEFAULT
)
6452 /* Attempt to minimize number of registers in the address. */
6454 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6456 && (!REG_P (parts
.index
)
6457 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6461 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6463 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6464 && parts
.base
!= parts
.index
)
6467 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6468 since it's predecode logic can't detect the length of instructions
6469 and it degenerates to vector decoded. Increase cost of such
6470 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6471 to split such addresses or even refuse such addresses at all.
6473 Following addressing modes are affected:
6478 The first and last case may be avoidable by explicitly coding the zero in
6479 memory address, but I don't have AMD-K6 machine handy to check this
6483 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6484 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6485 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6491 /* If X is a machine specific address (i.e. a symbol or label being
6492 referenced as a displacement from the GOT implemented using an
6493 UNSPEC), then return the base term. Otherwise return X. */
6496 ix86_find_base_term (rtx x
)
6502 if (GET_CODE (x
) != CONST
)
6505 if (GET_CODE (term
) == PLUS
6506 && (CONST_INT_P (XEXP (term
, 1))
6507 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6508 term
= XEXP (term
, 0);
6509 if (GET_CODE (term
) != UNSPEC
6510 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6513 term
= XVECEXP (term
, 0, 0);
6515 if (GET_CODE (term
) != SYMBOL_REF
6516 && GET_CODE (term
) != LABEL_REF
)
6522 term
= ix86_delegitimize_address (x
);
6524 if (GET_CODE (term
) != SYMBOL_REF
6525 && GET_CODE (term
) != LABEL_REF
)
6531 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6532 this is used for to form addresses to local data when -fPIC is in
6536 darwin_local_data_pic (rtx disp
)
6538 if (GET_CODE (disp
) == MINUS
)
6540 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6541 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6542 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6544 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6545 if (! strcmp (sym_name
, "<pic base>"))
6553 /* Determine if a given RTX is a valid constant. We already know this
6554 satisfies CONSTANT_P. */
6557 legitimate_constant_p (rtx x
)
6559 switch (GET_CODE (x
))
6564 if (GET_CODE (x
) == PLUS
)
6566 if (!CONST_INT_P (XEXP (x
, 1)))
6571 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6574 /* Only some unspecs are valid as "constants". */
6575 if (GET_CODE (x
) == UNSPEC
)
6576 switch (XINT (x
, 1))
6581 return TARGET_64BIT
;
6584 x
= XVECEXP (x
, 0, 0);
6585 return (GET_CODE (x
) == SYMBOL_REF
6586 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6588 x
= XVECEXP (x
, 0, 0);
6589 return (GET_CODE (x
) == SYMBOL_REF
6590 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6595 /* We must have drilled down to a symbol. */
6596 if (GET_CODE (x
) == LABEL_REF
)
6598 if (GET_CODE (x
) != SYMBOL_REF
)
6603 /* TLS symbols are never valid. */
6604 if (SYMBOL_REF_TLS_MODEL (x
))
6609 if (GET_MODE (x
) == TImode
6610 && x
!= CONST0_RTX (TImode
)
6616 if (x
== CONST0_RTX (GET_MODE (x
)))
6624 /* Otherwise we handle everything else in the move patterns. */
6628 /* Determine if it's legal to put X into the constant pool. This
6629 is not possible for the address of thread-local symbols, which
6630 is checked above. */
6633 ix86_cannot_force_const_mem (rtx x
)
6635 /* We can always put integral constants and vectors in memory. */
6636 switch (GET_CODE (x
))
6646 return !legitimate_constant_p (x
);
6649 /* Determine if a given RTX is a valid constant address. */
6652 constant_address_p (rtx x
)
6654 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6657 /* Nonzero if the constant value X is a legitimate general operand
6658 when generating PIC code. It is given that flag_pic is on and
6659 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6662 legitimate_pic_operand_p (rtx x
)
6666 switch (GET_CODE (x
))
6669 inner
= XEXP (x
, 0);
6670 if (GET_CODE (inner
) == PLUS
6671 && CONST_INT_P (XEXP (inner
, 1)))
6672 inner
= XEXP (inner
, 0);
6674 /* Only some unspecs are valid as "constants". */
6675 if (GET_CODE (inner
) == UNSPEC
)
6676 switch (XINT (inner
, 1))
6681 return TARGET_64BIT
;
6683 x
= XVECEXP (inner
, 0, 0);
6684 return (GET_CODE (x
) == SYMBOL_REF
6685 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6693 return legitimate_pic_address_disp_p (x
);
6700 /* Determine if a given CONST RTX is a valid memory displacement
6704 legitimate_pic_address_disp_p (rtx disp
)
6708 /* In 64bit mode we can allow direct addresses of symbols and labels
6709 when they are not dynamic symbols. */
6712 rtx op0
= disp
, op1
;
6714 switch (GET_CODE (disp
))
6720 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6722 op0
= XEXP (XEXP (disp
, 0), 0);
6723 op1
= XEXP (XEXP (disp
, 0), 1);
6724 if (!CONST_INT_P (op1
)
6725 || INTVAL (op1
) >= 16*1024*1024
6726 || INTVAL (op1
) < -16*1024*1024)
6728 if (GET_CODE (op0
) == LABEL_REF
)
6730 if (GET_CODE (op0
) != SYMBOL_REF
)
6735 /* TLS references should always be enclosed in UNSPEC. */
6736 if (SYMBOL_REF_TLS_MODEL (op0
))
6738 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6739 && ix86_cmodel
!= CM_LARGE_PIC
)
6747 if (GET_CODE (disp
) != CONST
)
6749 disp
= XEXP (disp
, 0);
6753 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6754 of GOT tables. We should not need these anyway. */
6755 if (GET_CODE (disp
) != UNSPEC
6756 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6757 && XINT (disp
, 1) != UNSPEC_GOTOFF
6758 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6761 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6762 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6768 if (GET_CODE (disp
) == PLUS
)
6770 if (!CONST_INT_P (XEXP (disp
, 1)))
6772 disp
= XEXP (disp
, 0);
6776 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6779 if (GET_CODE (disp
) != UNSPEC
)
6782 switch (XINT (disp
, 1))
6787 /* We need to check for both symbols and labels because VxWorks loads
6788 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6790 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6791 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6793 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6794 While ABI specify also 32bit relocation but we don't produce it in
6795 small PIC model at all. */
6796 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6797 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6799 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6801 case UNSPEC_GOTTPOFF
:
6802 case UNSPEC_GOTNTPOFF
:
6803 case UNSPEC_INDNTPOFF
:
6806 disp
= XVECEXP (disp
, 0, 0);
6807 return (GET_CODE (disp
) == SYMBOL_REF
6808 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6810 disp
= XVECEXP (disp
, 0, 0);
6811 return (GET_CODE (disp
) == SYMBOL_REF
6812 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6814 disp
= XVECEXP (disp
, 0, 0);
6815 return (GET_CODE (disp
) == SYMBOL_REF
6816 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6822 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6823 memory address for an instruction. The MODE argument is the machine mode
6824 for the MEM expression that wants to use this address.
6826 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6827 convert common non-canonical forms to canonical form so that they will
6831 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6833 struct ix86_address parts
;
6834 rtx base
, index
, disp
;
6835 HOST_WIDE_INT scale
;
6836 const char *reason
= NULL
;
6837 rtx reason_rtx
= NULL_RTX
;
6839 if (TARGET_DEBUG_ADDR
)
6842 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6843 GET_MODE_NAME (mode
), strict
);
6847 if (ix86_decompose_address (addr
, &parts
) <= 0)
6849 reason
= "decomposition failed";
6854 index
= parts
.index
;
6856 scale
= parts
.scale
;
6858 /* Validate base register.
6860 Don't allow SUBREG's that span more than a word here. It can lead to spill
6861 failures when the base is one word out of a two word structure, which is
6862 represented internally as a DImode int. */
6871 else if (GET_CODE (base
) == SUBREG
6872 && REG_P (SUBREG_REG (base
))
6873 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6875 reg
= SUBREG_REG (base
);
6878 reason
= "base is not a register";
6882 if (GET_MODE (base
) != Pmode
)
6884 reason
= "base is not in Pmode";
6888 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6889 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6891 reason
= "base is not valid";
6896 /* Validate index register.
6898 Don't allow SUBREG's that span more than a word here -- same as above. */
6907 else if (GET_CODE (index
) == SUBREG
6908 && REG_P (SUBREG_REG (index
))
6909 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6911 reg
= SUBREG_REG (index
);
6914 reason
= "index is not a register";
6918 if (GET_MODE (index
) != Pmode
)
6920 reason
= "index is not in Pmode";
6924 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6925 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6927 reason
= "index is not valid";
6932 /* Validate scale factor. */
6935 reason_rtx
= GEN_INT (scale
);
6938 reason
= "scale without index";
6942 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6944 reason
= "scale is not a valid multiplier";
6949 /* Validate displacement. */
6954 if (GET_CODE (disp
) == CONST
6955 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6956 switch (XINT (XEXP (disp
, 0), 1))
6958 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6959 used. While ABI specify also 32bit relocations, we don't produce
6960 them at all and use IP relative instead. */
6963 gcc_assert (flag_pic
);
6965 goto is_legitimate_pic
;
6966 reason
= "64bit address unspec";
6969 case UNSPEC_GOTPCREL
:
6970 gcc_assert (flag_pic
);
6971 goto is_legitimate_pic
;
6973 case UNSPEC_GOTTPOFF
:
6974 case UNSPEC_GOTNTPOFF
:
6975 case UNSPEC_INDNTPOFF
:
6981 reason
= "invalid address unspec";
6985 else if (SYMBOLIC_CONST (disp
)
6989 && MACHOPIC_INDIRECT
6990 && !machopic_operand_p (disp
)
6996 if (TARGET_64BIT
&& (index
|| base
))
6998 /* foo@dtpoff(%rX) is ok. */
6999 if (GET_CODE (disp
) != CONST
7000 || GET_CODE (XEXP (disp
, 0)) != PLUS
7001 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7002 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7003 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7004 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7006 reason
= "non-constant pic memory reference";
7010 else if (! legitimate_pic_address_disp_p (disp
))
7012 reason
= "displacement is an invalid pic construct";
7016 /* This code used to verify that a symbolic pic displacement
7017 includes the pic_offset_table_rtx register.
7019 While this is good idea, unfortunately these constructs may
7020 be created by "adds using lea" optimization for incorrect
7029 This code is nonsensical, but results in addressing
7030 GOT table with pic_offset_table_rtx base. We can't
7031 just refuse it easily, since it gets matched by
7032 "addsi3" pattern, that later gets split to lea in the
7033 case output register differs from input. While this
7034 can be handled by separate addsi pattern for this case
7035 that never results in lea, this seems to be easier and
7036 correct fix for crash to disable this test. */
7038 else if (GET_CODE (disp
) != LABEL_REF
7039 && !CONST_INT_P (disp
)
7040 && (GET_CODE (disp
) != CONST
7041 || !legitimate_constant_p (disp
))
7042 && (GET_CODE (disp
) != SYMBOL_REF
7043 || !legitimate_constant_p (disp
)))
7045 reason
= "displacement is not constant";
7048 else if (TARGET_64BIT
7049 && !x86_64_immediate_operand (disp
, VOIDmode
))
7051 reason
= "displacement is out of range";
7056 /* Everything looks valid. */
7057 if (TARGET_DEBUG_ADDR
)
7058 fprintf (stderr
, "Success.\n");
7062 if (TARGET_DEBUG_ADDR
)
7064 fprintf (stderr
, "Error: %s\n", reason
);
7065 debug_rtx (reason_rtx
);
7070 /* Return a unique alias set for the GOT. */
7072 static HOST_WIDE_INT
7073 ix86_GOT_alias_set (void)
7075 static HOST_WIDE_INT set
= -1;
7077 set
= new_alias_set ();
7081 /* Return a legitimate reference for ORIG (an address) using the
7082 register REG. If REG is 0, a new pseudo is generated.
7084 There are two types of references that must be handled:
7086 1. Global data references must load the address from the GOT, via
7087 the PIC reg. An insn is emitted to do this load, and the reg is
7090 2. Static data references, constant pool addresses, and code labels
7091 compute the address as an offset from the GOT, whose base is in
7092 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7093 differentiate them from global data objects. The returned
7094 address is the PIC reg + an unspec constant.
7096 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7097 reg also appears in the address. */
7100 legitimize_pic_address (rtx orig
, rtx reg
)
7107 if (TARGET_MACHO
&& !TARGET_64BIT
)
7110 reg
= gen_reg_rtx (Pmode
);
7111 /* Use the generic Mach-O PIC machinery. */
7112 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7116 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7118 else if (TARGET_64BIT
7119 && ix86_cmodel
!= CM_SMALL_PIC
7120 && gotoff_operand (addr
, Pmode
))
7123 /* This symbol may be referenced via a displacement from the PIC
7124 base address (@GOTOFF). */
7126 if (reload_in_progress
)
7127 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7128 if (GET_CODE (addr
) == CONST
)
7129 addr
= XEXP (addr
, 0);
7130 if (GET_CODE (addr
) == PLUS
)
7132 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7133 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7136 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7137 new = gen_rtx_CONST (Pmode
, new);
7139 tmpreg
= gen_reg_rtx (Pmode
);
7142 emit_move_insn (tmpreg
, new);
7146 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7147 tmpreg
, 1, OPTAB_DIRECT
);
7150 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7152 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7154 /* This symbol may be referenced via a displacement from the PIC
7155 base address (@GOTOFF). */
7157 if (reload_in_progress
)
7158 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7159 if (GET_CODE (addr
) == CONST
)
7160 addr
= XEXP (addr
, 0);
7161 if (GET_CODE (addr
) == PLUS
)
7163 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7164 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7167 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7168 new = gen_rtx_CONST (Pmode
, new);
7169 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7173 emit_move_insn (reg
, new);
7177 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7178 /* We can't use @GOTOFF for text labels on VxWorks;
7179 see gotoff_operand. */
7180 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7182 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7184 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7185 new = gen_rtx_CONST (Pmode
, new);
7186 new = gen_const_mem (Pmode
, new);
7187 set_mem_alias_set (new, ix86_GOT_alias_set ());
7190 reg
= gen_reg_rtx (Pmode
);
7191 /* Use directly gen_movsi, otherwise the address is loaded
7192 into register for CSE. We don't want to CSE this addresses,
7193 instead we CSE addresses from the GOT table, so skip this. */
7194 emit_insn (gen_movsi (reg
, new));
7199 /* This symbol must be referenced via a load from the
7200 Global Offset Table (@GOT). */
7202 if (reload_in_progress
)
7203 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7204 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7205 new = gen_rtx_CONST (Pmode
, new);
7207 new = force_reg (Pmode
, new);
7208 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7209 new = gen_const_mem (Pmode
, new);
7210 set_mem_alias_set (new, ix86_GOT_alias_set ());
7213 reg
= gen_reg_rtx (Pmode
);
7214 emit_move_insn (reg
, new);
7220 if (CONST_INT_P (addr
)
7221 && !x86_64_immediate_operand (addr
, VOIDmode
))
7225 emit_move_insn (reg
, addr
);
7229 new = force_reg (Pmode
, addr
);
7231 else if (GET_CODE (addr
) == CONST
)
7233 addr
= XEXP (addr
, 0);
7235 /* We must match stuff we generate before. Assume the only
7236 unspecs that can get here are ours. Not that we could do
7237 anything with them anyway.... */
7238 if (GET_CODE (addr
) == UNSPEC
7239 || (GET_CODE (addr
) == PLUS
7240 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7242 gcc_assert (GET_CODE (addr
) == PLUS
);
7244 if (GET_CODE (addr
) == PLUS
)
7246 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7248 /* Check first to see if this is a constant offset from a @GOTOFF
7249 symbol reference. */
7250 if (gotoff_operand (op0
, Pmode
)
7251 && CONST_INT_P (op1
))
7255 if (reload_in_progress
)
7256 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7257 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7259 new = gen_rtx_PLUS (Pmode
, new, op1
);
7260 new = gen_rtx_CONST (Pmode
, new);
7261 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7265 emit_move_insn (reg
, new);
7271 if (INTVAL (op1
) < -16*1024*1024
7272 || INTVAL (op1
) >= 16*1024*1024)
7274 if (!x86_64_immediate_operand (op1
, Pmode
))
7275 op1
= force_reg (Pmode
, op1
);
7276 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7282 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7283 new = legitimize_pic_address (XEXP (addr
, 1),
7284 base
== reg
? NULL_RTX
: reg
);
7286 if (CONST_INT_P (new))
7287 new = plus_constant (base
, INTVAL (new));
7290 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7292 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7293 new = XEXP (new, 1);
7295 new = gen_rtx_PLUS (Pmode
, base
, new);
7303 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7306 get_thread_pointer (int to_reg
)
7310 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7314 reg
= gen_reg_rtx (Pmode
);
7315 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7316 insn
= emit_insn (insn
);
7321 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7322 false if we expect this to be used for a memory address and true if
7323 we expect to load the address into a register. */
7326 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7328 rtx dest
, base
, off
, pic
, tp
;
7333 case TLS_MODEL_GLOBAL_DYNAMIC
:
7334 dest
= gen_reg_rtx (Pmode
);
7335 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7337 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7339 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7342 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7343 insns
= get_insns ();
7346 emit_libcall_block (insns
, dest
, rax
, x
);
7348 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7349 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7351 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7353 if (TARGET_GNU2_TLS
)
7355 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7357 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7361 case TLS_MODEL_LOCAL_DYNAMIC
:
7362 base
= gen_reg_rtx (Pmode
);
7363 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7365 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7367 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7370 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7371 insns
= get_insns ();
7374 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7375 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7376 emit_libcall_block (insns
, base
, rax
, note
);
7378 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7379 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7381 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7383 if (TARGET_GNU2_TLS
)
7385 rtx x
= ix86_tls_module_base ();
7387 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7388 gen_rtx_MINUS (Pmode
, x
, tp
));
7391 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7392 off
= gen_rtx_CONST (Pmode
, off
);
7394 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7396 if (TARGET_GNU2_TLS
)
7398 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7400 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7405 case TLS_MODEL_INITIAL_EXEC
:
7409 type
= UNSPEC_GOTNTPOFF
;
7413 if (reload_in_progress
)
7414 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7415 pic
= pic_offset_table_rtx
;
7416 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7418 else if (!TARGET_ANY_GNU_TLS
)
7420 pic
= gen_reg_rtx (Pmode
);
7421 emit_insn (gen_set_got (pic
));
7422 type
= UNSPEC_GOTTPOFF
;
7427 type
= UNSPEC_INDNTPOFF
;
7430 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7431 off
= gen_rtx_CONST (Pmode
, off
);
7433 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7434 off
= gen_const_mem (Pmode
, off
);
7435 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7437 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7439 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7440 off
= force_reg (Pmode
, off
);
7441 return gen_rtx_PLUS (Pmode
, base
, off
);
7445 base
= get_thread_pointer (true);
7446 dest
= gen_reg_rtx (Pmode
);
7447 emit_insn (gen_subsi3 (dest
, base
, off
));
7451 case TLS_MODEL_LOCAL_EXEC
:
7452 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7453 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7454 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7455 off
= gen_rtx_CONST (Pmode
, off
);
7457 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7459 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7460 return gen_rtx_PLUS (Pmode
, base
, off
);
7464 base
= get_thread_pointer (true);
7465 dest
= gen_reg_rtx (Pmode
);
7466 emit_insn (gen_subsi3 (dest
, base
, off
));
7477 /* Try machine-dependent ways of modifying an illegitimate address
7478 to be legitimate. If we find one, return the new, valid address.
7479 This macro is used in only one place: `memory_address' in explow.c.
7481 OLDX is the address as it was before break_out_memory_refs was called.
7482 In some cases it is useful to look at this to decide what needs to be done.
7484 MODE and WIN are passed so that this macro can use
7485 GO_IF_LEGITIMATE_ADDRESS.
7487 It is always safe for this macro to do nothing. It exists to recognize
7488 opportunities to optimize the output.
7490 For the 80386, we handle X+REG by loading X into a register R and
7491 using R+REG. R will go in a general reg and indexing will be used.
7492 However, if REG is a broken-out memory address or multiplication,
7493 nothing needs to be done because REG can certainly go in a general reg.
7495 When -fpic is used, special handling is needed for symbolic references.
7496 See comments by legitimize_pic_address in i386.c for details. */
7499 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7504 if (TARGET_DEBUG_ADDR
)
7506 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7507 GET_MODE_NAME (mode
));
7511 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7513 return legitimize_tls_address (x
, log
, false);
7514 if (GET_CODE (x
) == CONST
7515 && GET_CODE (XEXP (x
, 0)) == PLUS
7516 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7517 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7519 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7520 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7523 if (flag_pic
&& SYMBOLIC_CONST (x
))
7524 return legitimize_pic_address (x
, 0);
7526 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7527 if (GET_CODE (x
) == ASHIFT
7528 && CONST_INT_P (XEXP (x
, 1))
7529 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7532 log
= INTVAL (XEXP (x
, 1));
7533 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7534 GEN_INT (1 << log
));
7537 if (GET_CODE (x
) == PLUS
)
7539 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7541 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7542 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7543 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7546 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7547 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7548 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7549 GEN_INT (1 << log
));
7552 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7553 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7554 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7557 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7558 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7559 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7560 GEN_INT (1 << log
));
7563 /* Put multiply first if it isn't already. */
7564 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7566 rtx tmp
= XEXP (x
, 0);
7567 XEXP (x
, 0) = XEXP (x
, 1);
7572 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7573 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7574 created by virtual register instantiation, register elimination, and
7575 similar optimizations. */
7576 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7579 x
= gen_rtx_PLUS (Pmode
,
7580 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7581 XEXP (XEXP (x
, 1), 0)),
7582 XEXP (XEXP (x
, 1), 1));
7586 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7587 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7588 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7589 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7590 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7591 && CONSTANT_P (XEXP (x
, 1)))
7594 rtx other
= NULL_RTX
;
7596 if (CONST_INT_P (XEXP (x
, 1)))
7598 constant
= XEXP (x
, 1);
7599 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7601 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7603 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7604 other
= XEXP (x
, 1);
7612 x
= gen_rtx_PLUS (Pmode
,
7613 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7614 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7615 plus_constant (other
, INTVAL (constant
)));
7619 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7622 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7625 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7628 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7631 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7635 && REG_P (XEXP (x
, 1))
7636 && REG_P (XEXP (x
, 0)))
7639 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7642 x
= legitimize_pic_address (x
, 0);
7645 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7648 if (REG_P (XEXP (x
, 0)))
7650 rtx temp
= gen_reg_rtx (Pmode
);
7651 rtx val
= force_operand (XEXP (x
, 1), temp
);
7653 emit_move_insn (temp
, val
);
7659 else if (REG_P (XEXP (x
, 1)))
7661 rtx temp
= gen_reg_rtx (Pmode
);
7662 rtx val
= force_operand (XEXP (x
, 0), temp
);
7664 emit_move_insn (temp
, val
);
7674 /* Print an integer constant expression in assembler syntax. Addition
7675 and subtraction are the only arithmetic that may appear in these
7676 expressions. FILE is the stdio stream to write to, X is the rtx, and
7677 CODE is the operand print code from the output string. */
7680 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7684 switch (GET_CODE (x
))
7687 gcc_assert (flag_pic
);
7692 if (! TARGET_MACHO
|| TARGET_64BIT
)
7693 output_addr_const (file
, x
);
7696 const char *name
= XSTR (x
, 0);
7698 /* Mark the decl as referenced so that cgraph will output the function. */
7699 if (SYMBOL_REF_DECL (x
))
7700 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7703 if (MACHOPIC_INDIRECT
7704 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7705 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7707 assemble_name (file
, name
);
7709 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7710 fputs ("@PLT", file
);
7717 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7718 assemble_name (asm_out_file
, buf
);
7722 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7726 /* This used to output parentheses around the expression,
7727 but that does not work on the 386 (either ATT or BSD assembler). */
7728 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7732 if (GET_MODE (x
) == VOIDmode
)
7734 /* We can use %d if the number is <32 bits and positive. */
7735 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7736 fprintf (file
, "0x%lx%08lx",
7737 (unsigned long) CONST_DOUBLE_HIGH (x
),
7738 (unsigned long) CONST_DOUBLE_LOW (x
));
7740 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7743 /* We can't handle floating point constants;
7744 PRINT_OPERAND must handle them. */
7745 output_operand_lossage ("floating constant misused");
7749 /* Some assemblers need integer constants to appear first. */
7750 if (CONST_INT_P (XEXP (x
, 0)))
7752 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7754 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7758 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7759 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7761 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7767 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7768 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7770 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7772 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7776 gcc_assert (XVECLEN (x
, 0) == 1);
7777 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7778 switch (XINT (x
, 1))
7781 fputs ("@GOT", file
);
7784 fputs ("@GOTOFF", file
);
7787 fputs ("@PLTOFF", file
);
7789 case UNSPEC_GOTPCREL
:
7790 fputs ("@GOTPCREL(%rip)", file
);
7792 case UNSPEC_GOTTPOFF
:
7793 /* FIXME: This might be @TPOFF in Sun ld too. */
7794 fputs ("@GOTTPOFF", file
);
7797 fputs ("@TPOFF", file
);
7801 fputs ("@TPOFF", file
);
7803 fputs ("@NTPOFF", file
);
7806 fputs ("@DTPOFF", file
);
7808 case UNSPEC_GOTNTPOFF
:
7810 fputs ("@GOTTPOFF(%rip)", file
);
7812 fputs ("@GOTNTPOFF", file
);
7814 case UNSPEC_INDNTPOFF
:
7815 fputs ("@INDNTPOFF", file
);
7818 output_operand_lossage ("invalid UNSPEC as operand");
7824 output_operand_lossage ("invalid expression as operand");
7828 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7829 We need to emit DTP-relative relocations. */
7832 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7834 fputs (ASM_LONG
, file
);
7835 output_addr_const (file
, x
);
7836 fputs ("@DTPOFF", file
);
7842 fputs (", 0", file
);
7849 /* In the name of slightly smaller debug output, and to cater to
7850 general assembler lossage, recognize PIC+GOTOFF and turn it back
7851 into a direct symbol reference.
7853 On Darwin, this is necessary to avoid a crash, because Darwin
7854 has a different PIC label for each routine but the DWARF debugging
7855 information is not associated with any particular routine, so it's
7856 necessary to remove references to the PIC label from RTL stored by
7857 the DWARF output code. */
7860 ix86_delegitimize_address (rtx orig_x
)
7863 /* reg_addend is NULL or a multiple of some register. */
7864 rtx reg_addend
= NULL_RTX
;
7865 /* const_addend is NULL or a const_int. */
7866 rtx const_addend
= NULL_RTX
;
7867 /* This is the result, or NULL. */
7868 rtx result
= NULL_RTX
;
7875 if (GET_CODE (x
) != CONST
7876 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7877 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7880 return XVECEXP (XEXP (x
, 0), 0, 0);
7883 if (GET_CODE (x
) != PLUS
7884 || GET_CODE (XEXP (x
, 1)) != CONST
)
7887 if (REG_P (XEXP (x
, 0))
7888 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7889 /* %ebx + GOT/GOTOFF */
7891 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7893 /* %ebx + %reg * scale + GOT/GOTOFF */
7894 reg_addend
= XEXP (x
, 0);
7895 if (REG_P (XEXP (reg_addend
, 0))
7896 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7897 reg_addend
= XEXP (reg_addend
, 1);
7898 else if (REG_P (XEXP (reg_addend
, 1))
7899 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7900 reg_addend
= XEXP (reg_addend
, 0);
7903 if (!REG_P (reg_addend
)
7904 && GET_CODE (reg_addend
) != MULT
7905 && GET_CODE (reg_addend
) != ASHIFT
)
7911 x
= XEXP (XEXP (x
, 1), 0);
7912 if (GET_CODE (x
) == PLUS
7913 && CONST_INT_P (XEXP (x
, 1)))
7915 const_addend
= XEXP (x
, 1);
7919 if (GET_CODE (x
) == UNSPEC
7920 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7921 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7922 result
= XVECEXP (x
, 0, 0);
7924 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7926 result
= XEXP (x
, 0);
7932 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7934 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7939 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7944 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7946 enum rtx_code second_code
, bypass_code
;
7947 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7948 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7949 code
= ix86_fp_compare_code_to_integer (code
);
7953 code
= reverse_condition (code
);
7964 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7968 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7969 Those same assemblers have the same but opposite lossage on cmov. */
7970 gcc_assert (mode
== CCmode
);
7971 suffix
= fp
? "nbe" : "a";
7991 gcc_assert (mode
== CCmode
);
8013 gcc_assert (mode
== CCmode
);
8014 suffix
= fp
? "nb" : "ae";
8017 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8021 gcc_assert (mode
== CCmode
);
8025 suffix
= fp
? "u" : "p";
8028 suffix
= fp
? "nu" : "np";
8033 fputs (suffix
, file
);
8036 /* Print the name of register X to FILE based on its machine mode and number.
8037 If CODE is 'w', pretend the mode is HImode.
8038 If CODE is 'b', pretend the mode is QImode.
8039 If CODE is 'k', pretend the mode is SImode.
8040 If CODE is 'q', pretend the mode is DImode.
8041 If CODE is 'h', pretend the reg is the 'high' byte register.
8042 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8045 print_reg (rtx x
, int code
, FILE *file
)
8047 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8048 && REGNO (x
) != FRAME_POINTER_REGNUM
8049 && REGNO (x
) != FLAGS_REG
8050 && REGNO (x
) != FPSR_REG
8051 && REGNO (x
) != FPCR_REG
);
8053 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8056 if (code
== 'w' || MMX_REG_P (x
))
8058 else if (code
== 'b')
8060 else if (code
== 'k')
8062 else if (code
== 'q')
8064 else if (code
== 'y')
8066 else if (code
== 'h')
8069 code
= GET_MODE_SIZE (GET_MODE (x
));
8071 /* Irritatingly, AMD extended registers use different naming convention
8072 from the normal registers. */
8073 if (REX_INT_REG_P (x
))
8075 gcc_assert (TARGET_64BIT
);
8079 error ("extended registers have no high halves");
8082 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8085 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8088 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8091 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8094 error ("unsupported operand size for extended register");
8102 if (STACK_TOP_P (x
))
8104 fputs ("st(0)", file
);
8111 if (! ANY_FP_REG_P (x
))
8112 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8117 fputs (hi_reg_name
[REGNO (x
)], file
);
8120 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8122 fputs (qi_reg_name
[REGNO (x
)], file
);
8125 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8127 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8134 /* Locate some local-dynamic symbol still in use by this function
8135 so that we can print its name in some tls_local_dynamic_base
8139 get_some_local_dynamic_name (void)
8143 if (cfun
->machine
->some_ld_name
)
8144 return cfun
->machine
->some_ld_name
;
8146 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8148 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8149 return cfun
->machine
->some_ld_name
;
8155 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8159 if (GET_CODE (x
) == SYMBOL_REF
8160 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8162 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8170 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8171 C -- print opcode suffix for set/cmov insn.
8172 c -- like C, but print reversed condition
8173 F,f -- likewise, but for floating-point.
8174 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8176 R -- print the prefix for register names.
8177 z -- print the opcode suffix for the size of the current operand.
8178 * -- print a star (in certain assembler syntax)
8179 A -- print an absolute memory reference.
8180 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8181 s -- print a shift double count, followed by the assemblers argument
8183 b -- print the QImode name of the register for the indicated operand.
8184 %b0 would print %al if operands[0] is reg 0.
8185 w -- likewise, print the HImode name of the register.
8186 k -- likewise, print the SImode name of the register.
8187 q -- likewise, print the DImode name of the register.
8188 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8189 y -- print "st(0)" instead of "st" as a register.
8190 D -- print condition for SSE cmp instruction.
8191 P -- if PIC, print an @PLT suffix.
8192 X -- don't print any sort of PIC '@' suffix for a symbol.
8193 & -- print some in-use local-dynamic symbol name.
8194 H -- print a memory address offset by 8; used for sse high-parts
8198 print_operand (FILE *file
, rtx x
, int code
)
8205 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8210 assemble_name (file
, get_some_local_dynamic_name ());
8214 switch (ASSEMBLER_DIALECT
)
8221 /* Intel syntax. For absolute addresses, registers should not
8222 be surrounded by braces. */
8226 PRINT_OPERAND (file
, x
, 0);
8236 PRINT_OPERAND (file
, x
, 0);
8241 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8246 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8251 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8256 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8261 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8266 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8271 /* 387 opcodes don't get size suffixes if the operands are
8273 if (STACK_REG_P (x
))
8276 /* Likewise if using Intel opcodes. */
8277 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8280 /* This is the size of op from size of operand. */
8281 switch (GET_MODE_SIZE (GET_MODE (x
)))
8288 #ifdef HAVE_GAS_FILDS_FISTS
8294 if (GET_MODE (x
) == SFmode
)
8309 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8311 #ifdef GAS_MNEMONICS
8337 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8339 PRINT_OPERAND (file
, x
, 0);
8345 /* Little bit of braindamage here. The SSE compare instructions
8346 does use completely different names for the comparisons that the
8347 fp conditional moves. */
8348 switch (GET_CODE (x
))
8363 fputs ("unord", file
);
8367 fputs ("neq", file
);
8371 fputs ("nlt", file
);
8375 fputs ("nle", file
);
8378 fputs ("ord", file
);
8385 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8386 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8388 switch (GET_MODE (x
))
8390 case HImode
: putc ('w', file
); break;
8392 case SFmode
: putc ('l', file
); break;
8394 case DFmode
: putc ('q', file
); break;
8395 default: gcc_unreachable ();
8402 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8405 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8406 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8409 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8412 /* Like above, but reverse condition */
8414 /* Check to see if argument to %c is really a constant
8415 and not a condition code which needs to be reversed. */
8416 if (!COMPARISON_P (x
))
8418 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8421 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8424 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8425 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8428 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8432 /* It doesn't actually matter what mode we use here, as we're
8433 only going to use this for printing. */
8434 x
= adjust_address_nv (x
, DImode
, 8);
8441 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8444 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8447 int pred_val
= INTVAL (XEXP (x
, 0));
8449 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8450 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8452 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8453 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8455 /* Emit hints only in the case default branch prediction
8456 heuristics would fail. */
8457 if (taken
!= cputaken
)
8459 /* We use 3e (DS) prefix for taken branches and
8460 2e (CS) prefix for not taken branches. */
8462 fputs ("ds ; ", file
);
8464 fputs ("cs ; ", file
);
8471 output_operand_lossage ("invalid operand code '%c'", code
);
8476 print_reg (x
, code
, file
);
8480 /* No `byte ptr' prefix for call instructions. */
8481 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8484 switch (GET_MODE_SIZE (GET_MODE (x
)))
8486 case 1: size
= "BYTE"; break;
8487 case 2: size
= "WORD"; break;
8488 case 4: size
= "DWORD"; break;
8489 case 8: size
= "QWORD"; break;
8490 case 12: size
= "XWORD"; break;
8491 case 16: size
= "XMMWORD"; break;
8496 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8499 else if (code
== 'w')
8501 else if (code
== 'k')
8505 fputs (" PTR ", file
);
8509 /* Avoid (%rip) for call operands. */
8510 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8511 && !CONST_INT_P (x
))
8512 output_addr_const (file
, x
);
8513 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8514 output_operand_lossage ("invalid constraints for operand");
8519 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8524 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8525 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8527 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8529 fprintf (file
, "0x%08lx", l
);
8532 /* These float cases don't actually occur as immediate operands. */
8533 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8537 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8538 fprintf (file
, "%s", dstr
);
8541 else if (GET_CODE (x
) == CONST_DOUBLE
8542 && GET_MODE (x
) == XFmode
)
8546 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8547 fprintf (file
, "%s", dstr
);
8552 /* We have patterns that allow zero sets of memory, for instance.
8553 In 64-bit mode, we should probably support all 8-byte vectors,
8554 since we can in fact encode that into an immediate. */
8555 if (GET_CODE (x
) == CONST_VECTOR
)
8557 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8563 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8565 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8568 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8569 || GET_CODE (x
) == LABEL_REF
)
8571 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8574 fputs ("OFFSET FLAT:", file
);
8577 if (CONST_INT_P (x
))
8578 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8580 output_pic_addr_const (file
, x
, code
);
8582 output_addr_const (file
, x
);
8586 /* Print a memory operand whose address is ADDR. */
8589 print_operand_address (FILE *file
, rtx addr
)
8591 struct ix86_address parts
;
8592 rtx base
, index
, disp
;
8594 int ok
= ix86_decompose_address (addr
, &parts
);
8599 index
= parts
.index
;
8601 scale
= parts
.scale
;
8609 if (USER_LABEL_PREFIX
[0] == 0)
8611 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8617 if (!base
&& !index
)
8619 /* Displacement only requires special attention. */
8621 if (CONST_INT_P (disp
))
8623 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8625 if (USER_LABEL_PREFIX
[0] == 0)
8627 fputs ("ds:", file
);
8629 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8632 output_pic_addr_const (file
, disp
, 0);
8634 output_addr_const (file
, disp
);
8636 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8639 if (GET_CODE (disp
) == CONST
8640 && GET_CODE (XEXP (disp
, 0)) == PLUS
8641 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8642 disp
= XEXP (XEXP (disp
, 0), 0);
8643 if (GET_CODE (disp
) == LABEL_REF
8644 || (GET_CODE (disp
) == SYMBOL_REF
8645 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8646 fputs ("(%rip)", file
);
8651 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8656 output_pic_addr_const (file
, disp
, 0);
8657 else if (GET_CODE (disp
) == LABEL_REF
)
8658 output_asm_label (disp
);
8660 output_addr_const (file
, disp
);
8665 print_reg (base
, 0, file
);
8669 print_reg (index
, 0, file
);
8671 fprintf (file
, ",%d", scale
);
8677 rtx offset
= NULL_RTX
;
8681 /* Pull out the offset of a symbol; print any symbol itself. */
8682 if (GET_CODE (disp
) == CONST
8683 && GET_CODE (XEXP (disp
, 0)) == PLUS
8684 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8686 offset
= XEXP (XEXP (disp
, 0), 1);
8687 disp
= gen_rtx_CONST (VOIDmode
,
8688 XEXP (XEXP (disp
, 0), 0));
8692 output_pic_addr_const (file
, disp
, 0);
8693 else if (GET_CODE (disp
) == LABEL_REF
)
8694 output_asm_label (disp
);
8695 else if (CONST_INT_P (disp
))
8698 output_addr_const (file
, disp
);
8704 print_reg (base
, 0, file
);
8707 if (INTVAL (offset
) >= 0)
8709 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8713 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8720 print_reg (index
, 0, file
);
8722 fprintf (file
, "*%d", scale
);
8730 output_addr_const_extra (FILE *file
, rtx x
)
8734 if (GET_CODE (x
) != UNSPEC
)
8737 op
= XVECEXP (x
, 0, 0);
8738 switch (XINT (x
, 1))
8740 case UNSPEC_GOTTPOFF
:
8741 output_addr_const (file
, op
);
8742 /* FIXME: This might be @TPOFF in Sun ld. */
8743 fputs ("@GOTTPOFF", file
);
8746 output_addr_const (file
, op
);
8747 fputs ("@TPOFF", file
);
8750 output_addr_const (file
, op
);
8752 fputs ("@TPOFF", file
);
8754 fputs ("@NTPOFF", file
);
8757 output_addr_const (file
, op
);
8758 fputs ("@DTPOFF", file
);
8760 case UNSPEC_GOTNTPOFF
:
8761 output_addr_const (file
, op
);
8763 fputs ("@GOTTPOFF(%rip)", file
);
8765 fputs ("@GOTNTPOFF", file
);
8767 case UNSPEC_INDNTPOFF
:
8768 output_addr_const (file
, op
);
8769 fputs ("@INDNTPOFF", file
);
8779 /* Split one or more DImode RTL references into pairs of SImode
8780 references. The RTL can be REG, offsettable MEM, integer constant, or
8781 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8782 split and "num" is its length. lo_half and hi_half are output arrays
8783 that parallel "operands". */
8786 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8790 rtx op
= operands
[num
];
8792 /* simplify_subreg refuse to split volatile memory addresses,
8793 but we still have to handle it. */
8796 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8797 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8801 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8802 GET_MODE (op
) == VOIDmode
8803 ? DImode
: GET_MODE (op
), 0);
8804 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8805 GET_MODE (op
) == VOIDmode
8806 ? DImode
: GET_MODE (op
), 4);
8810 /* Split one or more TImode RTL references into pairs of DImode
8811 references. The RTL can be REG, offsettable MEM, integer constant, or
8812 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8813 split and "num" is its length. lo_half and hi_half are output arrays
8814 that parallel "operands". */
8817 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8821 rtx op
= operands
[num
];
8823 /* simplify_subreg refuse to split volatile memory addresses, but we
8824 still have to handle it. */
8827 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8828 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8832 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8833 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8838 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8839 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8840 is the expression of the binary operation. The output may either be
8841 emitted here, or returned to the caller, like all output_* functions.
8843 There is no guarantee that the operands are the same mode, as they
8844 might be within FLOAT or FLOAT_EXTEND expressions. */
8846 #ifndef SYSV386_COMPAT
8847 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8848 wants to fix the assemblers because that causes incompatibility
8849 with gcc. No-one wants to fix gcc because that causes
8850 incompatibility with assemblers... You can use the option of
8851 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8852 #define SYSV386_COMPAT 1
8856 output_387_binary_op (rtx insn
, rtx
*operands
)
8858 static char buf
[30];
8861 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8863 #ifdef ENABLE_CHECKING
8864 /* Even if we do not want to check the inputs, this documents input
8865 constraints. Which helps in understanding the following code. */
8866 if (STACK_REG_P (operands
[0])
8867 && ((REG_P (operands
[1])
8868 && REGNO (operands
[0]) == REGNO (operands
[1])
8869 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8870 || (REG_P (operands
[2])
8871 && REGNO (operands
[0]) == REGNO (operands
[2])
8872 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8873 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8876 gcc_assert (is_sse
);
8879 switch (GET_CODE (operands
[3]))
8882 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8883 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8891 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8892 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8900 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8901 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8909 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8910 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8924 if (GET_MODE (operands
[0]) == SFmode
)
8925 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8927 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8932 switch (GET_CODE (operands
[3]))
8936 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8938 rtx temp
= operands
[2];
8939 operands
[2] = operands
[1];
8943 /* know operands[0] == operands[1]. */
8945 if (MEM_P (operands
[2]))
8951 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8953 if (STACK_TOP_P (operands
[0]))
8954 /* How is it that we are storing to a dead operand[2]?
8955 Well, presumably operands[1] is dead too. We can't
8956 store the result to st(0) as st(0) gets popped on this
8957 instruction. Instead store to operands[2] (which I
8958 think has to be st(1)). st(1) will be popped later.
8959 gcc <= 2.8.1 didn't have this check and generated
8960 assembly code that the Unixware assembler rejected. */
8961 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8963 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8967 if (STACK_TOP_P (operands
[0]))
8968 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8970 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8975 if (MEM_P (operands
[1]))
8981 if (MEM_P (operands
[2]))
8987 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8990 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8991 derived assemblers, confusingly reverse the direction of
8992 the operation for fsub{r} and fdiv{r} when the
8993 destination register is not st(0). The Intel assembler
8994 doesn't have this brain damage. Read !SYSV386_COMPAT to
8995 figure out what the hardware really does. */
8996 if (STACK_TOP_P (operands
[0]))
8997 p
= "{p\t%0, %2|rp\t%2, %0}";
8999 p
= "{rp\t%2, %0|p\t%0, %2}";
9001 if (STACK_TOP_P (operands
[0]))
9002 /* As above for fmul/fadd, we can't store to st(0). */
9003 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9005 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9010 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9013 if (STACK_TOP_P (operands
[0]))
9014 p
= "{rp\t%0, %1|p\t%1, %0}";
9016 p
= "{p\t%1, %0|rp\t%0, %1}";
9018 if (STACK_TOP_P (operands
[0]))
9019 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9021 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9026 if (STACK_TOP_P (operands
[0]))
9028 if (STACK_TOP_P (operands
[1]))
9029 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9031 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9034 else if (STACK_TOP_P (operands
[1]))
9037 p
= "{\t%1, %0|r\t%0, %1}";
9039 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9045 p
= "{r\t%2, %0|\t%0, %2}";
9047 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9060 /* Return needed mode for entity in optimize_mode_switching pass. */
9063 ix86_mode_needed (int entity
, rtx insn
)
9065 enum attr_i387_cw mode
;
9067 /* The mode UNINITIALIZED is used to store control word after a
9068 function call or ASM pattern. The mode ANY specify that function
9069 has no requirements on the control word and make no changes in the
9070 bits we are interested in. */
9073 || (NONJUMP_INSN_P (insn
)
9074 && (asm_noperands (PATTERN (insn
)) >= 0
9075 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9076 return I387_CW_UNINITIALIZED
;
9078 if (recog_memoized (insn
) < 0)
9081 mode
= get_attr_i387_cw (insn
);
9086 if (mode
== I387_CW_TRUNC
)
9091 if (mode
== I387_CW_FLOOR
)
9096 if (mode
== I387_CW_CEIL
)
9101 if (mode
== I387_CW_MASK_PM
)
9112 /* Output code to initialize control word copies used by trunc?f?i and
9113 rounding patterns. CURRENT_MODE is set to current control word,
9114 while NEW_MODE is set to new control word. */
9117 emit_i387_cw_initialization (int mode
)
9119 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9124 rtx reg
= gen_reg_rtx (HImode
);
9126 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9127 emit_move_insn (reg
, copy_rtx (stored_mode
));
9129 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9134 /* round toward zero (truncate) */
9135 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9136 slot
= SLOT_CW_TRUNC
;
9140 /* round down toward -oo */
9141 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9142 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9143 slot
= SLOT_CW_FLOOR
;
9147 /* round up toward +oo */
9148 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9149 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9150 slot
= SLOT_CW_CEIL
;
9153 case I387_CW_MASK_PM
:
9154 /* mask precision exception for nearbyint() */
9155 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9156 slot
= SLOT_CW_MASK_PM
;
9168 /* round toward zero (truncate) */
9169 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9170 slot
= SLOT_CW_TRUNC
;
9174 /* round down toward -oo */
9175 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9176 slot
= SLOT_CW_FLOOR
;
9180 /* round up toward +oo */
9181 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9182 slot
= SLOT_CW_CEIL
;
9185 case I387_CW_MASK_PM
:
9186 /* mask precision exception for nearbyint() */
9187 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9188 slot
= SLOT_CW_MASK_PM
;
9196 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9198 new_mode
= assign_386_stack_local (HImode
, slot
);
9199 emit_move_insn (new_mode
, reg
);
9202 /* Output code for INSN to convert a float to a signed int. OPERANDS
9203 are the insn operands. The output may be [HSD]Imode and the input
9204 operand may be [SDX]Fmode. */
9207 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9209 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9210 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9211 int round_mode
= get_attr_i387_cw (insn
);
9213 /* Jump through a hoop or two for DImode, since the hardware has no
9214 non-popping instruction. We used to do this a different way, but
9215 that was somewhat fragile and broke with post-reload splitters. */
9216 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9217 output_asm_insn ("fld\t%y1", operands
);
9219 gcc_assert (STACK_TOP_P (operands
[1]));
9220 gcc_assert (MEM_P (operands
[0]));
9223 output_asm_insn ("fisttp%z0\t%0", operands
);
9226 if (round_mode
!= I387_CW_ANY
)
9227 output_asm_insn ("fldcw\t%3", operands
);
9228 if (stack_top_dies
|| dimode_p
)
9229 output_asm_insn ("fistp%z0\t%0", operands
);
9231 output_asm_insn ("fist%z0\t%0", operands
);
9232 if (round_mode
!= I387_CW_ANY
)
9233 output_asm_insn ("fldcw\t%2", operands
);
9239 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9240 have the values zero or one, indicates the ffreep insn's operand
9241 from the OPERANDS array. */
9244 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9246 if (TARGET_USE_FFREEP
)
9247 #if HAVE_AS_IX86_FFREEP
9248 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9251 static char retval
[] = ".word\t0xc_df";
9252 int regno
= REGNO (operands
[opno
]);
9254 gcc_assert (FP_REGNO_P (regno
));
9256 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9261 return opno
? "fstp\t%y1" : "fstp\t%y0";
9265 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9266 should be used. UNORDERED_P is true when fucom should be used. */
9269 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9272 rtx cmp_op0
, cmp_op1
;
9273 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9277 cmp_op0
= operands
[0];
9278 cmp_op1
= operands
[1];
9282 cmp_op0
= operands
[1];
9283 cmp_op1
= operands
[2];
9288 if (GET_MODE (operands
[0]) == SFmode
)
9290 return "ucomiss\t{%1, %0|%0, %1}";
9292 return "comiss\t{%1, %0|%0, %1}";
9295 return "ucomisd\t{%1, %0|%0, %1}";
9297 return "comisd\t{%1, %0|%0, %1}";
9300 gcc_assert (STACK_TOP_P (cmp_op0
));
9302 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9304 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9308 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9309 return output_387_ffreep (operands
, 1);
9312 return "ftst\n\tfnstsw\t%0";
9315 if (STACK_REG_P (cmp_op1
)
9317 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9318 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9320 /* If both the top of the 387 stack dies, and the other operand
9321 is also a stack register that dies, then this must be a
9322 `fcompp' float compare */
9326 /* There is no double popping fcomi variant. Fortunately,
9327 eflags is immune from the fstp's cc clobbering. */
9329 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9331 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9332 return output_387_ffreep (operands
, 0);
9337 return "fucompp\n\tfnstsw\t%0";
9339 return "fcompp\n\tfnstsw\t%0";
9344 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9346 static const char * const alt
[16] =
9348 "fcom%z2\t%y2\n\tfnstsw\t%0",
9349 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9350 "fucom%z2\t%y2\n\tfnstsw\t%0",
9351 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9353 "ficom%z2\t%y2\n\tfnstsw\t%0",
9354 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9358 "fcomi\t{%y1, %0|%0, %y1}",
9359 "fcomip\t{%y1, %0|%0, %y1}",
9360 "fucomi\t{%y1, %0|%0, %y1}",
9361 "fucomip\t{%y1, %0|%0, %y1}",
9372 mask
= eflags_p
<< 3;
9373 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9374 mask
|= unordered_p
<< 1;
9375 mask
|= stack_top_dies
;
9377 gcc_assert (mask
< 16);
9386 ix86_output_addr_vec_elt (FILE *file
, int value
)
9388 const char *directive
= ASM_LONG
;
9392 directive
= ASM_QUAD
;
9394 gcc_assert (!TARGET_64BIT
);
9397 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9401 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9403 const char *directive
= ASM_LONG
;
9406 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9407 directive
= ASM_QUAD
;
9409 gcc_assert (!TARGET_64BIT
);
9411 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9412 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9413 fprintf (file
, "%s%s%d-%s%d\n",
9414 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9415 else if (HAVE_AS_GOTOFF_IN_DATA
)
9416 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9418 else if (TARGET_MACHO
)
9420 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9421 machopic_output_function_base_name (file
);
9422 fprintf(file
, "\n");
9426 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9427 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9430 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9434 ix86_expand_clear (rtx dest
)
9438 /* We play register width games, which are only valid after reload. */
9439 gcc_assert (reload_completed
);
9441 /* Avoid HImode and its attendant prefix byte. */
9442 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9443 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9445 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9447 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9448 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9450 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9451 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9457 /* X is an unchanging MEM. If it is a constant pool reference, return
9458 the constant pool rtx, else NULL. */
9461 maybe_get_pool_constant (rtx x
)
9463 x
= ix86_delegitimize_address (XEXP (x
, 0));
9465 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9466 return get_pool_constant (x
);
9472 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9474 int strict
= (reload_in_progress
|| reload_completed
);
9476 enum tls_model model
;
9481 if (GET_CODE (op1
) == SYMBOL_REF
)
9483 model
= SYMBOL_REF_TLS_MODEL (op1
);
9486 op1
= legitimize_tls_address (op1
, model
, true);
9487 op1
= force_operand (op1
, op0
);
9492 else if (GET_CODE (op1
) == CONST
9493 && GET_CODE (XEXP (op1
, 0)) == PLUS
9494 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9496 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9499 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9500 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9501 op1
= force_operand (op1
, NULL
);
9502 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9503 op0
, 1, OPTAB_DIRECT
);
9509 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9511 if (TARGET_MACHO
&& !TARGET_64BIT
)
9516 rtx temp
= ((reload_in_progress
9517 || ((op0
&& REG_P (op0
))
9519 ? op0
: gen_reg_rtx (Pmode
));
9520 op1
= machopic_indirect_data_reference (op1
, temp
);
9521 op1
= machopic_legitimize_pic_address (op1
, mode
,
9522 temp
== op1
? 0 : temp
);
9524 else if (MACHOPIC_INDIRECT
)
9525 op1
= machopic_indirect_data_reference (op1
, 0);
9533 op1
= force_reg (Pmode
, op1
);
9534 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9536 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9537 op1
= legitimize_pic_address (op1
, reg
);
9546 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9547 || !push_operand (op0
, mode
))
9549 op1
= force_reg (mode
, op1
);
9551 if (push_operand (op0
, mode
)
9552 && ! general_no_elim_operand (op1
, mode
))
9553 op1
= copy_to_mode_reg (mode
, op1
);
9555 /* Force large constants in 64bit compilation into register
9556 to get them CSEed. */
9557 if (TARGET_64BIT
&& mode
== DImode
9558 && immediate_operand (op1
, mode
)
9559 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9560 && !register_operand (op0
, mode
)
9561 && optimize
&& !reload_completed
&& !reload_in_progress
)
9562 op1
= copy_to_mode_reg (mode
, op1
);
9564 if (FLOAT_MODE_P (mode
))
9566 /* If we are loading a floating point constant to a register,
9567 force the value to memory now, since we'll get better code
9568 out the back end. */
9572 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9574 op1
= validize_mem (force_const_mem (mode
, op1
));
9575 if (!register_operand (op0
, mode
))
9577 rtx temp
= gen_reg_rtx (mode
);
9578 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9579 emit_move_insn (op0
, temp
);
9586 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9590 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9592 rtx op0
= operands
[0], op1
= operands
[1];
9594 /* Force constants other than zero into memory. We do not know how
9595 the instructions used to build constants modify the upper 64 bits
9596 of the register, once we have that information we may be able
9597 to handle some of them more efficiently. */
9598 if ((reload_in_progress
| reload_completed
) == 0
9599 && register_operand (op0
, mode
)
9601 && standard_sse_constant_p (op1
) <= 0)
9602 op1
= validize_mem (force_const_mem (mode
, op1
));
9604 /* Make operand1 a register if it isn't already. */
9606 && !register_operand (op0
, mode
)
9607 && !register_operand (op1
, mode
))
9609 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9613 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9616 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9617 straight to ix86_expand_vector_move. */
9618 /* Code generation for scalar reg-reg moves of single and double precision data:
9619 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9623 if (x86_sse_partial_reg_dependency == true)
9628 Code generation for scalar loads of double precision data:
9629 if (x86_sse_split_regs == true)
9630 movlpd mem, reg (gas syntax)
9634 Code generation for unaligned packed loads of single precision data
9635 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9636 if (x86_sse_unaligned_move_optimal)
9639 if (x86_sse_partial_reg_dependency == true)
9651 Code generation for unaligned packed loads of double precision data
9652 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9653 if (x86_sse_unaligned_move_optimal)
9656 if (x86_sse_split_regs == true)
9669 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9678 /* If we're optimizing for size, movups is the smallest. */
9681 op0
= gen_lowpart (V4SFmode
, op0
);
9682 op1
= gen_lowpart (V4SFmode
, op1
);
9683 emit_insn (gen_sse_movups (op0
, op1
));
9687 /* ??? If we have typed data, then it would appear that using
9688 movdqu is the only way to get unaligned data loaded with
9690 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9692 op0
= gen_lowpart (V16QImode
, op0
);
9693 op1
= gen_lowpart (V16QImode
, op1
);
9694 emit_insn (gen_sse2_movdqu (op0
, op1
));
9698 if (TARGET_SSE2
&& mode
== V2DFmode
)
9702 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9704 op0
= gen_lowpart (V2DFmode
, op0
);
9705 op1
= gen_lowpart (V2DFmode
, op1
);
9706 emit_insn (gen_sse2_movupd (op0
, op1
));
9710 /* When SSE registers are split into halves, we can avoid
9711 writing to the top half twice. */
9712 if (TARGET_SSE_SPLIT_REGS
)
9714 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9719 /* ??? Not sure about the best option for the Intel chips.
9720 The following would seem to satisfy; the register is
9721 entirely cleared, breaking the dependency chain. We
9722 then store to the upper half, with a dependency depth
9723 of one. A rumor has it that Intel recommends two movsd
9724 followed by an unpacklpd, but this is unconfirmed. And
9725 given that the dependency depth of the unpacklpd would
9726 still be one, I'm not sure why this would be better. */
9727 zero
= CONST0_RTX (V2DFmode
);
9730 m
= adjust_address (op1
, DFmode
, 0);
9731 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9732 m
= adjust_address (op1
, DFmode
, 8);
9733 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9737 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9739 op0
= gen_lowpart (V4SFmode
, op0
);
9740 op1
= gen_lowpart (V4SFmode
, op1
);
9741 emit_insn (gen_sse_movups (op0
, op1
));
9745 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9746 emit_move_insn (op0
, CONST0_RTX (mode
));
9748 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9750 if (mode
!= V4SFmode
)
9751 op0
= gen_lowpart (V4SFmode
, op0
);
9752 m
= adjust_address (op1
, V2SFmode
, 0);
9753 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9754 m
= adjust_address (op1
, V2SFmode
, 8);
9755 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9758 else if (MEM_P (op0
))
9760 /* If we're optimizing for size, movups is the smallest. */
9763 op0
= gen_lowpart (V4SFmode
, op0
);
9764 op1
= gen_lowpart (V4SFmode
, op1
);
9765 emit_insn (gen_sse_movups (op0
, op1
));
9769 /* ??? Similar to above, only less clear because of quote
9770 typeless stores unquote. */
9771 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9772 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9774 op0
= gen_lowpart (V16QImode
, op0
);
9775 op1
= gen_lowpart (V16QImode
, op1
);
9776 emit_insn (gen_sse2_movdqu (op0
, op1
));
9780 if (TARGET_SSE2
&& mode
== V2DFmode
)
9782 m
= adjust_address (op0
, DFmode
, 0);
9783 emit_insn (gen_sse2_storelpd (m
, op1
));
9784 m
= adjust_address (op0
, DFmode
, 8);
9785 emit_insn (gen_sse2_storehpd (m
, op1
));
9789 if (mode
!= V4SFmode
)
9790 op1
= gen_lowpart (V4SFmode
, op1
);
9791 m
= adjust_address (op0
, V2SFmode
, 0);
9792 emit_insn (gen_sse_storelps (m
, op1
));
9793 m
= adjust_address (op0
, V2SFmode
, 8);
9794 emit_insn (gen_sse_storehps (m
, op1
));
9801 /* Expand a push in MODE. This is some mode for which we do not support
9802 proper push instructions, at least from the registers that we expect
9803 the value to live in. */
9806 ix86_expand_push (enum machine_mode mode
, rtx x
)
9810 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9811 GEN_INT (-GET_MODE_SIZE (mode
)),
9812 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9813 if (tmp
!= stack_pointer_rtx
)
9814 emit_move_insn (stack_pointer_rtx
, tmp
);
9816 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9817 emit_move_insn (tmp
, x
);
9820 /* Helper function of ix86_fixup_binary_operands to canonicalize
9821 operand order. Returns true if the operands should be swapped. */
9824 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9827 rtx dst
= operands
[0];
9828 rtx src1
= operands
[1];
9829 rtx src2
= operands
[2];
9831 /* If the operation is not commutative, we can't do anything. */
9832 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9835 /* Highest priority is that src1 should match dst. */
9836 if (rtx_equal_p (dst
, src1
))
9838 if (rtx_equal_p (dst
, src2
))
9841 /* Next highest priority is that immediate constants come second. */
9842 if (immediate_operand (src2
, mode
))
9844 if (immediate_operand (src1
, mode
))
9847 /* Lowest priority is that memory references should come second. */
9857 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9858 destination to use for the operation. If different from the true
9859 destination in operands[0], a copy operation will be required. */
9862 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9865 rtx dst
= operands
[0];
9866 rtx src1
= operands
[1];
9867 rtx src2
= operands
[2];
9869 /* Canonicalize operand order. */
9870 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9877 /* Both source operands cannot be in memory. */
9878 if (MEM_P (src1
) && MEM_P (src2
))
9880 /* Optimization: Only read from memory once. */
9881 if (rtx_equal_p (src1
, src2
))
9883 src2
= force_reg (mode
, src2
);
9887 src2
= force_reg (mode
, src2
);
9890 /* If the destination is memory, and we do not have matching source
9891 operands, do things in registers. */
9892 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9893 dst
= gen_reg_rtx (mode
);
9895 /* Source 1 cannot be a constant. */
9896 if (CONSTANT_P (src1
))
9897 src1
= force_reg (mode
, src1
);
9899 /* Source 1 cannot be a non-matching memory. */
9900 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9901 src1
= force_reg (mode
, src1
);
9908 /* Similarly, but assume that the destination has already been
9912 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9913 enum machine_mode mode
, rtx operands
[])
9915 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9916 gcc_assert (dst
== operands
[0]);
9919 /* Attempt to expand a binary operator. Make the expansion closer to the
9920 actual machine, then just general_operand, which will allow 3 separate
9921 memory references (one output, two input) in a single insn. */
9924 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9927 rtx src1
, src2
, dst
, op
, clob
;
9929 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9933 /* Emit the instruction. */
9935 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9936 if (reload_in_progress
)
9938 /* Reload doesn't know about the flags register, and doesn't know that
9939 it doesn't want to clobber it. We can only do this with PLUS. */
9940 gcc_assert (code
== PLUS
);
9945 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9946 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9949 /* Fix up the destination if needed. */
9950 if (dst
!= operands
[0])
9951 emit_move_insn (operands
[0], dst
);
9954 /* Return TRUE or FALSE depending on whether the binary operator meets the
9955 appropriate constraints. */
9958 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9961 rtx dst
= operands
[0];
9962 rtx src1
= operands
[1];
9963 rtx src2
= operands
[2];
9965 /* Both source operands cannot be in memory. */
9966 if (MEM_P (src1
) && MEM_P (src2
))
9969 /* Canonicalize operand order for commutative operators. */
9970 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9977 /* If the destination is memory, we must have a matching source operand. */
9978 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9981 /* Source 1 cannot be a constant. */
9982 if (CONSTANT_P (src1
))
9985 /* Source 1 cannot be a non-matching memory. */
9986 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9992 /* Attempt to expand a unary operator. Make the expansion closer to the
9993 actual machine, then just general_operand, which will allow 2 separate
9994 memory references (one output, one input) in a single insn. */
9997 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10000 int matching_memory
;
10001 rtx src
, dst
, op
, clob
;
10006 /* If the destination is memory, and we do not have matching source
10007 operands, do things in registers. */
10008 matching_memory
= 0;
10011 if (rtx_equal_p (dst
, src
))
10012 matching_memory
= 1;
10014 dst
= gen_reg_rtx (mode
);
10017 /* When source operand is memory, destination must match. */
10018 if (MEM_P (src
) && !matching_memory
)
10019 src
= force_reg (mode
, src
);
10021 /* Emit the instruction. */
10023 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10024 if (reload_in_progress
|| code
== NOT
)
10026 /* Reload doesn't know about the flags register, and doesn't know that
10027 it doesn't want to clobber it. */
10028 gcc_assert (code
== NOT
);
10033 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10034 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10037 /* Fix up the destination if needed. */
10038 if (dst
!= operands
[0])
10039 emit_move_insn (operands
[0], dst
);
10042 /* Return TRUE or FALSE depending on whether the unary operator meets the
10043 appropriate constraints. */
10046 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10047 enum machine_mode mode ATTRIBUTE_UNUSED
,
10048 rtx operands
[2] ATTRIBUTE_UNUSED
)
10050 /* If one of operands is memory, source and destination must match. */
10051 if ((MEM_P (operands
[0])
10052 || MEM_P (operands
[1]))
10053 && ! rtx_equal_p (operands
[0], operands
[1]))
10058 /* Post-reload splitter for converting an SF or DFmode value in an
10059 SSE register into an unsigned SImode. */
10062 ix86_split_convert_uns_si_sse (rtx operands
[])
10064 enum machine_mode vecmode
;
10065 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10067 large
= operands
[1];
10068 zero_or_two31
= operands
[2];
10069 input
= operands
[3];
10070 two31
= operands
[4];
10071 vecmode
= GET_MODE (large
);
10072 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10074 /* Load up the value into the low element. We must ensure that the other
10075 elements are valid floats -- zero is the easiest such value. */
10078 if (vecmode
== V4SFmode
)
10079 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10081 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10085 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10086 emit_move_insn (value
, CONST0_RTX (vecmode
));
10087 if (vecmode
== V4SFmode
)
10088 emit_insn (gen_sse_movss (value
, value
, input
));
10090 emit_insn (gen_sse2_movsd (value
, value
, input
));
10093 emit_move_insn (large
, two31
);
10094 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10096 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10097 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10099 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10100 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10102 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10103 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10105 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10106 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10108 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10109 if (vecmode
== V4SFmode
)
10110 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10112 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10115 emit_insn (gen_xorv4si3 (value
, value
, large
));
10118 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10119 Expects the 64-bit DImode to be supplied in a pair of integral
10120 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10121 -mfpmath=sse, !optimize_size only. */
10124 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10126 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10127 rtx int_xmm
, fp_xmm
;
10128 rtx biases
, exponents
;
10131 int_xmm
= gen_reg_rtx (V4SImode
);
10132 if (TARGET_INTER_UNIT_MOVES
)
10133 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10134 else if (TARGET_SSE_SPLIT_REGS
)
10136 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10137 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10141 x
= gen_reg_rtx (V2DImode
);
10142 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10143 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10146 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10147 gen_rtvec (4, GEN_INT (0x43300000UL
),
10148 GEN_INT (0x45300000UL
),
10149 const0_rtx
, const0_rtx
));
10150 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10152 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10153 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10155 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10156 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10157 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10158 (0x1.0p84 + double(fp_value_hi_xmm)).
10159 Note these exponents differ by 32. */
10161 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10163 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10164 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10165 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10166 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10167 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10168 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10169 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10170 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10171 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10173 /* Add the upper and lower DFmode values together. */
10175 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10178 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10179 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10180 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10183 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10186 /* Convert an unsigned SImode value into a DFmode. Only currently used
10187 for SSE, but applicable anywhere. */
10190 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10192 REAL_VALUE_TYPE TWO31r
;
10195 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10196 NULL
, 1, OPTAB_DIRECT
);
10198 fp
= gen_reg_rtx (DFmode
);
10199 emit_insn (gen_floatsidf2 (fp
, x
));
10201 real_ldexp (&TWO31r
, &dconst1
, 31);
10202 x
= const_double_from_real_value (TWO31r
, DFmode
);
10204 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10206 emit_move_insn (target
, x
);
10209 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10210 32-bit mode; otherwise we have a direct convert instruction. */
10213 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10215 REAL_VALUE_TYPE TWO32r
;
10216 rtx fp_lo
, fp_hi
, x
;
10218 fp_lo
= gen_reg_rtx (DFmode
);
10219 fp_hi
= gen_reg_rtx (DFmode
);
10221 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10223 real_ldexp (&TWO32r
, &dconst1
, 32);
10224 x
= const_double_from_real_value (TWO32r
, DFmode
);
10225 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10227 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10229 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10232 emit_move_insn (target
, x
);
10235 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10236 For x86_32, -mfpmath=sse, !optimize_size only. */
10238 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10240 REAL_VALUE_TYPE ONE16r
;
10241 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10243 real_ldexp (&ONE16r
, &dconst1
, 16);
10244 x
= const_double_from_real_value (ONE16r
, SFmode
);
10245 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10246 NULL
, 0, OPTAB_DIRECT
);
10247 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10248 NULL
, 0, OPTAB_DIRECT
);
10249 fp_hi
= gen_reg_rtx (SFmode
);
10250 fp_lo
= gen_reg_rtx (SFmode
);
10251 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10252 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10253 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10255 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10257 if (!rtx_equal_p (target
, fp_hi
))
10258 emit_move_insn (target
, fp_hi
);
10261 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10262 then replicate the value for all elements of the vector
10266 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10273 v
= gen_rtvec (4, value
, value
, value
, value
);
10275 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10276 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10277 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10281 v
= gen_rtvec (2, value
, value
);
10283 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10284 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10287 gcc_unreachable ();
10291 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10292 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10293 true, then replicate the mask for all elements of the vector register.
10294 If INVERT is true, then create a mask excluding the sign bit. */
10297 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10299 enum machine_mode vec_mode
;
10300 HOST_WIDE_INT hi
, lo
;
10305 /* Find the sign bit, sign extended to 2*HWI. */
10306 if (mode
== SFmode
)
10307 lo
= 0x80000000, hi
= lo
< 0;
10308 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10309 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10311 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10314 lo
= ~lo
, hi
= ~hi
;
10316 /* Force this value into the low part of a fp vector constant. */
10317 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10318 mask
= gen_lowpart (mode
, mask
);
10320 v
= ix86_build_const_vector (mode
, vect
, mask
);
10321 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10322 return force_reg (vec_mode
, v
);
10325 /* Generate code for floating point ABS or NEG. */
10328 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10331 rtx mask
, set
, use
, clob
, dst
, src
;
10332 bool matching_memory
;
10333 bool use_sse
= false;
10334 bool vector_mode
= VECTOR_MODE_P (mode
);
10335 enum machine_mode elt_mode
= mode
;
10339 elt_mode
= GET_MODE_INNER (mode
);
10342 else if (TARGET_SSE_MATH
)
10343 use_sse
= SSE_FLOAT_MODE_P (mode
);
10345 /* NEG and ABS performed with SSE use bitwise mask operations.
10346 Create the appropriate mask now. */
10348 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10355 /* If the destination is memory, and we don't have matching source
10356 operands or we're using the x87, do things in registers. */
10357 matching_memory
= false;
10360 if (use_sse
&& rtx_equal_p (dst
, src
))
10361 matching_memory
= true;
10363 dst
= gen_reg_rtx (mode
);
10365 if (MEM_P (src
) && !matching_memory
)
10366 src
= force_reg (mode
, src
);
10370 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10371 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10376 set
= gen_rtx_fmt_e (code
, mode
, src
);
10377 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10380 use
= gen_rtx_USE (VOIDmode
, mask
);
10381 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10382 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10383 gen_rtvec (3, set
, use
, clob
)));
10389 if (dst
!= operands
[0])
10390 emit_move_insn (operands
[0], dst
);
10393 /* Expand a copysign operation. Special case operand 0 being a constant. */
10396 ix86_expand_copysign (rtx operands
[])
10398 enum machine_mode mode
, vmode
;
10399 rtx dest
, op0
, op1
, mask
, nmask
;
10401 dest
= operands
[0];
10405 mode
= GET_MODE (dest
);
10406 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10408 if (GET_CODE (op0
) == CONST_DOUBLE
)
10412 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10413 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10415 if (op0
== CONST0_RTX (mode
))
10416 op0
= CONST0_RTX (vmode
);
10419 if (mode
== SFmode
)
10420 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10421 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10423 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10424 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10427 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10429 if (mode
== SFmode
)
10430 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10432 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10436 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10437 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10439 if (mode
== SFmode
)
10440 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10442 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10446 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10447 be a constant, and so has already been expanded into a vector constant. */
10450 ix86_split_copysign_const (rtx operands
[])
10452 enum machine_mode mode
, vmode
;
10453 rtx dest
, op0
, op1
, mask
, x
;
10455 dest
= operands
[0];
10458 mask
= operands
[3];
10460 mode
= GET_MODE (dest
);
10461 vmode
= GET_MODE (mask
);
10463 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10464 x
= gen_rtx_AND (vmode
, dest
, mask
);
10465 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10467 if (op0
!= CONST0_RTX (vmode
))
10469 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10470 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10474 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10475 so we have to do two masks. */
10478 ix86_split_copysign_var (rtx operands
[])
10480 enum machine_mode mode
, vmode
;
10481 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10483 dest
= operands
[0];
10484 scratch
= operands
[1];
10487 nmask
= operands
[4];
10488 mask
= operands
[5];
10490 mode
= GET_MODE (dest
);
10491 vmode
= GET_MODE (mask
);
10493 if (rtx_equal_p (op0
, op1
))
10495 /* Shouldn't happen often (it's useless, obviously), but when it does
10496 we'd generate incorrect code if we continue below. */
10497 emit_move_insn (dest
, op0
);
10501 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10503 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10505 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10506 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10509 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10510 x
= gen_rtx_NOT (vmode
, dest
);
10511 x
= gen_rtx_AND (vmode
, x
, op0
);
10512 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10516 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10518 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10520 else /* alternative 2,4 */
10522 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10523 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10524 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10526 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10528 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10530 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10531 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10533 else /* alternative 3,4 */
10535 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10537 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10538 x
= gen_rtx_AND (vmode
, dest
, op0
);
10540 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10543 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10544 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10547 /* Return TRUE or FALSE depending on whether the first SET in INSN
10548 has source and destination with matching CC modes, and that the
10549 CC mode is at least as constrained as REQ_MODE. */
10552 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10555 enum machine_mode set_mode
;
10557 set
= PATTERN (insn
);
10558 if (GET_CODE (set
) == PARALLEL
)
10559 set
= XVECEXP (set
, 0, 0);
10560 gcc_assert (GET_CODE (set
) == SET
);
10561 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10563 set_mode
= GET_MODE (SET_DEST (set
));
10567 if (req_mode
!= CCNOmode
10568 && (req_mode
!= CCmode
10569 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10573 if (req_mode
== CCGCmode
)
10577 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10581 if (req_mode
== CCZmode
)
10588 gcc_unreachable ();
10591 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10594 /* Generate insn patterns to do an integer compare of OPERANDS. */
10597 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10599 enum machine_mode cmpmode
;
10602 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10603 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10605 /* This is very simple, but making the interface the same as in the
10606 FP case makes the rest of the code easier. */
10607 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10608 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10610 /* Return the test that should be put into the flags user, i.e.
10611 the bcc, scc, or cmov instruction. */
10612 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10615 /* Figure out whether to use ordered or unordered fp comparisons.
10616 Return the appropriate mode to use. */
10619 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10621 /* ??? In order to make all comparisons reversible, we do all comparisons
10622 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10623 all forms trapping and nontrapping comparisons, we can make inequality
10624 comparisons trapping again, since it results in better code when using
10625 FCOM based compares. */
10626 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10630 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10632 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10633 return ix86_fp_compare_mode (code
);
10636 /* Only zero flag is needed. */
10637 case EQ
: /* ZF=0 */
10638 case NE
: /* ZF!=0 */
10640 /* Codes needing carry flag. */
10641 case GEU
: /* CF=0 */
10642 case GTU
: /* CF=0 & ZF=0 */
10643 case LTU
: /* CF=1 */
10644 case LEU
: /* CF=1 | ZF=1 */
10646 /* Codes possibly doable only with sign flag when
10647 comparing against zero. */
10648 case GE
: /* SF=OF or SF=0 */
10649 case LT
: /* SF<>OF or SF=1 */
10650 if (op1
== const0_rtx
)
10653 /* For other cases Carry flag is not required. */
10655 /* Codes doable only with sign flag when comparing
10656 against zero, but we miss jump instruction for it
10657 so we need to use relational tests against overflow
10658 that thus needs to be zero. */
10659 case GT
: /* ZF=0 & SF=OF */
10660 case LE
: /* ZF=1 | SF<>OF */
10661 if (op1
== const0_rtx
)
10665 /* strcmp pattern do (use flags) and combine may ask us for proper
10670 gcc_unreachable ();
10674 /* Return the fixed registers used for condition codes. */
10677 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10684 /* If two condition code modes are compatible, return a condition code
10685 mode which is compatible with both. Otherwise, return
10688 static enum machine_mode
10689 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10694 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10697 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10698 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10704 gcc_unreachable ();
10726 /* These are only compatible with themselves, which we already
10732 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10735 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10737 enum rtx_code swapped_code
= swap_condition (code
);
10738 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10739 || (ix86_fp_comparison_cost (swapped_code
)
10740 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10743 /* Swap, force into registers, or otherwise massage the two operands
10744 to a fp comparison. The operands are updated in place; the new
10745 comparison code is returned. */
10747 static enum rtx_code
10748 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10750 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10751 rtx op0
= *pop0
, op1
= *pop1
;
10752 enum machine_mode op_mode
= GET_MODE (op0
);
10753 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10755 /* All of the unordered compare instructions only work on registers.
10756 The same is true of the fcomi compare instructions. The XFmode
10757 compare instructions require registers except when comparing
10758 against zero or when converting operand 1 from fixed point to
10762 && (fpcmp_mode
== CCFPUmode
10763 || (op_mode
== XFmode
10764 && ! (standard_80387_constant_p (op0
) == 1
10765 || standard_80387_constant_p (op1
) == 1)
10766 && GET_CODE (op1
) != FLOAT
)
10767 || ix86_use_fcomi_compare (code
)))
10769 op0
= force_reg (op_mode
, op0
);
10770 op1
= force_reg (op_mode
, op1
);
10774 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10775 things around if they appear profitable, otherwise force op0
10776 into a register. */
10778 if (standard_80387_constant_p (op0
) == 0
10780 && ! (standard_80387_constant_p (op1
) == 0
10784 tmp
= op0
, op0
= op1
, op1
= tmp
;
10785 code
= swap_condition (code
);
10789 op0
= force_reg (op_mode
, op0
);
10791 if (CONSTANT_P (op1
))
10793 int tmp
= standard_80387_constant_p (op1
);
10795 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10799 op1
= force_reg (op_mode
, op1
);
10802 op1
= force_reg (op_mode
, op1
);
10806 /* Try to rearrange the comparison to make it cheaper. */
10807 if (ix86_fp_comparison_cost (code
)
10808 > ix86_fp_comparison_cost (swap_condition (code
))
10809 && (REG_P (op1
) || !no_new_pseudos
))
10812 tmp
= op0
, op0
= op1
, op1
= tmp
;
10813 code
= swap_condition (code
);
10815 op0
= force_reg (op_mode
, op0
);
10823 /* Convert comparison codes we use to represent FP comparison to integer
10824 code that will result in proper branch. Return UNKNOWN if no such code
10828 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10857 /* Split comparison code CODE into comparisons we can do using branch
10858 instructions. BYPASS_CODE is comparison code for branch that will
10859 branch around FIRST_CODE and SECOND_CODE. If some of branches
10860 is not required, set value to UNKNOWN.
10861 We never require more than two branches. */
10864 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10865 enum rtx_code
*first_code
,
10866 enum rtx_code
*second_code
)
10868 *first_code
= code
;
10869 *bypass_code
= UNKNOWN
;
10870 *second_code
= UNKNOWN
;
10872 /* The fcomi comparison sets flags as follows:
10882 case GT
: /* GTU - CF=0 & ZF=0 */
10883 case GE
: /* GEU - CF=0 */
10884 case ORDERED
: /* PF=0 */
10885 case UNORDERED
: /* PF=1 */
10886 case UNEQ
: /* EQ - ZF=1 */
10887 case UNLT
: /* LTU - CF=1 */
10888 case UNLE
: /* LEU - CF=1 | ZF=1 */
10889 case LTGT
: /* EQ - ZF=0 */
10891 case LT
: /* LTU - CF=1 - fails on unordered */
10892 *first_code
= UNLT
;
10893 *bypass_code
= UNORDERED
;
10895 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10896 *first_code
= UNLE
;
10897 *bypass_code
= UNORDERED
;
10899 case EQ
: /* EQ - ZF=1 - fails on unordered */
10900 *first_code
= UNEQ
;
10901 *bypass_code
= UNORDERED
;
10903 case NE
: /* NE - ZF=0 - fails on unordered */
10904 *first_code
= LTGT
;
10905 *second_code
= UNORDERED
;
10907 case UNGE
: /* GEU - CF=0 - fails on unordered */
10909 *second_code
= UNORDERED
;
10911 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10913 *second_code
= UNORDERED
;
10916 gcc_unreachable ();
10918 if (!TARGET_IEEE_FP
)
10920 *second_code
= UNKNOWN
;
10921 *bypass_code
= UNKNOWN
;
10925 /* Return cost of comparison done fcom + arithmetics operations on AX.
10926 All following functions do use number of instructions as a cost metrics.
10927 In future this should be tweaked to compute bytes for optimize_size and
10928 take into account performance of various instructions on various CPUs. */
10930 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10932 if (!TARGET_IEEE_FP
)
10934 /* The cost of code output by ix86_expand_fp_compare. */
10958 gcc_unreachable ();
10962 /* Return cost of comparison done using fcomi operation.
10963 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10965 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10967 enum rtx_code bypass_code
, first_code
, second_code
;
10968 /* Return arbitrarily high cost when instruction is not supported - this
10969 prevents gcc from using it. */
10972 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10973 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10976 /* Return cost of comparison done using sahf operation.
10977 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10979 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10981 enum rtx_code bypass_code
, first_code
, second_code
;
10982 /* Return arbitrarily high cost when instruction is not preferred - this
10983 avoids gcc from using it. */
10984 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
10986 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10987 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10990 /* Compute cost of the comparison done using any method.
10991 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10993 ix86_fp_comparison_cost (enum rtx_code code
)
10995 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10998 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10999 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11001 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11002 if (min
> sahf_cost
)
11004 if (min
> fcomi_cost
)
11009 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11012 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11013 rtx
*second_test
, rtx
*bypass_test
)
11015 enum machine_mode fpcmp_mode
, intcmp_mode
;
11017 int cost
= ix86_fp_comparison_cost (code
);
11018 enum rtx_code bypass_code
, first_code
, second_code
;
11020 fpcmp_mode
= ix86_fp_compare_mode (code
);
11021 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11024 *second_test
= NULL_RTX
;
11026 *bypass_test
= NULL_RTX
;
11028 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11030 /* Do fcomi/sahf based test when profitable. */
11031 if ((TARGET_CMOVE
|| TARGET_SAHF
)
11032 && (bypass_code
== UNKNOWN
|| bypass_test
)
11033 && (second_code
== UNKNOWN
|| second_test
)
11034 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11038 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11039 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11045 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11046 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11048 scratch
= gen_reg_rtx (HImode
);
11049 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11050 emit_insn (gen_x86_sahf_1 (scratch
));
11053 /* The FP codes work out to act like unsigned. */
11054 intcmp_mode
= fpcmp_mode
;
11056 if (bypass_code
!= UNKNOWN
)
11057 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11058 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11060 if (second_code
!= UNKNOWN
)
11061 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11062 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11067 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11068 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11069 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11071 scratch
= gen_reg_rtx (HImode
);
11072 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11074 /* In the unordered case, we have to check C2 for NaN's, which
11075 doesn't happen to work out to anything nice combination-wise.
11076 So do some bit twiddling on the value we've got in AH to come
11077 up with an appropriate set of condition codes. */
11079 intcmp_mode
= CCNOmode
;
11084 if (code
== GT
|| !TARGET_IEEE_FP
)
11086 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11091 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11092 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11093 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11094 intcmp_mode
= CCmode
;
11100 if (code
== LT
&& TARGET_IEEE_FP
)
11102 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11103 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11104 intcmp_mode
= CCmode
;
11109 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11115 if (code
== GE
|| !TARGET_IEEE_FP
)
11117 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11122 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11123 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11130 if (code
== LE
&& TARGET_IEEE_FP
)
11132 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11133 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11134 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11135 intcmp_mode
= CCmode
;
11140 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11146 if (code
== EQ
&& TARGET_IEEE_FP
)
11148 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11149 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11150 intcmp_mode
= CCmode
;
11155 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11162 if (code
== NE
&& TARGET_IEEE_FP
)
11164 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11165 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11171 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11177 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11181 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11186 gcc_unreachable ();
11190 /* Return the test that should be put into the flags user, i.e.
11191 the bcc, scc, or cmov instruction. */
11192 return gen_rtx_fmt_ee (code
, VOIDmode
,
11193 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11198 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11201 op0
= ix86_compare_op0
;
11202 op1
= ix86_compare_op1
;
11205 *second_test
= NULL_RTX
;
11207 *bypass_test
= NULL_RTX
;
11209 if (ix86_compare_emitted
)
11211 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11212 ix86_compare_emitted
= NULL_RTX
;
11214 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11215 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11216 second_test
, bypass_test
);
11218 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11223 /* Return true if the CODE will result in nontrivial jump sequence. */
11225 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11227 enum rtx_code bypass_code
, first_code
, second_code
;
11230 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11231 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11235 ix86_expand_branch (enum rtx_code code
, rtx label
)
11239 /* If we have emitted a compare insn, go straight to simple.
11240 ix86_expand_compare won't emit anything if ix86_compare_emitted
11242 if (ix86_compare_emitted
)
11245 switch (GET_MODE (ix86_compare_op0
))
11251 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11252 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11253 gen_rtx_LABEL_REF (VOIDmode
, label
),
11255 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11264 enum rtx_code bypass_code
, first_code
, second_code
;
11266 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11267 &ix86_compare_op1
);
11269 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11271 /* Check whether we will use the natural sequence with one jump. If
11272 so, we can expand jump early. Otherwise delay expansion by
11273 creating compound insn to not confuse optimizers. */
11274 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11277 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11278 gen_rtx_LABEL_REF (VOIDmode
, label
),
11279 pc_rtx
, NULL_RTX
, NULL_RTX
);
11283 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11284 ix86_compare_op0
, ix86_compare_op1
);
11285 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11286 gen_rtx_LABEL_REF (VOIDmode
, label
),
11288 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11290 use_fcomi
= ix86_use_fcomi_compare (code
);
11291 vec
= rtvec_alloc (3 + !use_fcomi
);
11292 RTVEC_ELT (vec
, 0) = tmp
;
11294 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11296 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11299 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11301 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11310 /* Expand DImode branch into multiple compare+branch. */
11312 rtx lo
[2], hi
[2], label2
;
11313 enum rtx_code code1
, code2
, code3
;
11314 enum machine_mode submode
;
11316 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11318 tmp
= ix86_compare_op0
;
11319 ix86_compare_op0
= ix86_compare_op1
;
11320 ix86_compare_op1
= tmp
;
11321 code
= swap_condition (code
);
11323 if (GET_MODE (ix86_compare_op0
) == DImode
)
11325 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11326 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11331 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11332 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11336 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11337 avoid two branches. This costs one extra insn, so disable when
11338 optimizing for size. */
11340 if ((code
== EQ
|| code
== NE
)
11342 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11347 if (hi
[1] != const0_rtx
)
11348 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11349 NULL_RTX
, 0, OPTAB_WIDEN
);
11352 if (lo
[1] != const0_rtx
)
11353 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11354 NULL_RTX
, 0, OPTAB_WIDEN
);
11356 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11357 NULL_RTX
, 0, OPTAB_WIDEN
);
11359 ix86_compare_op0
= tmp
;
11360 ix86_compare_op1
= const0_rtx
;
11361 ix86_expand_branch (code
, label
);
11365 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11366 op1 is a constant and the low word is zero, then we can just
11367 examine the high word. */
11369 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11372 case LT
: case LTU
: case GE
: case GEU
:
11373 ix86_compare_op0
= hi
[0];
11374 ix86_compare_op1
= hi
[1];
11375 ix86_expand_branch (code
, label
);
11381 /* Otherwise, we need two or three jumps. */
11383 label2
= gen_label_rtx ();
11386 code2
= swap_condition (code
);
11387 code3
= unsigned_condition (code
);
11391 case LT
: case GT
: case LTU
: case GTU
:
11394 case LE
: code1
= LT
; code2
= GT
; break;
11395 case GE
: code1
= GT
; code2
= LT
; break;
11396 case LEU
: code1
= LTU
; code2
= GTU
; break;
11397 case GEU
: code1
= GTU
; code2
= LTU
; break;
11399 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11400 case NE
: code2
= UNKNOWN
; break;
11403 gcc_unreachable ();
11408 * if (hi(a) < hi(b)) goto true;
11409 * if (hi(a) > hi(b)) goto false;
11410 * if (lo(a) < lo(b)) goto true;
11414 ix86_compare_op0
= hi
[0];
11415 ix86_compare_op1
= hi
[1];
11417 if (code1
!= UNKNOWN
)
11418 ix86_expand_branch (code1
, label
);
11419 if (code2
!= UNKNOWN
)
11420 ix86_expand_branch (code2
, label2
);
11422 ix86_compare_op0
= lo
[0];
11423 ix86_compare_op1
= lo
[1];
11424 ix86_expand_branch (code3
, label
);
11426 if (code2
!= UNKNOWN
)
11427 emit_label (label2
);
11432 gcc_unreachable ();
11436 /* Split branch based on floating point condition. */
11438 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11439 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11441 rtx second
, bypass
;
11442 rtx label
= NULL_RTX
;
11444 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11447 if (target2
!= pc_rtx
)
11450 code
= reverse_condition_maybe_unordered (code
);
11455 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11456 tmp
, &second
, &bypass
);
11458 /* Remove pushed operand from stack. */
11460 ix86_free_from_memory (GET_MODE (pushed
));
11462 if (split_branch_probability
>= 0)
11464 /* Distribute the probabilities across the jumps.
11465 Assume the BYPASS and SECOND to be always test
11467 probability
= split_branch_probability
;
11469 /* Value of 1 is low enough to make no need for probability
11470 to be updated. Later we may run some experiments and see
11471 if unordered values are more frequent in practice. */
11473 bypass_probability
= 1;
11475 second_probability
= 1;
11477 if (bypass
!= NULL_RTX
)
11479 label
= gen_label_rtx ();
11480 i
= emit_jump_insn (gen_rtx_SET
11482 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11484 gen_rtx_LABEL_REF (VOIDmode
,
11487 if (bypass_probability
>= 0)
11489 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11490 GEN_INT (bypass_probability
),
11493 i
= emit_jump_insn (gen_rtx_SET
11495 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11496 condition
, target1
, target2
)));
11497 if (probability
>= 0)
11499 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11500 GEN_INT (probability
),
11502 if (second
!= NULL_RTX
)
11504 i
= emit_jump_insn (gen_rtx_SET
11506 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11508 if (second_probability
>= 0)
11510 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11511 GEN_INT (second_probability
),
11514 if (label
!= NULL_RTX
)
11515 emit_label (label
);
11519 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11521 rtx ret
, tmp
, tmpreg
, equiv
;
11522 rtx second_test
, bypass_test
;
11524 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11525 return 0; /* FAIL */
11527 gcc_assert (GET_MODE (dest
) == QImode
);
11529 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11530 PUT_MODE (ret
, QImode
);
11535 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11536 if (bypass_test
|| second_test
)
11538 rtx test
= second_test
;
11540 rtx tmp2
= gen_reg_rtx (QImode
);
11543 gcc_assert (!second_test
);
11544 test
= bypass_test
;
11546 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11548 PUT_MODE (test
, QImode
);
11549 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11552 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11554 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11557 /* Attach a REG_EQUAL note describing the comparison result. */
11558 if (ix86_compare_op0
&& ix86_compare_op1
)
11560 equiv
= simplify_gen_relational (code
, QImode
,
11561 GET_MODE (ix86_compare_op0
),
11562 ix86_compare_op0
, ix86_compare_op1
);
11563 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11566 return 1; /* DONE */
11569 /* Expand comparison setting or clearing carry flag. Return true when
11570 successful and set pop for the operation. */
11572 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11574 enum machine_mode mode
=
11575 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11577 /* Do not handle DImode compares that go through special path. Also we can't
11578 deal with FP compares yet. This is possible to add. */
11579 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11581 if (FLOAT_MODE_P (mode
))
11583 rtx second_test
= NULL
, bypass_test
= NULL
;
11584 rtx compare_op
, compare_seq
;
11586 /* Shortcut: following common codes never translate into carry flag compares. */
11587 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11588 || code
== ORDERED
|| code
== UNORDERED
)
11591 /* These comparisons require zero flag; swap operands so they won't. */
11592 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11593 && !TARGET_IEEE_FP
)
11598 code
= swap_condition (code
);
11601 /* Try to expand the comparison and verify that we end up with carry flag
11602 based comparison. This is fails to be true only when we decide to expand
11603 comparison using arithmetic that is not too common scenario. */
11605 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11606 &second_test
, &bypass_test
);
11607 compare_seq
= get_insns ();
11610 if (second_test
|| bypass_test
)
11612 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11613 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11614 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11616 code
= GET_CODE (compare_op
);
11617 if (code
!= LTU
&& code
!= GEU
)
11619 emit_insn (compare_seq
);
11623 if (!INTEGRAL_MODE_P (mode
))
11631 /* Convert a==0 into (unsigned)a<1. */
11634 if (op1
!= const0_rtx
)
11637 code
= (code
== EQ
? LTU
: GEU
);
11640 /* Convert a>b into b<a or a>=b-1. */
11643 if (CONST_INT_P (op1
))
11645 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11646 /* Bail out on overflow. We still can swap operands but that
11647 would force loading of the constant into register. */
11648 if (op1
== const0_rtx
11649 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11651 code
= (code
== GTU
? GEU
: LTU
);
11658 code
= (code
== GTU
? LTU
: GEU
);
11662 /* Convert a>=0 into (unsigned)a<0x80000000. */
11665 if (mode
== DImode
|| op1
!= const0_rtx
)
11667 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11668 code
= (code
== LT
? GEU
: LTU
);
11672 if (mode
== DImode
|| op1
!= constm1_rtx
)
11674 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11675 code
= (code
== LE
? GEU
: LTU
);
11681 /* Swapping operands may cause constant to appear as first operand. */
11682 if (!nonimmediate_operand (op0
, VOIDmode
))
11684 if (no_new_pseudos
)
11686 op0
= force_reg (mode
, op0
);
11688 ix86_compare_op0
= op0
;
11689 ix86_compare_op1
= op1
;
11690 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11691 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11696 ix86_expand_int_movcc (rtx operands
[])
11698 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11699 rtx compare_seq
, compare_op
;
11700 rtx second_test
, bypass_test
;
11701 enum machine_mode mode
= GET_MODE (operands
[0]);
11702 bool sign_bit_compare_p
= false;;
11705 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11706 compare_seq
= get_insns ();
11709 compare_code
= GET_CODE (compare_op
);
11711 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11712 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11713 sign_bit_compare_p
= true;
11715 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11716 HImode insns, we'd be swallowed in word prefix ops. */
11718 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11719 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11720 && CONST_INT_P (operands
[2])
11721 && CONST_INT_P (operands
[3]))
11723 rtx out
= operands
[0];
11724 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11725 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11726 HOST_WIDE_INT diff
;
11729 /* Sign bit compares are better done using shifts than we do by using
11731 if (sign_bit_compare_p
11732 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11733 ix86_compare_op1
, &compare_op
))
11735 /* Detect overlap between destination and compare sources. */
11738 if (!sign_bit_compare_p
)
11740 bool fpcmp
= false;
11742 compare_code
= GET_CODE (compare_op
);
11744 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11745 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11748 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11751 /* To simplify rest of code, restrict to the GEU case. */
11752 if (compare_code
== LTU
)
11754 HOST_WIDE_INT tmp
= ct
;
11757 compare_code
= reverse_condition (compare_code
);
11758 code
= reverse_condition (code
);
11763 PUT_CODE (compare_op
,
11764 reverse_condition_maybe_unordered
11765 (GET_CODE (compare_op
)));
11767 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11771 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11772 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11773 tmp
= gen_reg_rtx (mode
);
11775 if (mode
== DImode
)
11776 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11778 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11782 if (code
== GT
|| code
== GE
)
11783 code
= reverse_condition (code
);
11786 HOST_WIDE_INT tmp
= ct
;
11791 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11792 ix86_compare_op1
, VOIDmode
, 0, -1);
11805 tmp
= expand_simple_binop (mode
, PLUS
,
11807 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11818 tmp
= expand_simple_binop (mode
, IOR
,
11820 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11822 else if (diff
== -1 && ct
)
11832 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11834 tmp
= expand_simple_binop (mode
, PLUS
,
11835 copy_rtx (tmp
), GEN_INT (cf
),
11836 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11844 * andl cf - ct, dest
11854 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11857 tmp
= expand_simple_binop (mode
, AND
,
11859 gen_int_mode (cf
- ct
, mode
),
11860 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11862 tmp
= expand_simple_binop (mode
, PLUS
,
11863 copy_rtx (tmp
), GEN_INT (ct
),
11864 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11867 if (!rtx_equal_p (tmp
, out
))
11868 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11870 return 1; /* DONE */
11876 tmp
= ct
, ct
= cf
, cf
= tmp
;
11878 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11880 /* We may be reversing unordered compare to normal compare, that
11881 is not valid in general (we may convert non-trapping condition
11882 to trapping one), however on i386 we currently emit all
11883 comparisons unordered. */
11884 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11885 code
= reverse_condition_maybe_unordered (code
);
11889 compare_code
= reverse_condition (compare_code
);
11890 code
= reverse_condition (code
);
11894 compare_code
= UNKNOWN
;
11895 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11896 && CONST_INT_P (ix86_compare_op1
))
11898 if (ix86_compare_op1
== const0_rtx
11899 && (code
== LT
|| code
== GE
))
11900 compare_code
= code
;
11901 else if (ix86_compare_op1
== constm1_rtx
)
11905 else if (code
== GT
)
11910 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11911 if (compare_code
!= UNKNOWN
11912 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11913 && (cf
== -1 || ct
== -1))
11915 /* If lea code below could be used, only optimize
11916 if it results in a 2 insn sequence. */
11918 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11919 || diff
== 3 || diff
== 5 || diff
== 9)
11920 || (compare_code
== LT
&& ct
== -1)
11921 || (compare_code
== GE
&& cf
== -1))
11924 * notl op1 (if necessary)
11932 code
= reverse_condition (code
);
11935 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11936 ix86_compare_op1
, VOIDmode
, 0, -1);
11938 out
= expand_simple_binop (mode
, IOR
,
11940 out
, 1, OPTAB_DIRECT
);
11941 if (out
!= operands
[0])
11942 emit_move_insn (operands
[0], out
);
11944 return 1; /* DONE */
11949 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11950 || diff
== 3 || diff
== 5 || diff
== 9)
11951 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11953 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11959 * lea cf(dest*(ct-cf)),dest
11963 * This also catches the degenerate setcc-only case.
11969 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11970 ix86_compare_op1
, VOIDmode
, 0, 1);
11973 /* On x86_64 the lea instruction operates on Pmode, so we need
11974 to get arithmetics done in proper mode to match. */
11976 tmp
= copy_rtx (out
);
11980 out1
= copy_rtx (out
);
11981 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11985 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11991 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11994 if (!rtx_equal_p (tmp
, out
))
11997 out
= force_operand (tmp
, copy_rtx (out
));
11999 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12001 if (!rtx_equal_p (out
, operands
[0]))
12002 emit_move_insn (operands
[0], copy_rtx (out
));
12004 return 1; /* DONE */
12008 * General case: Jumpful:
12009 * xorl dest,dest cmpl op1, op2
12010 * cmpl op1, op2 movl ct, dest
12011 * setcc dest jcc 1f
12012 * decl dest movl cf, dest
12013 * andl (cf-ct),dest 1:
12016 * Size 20. Size 14.
12018 * This is reasonably steep, but branch mispredict costs are
12019 * high on modern cpus, so consider failing only if optimizing
12023 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12024 && BRANCH_COST
>= 2)
12030 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
12031 /* We may be reversing unordered compare to normal compare,
12032 that is not valid in general (we may convert non-trapping
12033 condition to trapping one), however on i386 we currently
12034 emit all comparisons unordered. */
12035 code
= reverse_condition_maybe_unordered (code
);
12038 code
= reverse_condition (code
);
12039 if (compare_code
!= UNKNOWN
)
12040 compare_code
= reverse_condition (compare_code
);
12044 if (compare_code
!= UNKNOWN
)
12046 /* notl op1 (if needed)
12051 For x < 0 (resp. x <= -1) there will be no notl,
12052 so if possible swap the constants to get rid of the
12054 True/false will be -1/0 while code below (store flag
12055 followed by decrement) is 0/-1, so the constants need
12056 to be exchanged once more. */
12058 if (compare_code
== GE
|| !cf
)
12060 code
= reverse_condition (code
);
12065 HOST_WIDE_INT tmp
= cf
;
12070 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12071 ix86_compare_op1
, VOIDmode
, 0, -1);
12075 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12076 ix86_compare_op1
, VOIDmode
, 0, 1);
12078 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12079 copy_rtx (out
), 1, OPTAB_DIRECT
);
12082 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12083 gen_int_mode (cf
- ct
, mode
),
12084 copy_rtx (out
), 1, OPTAB_DIRECT
);
12086 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12087 copy_rtx (out
), 1, OPTAB_DIRECT
);
12088 if (!rtx_equal_p (out
, operands
[0]))
12089 emit_move_insn (operands
[0], copy_rtx (out
));
12091 return 1; /* DONE */
12095 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12097 /* Try a few things more with specific constants and a variable. */
12100 rtx var
, orig_out
, out
, tmp
;
12102 if (BRANCH_COST
<= 2)
12103 return 0; /* FAIL */
12105 /* If one of the two operands is an interesting constant, load a
12106 constant with the above and mask it in with a logical operation. */
12108 if (CONST_INT_P (operands
[2]))
12111 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12112 operands
[3] = constm1_rtx
, op
= and_optab
;
12113 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12114 operands
[3] = const0_rtx
, op
= ior_optab
;
12116 return 0; /* FAIL */
12118 else if (CONST_INT_P (operands
[3]))
12121 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12122 operands
[2] = constm1_rtx
, op
= and_optab
;
12123 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12124 operands
[2] = const0_rtx
, op
= ior_optab
;
12126 return 0; /* FAIL */
12129 return 0; /* FAIL */
12131 orig_out
= operands
[0];
12132 tmp
= gen_reg_rtx (mode
);
12135 /* Recurse to get the constant loaded. */
12136 if (ix86_expand_int_movcc (operands
) == 0)
12137 return 0; /* FAIL */
12139 /* Mask in the interesting variable. */
12140 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12142 if (!rtx_equal_p (out
, orig_out
))
12143 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12145 return 1; /* DONE */
12149 * For comparison with above,
12159 if (! nonimmediate_operand (operands
[2], mode
))
12160 operands
[2] = force_reg (mode
, operands
[2]);
12161 if (! nonimmediate_operand (operands
[3], mode
))
12162 operands
[3] = force_reg (mode
, operands
[3]);
12164 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12166 rtx tmp
= gen_reg_rtx (mode
);
12167 emit_move_insn (tmp
, operands
[3]);
12170 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12172 rtx tmp
= gen_reg_rtx (mode
);
12173 emit_move_insn (tmp
, operands
[2]);
12177 if (! register_operand (operands
[2], VOIDmode
)
12179 || ! register_operand (operands
[3], VOIDmode
)))
12180 operands
[2] = force_reg (mode
, operands
[2]);
12183 && ! register_operand (operands
[3], VOIDmode
))
12184 operands
[3] = force_reg (mode
, operands
[3]);
12186 emit_insn (compare_seq
);
12187 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12188 gen_rtx_IF_THEN_ELSE (mode
,
12189 compare_op
, operands
[2],
12192 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12193 gen_rtx_IF_THEN_ELSE (mode
,
12195 copy_rtx (operands
[3]),
12196 copy_rtx (operands
[0]))));
12198 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12199 gen_rtx_IF_THEN_ELSE (mode
,
12201 copy_rtx (operands
[2]),
12202 copy_rtx (operands
[0]))));
12204 return 1; /* DONE */
12207 /* Swap, force into registers, or otherwise massage the two operands
12208 to an sse comparison with a mask result. Thus we differ a bit from
12209 ix86_prepare_fp_compare_args which expects to produce a flags result.
12211 The DEST operand exists to help determine whether to commute commutative
12212 operators. The POP0/POP1 operands are updated in place. The new
12213 comparison code is returned, or UNKNOWN if not implementable. */
12215 static enum rtx_code
12216 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12217 rtx
*pop0
, rtx
*pop1
)
12225 /* We have no LTGT as an operator. We could implement it with
12226 NE & ORDERED, but this requires an extra temporary. It's
12227 not clear that it's worth it. */
12234 /* These are supported directly. */
12241 /* For commutative operators, try to canonicalize the destination
12242 operand to be first in the comparison - this helps reload to
12243 avoid extra moves. */
12244 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12252 /* These are not supported directly. Swap the comparison operands
12253 to transform into something that is supported. */
12257 code
= swap_condition (code
);
12261 gcc_unreachable ();
12267 /* Detect conditional moves that exactly match min/max operational
12268 semantics. Note that this is IEEE safe, as long as we don't
12269 interchange the operands.
12271 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12272 and TRUE if the operation is successful and instructions are emitted. */
12275 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12276 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12278 enum machine_mode mode
;
12284 else if (code
== UNGE
)
12287 if_true
= if_false
;
12293 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12295 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12300 mode
= GET_MODE (dest
);
12302 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12303 but MODE may be a vector mode and thus not appropriate. */
12304 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12306 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12309 if_true
= force_reg (mode
, if_true
);
12310 v
= gen_rtvec (2, if_true
, if_false
);
12311 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12315 code
= is_min
? SMIN
: SMAX
;
12316 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12319 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12323 /* Expand an sse vector comparison. Return the register with the result. */
12326 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12327 rtx op_true
, rtx op_false
)
12329 enum machine_mode mode
= GET_MODE (dest
);
12332 cmp_op0
= force_reg (mode
, cmp_op0
);
12333 if (!nonimmediate_operand (cmp_op1
, mode
))
12334 cmp_op1
= force_reg (mode
, cmp_op1
);
12337 || reg_overlap_mentioned_p (dest
, op_true
)
12338 || reg_overlap_mentioned_p (dest
, op_false
))
12339 dest
= gen_reg_rtx (mode
);
12341 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12342 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12347 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12348 operations. This is used for both scalar and vector conditional moves. */
12351 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12353 enum machine_mode mode
= GET_MODE (dest
);
12356 if (op_false
== CONST0_RTX (mode
))
12358 op_true
= force_reg (mode
, op_true
);
12359 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12360 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12362 else if (op_true
== CONST0_RTX (mode
))
12364 op_false
= force_reg (mode
, op_false
);
12365 x
= gen_rtx_NOT (mode
, cmp
);
12366 x
= gen_rtx_AND (mode
, x
, op_false
);
12367 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12371 op_true
= force_reg (mode
, op_true
);
12372 op_false
= force_reg (mode
, op_false
);
12374 t2
= gen_reg_rtx (mode
);
12376 t3
= gen_reg_rtx (mode
);
12380 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12381 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12383 x
= gen_rtx_NOT (mode
, cmp
);
12384 x
= gen_rtx_AND (mode
, x
, op_false
);
12385 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12387 x
= gen_rtx_IOR (mode
, t3
, t2
);
12388 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12392 /* Expand a floating-point conditional move. Return true if successful. */
12395 ix86_expand_fp_movcc (rtx operands
[])
12397 enum machine_mode mode
= GET_MODE (operands
[0]);
12398 enum rtx_code code
= GET_CODE (operands
[1]);
12399 rtx tmp
, compare_op
, second_test
, bypass_test
;
12401 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12403 enum machine_mode cmode
;
12405 /* Since we've no cmove for sse registers, don't force bad register
12406 allocation just to gain access to it. Deny movcc when the
12407 comparison mode doesn't match the move mode. */
12408 cmode
= GET_MODE (ix86_compare_op0
);
12409 if (cmode
== VOIDmode
)
12410 cmode
= GET_MODE (ix86_compare_op1
);
12414 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12416 &ix86_compare_op1
);
12417 if (code
== UNKNOWN
)
12420 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12421 ix86_compare_op1
, operands
[2],
12425 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12426 ix86_compare_op1
, operands
[2], operands
[3]);
12427 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12431 /* The floating point conditional move instructions don't directly
12432 support conditions resulting from a signed integer comparison. */
12434 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12436 /* The floating point conditional move instructions don't directly
12437 support signed integer comparisons. */
12439 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12441 gcc_assert (!second_test
&& !bypass_test
);
12442 tmp
= gen_reg_rtx (QImode
);
12443 ix86_expand_setcc (code
, tmp
);
12445 ix86_compare_op0
= tmp
;
12446 ix86_compare_op1
= const0_rtx
;
12447 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12449 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12451 tmp
= gen_reg_rtx (mode
);
12452 emit_move_insn (tmp
, operands
[3]);
12455 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12457 tmp
= gen_reg_rtx (mode
);
12458 emit_move_insn (tmp
, operands
[2]);
12462 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12463 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12464 operands
[2], operands
[3])));
12466 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12467 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12468 operands
[3], operands
[0])));
12470 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12471 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12472 operands
[2], operands
[0])));
12477 /* Expand a floating-point vector conditional move; a vcond operation
12478 rather than a movcc operation. */
12481 ix86_expand_fp_vcond (rtx operands
[])
12483 enum rtx_code code
= GET_CODE (operands
[3]);
12486 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12487 &operands
[4], &operands
[5]);
12488 if (code
== UNKNOWN
)
12491 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12492 operands
[5], operands
[1], operands
[2]))
12495 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12496 operands
[1], operands
[2]);
12497 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12501 /* Expand a signed integral vector conditional move. */
12504 ix86_expand_int_vcond (rtx operands
[])
12506 enum machine_mode mode
= GET_MODE (operands
[0]);
12507 enum rtx_code code
= GET_CODE (operands
[3]);
12508 bool negate
= false;
12511 cop0
= operands
[4];
12512 cop1
= operands
[5];
12514 /* Canonicalize the comparison to EQ, GT, GTU. */
12525 code
= reverse_condition (code
);
12531 code
= reverse_condition (code
);
12537 code
= swap_condition (code
);
12538 x
= cop0
, cop0
= cop1
, cop1
= x
;
12542 gcc_unreachable ();
12545 /* Unsigned parallel compare is not supported by the hardware. Play some
12546 tricks to turn this into a signed comparison against 0. */
12549 cop0
= force_reg (mode
, cop0
);
12557 /* Perform a parallel modulo subtraction. */
12558 t1
= gen_reg_rtx (mode
);
12559 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12561 /* Extract the original sign bit of op0. */
12562 mask
= GEN_INT (-0x80000000);
12563 mask
= gen_rtx_CONST_VECTOR (mode
,
12564 gen_rtvec (4, mask
, mask
, mask
, mask
));
12565 mask
= force_reg (mode
, mask
);
12566 t2
= gen_reg_rtx (mode
);
12567 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12569 /* XOR it back into the result of the subtraction. This results
12570 in the sign bit set iff we saw unsigned underflow. */
12571 x
= gen_reg_rtx (mode
);
12572 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12580 /* Perform a parallel unsigned saturating subtraction. */
12581 x
= gen_reg_rtx (mode
);
12582 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12583 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12590 gcc_unreachable ();
12594 cop1
= CONST0_RTX (mode
);
12597 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12598 operands
[1+negate
], operands
[2-negate
]);
12600 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12601 operands
[2-negate
]);
12605 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12606 true if we should do zero extension, else sign extension. HIGH_P is
12607 true if we want the N/2 high elements, else the low elements. */
12610 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12612 enum machine_mode imode
= GET_MODE (operands
[1]);
12613 rtx (*unpack
)(rtx
, rtx
, rtx
);
12620 unpack
= gen_vec_interleave_highv16qi
;
12622 unpack
= gen_vec_interleave_lowv16qi
;
12626 unpack
= gen_vec_interleave_highv8hi
;
12628 unpack
= gen_vec_interleave_lowv8hi
;
12632 unpack
= gen_vec_interleave_highv4si
;
12634 unpack
= gen_vec_interleave_lowv4si
;
12637 gcc_unreachable ();
12640 dest
= gen_lowpart (imode
, operands
[0]);
12643 se
= force_reg (imode
, CONST0_RTX (imode
));
12645 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12646 operands
[1], pc_rtx
, pc_rtx
);
12648 emit_insn (unpack (dest
, operands
[1], se
));
12651 /* Expand conditional increment or decrement using adb/sbb instructions.
12652 The default case using setcc followed by the conditional move can be
12653 done by generic code. */
12655 ix86_expand_int_addcc (rtx operands
[])
12657 enum rtx_code code
= GET_CODE (operands
[1]);
12659 rtx val
= const0_rtx
;
12660 bool fpcmp
= false;
12661 enum machine_mode mode
= GET_MODE (operands
[0]);
12663 if (operands
[3] != const1_rtx
12664 && operands
[3] != constm1_rtx
)
12666 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12667 ix86_compare_op1
, &compare_op
))
12669 code
= GET_CODE (compare_op
);
12671 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12672 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12675 code
= ix86_fp_compare_code_to_integer (code
);
12682 PUT_CODE (compare_op
,
12683 reverse_condition_maybe_unordered
12684 (GET_CODE (compare_op
)));
12686 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12688 PUT_MODE (compare_op
, mode
);
12690 /* Construct either adc or sbb insn. */
12691 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12693 switch (GET_MODE (operands
[0]))
12696 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12699 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12702 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12705 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12708 gcc_unreachable ();
12713 switch (GET_MODE (operands
[0]))
12716 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12719 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12722 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12725 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12728 gcc_unreachable ();
12731 return 1; /* DONE */
12735 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12736 works for floating pointer parameters and nonoffsetable memories.
12737 For pushes, it returns just stack offsets; the values will be saved
12738 in the right order. Maximally three parts are generated. */
12741 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12746 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12748 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12750 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12751 gcc_assert (size
>= 2 && size
<= 3);
12753 /* Optimize constant pool reference to immediates. This is used by fp
12754 moves, that force all constants to memory to allow combining. */
12755 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12757 rtx tmp
= maybe_get_pool_constant (operand
);
12762 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12764 /* The only non-offsetable memories we handle are pushes. */
12765 int ok
= push_operand (operand
, VOIDmode
);
12769 operand
= copy_rtx (operand
);
12770 PUT_MODE (operand
, Pmode
);
12771 parts
[0] = parts
[1] = parts
[2] = operand
;
12775 if (GET_CODE (operand
) == CONST_VECTOR
)
12777 enum machine_mode imode
= int_mode_for_mode (mode
);
12778 /* Caution: if we looked through a constant pool memory above,
12779 the operand may actually have a different mode now. That's
12780 ok, since we want to pun this all the way back to an integer. */
12781 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12782 gcc_assert (operand
!= NULL
);
12788 if (mode
== DImode
)
12789 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12792 if (REG_P (operand
))
12794 gcc_assert (reload_completed
);
12795 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12796 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12798 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12800 else if (offsettable_memref_p (operand
))
12802 operand
= adjust_address (operand
, SImode
, 0);
12803 parts
[0] = operand
;
12804 parts
[1] = adjust_address (operand
, SImode
, 4);
12806 parts
[2] = adjust_address (operand
, SImode
, 8);
12808 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12813 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12817 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12818 parts
[2] = gen_int_mode (l
[2], SImode
);
12821 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12824 gcc_unreachable ();
12826 parts
[1] = gen_int_mode (l
[1], SImode
);
12827 parts
[0] = gen_int_mode (l
[0], SImode
);
12830 gcc_unreachable ();
12835 if (mode
== TImode
)
12836 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12837 if (mode
== XFmode
|| mode
== TFmode
)
12839 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12840 if (REG_P (operand
))
12842 gcc_assert (reload_completed
);
12843 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12844 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12846 else if (offsettable_memref_p (operand
))
12848 operand
= adjust_address (operand
, DImode
, 0);
12849 parts
[0] = operand
;
12850 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12852 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12857 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12858 real_to_target (l
, &r
, mode
);
12860 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12861 if (HOST_BITS_PER_WIDE_INT
>= 64)
12864 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12865 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12868 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12870 if (upper_mode
== SImode
)
12871 parts
[1] = gen_int_mode (l
[2], SImode
);
12872 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12875 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12876 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12879 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12882 gcc_unreachable ();
12889 /* Emit insns to perform a move or push of DI, DF, and XF values.
12890 Return false when normal moves are needed; true when all required
12891 insns have been emitted. Operands 2-4 contain the input values
12892 int the correct order; operands 5-7 contain the output values. */
12895 ix86_split_long_move (rtx operands
[])
12900 int collisions
= 0;
12901 enum machine_mode mode
= GET_MODE (operands
[0]);
12903 /* The DFmode expanders may ask us to move double.
12904 For 64bit target this is single move. By hiding the fact
12905 here we simplify i386.md splitters. */
12906 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12908 /* Optimize constant pool reference to immediates. This is used by
12909 fp moves, that force all constants to memory to allow combining. */
12911 if (MEM_P (operands
[1])
12912 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12913 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12914 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12915 if (push_operand (operands
[0], VOIDmode
))
12917 operands
[0] = copy_rtx (operands
[0]);
12918 PUT_MODE (operands
[0], Pmode
);
12921 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12922 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12923 emit_move_insn (operands
[0], operands
[1]);
12927 /* The only non-offsettable memory we handle is push. */
12928 if (push_operand (operands
[0], VOIDmode
))
12931 gcc_assert (!MEM_P (operands
[0])
12932 || offsettable_memref_p (operands
[0]));
12934 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12935 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12937 /* When emitting push, take care for source operands on the stack. */
12938 if (push
&& MEM_P (operands
[1])
12939 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12942 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12943 XEXP (part
[1][2], 0));
12944 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12945 XEXP (part
[1][1], 0));
12948 /* We need to do copy in the right order in case an address register
12949 of the source overlaps the destination. */
12950 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12952 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12954 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12957 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12960 /* Collision in the middle part can be handled by reordering. */
12961 if (collisions
== 1 && nparts
== 3
12962 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12965 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12966 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12969 /* If there are more collisions, we can't handle it by reordering.
12970 Do an lea to the last part and use only one colliding move. */
12971 else if (collisions
> 1)
12977 base
= part
[0][nparts
- 1];
12979 /* Handle the case when the last part isn't valid for lea.
12980 Happens in 64-bit mode storing the 12-byte XFmode. */
12981 if (GET_MODE (base
) != Pmode
)
12982 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12984 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12985 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12986 part
[1][1] = replace_equiv_address (part
[1][1],
12987 plus_constant (base
, UNITS_PER_WORD
));
12989 part
[1][2] = replace_equiv_address (part
[1][2],
12990 plus_constant (base
, 8));
13000 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13001 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13002 emit_move_insn (part
[0][2], part
[1][2]);
13007 /* In 64bit mode we don't have 32bit push available. In case this is
13008 register, it is OK - we will just use larger counterpart. We also
13009 retype memory - these comes from attempt to avoid REX prefix on
13010 moving of second half of TFmode value. */
13011 if (GET_MODE (part
[1][1]) == SImode
)
13013 switch (GET_CODE (part
[1][1]))
13016 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13020 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13024 gcc_unreachable ();
13027 if (GET_MODE (part
[1][0]) == SImode
)
13028 part
[1][0] = part
[1][1];
13031 emit_move_insn (part
[0][1], part
[1][1]);
13032 emit_move_insn (part
[0][0], part
[1][0]);
13036 /* Choose correct order to not overwrite the source before it is copied. */
13037 if ((REG_P (part
[0][0])
13038 && REG_P (part
[1][1])
13039 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13041 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13043 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13047 operands
[2] = part
[0][2];
13048 operands
[3] = part
[0][1];
13049 operands
[4] = part
[0][0];
13050 operands
[5] = part
[1][2];
13051 operands
[6] = part
[1][1];
13052 operands
[7] = part
[1][0];
13056 operands
[2] = part
[0][1];
13057 operands
[3] = part
[0][0];
13058 operands
[5] = part
[1][1];
13059 operands
[6] = part
[1][0];
13066 operands
[2] = part
[0][0];
13067 operands
[3] = part
[0][1];
13068 operands
[4] = part
[0][2];
13069 operands
[5] = part
[1][0];
13070 operands
[6] = part
[1][1];
13071 operands
[7] = part
[1][2];
13075 operands
[2] = part
[0][0];
13076 operands
[3] = part
[0][1];
13077 operands
[5] = part
[1][0];
13078 operands
[6] = part
[1][1];
13082 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13085 if (CONST_INT_P (operands
[5])
13086 && operands
[5] != const0_rtx
13087 && REG_P (operands
[2]))
13089 if (CONST_INT_P (operands
[6])
13090 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13091 operands
[6] = operands
[2];
13094 && CONST_INT_P (operands
[7])
13095 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13096 operands
[7] = operands
[2];
13100 && CONST_INT_P (operands
[6])
13101 && operands
[6] != const0_rtx
13102 && REG_P (operands
[3])
13103 && CONST_INT_P (operands
[7])
13104 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13105 operands
[7] = operands
[3];
13108 emit_move_insn (operands
[2], operands
[5]);
13109 emit_move_insn (operands
[3], operands
[6]);
13111 emit_move_insn (operands
[4], operands
[7]);
13116 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13117 left shift by a constant, either using a single shift or
13118 a sequence of add instructions. */
13121 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13125 emit_insn ((mode
== DImode
13127 : gen_adddi3
) (operand
, operand
, operand
));
13129 else if (!optimize_size
13130 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13133 for (i
=0; i
<count
; i
++)
13135 emit_insn ((mode
== DImode
13137 : gen_adddi3
) (operand
, operand
, operand
));
13141 emit_insn ((mode
== DImode
13143 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13147 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13149 rtx low
[2], high
[2];
13151 const int single_width
= mode
== DImode
? 32 : 64;
13153 if (CONST_INT_P (operands
[2]))
13155 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13156 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13158 if (count
>= single_width
)
13160 emit_move_insn (high
[0], low
[1]);
13161 emit_move_insn (low
[0], const0_rtx
);
13163 if (count
> single_width
)
13164 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13168 if (!rtx_equal_p (operands
[0], operands
[1]))
13169 emit_move_insn (operands
[0], operands
[1]);
13170 emit_insn ((mode
== DImode
13172 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13173 ix86_expand_ashl_const (low
[0], count
, mode
);
13178 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13180 if (operands
[1] == const1_rtx
)
13182 /* Assuming we've chosen a QImode capable registers, then 1 << N
13183 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13184 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13186 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13188 ix86_expand_clear (low
[0]);
13189 ix86_expand_clear (high
[0]);
13190 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13192 d
= gen_lowpart (QImode
, low
[0]);
13193 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13194 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13195 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13197 d
= gen_lowpart (QImode
, high
[0]);
13198 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13199 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13200 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13203 /* Otherwise, we can get the same results by manually performing
13204 a bit extract operation on bit 5/6, and then performing the two
13205 shifts. The two methods of getting 0/1 into low/high are exactly
13206 the same size. Avoiding the shift in the bit extract case helps
13207 pentium4 a bit; no one else seems to care much either way. */
13212 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13213 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13215 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13216 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13218 emit_insn ((mode
== DImode
13220 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13221 emit_insn ((mode
== DImode
13223 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13224 emit_move_insn (low
[0], high
[0]);
13225 emit_insn ((mode
== DImode
13227 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13230 emit_insn ((mode
== DImode
13232 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13233 emit_insn ((mode
== DImode
13235 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13239 if (operands
[1] == constm1_rtx
)
13241 /* For -1 << N, we can avoid the shld instruction, because we
13242 know that we're shifting 0...31/63 ones into a -1. */
13243 emit_move_insn (low
[0], constm1_rtx
);
13245 emit_move_insn (high
[0], low
[0]);
13247 emit_move_insn (high
[0], constm1_rtx
);
13251 if (!rtx_equal_p (operands
[0], operands
[1]))
13252 emit_move_insn (operands
[0], operands
[1]);
13254 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13255 emit_insn ((mode
== DImode
13257 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13260 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13262 if (TARGET_CMOVE
&& scratch
)
13264 ix86_expand_clear (scratch
);
13265 emit_insn ((mode
== DImode
13266 ? gen_x86_shift_adj_1
13267 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13270 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13274 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13276 rtx low
[2], high
[2];
13278 const int single_width
= mode
== DImode
? 32 : 64;
13280 if (CONST_INT_P (operands
[2]))
13282 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13283 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13285 if (count
== single_width
* 2 - 1)
13287 emit_move_insn (high
[0], high
[1]);
13288 emit_insn ((mode
== DImode
13290 : gen_ashrdi3
) (high
[0], high
[0],
13291 GEN_INT (single_width
- 1)));
13292 emit_move_insn (low
[0], high
[0]);
13295 else if (count
>= single_width
)
13297 emit_move_insn (low
[0], high
[1]);
13298 emit_move_insn (high
[0], low
[0]);
13299 emit_insn ((mode
== DImode
13301 : gen_ashrdi3
) (high
[0], high
[0],
13302 GEN_INT (single_width
- 1)));
13303 if (count
> single_width
)
13304 emit_insn ((mode
== DImode
13306 : gen_ashrdi3
) (low
[0], low
[0],
13307 GEN_INT (count
- single_width
)));
13311 if (!rtx_equal_p (operands
[0], operands
[1]))
13312 emit_move_insn (operands
[0], operands
[1]);
13313 emit_insn ((mode
== DImode
13315 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13316 emit_insn ((mode
== DImode
13318 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13323 if (!rtx_equal_p (operands
[0], operands
[1]))
13324 emit_move_insn (operands
[0], operands
[1]);
13326 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13328 emit_insn ((mode
== DImode
13330 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13331 emit_insn ((mode
== DImode
13333 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13335 if (TARGET_CMOVE
&& scratch
)
13337 emit_move_insn (scratch
, high
[0]);
13338 emit_insn ((mode
== DImode
13340 : gen_ashrdi3
) (scratch
, scratch
,
13341 GEN_INT (single_width
- 1)));
13342 emit_insn ((mode
== DImode
13343 ? gen_x86_shift_adj_1
13344 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13348 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13353 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13355 rtx low
[2], high
[2];
13357 const int single_width
= mode
== DImode
? 32 : 64;
13359 if (CONST_INT_P (operands
[2]))
13361 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13362 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13364 if (count
>= single_width
)
13366 emit_move_insn (low
[0], high
[1]);
13367 ix86_expand_clear (high
[0]);
13369 if (count
> single_width
)
13370 emit_insn ((mode
== DImode
13372 : gen_lshrdi3
) (low
[0], low
[0],
13373 GEN_INT (count
- single_width
)));
13377 if (!rtx_equal_p (operands
[0], operands
[1]))
13378 emit_move_insn (operands
[0], operands
[1]);
13379 emit_insn ((mode
== DImode
13381 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13382 emit_insn ((mode
== DImode
13384 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13389 if (!rtx_equal_p (operands
[0], operands
[1]))
13390 emit_move_insn (operands
[0], operands
[1]);
13392 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13394 emit_insn ((mode
== DImode
13396 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13397 emit_insn ((mode
== DImode
13399 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13401 /* Heh. By reversing the arguments, we can reuse this pattern. */
13402 if (TARGET_CMOVE
&& scratch
)
13404 ix86_expand_clear (scratch
);
13405 emit_insn ((mode
== DImode
13406 ? gen_x86_shift_adj_1
13407 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13411 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13415 /* Predict just emitted jump instruction to be taken with probability PROB. */
13417 predict_jump (int prob
)
13419 rtx insn
= get_last_insn ();
13420 gcc_assert (JUMP_P (insn
));
13422 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13427 /* Helper function for the string operations below. Dest VARIABLE whether
13428 it is aligned to VALUE bytes. If true, jump to the label. */
13430 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13432 rtx label
= gen_label_rtx ();
13433 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13434 if (GET_MODE (variable
) == DImode
)
13435 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13437 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13438 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13441 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13443 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13447 /* Adjust COUNTER by the VALUE. */
13449 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13451 if (GET_MODE (countreg
) == DImode
)
13452 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13454 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13457 /* Zero extend possibly SImode EXP to Pmode register. */
13459 ix86_zero_extend_to_Pmode (rtx exp
)
13462 if (GET_MODE (exp
) == VOIDmode
)
13463 return force_reg (Pmode
, exp
);
13464 if (GET_MODE (exp
) == Pmode
)
13465 return copy_to_mode_reg (Pmode
, exp
);
13466 r
= gen_reg_rtx (Pmode
);
13467 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13471 /* Divide COUNTREG by SCALE. */
13473 scale_counter (rtx countreg
, int scale
)
13476 rtx piece_size_mask
;
13480 if (CONST_INT_P (countreg
))
13481 return GEN_INT (INTVAL (countreg
) / scale
);
13482 gcc_assert (REG_P (countreg
));
13484 piece_size_mask
= GEN_INT (scale
- 1);
13485 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13486 GEN_INT (exact_log2 (scale
)),
13487 NULL
, 1, OPTAB_DIRECT
);
13491 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13492 for constant loop counts. */
13494 static enum machine_mode
13495 counter_mode (rtx count_exp
)
13497 if (GET_MODE (count_exp
) != VOIDmode
)
13498 return GET_MODE (count_exp
);
13499 if (GET_CODE (count_exp
) != CONST_INT
)
13501 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13506 /* When SRCPTR is non-NULL, output simple loop to move memory
13507 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13508 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13509 equivalent loop to set memory by VALUE (supposed to be in MODE).
13511 The size is rounded down to whole number of chunk size moved at once.
13512 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13516 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13517 rtx destptr
, rtx srcptr
, rtx value
,
13518 rtx count
, enum machine_mode mode
, int unroll
,
13521 rtx out_label
, top_label
, iter
, tmp
;
13522 enum machine_mode iter_mode
= counter_mode (count
);
13523 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13524 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13530 top_label
= gen_label_rtx ();
13531 out_label
= gen_label_rtx ();
13532 iter
= gen_reg_rtx (iter_mode
);
13534 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13535 NULL
, 1, OPTAB_DIRECT
);
13536 /* Those two should combine. */
13537 if (piece_size
== const1_rtx
)
13539 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13541 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13543 emit_move_insn (iter
, const0_rtx
);
13545 emit_label (top_label
);
13547 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13548 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13549 destmem
= change_address (destmem
, mode
, x_addr
);
13553 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13554 srcmem
= change_address (srcmem
, mode
, y_addr
);
13556 /* When unrolling for chips that reorder memory reads and writes,
13557 we can save registers by using single temporary.
13558 Also using 4 temporaries is overkill in 32bit mode. */
13559 if (!TARGET_64BIT
&& 0)
13561 for (i
= 0; i
< unroll
; i
++)
13566 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13568 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13570 emit_move_insn (destmem
, srcmem
);
13576 gcc_assert (unroll
<= 4);
13577 for (i
= 0; i
< unroll
; i
++)
13579 tmpreg
[i
] = gen_reg_rtx (mode
);
13583 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13585 emit_move_insn (tmpreg
[i
], srcmem
);
13587 for (i
= 0; i
< unroll
; i
++)
13592 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13594 emit_move_insn (destmem
, tmpreg
[i
]);
13599 for (i
= 0; i
< unroll
; i
++)
13603 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13604 emit_move_insn (destmem
, value
);
13607 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13608 true, OPTAB_LIB_WIDEN
);
13610 emit_move_insn (iter
, tmp
);
13612 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13614 if (expected_size
!= -1)
13616 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13617 if (expected_size
== 0)
13619 else if (expected_size
> REG_BR_PROB_BASE
)
13620 predict_jump (REG_BR_PROB_BASE
- 1);
13622 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13625 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13626 iter
= ix86_zero_extend_to_Pmode (iter
);
13627 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13628 true, OPTAB_LIB_WIDEN
);
13629 if (tmp
!= destptr
)
13630 emit_move_insn (destptr
, tmp
);
13633 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13634 true, OPTAB_LIB_WIDEN
);
13636 emit_move_insn (srcptr
, tmp
);
13638 emit_label (out_label
);
13641 /* Output "rep; mov" instruction.
13642 Arguments have same meaning as for previous function */
13644 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13645 rtx destptr
, rtx srcptr
,
13647 enum machine_mode mode
)
13653 /* If the size is known, it is shorter to use rep movs. */
13654 if (mode
== QImode
&& CONST_INT_P (count
)
13655 && !(INTVAL (count
) & 3))
13658 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13659 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13660 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13661 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13662 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13663 if (mode
!= QImode
)
13665 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13666 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13667 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13668 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13669 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13670 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13674 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13675 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13677 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13681 /* Output "rep; stos" instruction.
13682 Arguments have same meaning as for previous function */
13684 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13686 enum machine_mode mode
)
13691 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13692 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13693 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13694 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13695 if (mode
!= QImode
)
13697 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13698 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13699 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13702 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13703 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13707 emit_strmov (rtx destmem
, rtx srcmem
,
13708 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13710 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13711 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13712 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13715 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13717 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13718 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13721 if (CONST_INT_P (count
))
13723 HOST_WIDE_INT countval
= INTVAL (count
);
13726 if ((countval
& 0x10) && max_size
> 16)
13730 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13731 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13734 gcc_unreachable ();
13737 if ((countval
& 0x08) && max_size
> 8)
13740 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13743 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13744 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13748 if ((countval
& 0x04) && max_size
> 4)
13750 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13753 if ((countval
& 0x02) && max_size
> 2)
13755 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13758 if ((countval
& 0x01) && max_size
> 1)
13760 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13767 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13768 count
, 1, OPTAB_DIRECT
);
13769 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13770 count
, QImode
, 1, 4);
13774 /* When there are stringops, we can cheaply increase dest and src pointers.
13775 Otherwise we save code size by maintaining offset (zero is readily
13776 available from preceding rep operation) and using x86 addressing modes.
13778 if (TARGET_SINGLE_STRINGOP
)
13782 rtx label
= ix86_expand_aligntest (count
, 4, true);
13783 src
= change_address (srcmem
, SImode
, srcptr
);
13784 dest
= change_address (destmem
, SImode
, destptr
);
13785 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13786 emit_label (label
);
13787 LABEL_NUSES (label
) = 1;
13791 rtx label
= ix86_expand_aligntest (count
, 2, true);
13792 src
= change_address (srcmem
, HImode
, srcptr
);
13793 dest
= change_address (destmem
, HImode
, destptr
);
13794 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13795 emit_label (label
);
13796 LABEL_NUSES (label
) = 1;
13800 rtx label
= ix86_expand_aligntest (count
, 1, true);
13801 src
= change_address (srcmem
, QImode
, srcptr
);
13802 dest
= change_address (destmem
, QImode
, destptr
);
13803 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13804 emit_label (label
);
13805 LABEL_NUSES (label
) = 1;
13810 rtx offset
= force_reg (Pmode
, const0_rtx
);
13815 rtx label
= ix86_expand_aligntest (count
, 4, true);
13816 src
= change_address (srcmem
, SImode
, srcptr
);
13817 dest
= change_address (destmem
, SImode
, destptr
);
13818 emit_move_insn (dest
, src
);
13819 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13820 true, OPTAB_LIB_WIDEN
);
13822 emit_move_insn (offset
, tmp
);
13823 emit_label (label
);
13824 LABEL_NUSES (label
) = 1;
13828 rtx label
= ix86_expand_aligntest (count
, 2, true);
13829 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13830 src
= change_address (srcmem
, HImode
, tmp
);
13831 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13832 dest
= change_address (destmem
, HImode
, tmp
);
13833 emit_move_insn (dest
, src
);
13834 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13835 true, OPTAB_LIB_WIDEN
);
13837 emit_move_insn (offset
, tmp
);
13838 emit_label (label
);
13839 LABEL_NUSES (label
) = 1;
13843 rtx label
= ix86_expand_aligntest (count
, 1, true);
13844 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13845 src
= change_address (srcmem
, QImode
, tmp
);
13846 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13847 dest
= change_address (destmem
, QImode
, tmp
);
13848 emit_move_insn (dest
, src
);
13849 emit_label (label
);
13850 LABEL_NUSES (label
) = 1;
13855 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13857 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13858 rtx count
, int max_size
)
13861 expand_simple_binop (counter_mode (count
), AND
, count
,
13862 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13863 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13864 gen_lowpart (QImode
, value
), count
, QImode
,
13868 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13870 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13874 if (CONST_INT_P (count
))
13876 HOST_WIDE_INT countval
= INTVAL (count
);
13879 if ((countval
& 0x10) && max_size
> 16)
13883 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13884 emit_insn (gen_strset (destptr
, dest
, value
));
13885 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13886 emit_insn (gen_strset (destptr
, dest
, value
));
13889 gcc_unreachable ();
13892 if ((countval
& 0x08) && max_size
> 8)
13896 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13897 emit_insn (gen_strset (destptr
, dest
, value
));
13901 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13902 emit_insn (gen_strset (destptr
, dest
, value
));
13903 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13904 emit_insn (gen_strset (destptr
, dest
, value
));
13908 if ((countval
& 0x04) && max_size
> 4)
13910 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13911 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13914 if ((countval
& 0x02) && max_size
> 2)
13916 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13917 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13920 if ((countval
& 0x01) && max_size
> 1)
13922 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13923 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13930 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13935 rtx label
= ix86_expand_aligntest (count
, 16, true);
13938 dest
= change_address (destmem
, DImode
, destptr
);
13939 emit_insn (gen_strset (destptr
, dest
, value
));
13940 emit_insn (gen_strset (destptr
, dest
, value
));
13944 dest
= change_address (destmem
, SImode
, destptr
);
13945 emit_insn (gen_strset (destptr
, dest
, value
));
13946 emit_insn (gen_strset (destptr
, dest
, value
));
13947 emit_insn (gen_strset (destptr
, dest
, value
));
13948 emit_insn (gen_strset (destptr
, dest
, value
));
13950 emit_label (label
);
13951 LABEL_NUSES (label
) = 1;
13955 rtx label
= ix86_expand_aligntest (count
, 8, true);
13958 dest
= change_address (destmem
, DImode
, destptr
);
13959 emit_insn (gen_strset (destptr
, dest
, value
));
13963 dest
= change_address (destmem
, SImode
, destptr
);
13964 emit_insn (gen_strset (destptr
, dest
, value
));
13965 emit_insn (gen_strset (destptr
, dest
, value
));
13967 emit_label (label
);
13968 LABEL_NUSES (label
) = 1;
13972 rtx label
= ix86_expand_aligntest (count
, 4, true);
13973 dest
= change_address (destmem
, SImode
, destptr
);
13974 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13975 emit_label (label
);
13976 LABEL_NUSES (label
) = 1;
13980 rtx label
= ix86_expand_aligntest (count
, 2, true);
13981 dest
= change_address (destmem
, HImode
, destptr
);
13982 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13983 emit_label (label
);
13984 LABEL_NUSES (label
) = 1;
13988 rtx label
= ix86_expand_aligntest (count
, 1, true);
13989 dest
= change_address (destmem
, QImode
, destptr
);
13990 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13991 emit_label (label
);
13992 LABEL_NUSES (label
) = 1;
13996 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13997 DESIRED_ALIGNMENT. */
13999 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14000 rtx destptr
, rtx srcptr
, rtx count
,
14001 int align
, int desired_alignment
)
14003 if (align
<= 1 && desired_alignment
> 1)
14005 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14006 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14007 destmem
= change_address (destmem
, QImode
, destptr
);
14008 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14009 ix86_adjust_counter (count
, 1);
14010 emit_label (label
);
14011 LABEL_NUSES (label
) = 1;
14013 if (align
<= 2 && desired_alignment
> 2)
14015 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14016 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14017 destmem
= change_address (destmem
, HImode
, destptr
);
14018 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14019 ix86_adjust_counter (count
, 2);
14020 emit_label (label
);
14021 LABEL_NUSES (label
) = 1;
14023 if (align
<= 4 && desired_alignment
> 4)
14025 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14026 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14027 destmem
= change_address (destmem
, SImode
, destptr
);
14028 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14029 ix86_adjust_counter (count
, 4);
14030 emit_label (label
);
14031 LABEL_NUSES (label
) = 1;
14033 gcc_assert (desired_alignment
<= 8);
14036 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14037 DESIRED_ALIGNMENT. */
14039 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14040 int align
, int desired_alignment
)
14042 if (align
<= 1 && desired_alignment
> 1)
14044 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14045 destmem
= change_address (destmem
, QImode
, destptr
);
14046 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14047 ix86_adjust_counter (count
, 1);
14048 emit_label (label
);
14049 LABEL_NUSES (label
) = 1;
14051 if (align
<= 2 && desired_alignment
> 2)
14053 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14054 destmem
= change_address (destmem
, HImode
, destptr
);
14055 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14056 ix86_adjust_counter (count
, 2);
14057 emit_label (label
);
14058 LABEL_NUSES (label
) = 1;
14060 if (align
<= 4 && desired_alignment
> 4)
14062 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14063 destmem
= change_address (destmem
, SImode
, destptr
);
14064 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14065 ix86_adjust_counter (count
, 4);
14066 emit_label (label
);
14067 LABEL_NUSES (label
) = 1;
14069 gcc_assert (desired_alignment
<= 8);
14072 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14073 static enum stringop_alg
14074 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14075 int *dynamic_check
)
14077 const struct stringop_algs
* algs
;
14079 *dynamic_check
= -1;
14081 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14083 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14084 if (stringop_alg
!= no_stringop
)
14085 return stringop_alg
;
14086 /* rep; movq or rep; movl is the smallest variant. */
14087 else if (optimize_size
)
14089 if (!count
|| (count
& 3))
14090 return rep_prefix_1_byte
;
14092 return rep_prefix_4_byte
;
14094 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14096 else if (expected_size
!= -1 && expected_size
< 4)
14097 return loop_1_byte
;
14098 else if (expected_size
!= -1)
14101 enum stringop_alg alg
= libcall
;
14102 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14104 gcc_assert (algs
->size
[i
].max
);
14105 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14107 if (algs
->size
[i
].alg
!= libcall
)
14108 alg
= algs
->size
[i
].alg
;
14109 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14110 last non-libcall inline algorithm. */
14111 if (TARGET_INLINE_ALL_STRINGOPS
)
14113 /* When the current size is best to be copied by a libcall,
14114 but we are still forced to inline, run the heuristic bellow
14115 that will pick code for medium sized blocks. */
14116 if (alg
!= libcall
)
14121 return algs
->size
[i
].alg
;
14124 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14126 /* When asked to inline the call anyway, try to pick meaningful choice.
14127 We look for maximal size of block that is faster to copy by hand and
14128 take blocks of at most of that size guessing that average size will
14129 be roughly half of the block.
14131 If this turns out to be bad, we might simply specify the preferred
14132 choice in ix86_costs. */
14133 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14134 && algs
->unknown_size
== libcall
)
14137 enum stringop_alg alg
;
14140 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14141 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14142 max
= algs
->size
[i
].max
;
14145 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14146 gcc_assert (*dynamic_check
== -1);
14147 gcc_assert (alg
!= libcall
);
14148 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14149 *dynamic_check
= max
;
14152 return algs
->unknown_size
;
14155 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14156 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14158 decide_alignment (int align
,
14159 enum stringop_alg alg
,
14162 int desired_align
= 0;
14166 gcc_unreachable ();
14168 case unrolled_loop
:
14169 desired_align
= GET_MODE_SIZE (Pmode
);
14171 case rep_prefix_8_byte
:
14174 case rep_prefix_4_byte
:
14175 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14176 copying whole cacheline at once. */
14177 if (TARGET_PENTIUMPRO
)
14182 case rep_prefix_1_byte
:
14183 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14184 copying whole cacheline at once. */
14185 if (TARGET_PENTIUMPRO
)
14199 if (desired_align
< align
)
14200 desired_align
= align
;
14201 if (expected_size
!= -1 && expected_size
< 4)
14202 desired_align
= align
;
14203 return desired_align
;
14206 /* Return the smallest power of 2 greater than VAL. */
14208 smallest_pow2_greater_than (int val
)
14216 /* Expand string move (memcpy) operation. Use i386 string operations when
14217 profitable. expand_clrmem contains similar code. The code depends upon
14218 architecture, block size and alignment, but always has the same
14221 1) Prologue guard: Conditional that jumps up to epilogues for small
14222 blocks that can be handled by epilogue alone. This is faster but
14223 also needed for correctness, since prologue assume the block is larger
14224 than the desired alignment.
14226 Optional dynamic check for size and libcall for large
14227 blocks is emitted here too, with -minline-stringops-dynamically.
14229 2) Prologue: copy first few bytes in order to get destination aligned
14230 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14231 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14232 We emit either a jump tree on power of two sized blocks, or a byte loop.
14234 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14235 with specified algorithm.
14237 4) Epilogue: code copying tail of the block that is too small to be
14238 handled by main body (or up to size guarded by prologue guard). */
14241 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14242 rtx expected_align_exp
, rtx expected_size_exp
)
14248 rtx jump_around_label
= NULL
;
14249 HOST_WIDE_INT align
= 1;
14250 unsigned HOST_WIDE_INT count
= 0;
14251 HOST_WIDE_INT expected_size
= -1;
14252 int size_needed
= 0, epilogue_size_needed
;
14253 int desired_align
= 0;
14254 enum stringop_alg alg
;
14257 if (CONST_INT_P (align_exp
))
14258 align
= INTVAL (align_exp
);
14259 /* i386 can do misaligned access on reasonably increased cost. */
14260 if (CONST_INT_P (expected_align_exp
)
14261 && INTVAL (expected_align_exp
) > align
)
14262 align
= INTVAL (expected_align_exp
);
14263 if (CONST_INT_P (count_exp
))
14264 count
= expected_size
= INTVAL (count_exp
);
14265 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14266 expected_size
= INTVAL (expected_size_exp
);
14268 /* Step 0: Decide on preferred algorithm, desired alignment and
14269 size of chunks to be copied by main loop. */
14271 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14272 desired_align
= decide_alignment (align
, alg
, expected_size
);
14274 if (!TARGET_ALIGN_STRINGOPS
)
14275 align
= desired_align
;
14277 if (alg
== libcall
)
14279 gcc_assert (alg
!= no_stringop
);
14281 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14282 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14283 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14288 gcc_unreachable ();
14290 size_needed
= GET_MODE_SIZE (Pmode
);
14292 case unrolled_loop
:
14293 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14295 case rep_prefix_8_byte
:
14298 case rep_prefix_4_byte
:
14301 case rep_prefix_1_byte
:
14307 epilogue_size_needed
= size_needed
;
14309 /* Step 1: Prologue guard. */
14311 /* Alignment code needs count to be in register. */
14312 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14314 enum machine_mode mode
= SImode
;
14315 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14317 count_exp
= force_reg (mode
, count_exp
);
14319 gcc_assert (desired_align
>= 1 && align
>= 1);
14321 /* Ensure that alignment prologue won't copy past end of block. */
14322 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14324 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14325 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14326 Make sure it is power of 2. */
14327 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14329 label
= gen_label_rtx ();
14330 emit_cmp_and_jump_insns (count_exp
,
14331 GEN_INT (epilogue_size_needed
),
14332 LTU
, 0, counter_mode (count_exp
), 1, label
);
14333 if (GET_CODE (count_exp
) == CONST_INT
)
14335 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14336 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14338 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14340 /* Emit code to decide on runtime whether library call or inline should be
14342 if (dynamic_check
!= -1)
14344 rtx hot_label
= gen_label_rtx ();
14345 jump_around_label
= gen_label_rtx ();
14346 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14347 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14348 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14349 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14350 emit_jump (jump_around_label
);
14351 emit_label (hot_label
);
14354 /* Step 2: Alignment prologue. */
14356 if (desired_align
> align
)
14358 /* Except for the first move in epilogue, we no longer know
14359 constant offset in aliasing info. It don't seems to worth
14360 the pain to maintain it for the first move, so throw away
14362 src
= change_address (src
, BLKmode
, srcreg
);
14363 dst
= change_address (dst
, BLKmode
, destreg
);
14364 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14367 if (label
&& size_needed
== 1)
14369 emit_label (label
);
14370 LABEL_NUSES (label
) = 1;
14374 /* Step 3: Main loop. */
14380 gcc_unreachable ();
14382 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14383 count_exp
, QImode
, 1, expected_size
);
14386 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14387 count_exp
, Pmode
, 1, expected_size
);
14389 case unrolled_loop
:
14390 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14391 registers for 4 temporaries anyway. */
14392 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14393 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14396 case rep_prefix_8_byte
:
14397 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14400 case rep_prefix_4_byte
:
14401 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14404 case rep_prefix_1_byte
:
14405 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14409 /* Adjust properly the offset of src and dest memory for aliasing. */
14410 if (CONST_INT_P (count_exp
))
14412 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14413 (count
/ size_needed
) * size_needed
);
14414 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14415 (count
/ size_needed
) * size_needed
);
14419 src
= change_address (src
, BLKmode
, srcreg
);
14420 dst
= change_address (dst
, BLKmode
, destreg
);
14423 /* Step 4: Epilogue to copy the remaining bytes. */
14427 /* When the main loop is done, COUNT_EXP might hold original count,
14428 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14429 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14430 bytes. Compensate if needed. */
14432 if (size_needed
< epilogue_size_needed
)
14435 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14436 GEN_INT (size_needed
- 1), count_exp
, 1,
14438 if (tmp
!= count_exp
)
14439 emit_move_insn (count_exp
, tmp
);
14441 emit_label (label
);
14442 LABEL_NUSES (label
) = 1;
14445 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14446 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14447 epilogue_size_needed
);
14448 if (jump_around_label
)
14449 emit_label (jump_around_label
);
14453 /* Helper function for memcpy. For QImode value 0xXY produce
14454 0xXYXYXYXY of wide specified by MODE. This is essentially
14455 a * 0x10101010, but we can do slightly better than
14456 synth_mult by unwinding the sequence by hand on CPUs with
14459 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14461 enum machine_mode valmode
= GET_MODE (val
);
14463 int nops
= mode
== DImode
? 3 : 2;
14465 gcc_assert (mode
== SImode
|| mode
== DImode
);
14466 if (val
== const0_rtx
)
14467 return copy_to_mode_reg (mode
, const0_rtx
);
14468 if (CONST_INT_P (val
))
14470 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14474 if (mode
== DImode
)
14475 v
|= (v
<< 16) << 16;
14476 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14479 if (valmode
== VOIDmode
)
14481 if (valmode
!= QImode
)
14482 val
= gen_lowpart (QImode
, val
);
14483 if (mode
== QImode
)
14485 if (!TARGET_PARTIAL_REG_STALL
)
14487 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14488 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14489 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14490 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14492 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14493 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14494 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14499 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14501 if (!TARGET_PARTIAL_REG_STALL
)
14502 if (mode
== SImode
)
14503 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14505 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14508 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14509 NULL
, 1, OPTAB_DIRECT
);
14511 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14513 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14514 NULL
, 1, OPTAB_DIRECT
);
14515 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14516 if (mode
== SImode
)
14518 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14519 NULL
, 1, OPTAB_DIRECT
);
14520 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14525 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14526 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14527 alignment from ALIGN to DESIRED_ALIGN. */
14529 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14534 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14535 promoted_val
= promote_duplicated_reg (DImode
, val
);
14536 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14537 promoted_val
= promote_duplicated_reg (SImode
, val
);
14538 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14539 promoted_val
= promote_duplicated_reg (HImode
, val
);
14541 promoted_val
= val
;
14543 return promoted_val
;
14546 /* Expand string clear operation (bzero). Use i386 string operations when
14547 profitable. See expand_movmem comment for explanation of individual
14548 steps performed. */
14550 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14551 rtx expected_align_exp
, rtx expected_size_exp
)
14556 rtx jump_around_label
= NULL
;
14557 HOST_WIDE_INT align
= 1;
14558 unsigned HOST_WIDE_INT count
= 0;
14559 HOST_WIDE_INT expected_size
= -1;
14560 int size_needed
= 0, epilogue_size_needed
;
14561 int desired_align
= 0;
14562 enum stringop_alg alg
;
14563 rtx promoted_val
= NULL
;
14564 bool force_loopy_epilogue
= false;
14567 if (CONST_INT_P (align_exp
))
14568 align
= INTVAL (align_exp
);
14569 /* i386 can do misaligned access on reasonably increased cost. */
14570 if (CONST_INT_P (expected_align_exp
)
14571 && INTVAL (expected_align_exp
) > align
)
14572 align
= INTVAL (expected_align_exp
);
14573 if (CONST_INT_P (count_exp
))
14574 count
= expected_size
= INTVAL (count_exp
);
14575 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14576 expected_size
= INTVAL (expected_size_exp
);
14578 /* Step 0: Decide on preferred algorithm, desired alignment and
14579 size of chunks to be copied by main loop. */
14581 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14582 desired_align
= decide_alignment (align
, alg
, expected_size
);
14584 if (!TARGET_ALIGN_STRINGOPS
)
14585 align
= desired_align
;
14587 if (alg
== libcall
)
14589 gcc_assert (alg
!= no_stringop
);
14591 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14592 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14597 gcc_unreachable ();
14599 size_needed
= GET_MODE_SIZE (Pmode
);
14601 case unrolled_loop
:
14602 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14604 case rep_prefix_8_byte
:
14607 case rep_prefix_4_byte
:
14610 case rep_prefix_1_byte
:
14615 epilogue_size_needed
= size_needed
;
14617 /* Step 1: Prologue guard. */
14619 /* Alignment code needs count to be in register. */
14620 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14622 enum machine_mode mode
= SImode
;
14623 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14625 count_exp
= force_reg (mode
, count_exp
);
14627 /* Do the cheap promotion to allow better CSE across the
14628 main loop and epilogue (ie one load of the big constant in the
14629 front of all code. */
14630 if (CONST_INT_P (val_exp
))
14631 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14632 desired_align
, align
);
14633 /* Ensure that alignment prologue won't copy past end of block. */
14634 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14636 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14637 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14638 Make sure it is power of 2. */
14639 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14641 /* To improve performance of small blocks, we jump around the VAL
14642 promoting mode. This mean that if the promoted VAL is not constant,
14643 we might not use it in the epilogue and have to use byte
14645 if (epilogue_size_needed
> 2 && !promoted_val
)
14646 force_loopy_epilogue
= true;
14647 label
= gen_label_rtx ();
14648 emit_cmp_and_jump_insns (count_exp
,
14649 GEN_INT (epilogue_size_needed
),
14650 LTU
, 0, counter_mode (count_exp
), 1, label
);
14651 if (GET_CODE (count_exp
) == CONST_INT
)
14653 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14654 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14656 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14658 if (dynamic_check
!= -1)
14660 rtx hot_label
= gen_label_rtx ();
14661 jump_around_label
= gen_label_rtx ();
14662 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14663 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14664 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14665 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14666 emit_jump (jump_around_label
);
14667 emit_label (hot_label
);
14670 /* Step 2: Alignment prologue. */
14672 /* Do the expensive promotion once we branched off the small blocks. */
14674 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14675 desired_align
, align
);
14676 gcc_assert (desired_align
>= 1 && align
>= 1);
14678 if (desired_align
> align
)
14680 /* Except for the first move in epilogue, we no longer know
14681 constant offset in aliasing info. It don't seems to worth
14682 the pain to maintain it for the first move, so throw away
14684 dst
= change_address (dst
, BLKmode
, destreg
);
14685 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14688 if (label
&& size_needed
== 1)
14690 emit_label (label
);
14691 LABEL_NUSES (label
) = 1;
14695 /* Step 3: Main loop. */
14701 gcc_unreachable ();
14703 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14704 count_exp
, QImode
, 1, expected_size
);
14707 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14708 count_exp
, Pmode
, 1, expected_size
);
14710 case unrolled_loop
:
14711 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14712 count_exp
, Pmode
, 4, expected_size
);
14714 case rep_prefix_8_byte
:
14715 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14718 case rep_prefix_4_byte
:
14719 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14722 case rep_prefix_1_byte
:
14723 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14727 /* Adjust properly the offset of src and dest memory for aliasing. */
14728 if (CONST_INT_P (count_exp
))
14729 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14730 (count
/ size_needed
) * size_needed
);
14732 dst
= change_address (dst
, BLKmode
, destreg
);
14734 /* Step 4: Epilogue to copy the remaining bytes. */
14738 /* When the main loop is done, COUNT_EXP might hold original count,
14739 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14740 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14741 bytes. Compensate if needed. */
14743 if (size_needed
< desired_align
- align
)
14746 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14747 GEN_INT (size_needed
- 1), count_exp
, 1,
14749 size_needed
= desired_align
- align
+ 1;
14750 if (tmp
!= count_exp
)
14751 emit_move_insn (count_exp
, tmp
);
14753 emit_label (label
);
14754 LABEL_NUSES (label
) = 1;
14756 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14758 if (force_loopy_epilogue
)
14759 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14762 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14765 if (jump_around_label
)
14766 emit_label (jump_around_label
);
14770 /* Expand strlen. */
14772 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14774 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14776 /* The generic case of strlen expander is long. Avoid it's
14777 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14779 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14780 && !TARGET_INLINE_ALL_STRINGOPS
14782 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14785 addr
= force_reg (Pmode
, XEXP (src
, 0));
14786 scratch1
= gen_reg_rtx (Pmode
);
14788 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14791 /* Well it seems that some optimizer does not combine a call like
14792 foo(strlen(bar), strlen(bar));
14793 when the move and the subtraction is done here. It does calculate
14794 the length just once when these instructions are done inside of
14795 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14796 often used and I use one fewer register for the lifetime of
14797 output_strlen_unroll() this is better. */
14799 emit_move_insn (out
, addr
);
14801 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14803 /* strlensi_unroll_1 returns the address of the zero at the end of
14804 the string, like memchr(), so compute the length by subtracting
14805 the start address. */
14807 emit_insn (gen_subdi3 (out
, out
, addr
));
14809 emit_insn (gen_subsi3 (out
, out
, addr
));
14814 scratch2
= gen_reg_rtx (Pmode
);
14815 scratch3
= gen_reg_rtx (Pmode
);
14816 scratch4
= force_reg (Pmode
, constm1_rtx
);
14818 emit_move_insn (scratch3
, addr
);
14819 eoschar
= force_reg (QImode
, eoschar
);
14821 src
= replace_equiv_address_nv (src
, scratch3
);
14823 /* If .md starts supporting :P, this can be done in .md. */
14824 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14825 scratch4
), UNSPEC_SCAS
);
14826 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14829 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14830 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14834 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14835 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14841 /* Expand the appropriate insns for doing strlen if not just doing
14844 out = result, initialized with the start address
14845 align_rtx = alignment of the address.
14846 scratch = scratch register, initialized with the startaddress when
14847 not aligned, otherwise undefined
14849 This is just the body. It needs the initializations mentioned above and
14850 some address computing at the end. These things are done in i386.md. */
14853 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14857 rtx align_2_label
= NULL_RTX
;
14858 rtx align_3_label
= NULL_RTX
;
14859 rtx align_4_label
= gen_label_rtx ();
14860 rtx end_0_label
= gen_label_rtx ();
14862 rtx tmpreg
= gen_reg_rtx (SImode
);
14863 rtx scratch
= gen_reg_rtx (SImode
);
14867 if (CONST_INT_P (align_rtx
))
14868 align
= INTVAL (align_rtx
);
14870 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14872 /* Is there a known alignment and is it less than 4? */
14875 rtx scratch1
= gen_reg_rtx (Pmode
);
14876 emit_move_insn (scratch1
, out
);
14877 /* Is there a known alignment and is it not 2? */
14880 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14881 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14883 /* Leave just the 3 lower bits. */
14884 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14885 NULL_RTX
, 0, OPTAB_WIDEN
);
14887 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14888 Pmode
, 1, align_4_label
);
14889 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14890 Pmode
, 1, align_2_label
);
14891 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14892 Pmode
, 1, align_3_label
);
14896 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14897 check if is aligned to 4 - byte. */
14899 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14900 NULL_RTX
, 0, OPTAB_WIDEN
);
14902 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14903 Pmode
, 1, align_4_label
);
14906 mem
= change_address (src
, QImode
, out
);
14908 /* Now compare the bytes. */
14910 /* Compare the first n unaligned byte on a byte per byte basis. */
14911 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14912 QImode
, 1, end_0_label
);
14914 /* Increment the address. */
14916 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14918 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14920 /* Not needed with an alignment of 2 */
14923 emit_label (align_2_label
);
14925 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14929 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14931 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14933 emit_label (align_3_label
);
14936 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14940 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14942 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14945 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14946 align this loop. It gives only huge programs, but does not help to
14948 emit_label (align_4_label
);
14950 mem
= change_address (src
, SImode
, out
);
14951 emit_move_insn (scratch
, mem
);
14953 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14955 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14957 /* This formula yields a nonzero result iff one of the bytes is zero.
14958 This saves three branches inside loop and many cycles. */
14960 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14961 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14962 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14963 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14964 gen_int_mode (0x80808080, SImode
)));
14965 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14970 rtx reg
= gen_reg_rtx (SImode
);
14971 rtx reg2
= gen_reg_rtx (Pmode
);
14972 emit_move_insn (reg
, tmpreg
);
14973 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14975 /* If zero is not in the first two bytes, move two bytes forward. */
14976 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14977 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14978 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14979 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14980 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14983 /* Emit lea manually to avoid clobbering of flags. */
14984 emit_insn (gen_rtx_SET (SImode
, reg2
,
14985 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14987 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14988 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14989 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14990 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14997 rtx end_2_label
= gen_label_rtx ();
14998 /* Is zero in the first two bytes? */
15000 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15001 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15002 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15003 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15004 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15006 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15007 JUMP_LABEL (tmp
) = end_2_label
;
15009 /* Not in the first two. Move two bytes forward. */
15010 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15012 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15014 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15016 emit_label (end_2_label
);
15020 /* Avoid branch in fixing the byte. */
15021 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15022 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15023 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15025 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15027 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15029 emit_label (end_0_label
);
15032 /* For given symbol (function) construct code to compute address of it's PLT
15033 entry in large x86-64 PIC model. */
15035 construct_plt_address (rtx symbol
)
15037 rtx tmp
= gen_reg_rtx (Pmode
);
15038 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15040 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15041 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15043 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15044 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15049 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15050 rtx callarg2 ATTRIBUTE_UNUSED
,
15051 rtx pop
, int sibcall
)
15053 rtx use
= NULL
, call
;
15055 if (pop
== const0_rtx
)
15057 gcc_assert (!TARGET_64BIT
|| !pop
);
15059 if (TARGET_MACHO
&& !TARGET_64BIT
)
15062 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15063 fnaddr
= machopic_indirect_call_target (fnaddr
);
15068 /* Static functions and indirect calls don't need the pic register. */
15069 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15070 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15071 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15072 use_reg (&use
, pic_offset_table_rtx
);
15075 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15077 rtx al
= gen_rtx_REG (QImode
, 0);
15078 emit_move_insn (al
, callarg2
);
15079 use_reg (&use
, al
);
15082 if (ix86_cmodel
== CM_LARGE_PIC
15083 && GET_CODE (fnaddr
) == MEM
15084 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15085 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15086 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15087 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15089 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15090 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15092 if (sibcall
&& TARGET_64BIT
15093 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15096 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15097 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15098 emit_move_insn (fnaddr
, addr
);
15099 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15102 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15104 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15107 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15108 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15109 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15112 call
= emit_call_insn (call
);
15114 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15118 /* Clear stack slot assignments remembered from previous functions.
15119 This is called from INIT_EXPANDERS once before RTL is emitted for each
15122 static struct machine_function
*
15123 ix86_init_machine_status (void)
15125 struct machine_function
*f
;
15127 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15128 f
->use_fast_prologue_epilogue_nregs
= -1;
15129 f
->tls_descriptor_call_expanded_p
= 0;
15134 /* Return a MEM corresponding to a stack slot with mode MODE.
15135 Allocate a new slot if necessary.
15137 The RTL for a function can have several slots available: N is
15138 which slot to use. */
15141 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15143 struct stack_local_entry
*s
;
15145 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15147 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15148 if (s
->mode
== mode
&& s
->n
== n
)
15149 return copy_rtx (s
->rtl
);
15151 s
= (struct stack_local_entry
*)
15152 ggc_alloc (sizeof (struct stack_local_entry
));
15155 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15157 s
->next
= ix86_stack_locals
;
15158 ix86_stack_locals
= s
;
15162 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15164 static GTY(()) rtx ix86_tls_symbol
;
15166 ix86_tls_get_addr (void)
15169 if (!ix86_tls_symbol
)
15171 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15172 (TARGET_ANY_GNU_TLS
15174 ? "___tls_get_addr"
15175 : "__tls_get_addr");
15178 return ix86_tls_symbol
;
15181 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15183 static GTY(()) rtx ix86_tls_module_base_symbol
;
15185 ix86_tls_module_base (void)
15188 if (!ix86_tls_module_base_symbol
)
15190 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15191 "_TLS_MODULE_BASE_");
15192 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15193 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15196 return ix86_tls_module_base_symbol
;
15199 /* Calculate the length of the memory address in the instruction
15200 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15203 memory_address_length (rtx addr
)
15205 struct ix86_address parts
;
15206 rtx base
, index
, disp
;
15210 if (GET_CODE (addr
) == PRE_DEC
15211 || GET_CODE (addr
) == POST_INC
15212 || GET_CODE (addr
) == PRE_MODIFY
15213 || GET_CODE (addr
) == POST_MODIFY
)
15216 ok
= ix86_decompose_address (addr
, &parts
);
15219 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15220 parts
.base
= SUBREG_REG (parts
.base
);
15221 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15222 parts
.index
= SUBREG_REG (parts
.index
);
15225 index
= parts
.index
;
15230 - esp as the base always wants an index,
15231 - ebp as the base always wants a displacement. */
15233 /* Register Indirect. */
15234 if (base
&& !index
&& !disp
)
15236 /* esp (for its index) and ebp (for its displacement) need
15237 the two-byte modrm form. */
15238 if (addr
== stack_pointer_rtx
15239 || addr
== arg_pointer_rtx
15240 || addr
== frame_pointer_rtx
15241 || addr
== hard_frame_pointer_rtx
)
15245 /* Direct Addressing. */
15246 else if (disp
&& !base
&& !index
)
15251 /* Find the length of the displacement constant. */
15254 if (base
&& satisfies_constraint_K (disp
))
15259 /* ebp always wants a displacement. */
15260 else if (base
== hard_frame_pointer_rtx
)
15263 /* An index requires the two-byte modrm form.... */
15265 /* ...like esp, which always wants an index. */
15266 || base
== stack_pointer_rtx
15267 || base
== arg_pointer_rtx
15268 || base
== frame_pointer_rtx
)
15275 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15276 is set, expect that insn have 8bit immediate alternative. */
15278 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15282 extract_insn_cached (insn
);
15283 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15284 if (CONSTANT_P (recog_data
.operand
[i
]))
15287 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15291 switch (get_attr_mode (insn
))
15302 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15307 fatal_insn ("unknown insn mode", insn
);
15313 /* Compute default value for "length_address" attribute. */
15315 ix86_attr_length_address_default (rtx insn
)
15319 if (get_attr_type (insn
) == TYPE_LEA
)
15321 rtx set
= PATTERN (insn
);
15323 if (GET_CODE (set
) == PARALLEL
)
15324 set
= XVECEXP (set
, 0, 0);
15326 gcc_assert (GET_CODE (set
) == SET
);
15328 return memory_address_length (SET_SRC (set
));
15331 extract_insn_cached (insn
);
15332 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15333 if (MEM_P (recog_data
.operand
[i
]))
15335 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15341 /* Return the maximum number of instructions a cpu can issue. */
15344 ix86_issue_rate (void)
15348 case PROCESSOR_PENTIUM
:
15352 case PROCESSOR_PENTIUMPRO
:
15353 case PROCESSOR_PENTIUM4
:
15354 case PROCESSOR_ATHLON
:
15356 case PROCESSOR_AMDFAM10
:
15357 case PROCESSOR_NOCONA
:
15358 case PROCESSOR_GENERIC32
:
15359 case PROCESSOR_GENERIC64
:
15362 case PROCESSOR_CORE2
:
15370 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15371 by DEP_INSN and nothing set by DEP_INSN. */
15374 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15378 /* Simplify the test for uninteresting insns. */
15379 if (insn_type
!= TYPE_SETCC
15380 && insn_type
!= TYPE_ICMOV
15381 && insn_type
!= TYPE_FCMOV
15382 && insn_type
!= TYPE_IBR
)
15385 if ((set
= single_set (dep_insn
)) != 0)
15387 set
= SET_DEST (set
);
15390 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15391 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15392 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15393 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15395 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15396 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15401 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15404 /* This test is true if the dependent insn reads the flags but
15405 not any other potentially set register. */
15406 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15409 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15415 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15416 address with operands set by DEP_INSN. */
15419 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15423 if (insn_type
== TYPE_LEA
15426 addr
= PATTERN (insn
);
15428 if (GET_CODE (addr
) == PARALLEL
)
15429 addr
= XVECEXP (addr
, 0, 0);
15431 gcc_assert (GET_CODE (addr
) == SET
);
15433 addr
= SET_SRC (addr
);
15438 extract_insn_cached (insn
);
15439 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15440 if (MEM_P (recog_data
.operand
[i
]))
15442 addr
= XEXP (recog_data
.operand
[i
], 0);
15449 return modified_in_p (addr
, dep_insn
);
15453 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15455 enum attr_type insn_type
, dep_insn_type
;
15456 enum attr_memory memory
;
15458 int dep_insn_code_number
;
15460 /* Anti and output dependencies have zero cost on all CPUs. */
15461 if (REG_NOTE_KIND (link
) != 0)
15464 dep_insn_code_number
= recog_memoized (dep_insn
);
15466 /* If we can't recognize the insns, we can't really do anything. */
15467 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15470 insn_type
= get_attr_type (insn
);
15471 dep_insn_type
= get_attr_type (dep_insn
);
15475 case PROCESSOR_PENTIUM
:
15476 /* Address Generation Interlock adds a cycle of latency. */
15477 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15480 /* ??? Compares pair with jump/setcc. */
15481 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15484 /* Floating point stores require value to be ready one cycle earlier. */
15485 if (insn_type
== TYPE_FMOV
15486 && get_attr_memory (insn
) == MEMORY_STORE
15487 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15491 case PROCESSOR_PENTIUMPRO
:
15492 memory
= get_attr_memory (insn
);
15494 /* INT->FP conversion is expensive. */
15495 if (get_attr_fp_int_src (dep_insn
))
15498 /* There is one cycle extra latency between an FP op and a store. */
15499 if (insn_type
== TYPE_FMOV
15500 && (set
= single_set (dep_insn
)) != NULL_RTX
15501 && (set2
= single_set (insn
)) != NULL_RTX
15502 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15503 && MEM_P (SET_DEST (set2
)))
15506 /* Show ability of reorder buffer to hide latency of load by executing
15507 in parallel with previous instruction in case
15508 previous instruction is not needed to compute the address. */
15509 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15510 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15512 /* Claim moves to take one cycle, as core can issue one load
15513 at time and the next load can start cycle later. */
15514 if (dep_insn_type
== TYPE_IMOV
15515 || dep_insn_type
== TYPE_FMOV
)
15523 memory
= get_attr_memory (insn
);
15525 /* The esp dependency is resolved before the instruction is really
15527 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15528 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15531 /* INT->FP conversion is expensive. */
15532 if (get_attr_fp_int_src (dep_insn
))
15535 /* Show ability of reorder buffer to hide latency of load by executing
15536 in parallel with previous instruction in case
15537 previous instruction is not needed to compute the address. */
15538 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15539 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15541 /* Claim moves to take one cycle, as core can issue one load
15542 at time and the next load can start cycle later. */
15543 if (dep_insn_type
== TYPE_IMOV
15544 || dep_insn_type
== TYPE_FMOV
)
15553 case PROCESSOR_ATHLON
:
15555 case PROCESSOR_AMDFAM10
:
15556 case PROCESSOR_GENERIC32
:
15557 case PROCESSOR_GENERIC64
:
15558 memory
= get_attr_memory (insn
);
15560 /* Show ability of reorder buffer to hide latency of load by executing
15561 in parallel with previous instruction in case
15562 previous instruction is not needed to compute the address. */
15563 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15564 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15566 enum attr_unit unit
= get_attr_unit (insn
);
15569 /* Because of the difference between the length of integer and
15570 floating unit pipeline preparation stages, the memory operands
15571 for floating point are cheaper.
15573 ??? For Athlon it the difference is most probably 2. */
15574 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15577 loadcost
= TARGET_ATHLON
? 2 : 0;
15579 if (cost
>= loadcost
)
15592 /* How many alternative schedules to try. This should be as wide as the
15593 scheduling freedom in the DFA, but no wider. Making this value too
15594 large results extra work for the scheduler. */
15597 ia32_multipass_dfa_lookahead (void)
15599 if (ix86_tune
== PROCESSOR_PENTIUM
)
15602 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15603 || ix86_tune
== PROCESSOR_K6
)
15611 /* Compute the alignment given to a constant that is being placed in memory.
15612 EXP is the constant and ALIGN is the alignment that the object would
15614 The value of this function is used instead of that alignment to align
15618 ix86_constant_alignment (tree exp
, int align
)
15620 if (TREE_CODE (exp
) == REAL_CST
)
15622 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15624 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15627 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15628 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15629 return BITS_PER_WORD
;
15634 /* Compute the alignment for a static variable.
15635 TYPE is the data type, and ALIGN is the alignment that
15636 the object would ordinarily have. The value of this function is used
15637 instead of that alignment to align the object. */
15640 ix86_data_alignment (tree type
, int align
)
15642 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15644 if (AGGREGATE_TYPE_P (type
)
15645 && TYPE_SIZE (type
)
15646 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15647 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15648 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15649 && align
< max_align
)
15652 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15653 to 16byte boundary. */
15656 if (AGGREGATE_TYPE_P (type
)
15657 && TYPE_SIZE (type
)
15658 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15659 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15660 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15664 if (TREE_CODE (type
) == ARRAY_TYPE
)
15666 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15668 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15671 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15674 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15676 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15679 else if ((TREE_CODE (type
) == RECORD_TYPE
15680 || TREE_CODE (type
) == UNION_TYPE
15681 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15682 && TYPE_FIELDS (type
))
15684 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15686 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15689 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15690 || TREE_CODE (type
) == INTEGER_TYPE
)
15692 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15694 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15701 /* Compute the alignment for a local variable.
15702 TYPE is the data type, and ALIGN is the alignment that
15703 the object would ordinarily have. The value of this macro is used
15704 instead of that alignment to align the object. */
15707 ix86_local_alignment (tree type
, int align
)
15709 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15710 to 16byte boundary. */
15713 if (AGGREGATE_TYPE_P (type
)
15714 && TYPE_SIZE (type
)
15715 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15716 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15717 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15720 if (TREE_CODE (type
) == ARRAY_TYPE
)
15722 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15724 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15727 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15729 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15731 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15734 else if ((TREE_CODE (type
) == RECORD_TYPE
15735 || TREE_CODE (type
) == UNION_TYPE
15736 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15737 && TYPE_FIELDS (type
))
15739 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15741 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15744 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15745 || TREE_CODE (type
) == INTEGER_TYPE
)
15748 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15750 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15756 /* Emit RTL insns to initialize the variable parts of a trampoline.
15757 FNADDR is an RTX for the address of the function's pure code.
15758 CXT is an RTX for the static chain value for the function. */
15760 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15764 /* Compute offset from the end of the jmp to the target function. */
15765 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15766 plus_constant (tramp
, 10),
15767 NULL_RTX
, 1, OPTAB_DIRECT
);
15768 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15769 gen_int_mode (0xb9, QImode
));
15770 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15771 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15772 gen_int_mode (0xe9, QImode
));
15773 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15778 /* Try to load address using shorter movl instead of movabs.
15779 We may want to support movq for kernel mode, but kernel does not use
15780 trampolines at the moment. */
15781 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15783 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15784 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15785 gen_int_mode (0xbb41, HImode
));
15786 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15787 gen_lowpart (SImode
, fnaddr
));
15792 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15793 gen_int_mode (0xbb49, HImode
));
15794 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15798 /* Load static chain using movabs to r10. */
15799 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15800 gen_int_mode (0xba49, HImode
));
15801 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15804 /* Jump to the r11 */
15805 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15806 gen_int_mode (0xff49, HImode
));
15807 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15808 gen_int_mode (0xe3, QImode
));
15810 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15813 #ifdef ENABLE_EXECUTE_STACK
15814 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15815 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15819 /* Codes for all the SSE/MMX builtins. */
15822 IX86_BUILTIN_ADDPS
,
15823 IX86_BUILTIN_ADDSS
,
15824 IX86_BUILTIN_DIVPS
,
15825 IX86_BUILTIN_DIVSS
,
15826 IX86_BUILTIN_MULPS
,
15827 IX86_BUILTIN_MULSS
,
15828 IX86_BUILTIN_SUBPS
,
15829 IX86_BUILTIN_SUBSS
,
15831 IX86_BUILTIN_CMPEQPS
,
15832 IX86_BUILTIN_CMPLTPS
,
15833 IX86_BUILTIN_CMPLEPS
,
15834 IX86_BUILTIN_CMPGTPS
,
15835 IX86_BUILTIN_CMPGEPS
,
15836 IX86_BUILTIN_CMPNEQPS
,
15837 IX86_BUILTIN_CMPNLTPS
,
15838 IX86_BUILTIN_CMPNLEPS
,
15839 IX86_BUILTIN_CMPNGTPS
,
15840 IX86_BUILTIN_CMPNGEPS
,
15841 IX86_BUILTIN_CMPORDPS
,
15842 IX86_BUILTIN_CMPUNORDPS
,
15843 IX86_BUILTIN_CMPEQSS
,
15844 IX86_BUILTIN_CMPLTSS
,
15845 IX86_BUILTIN_CMPLESS
,
15846 IX86_BUILTIN_CMPNEQSS
,
15847 IX86_BUILTIN_CMPNLTSS
,
15848 IX86_BUILTIN_CMPNLESS
,
15849 IX86_BUILTIN_CMPNGTSS
,
15850 IX86_BUILTIN_CMPNGESS
,
15851 IX86_BUILTIN_CMPORDSS
,
15852 IX86_BUILTIN_CMPUNORDSS
,
15854 IX86_BUILTIN_COMIEQSS
,
15855 IX86_BUILTIN_COMILTSS
,
15856 IX86_BUILTIN_COMILESS
,
15857 IX86_BUILTIN_COMIGTSS
,
15858 IX86_BUILTIN_COMIGESS
,
15859 IX86_BUILTIN_COMINEQSS
,
15860 IX86_BUILTIN_UCOMIEQSS
,
15861 IX86_BUILTIN_UCOMILTSS
,
15862 IX86_BUILTIN_UCOMILESS
,
15863 IX86_BUILTIN_UCOMIGTSS
,
15864 IX86_BUILTIN_UCOMIGESS
,
15865 IX86_BUILTIN_UCOMINEQSS
,
15867 IX86_BUILTIN_CVTPI2PS
,
15868 IX86_BUILTIN_CVTPS2PI
,
15869 IX86_BUILTIN_CVTSI2SS
,
15870 IX86_BUILTIN_CVTSI642SS
,
15871 IX86_BUILTIN_CVTSS2SI
,
15872 IX86_BUILTIN_CVTSS2SI64
,
15873 IX86_BUILTIN_CVTTPS2PI
,
15874 IX86_BUILTIN_CVTTSS2SI
,
15875 IX86_BUILTIN_CVTTSS2SI64
,
15877 IX86_BUILTIN_MAXPS
,
15878 IX86_BUILTIN_MAXSS
,
15879 IX86_BUILTIN_MINPS
,
15880 IX86_BUILTIN_MINSS
,
15882 IX86_BUILTIN_LOADUPS
,
15883 IX86_BUILTIN_STOREUPS
,
15884 IX86_BUILTIN_MOVSS
,
15886 IX86_BUILTIN_MOVHLPS
,
15887 IX86_BUILTIN_MOVLHPS
,
15888 IX86_BUILTIN_LOADHPS
,
15889 IX86_BUILTIN_LOADLPS
,
15890 IX86_BUILTIN_STOREHPS
,
15891 IX86_BUILTIN_STORELPS
,
15893 IX86_BUILTIN_MASKMOVQ
,
15894 IX86_BUILTIN_MOVMSKPS
,
15895 IX86_BUILTIN_PMOVMSKB
,
15897 IX86_BUILTIN_MOVNTPS
,
15898 IX86_BUILTIN_MOVNTQ
,
15900 IX86_BUILTIN_LOADDQU
,
15901 IX86_BUILTIN_STOREDQU
,
15903 IX86_BUILTIN_PACKSSWB
,
15904 IX86_BUILTIN_PACKSSDW
,
15905 IX86_BUILTIN_PACKUSWB
,
15907 IX86_BUILTIN_PADDB
,
15908 IX86_BUILTIN_PADDW
,
15909 IX86_BUILTIN_PADDD
,
15910 IX86_BUILTIN_PADDQ
,
15911 IX86_BUILTIN_PADDSB
,
15912 IX86_BUILTIN_PADDSW
,
15913 IX86_BUILTIN_PADDUSB
,
15914 IX86_BUILTIN_PADDUSW
,
15915 IX86_BUILTIN_PSUBB
,
15916 IX86_BUILTIN_PSUBW
,
15917 IX86_BUILTIN_PSUBD
,
15918 IX86_BUILTIN_PSUBQ
,
15919 IX86_BUILTIN_PSUBSB
,
15920 IX86_BUILTIN_PSUBSW
,
15921 IX86_BUILTIN_PSUBUSB
,
15922 IX86_BUILTIN_PSUBUSW
,
15925 IX86_BUILTIN_PANDN
,
15929 IX86_BUILTIN_PAVGB
,
15930 IX86_BUILTIN_PAVGW
,
15932 IX86_BUILTIN_PCMPEQB
,
15933 IX86_BUILTIN_PCMPEQW
,
15934 IX86_BUILTIN_PCMPEQD
,
15935 IX86_BUILTIN_PCMPGTB
,
15936 IX86_BUILTIN_PCMPGTW
,
15937 IX86_BUILTIN_PCMPGTD
,
15939 IX86_BUILTIN_PMADDWD
,
15941 IX86_BUILTIN_PMAXSW
,
15942 IX86_BUILTIN_PMAXUB
,
15943 IX86_BUILTIN_PMINSW
,
15944 IX86_BUILTIN_PMINUB
,
15946 IX86_BUILTIN_PMULHUW
,
15947 IX86_BUILTIN_PMULHW
,
15948 IX86_BUILTIN_PMULLW
,
15950 IX86_BUILTIN_PSADBW
,
15951 IX86_BUILTIN_PSHUFW
,
15953 IX86_BUILTIN_PSLLW
,
15954 IX86_BUILTIN_PSLLD
,
15955 IX86_BUILTIN_PSLLQ
,
15956 IX86_BUILTIN_PSRAW
,
15957 IX86_BUILTIN_PSRAD
,
15958 IX86_BUILTIN_PSRLW
,
15959 IX86_BUILTIN_PSRLD
,
15960 IX86_BUILTIN_PSRLQ
,
15961 IX86_BUILTIN_PSLLWI
,
15962 IX86_BUILTIN_PSLLDI
,
15963 IX86_BUILTIN_PSLLQI
,
15964 IX86_BUILTIN_PSRAWI
,
15965 IX86_BUILTIN_PSRADI
,
15966 IX86_BUILTIN_PSRLWI
,
15967 IX86_BUILTIN_PSRLDI
,
15968 IX86_BUILTIN_PSRLQI
,
15970 IX86_BUILTIN_PUNPCKHBW
,
15971 IX86_BUILTIN_PUNPCKHWD
,
15972 IX86_BUILTIN_PUNPCKHDQ
,
15973 IX86_BUILTIN_PUNPCKLBW
,
15974 IX86_BUILTIN_PUNPCKLWD
,
15975 IX86_BUILTIN_PUNPCKLDQ
,
15977 IX86_BUILTIN_SHUFPS
,
15979 IX86_BUILTIN_RCPPS
,
15980 IX86_BUILTIN_RCPSS
,
15981 IX86_BUILTIN_RSQRTPS
,
15982 IX86_BUILTIN_RSQRTSS
,
15983 IX86_BUILTIN_SQRTPS
,
15984 IX86_BUILTIN_SQRTSS
,
15986 IX86_BUILTIN_UNPCKHPS
,
15987 IX86_BUILTIN_UNPCKLPS
,
15989 IX86_BUILTIN_ANDPS
,
15990 IX86_BUILTIN_ANDNPS
,
15992 IX86_BUILTIN_XORPS
,
15995 IX86_BUILTIN_LDMXCSR
,
15996 IX86_BUILTIN_STMXCSR
,
15997 IX86_BUILTIN_SFENCE
,
15999 /* 3DNow! Original */
16000 IX86_BUILTIN_FEMMS
,
16001 IX86_BUILTIN_PAVGUSB
,
16002 IX86_BUILTIN_PF2ID
,
16003 IX86_BUILTIN_PFACC
,
16004 IX86_BUILTIN_PFADD
,
16005 IX86_BUILTIN_PFCMPEQ
,
16006 IX86_BUILTIN_PFCMPGE
,
16007 IX86_BUILTIN_PFCMPGT
,
16008 IX86_BUILTIN_PFMAX
,
16009 IX86_BUILTIN_PFMIN
,
16010 IX86_BUILTIN_PFMUL
,
16011 IX86_BUILTIN_PFRCP
,
16012 IX86_BUILTIN_PFRCPIT1
,
16013 IX86_BUILTIN_PFRCPIT2
,
16014 IX86_BUILTIN_PFRSQIT1
,
16015 IX86_BUILTIN_PFRSQRT
,
16016 IX86_BUILTIN_PFSUB
,
16017 IX86_BUILTIN_PFSUBR
,
16018 IX86_BUILTIN_PI2FD
,
16019 IX86_BUILTIN_PMULHRW
,
16021 /* 3DNow! Athlon Extensions */
16022 IX86_BUILTIN_PF2IW
,
16023 IX86_BUILTIN_PFNACC
,
16024 IX86_BUILTIN_PFPNACC
,
16025 IX86_BUILTIN_PI2FW
,
16026 IX86_BUILTIN_PSWAPDSI
,
16027 IX86_BUILTIN_PSWAPDSF
,
16030 IX86_BUILTIN_ADDPD
,
16031 IX86_BUILTIN_ADDSD
,
16032 IX86_BUILTIN_DIVPD
,
16033 IX86_BUILTIN_DIVSD
,
16034 IX86_BUILTIN_MULPD
,
16035 IX86_BUILTIN_MULSD
,
16036 IX86_BUILTIN_SUBPD
,
16037 IX86_BUILTIN_SUBSD
,
16039 IX86_BUILTIN_CMPEQPD
,
16040 IX86_BUILTIN_CMPLTPD
,
16041 IX86_BUILTIN_CMPLEPD
,
16042 IX86_BUILTIN_CMPGTPD
,
16043 IX86_BUILTIN_CMPGEPD
,
16044 IX86_BUILTIN_CMPNEQPD
,
16045 IX86_BUILTIN_CMPNLTPD
,
16046 IX86_BUILTIN_CMPNLEPD
,
16047 IX86_BUILTIN_CMPNGTPD
,
16048 IX86_BUILTIN_CMPNGEPD
,
16049 IX86_BUILTIN_CMPORDPD
,
16050 IX86_BUILTIN_CMPUNORDPD
,
16051 IX86_BUILTIN_CMPNEPD
,
16052 IX86_BUILTIN_CMPEQSD
,
16053 IX86_BUILTIN_CMPLTSD
,
16054 IX86_BUILTIN_CMPLESD
,
16055 IX86_BUILTIN_CMPNEQSD
,
16056 IX86_BUILTIN_CMPNLTSD
,
16057 IX86_BUILTIN_CMPNLESD
,
16058 IX86_BUILTIN_CMPORDSD
,
16059 IX86_BUILTIN_CMPUNORDSD
,
16060 IX86_BUILTIN_CMPNESD
,
16062 IX86_BUILTIN_COMIEQSD
,
16063 IX86_BUILTIN_COMILTSD
,
16064 IX86_BUILTIN_COMILESD
,
16065 IX86_BUILTIN_COMIGTSD
,
16066 IX86_BUILTIN_COMIGESD
,
16067 IX86_BUILTIN_COMINEQSD
,
16068 IX86_BUILTIN_UCOMIEQSD
,
16069 IX86_BUILTIN_UCOMILTSD
,
16070 IX86_BUILTIN_UCOMILESD
,
16071 IX86_BUILTIN_UCOMIGTSD
,
16072 IX86_BUILTIN_UCOMIGESD
,
16073 IX86_BUILTIN_UCOMINEQSD
,
16075 IX86_BUILTIN_MAXPD
,
16076 IX86_BUILTIN_MAXSD
,
16077 IX86_BUILTIN_MINPD
,
16078 IX86_BUILTIN_MINSD
,
16080 IX86_BUILTIN_ANDPD
,
16081 IX86_BUILTIN_ANDNPD
,
16083 IX86_BUILTIN_XORPD
,
16085 IX86_BUILTIN_SQRTPD
,
16086 IX86_BUILTIN_SQRTSD
,
16088 IX86_BUILTIN_UNPCKHPD
,
16089 IX86_BUILTIN_UNPCKLPD
,
16091 IX86_BUILTIN_SHUFPD
,
16093 IX86_BUILTIN_LOADUPD
,
16094 IX86_BUILTIN_STOREUPD
,
16095 IX86_BUILTIN_MOVSD
,
16097 IX86_BUILTIN_LOADHPD
,
16098 IX86_BUILTIN_LOADLPD
,
16100 IX86_BUILTIN_CVTDQ2PD
,
16101 IX86_BUILTIN_CVTDQ2PS
,
16103 IX86_BUILTIN_CVTPD2DQ
,
16104 IX86_BUILTIN_CVTPD2PI
,
16105 IX86_BUILTIN_CVTPD2PS
,
16106 IX86_BUILTIN_CVTTPD2DQ
,
16107 IX86_BUILTIN_CVTTPD2PI
,
16109 IX86_BUILTIN_CVTPI2PD
,
16110 IX86_BUILTIN_CVTSI2SD
,
16111 IX86_BUILTIN_CVTSI642SD
,
16113 IX86_BUILTIN_CVTSD2SI
,
16114 IX86_BUILTIN_CVTSD2SI64
,
16115 IX86_BUILTIN_CVTSD2SS
,
16116 IX86_BUILTIN_CVTSS2SD
,
16117 IX86_BUILTIN_CVTTSD2SI
,
16118 IX86_BUILTIN_CVTTSD2SI64
,
16120 IX86_BUILTIN_CVTPS2DQ
,
16121 IX86_BUILTIN_CVTPS2PD
,
16122 IX86_BUILTIN_CVTTPS2DQ
,
16124 IX86_BUILTIN_MOVNTI
,
16125 IX86_BUILTIN_MOVNTPD
,
16126 IX86_BUILTIN_MOVNTDQ
,
16129 IX86_BUILTIN_MASKMOVDQU
,
16130 IX86_BUILTIN_MOVMSKPD
,
16131 IX86_BUILTIN_PMOVMSKB128
,
16133 IX86_BUILTIN_PACKSSWB128
,
16134 IX86_BUILTIN_PACKSSDW128
,
16135 IX86_BUILTIN_PACKUSWB128
,
16137 IX86_BUILTIN_PADDB128
,
16138 IX86_BUILTIN_PADDW128
,
16139 IX86_BUILTIN_PADDD128
,
16140 IX86_BUILTIN_PADDQ128
,
16141 IX86_BUILTIN_PADDSB128
,
16142 IX86_BUILTIN_PADDSW128
,
16143 IX86_BUILTIN_PADDUSB128
,
16144 IX86_BUILTIN_PADDUSW128
,
16145 IX86_BUILTIN_PSUBB128
,
16146 IX86_BUILTIN_PSUBW128
,
16147 IX86_BUILTIN_PSUBD128
,
16148 IX86_BUILTIN_PSUBQ128
,
16149 IX86_BUILTIN_PSUBSB128
,
16150 IX86_BUILTIN_PSUBSW128
,
16151 IX86_BUILTIN_PSUBUSB128
,
16152 IX86_BUILTIN_PSUBUSW128
,
16154 IX86_BUILTIN_PAND128
,
16155 IX86_BUILTIN_PANDN128
,
16156 IX86_BUILTIN_POR128
,
16157 IX86_BUILTIN_PXOR128
,
16159 IX86_BUILTIN_PAVGB128
,
16160 IX86_BUILTIN_PAVGW128
,
16162 IX86_BUILTIN_PCMPEQB128
,
16163 IX86_BUILTIN_PCMPEQW128
,
16164 IX86_BUILTIN_PCMPEQD128
,
16165 IX86_BUILTIN_PCMPGTB128
,
16166 IX86_BUILTIN_PCMPGTW128
,
16167 IX86_BUILTIN_PCMPGTD128
,
16169 IX86_BUILTIN_PMADDWD128
,
16171 IX86_BUILTIN_PMAXSW128
,
16172 IX86_BUILTIN_PMAXUB128
,
16173 IX86_BUILTIN_PMINSW128
,
16174 IX86_BUILTIN_PMINUB128
,
16176 IX86_BUILTIN_PMULUDQ
,
16177 IX86_BUILTIN_PMULUDQ128
,
16178 IX86_BUILTIN_PMULHUW128
,
16179 IX86_BUILTIN_PMULHW128
,
16180 IX86_BUILTIN_PMULLW128
,
16182 IX86_BUILTIN_PSADBW128
,
16183 IX86_BUILTIN_PSHUFHW
,
16184 IX86_BUILTIN_PSHUFLW
,
16185 IX86_BUILTIN_PSHUFD
,
16187 IX86_BUILTIN_PSLLW128
,
16188 IX86_BUILTIN_PSLLD128
,
16189 IX86_BUILTIN_PSLLQ128
,
16190 IX86_BUILTIN_PSRAW128
,
16191 IX86_BUILTIN_PSRAD128
,
16192 IX86_BUILTIN_PSRLW128
,
16193 IX86_BUILTIN_PSRLD128
,
16194 IX86_BUILTIN_PSRLQ128
,
16195 IX86_BUILTIN_PSLLDQI128
,
16196 IX86_BUILTIN_PSLLWI128
,
16197 IX86_BUILTIN_PSLLDI128
,
16198 IX86_BUILTIN_PSLLQI128
,
16199 IX86_BUILTIN_PSRAWI128
,
16200 IX86_BUILTIN_PSRADI128
,
16201 IX86_BUILTIN_PSRLDQI128
,
16202 IX86_BUILTIN_PSRLWI128
,
16203 IX86_BUILTIN_PSRLDI128
,
16204 IX86_BUILTIN_PSRLQI128
,
16206 IX86_BUILTIN_PUNPCKHBW128
,
16207 IX86_BUILTIN_PUNPCKHWD128
,
16208 IX86_BUILTIN_PUNPCKHDQ128
,
16209 IX86_BUILTIN_PUNPCKHQDQ128
,
16210 IX86_BUILTIN_PUNPCKLBW128
,
16211 IX86_BUILTIN_PUNPCKLWD128
,
16212 IX86_BUILTIN_PUNPCKLDQ128
,
16213 IX86_BUILTIN_PUNPCKLQDQ128
,
16215 IX86_BUILTIN_CLFLUSH
,
16216 IX86_BUILTIN_MFENCE
,
16217 IX86_BUILTIN_LFENCE
,
16219 /* Prescott New Instructions. */
16220 IX86_BUILTIN_ADDSUBPS
,
16221 IX86_BUILTIN_HADDPS
,
16222 IX86_BUILTIN_HSUBPS
,
16223 IX86_BUILTIN_MOVSHDUP
,
16224 IX86_BUILTIN_MOVSLDUP
,
16225 IX86_BUILTIN_ADDSUBPD
,
16226 IX86_BUILTIN_HADDPD
,
16227 IX86_BUILTIN_HSUBPD
,
16228 IX86_BUILTIN_LDDQU
,
16230 IX86_BUILTIN_MONITOR
,
16231 IX86_BUILTIN_MWAIT
,
16234 IX86_BUILTIN_PHADDW
,
16235 IX86_BUILTIN_PHADDD
,
16236 IX86_BUILTIN_PHADDSW
,
16237 IX86_BUILTIN_PHSUBW
,
16238 IX86_BUILTIN_PHSUBD
,
16239 IX86_BUILTIN_PHSUBSW
,
16240 IX86_BUILTIN_PMADDUBSW
,
16241 IX86_BUILTIN_PMULHRSW
,
16242 IX86_BUILTIN_PSHUFB
,
16243 IX86_BUILTIN_PSIGNB
,
16244 IX86_BUILTIN_PSIGNW
,
16245 IX86_BUILTIN_PSIGND
,
16246 IX86_BUILTIN_PALIGNR
,
16247 IX86_BUILTIN_PABSB
,
16248 IX86_BUILTIN_PABSW
,
16249 IX86_BUILTIN_PABSD
,
16251 IX86_BUILTIN_PHADDW128
,
16252 IX86_BUILTIN_PHADDD128
,
16253 IX86_BUILTIN_PHADDSW128
,
16254 IX86_BUILTIN_PHSUBW128
,
16255 IX86_BUILTIN_PHSUBD128
,
16256 IX86_BUILTIN_PHSUBSW128
,
16257 IX86_BUILTIN_PMADDUBSW128
,
16258 IX86_BUILTIN_PMULHRSW128
,
16259 IX86_BUILTIN_PSHUFB128
,
16260 IX86_BUILTIN_PSIGNB128
,
16261 IX86_BUILTIN_PSIGNW128
,
16262 IX86_BUILTIN_PSIGND128
,
16263 IX86_BUILTIN_PALIGNR128
,
16264 IX86_BUILTIN_PABSB128
,
16265 IX86_BUILTIN_PABSW128
,
16266 IX86_BUILTIN_PABSD128
,
16268 /* AMDFAM10 - SSE4A New Instructions. */
16269 IX86_BUILTIN_MOVNTSD
,
16270 IX86_BUILTIN_MOVNTSS
,
16271 IX86_BUILTIN_EXTRQI
,
16272 IX86_BUILTIN_EXTRQ
,
16273 IX86_BUILTIN_INSERTQI
,
16274 IX86_BUILTIN_INSERTQ
,
16276 IX86_BUILTIN_VEC_INIT_V2SI
,
16277 IX86_BUILTIN_VEC_INIT_V4HI
,
16278 IX86_BUILTIN_VEC_INIT_V8QI
,
16279 IX86_BUILTIN_VEC_EXT_V2DF
,
16280 IX86_BUILTIN_VEC_EXT_V2DI
,
16281 IX86_BUILTIN_VEC_EXT_V4SF
,
16282 IX86_BUILTIN_VEC_EXT_V4SI
,
16283 IX86_BUILTIN_VEC_EXT_V8HI
,
16284 IX86_BUILTIN_VEC_EXT_V2SI
,
16285 IX86_BUILTIN_VEC_EXT_V4HI
,
16286 IX86_BUILTIN_VEC_SET_V8HI
,
16287 IX86_BUILTIN_VEC_SET_V4HI
,
16292 /* Table for the ix86 builtin decls. */
16293 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16295 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16296 * if the target_flags include one of MASK. Stores the function decl
16297 * in the ix86_builtins array.
16298 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16301 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16303 tree decl
= NULL_TREE
;
16305 if (mask
& target_flags
16306 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16308 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16310 ix86_builtins
[(int) code
] = decl
;
16316 /* Like def_builtin, but also marks the function decl "const". */
16319 def_builtin_const (int mask
, const char *name
, tree type
,
16320 enum ix86_builtins code
)
16322 tree decl
= def_builtin (mask
, name
, type
, code
);
16324 TREE_READONLY (decl
) = 1;
16328 /* Bits for builtin_description.flag. */
16330 /* Set when we don't support the comparison natively, and should
16331 swap_comparison in order to support it. */
16332 #define BUILTIN_DESC_SWAP_OPERANDS 1
16334 struct builtin_description
16336 const unsigned int mask
;
16337 const enum insn_code icode
;
16338 const char *const name
;
16339 const enum ix86_builtins code
;
16340 const enum rtx_code comparison
;
16341 const unsigned int flag
;
16344 static const struct builtin_description bdesc_comi
[] =
16346 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16347 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16348 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16349 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16350 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16351 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16352 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16353 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16354 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16355 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16356 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16357 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16358 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16359 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16360 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16361 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16362 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16363 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16364 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16365 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16366 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16367 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16368 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16369 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16372 static const struct builtin_description bdesc_2arg
[] =
16375 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16376 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16377 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16378 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16379 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16380 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16381 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16382 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16384 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16385 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16386 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16387 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16388 BUILTIN_DESC_SWAP_OPERANDS
},
16389 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16390 BUILTIN_DESC_SWAP_OPERANDS
},
16391 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16392 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16393 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16394 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16395 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16396 BUILTIN_DESC_SWAP_OPERANDS
},
16397 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16398 BUILTIN_DESC_SWAP_OPERANDS
},
16399 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16400 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16401 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16402 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16403 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16404 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16405 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16406 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16407 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16408 BUILTIN_DESC_SWAP_OPERANDS
},
16409 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16410 BUILTIN_DESC_SWAP_OPERANDS
},
16411 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16413 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16414 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16415 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16416 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16418 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16419 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16420 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16421 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16423 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16424 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16425 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16426 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16427 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16430 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16431 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16432 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16433 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16434 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16435 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16436 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16437 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16439 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16440 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16441 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16442 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16443 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16444 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16445 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16446 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16448 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16449 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16450 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16452 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16453 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16454 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16455 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16457 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16458 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16460 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16461 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16462 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16463 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16464 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16465 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16467 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16468 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16469 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16470 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16472 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16473 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16474 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16475 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16476 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16477 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16480 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16481 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16482 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16484 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16485 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16486 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16488 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16489 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16490 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16491 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16492 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16493 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16495 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16496 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16497 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16498 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16499 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16500 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16502 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16503 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16504 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16505 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16507 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16508 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16511 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16512 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16513 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16514 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16515 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16516 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16517 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16518 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16520 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16521 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16522 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16523 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16524 BUILTIN_DESC_SWAP_OPERANDS
},
16525 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16526 BUILTIN_DESC_SWAP_OPERANDS
},
16527 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16528 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16529 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16530 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16531 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16532 BUILTIN_DESC_SWAP_OPERANDS
},
16533 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16534 BUILTIN_DESC_SWAP_OPERANDS
},
16535 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16536 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16537 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16538 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16539 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16540 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16541 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16542 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16543 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16545 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16546 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16547 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16548 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16550 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16551 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16552 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16553 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16555 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16556 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16557 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16560 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16561 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16562 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16563 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16564 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16565 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16566 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16567 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16569 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16570 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16571 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16572 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16573 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16574 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16575 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16576 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16578 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16579 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16581 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16582 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16583 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16584 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16586 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16587 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16589 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16590 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16591 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16592 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16593 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16594 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16596 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16597 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16598 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16599 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16601 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16602 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16603 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16604 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16605 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16606 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16607 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16608 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16610 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16611 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16612 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16614 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16615 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16617 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16618 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16620 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16621 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16622 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16624 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16625 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16626 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16628 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16629 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16631 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16633 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16634 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16635 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16636 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16639 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16640 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16641 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16642 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16643 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16644 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16647 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16648 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16649 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16650 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16651 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16652 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16653 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16654 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16655 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16656 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16657 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16658 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16659 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16660 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16661 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16662 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16663 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16664 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16665 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16666 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16667 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16668 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16669 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16670 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16673 static const struct builtin_description bdesc_1arg
[] =
16675 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16676 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16678 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16679 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16680 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16682 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16683 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16684 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16685 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16686 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16687 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16689 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16690 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16692 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16694 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16695 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16697 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16698 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16699 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16700 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16701 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16703 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16705 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16706 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16707 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16708 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16710 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16711 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16712 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16715 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16716 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16719 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16720 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16721 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16722 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16723 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16724 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16728 ix86_init_builtins (void)
16731 ix86_init_mmx_sse_builtins ();
16734 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16735 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16738 ix86_init_mmx_sse_builtins (void)
16740 const struct builtin_description
* d
;
16743 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16744 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16745 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16746 tree V2DI_type_node
16747 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16748 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16749 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16750 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16751 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16752 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16753 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16755 tree pchar_type_node
= build_pointer_type (char_type_node
);
16756 tree pcchar_type_node
= build_pointer_type (
16757 build_type_variant (char_type_node
, 1, 0));
16758 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16759 tree pcfloat_type_node
= build_pointer_type (
16760 build_type_variant (float_type_node
, 1, 0));
16761 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16762 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16763 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16766 tree int_ftype_v4sf_v4sf
16767 = build_function_type_list (integer_type_node
,
16768 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16769 tree v4si_ftype_v4sf_v4sf
16770 = build_function_type_list (V4SI_type_node
,
16771 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16772 /* MMX/SSE/integer conversions. */
16773 tree int_ftype_v4sf
16774 = build_function_type_list (integer_type_node
,
16775 V4SF_type_node
, NULL_TREE
);
16776 tree int64_ftype_v4sf
16777 = build_function_type_list (long_long_integer_type_node
,
16778 V4SF_type_node
, NULL_TREE
);
16779 tree int_ftype_v8qi
16780 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16781 tree v4sf_ftype_v4sf_int
16782 = build_function_type_list (V4SF_type_node
,
16783 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16784 tree v4sf_ftype_v4sf_int64
16785 = build_function_type_list (V4SF_type_node
,
16786 V4SF_type_node
, long_long_integer_type_node
,
16788 tree v4sf_ftype_v4sf_v2si
16789 = build_function_type_list (V4SF_type_node
,
16790 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16792 /* Miscellaneous. */
16793 tree v8qi_ftype_v4hi_v4hi
16794 = build_function_type_list (V8QI_type_node
,
16795 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16796 tree v4hi_ftype_v2si_v2si
16797 = build_function_type_list (V4HI_type_node
,
16798 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16799 tree v4sf_ftype_v4sf_v4sf_int
16800 = build_function_type_list (V4SF_type_node
,
16801 V4SF_type_node
, V4SF_type_node
,
16802 integer_type_node
, NULL_TREE
);
16803 tree v2si_ftype_v4hi_v4hi
16804 = build_function_type_list (V2SI_type_node
,
16805 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16806 tree v4hi_ftype_v4hi_int
16807 = build_function_type_list (V4HI_type_node
,
16808 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16809 tree v4hi_ftype_v4hi_di
16810 = build_function_type_list (V4HI_type_node
,
16811 V4HI_type_node
, long_long_unsigned_type_node
,
16813 tree v2si_ftype_v2si_di
16814 = build_function_type_list (V2SI_type_node
,
16815 V2SI_type_node
, long_long_unsigned_type_node
,
16817 tree void_ftype_void
16818 = build_function_type (void_type_node
, void_list_node
);
16819 tree void_ftype_unsigned
16820 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16821 tree void_ftype_unsigned_unsigned
16822 = build_function_type_list (void_type_node
, unsigned_type_node
,
16823 unsigned_type_node
, NULL_TREE
);
16824 tree void_ftype_pcvoid_unsigned_unsigned
16825 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16826 unsigned_type_node
, unsigned_type_node
,
16828 tree unsigned_ftype_void
16829 = build_function_type (unsigned_type_node
, void_list_node
);
16830 tree v2si_ftype_v4sf
16831 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16832 /* Loads/stores. */
16833 tree void_ftype_v8qi_v8qi_pchar
16834 = build_function_type_list (void_type_node
,
16835 V8QI_type_node
, V8QI_type_node
,
16836 pchar_type_node
, NULL_TREE
);
16837 tree v4sf_ftype_pcfloat
16838 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16839 /* @@@ the type is bogus */
16840 tree v4sf_ftype_v4sf_pv2si
16841 = build_function_type_list (V4SF_type_node
,
16842 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16843 tree void_ftype_pv2si_v4sf
16844 = build_function_type_list (void_type_node
,
16845 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16846 tree void_ftype_pfloat_v4sf
16847 = build_function_type_list (void_type_node
,
16848 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16849 tree void_ftype_pdi_di
16850 = build_function_type_list (void_type_node
,
16851 pdi_type_node
, long_long_unsigned_type_node
,
16853 tree void_ftype_pv2di_v2di
16854 = build_function_type_list (void_type_node
,
16855 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16856 /* Normal vector unops. */
16857 tree v4sf_ftype_v4sf
16858 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16859 tree v16qi_ftype_v16qi
16860 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16861 tree v8hi_ftype_v8hi
16862 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16863 tree v4si_ftype_v4si
16864 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16865 tree v8qi_ftype_v8qi
16866 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16867 tree v4hi_ftype_v4hi
16868 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16870 /* Normal vector binops. */
16871 tree v4sf_ftype_v4sf_v4sf
16872 = build_function_type_list (V4SF_type_node
,
16873 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16874 tree v8qi_ftype_v8qi_v8qi
16875 = build_function_type_list (V8QI_type_node
,
16876 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16877 tree v4hi_ftype_v4hi_v4hi
16878 = build_function_type_list (V4HI_type_node
,
16879 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16880 tree v2si_ftype_v2si_v2si
16881 = build_function_type_list (V2SI_type_node
,
16882 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16883 tree di_ftype_di_di
16884 = build_function_type_list (long_long_unsigned_type_node
,
16885 long_long_unsigned_type_node
,
16886 long_long_unsigned_type_node
, NULL_TREE
);
16888 tree di_ftype_di_di_int
16889 = build_function_type_list (long_long_unsigned_type_node
,
16890 long_long_unsigned_type_node
,
16891 long_long_unsigned_type_node
,
16892 integer_type_node
, NULL_TREE
);
16894 tree v2si_ftype_v2sf
16895 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16896 tree v2sf_ftype_v2si
16897 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16898 tree v2si_ftype_v2si
16899 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16900 tree v2sf_ftype_v2sf
16901 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16902 tree v2sf_ftype_v2sf_v2sf
16903 = build_function_type_list (V2SF_type_node
,
16904 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16905 tree v2si_ftype_v2sf_v2sf
16906 = build_function_type_list (V2SI_type_node
,
16907 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16908 tree pint_type_node
= build_pointer_type (integer_type_node
);
16909 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16910 tree pcdouble_type_node
= build_pointer_type (
16911 build_type_variant (double_type_node
, 1, 0));
16912 tree int_ftype_v2df_v2df
16913 = build_function_type_list (integer_type_node
,
16914 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16916 tree void_ftype_pcvoid
16917 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16918 tree v4sf_ftype_v4si
16919 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16920 tree v4si_ftype_v4sf
16921 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16922 tree v2df_ftype_v4si
16923 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16924 tree v4si_ftype_v2df
16925 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16926 tree v2si_ftype_v2df
16927 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16928 tree v4sf_ftype_v2df
16929 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16930 tree v2df_ftype_v2si
16931 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16932 tree v2df_ftype_v4sf
16933 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16934 tree int_ftype_v2df
16935 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16936 tree int64_ftype_v2df
16937 = build_function_type_list (long_long_integer_type_node
,
16938 V2DF_type_node
, NULL_TREE
);
16939 tree v2df_ftype_v2df_int
16940 = build_function_type_list (V2DF_type_node
,
16941 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16942 tree v2df_ftype_v2df_int64
16943 = build_function_type_list (V2DF_type_node
,
16944 V2DF_type_node
, long_long_integer_type_node
,
16946 tree v4sf_ftype_v4sf_v2df
16947 = build_function_type_list (V4SF_type_node
,
16948 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16949 tree v2df_ftype_v2df_v4sf
16950 = build_function_type_list (V2DF_type_node
,
16951 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16952 tree v2df_ftype_v2df_v2df_int
16953 = build_function_type_list (V2DF_type_node
,
16954 V2DF_type_node
, V2DF_type_node
,
16957 tree v2df_ftype_v2df_pcdouble
16958 = build_function_type_list (V2DF_type_node
,
16959 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16960 tree void_ftype_pdouble_v2df
16961 = build_function_type_list (void_type_node
,
16962 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16963 tree void_ftype_pint_int
16964 = build_function_type_list (void_type_node
,
16965 pint_type_node
, integer_type_node
, NULL_TREE
);
16966 tree void_ftype_v16qi_v16qi_pchar
16967 = build_function_type_list (void_type_node
,
16968 V16QI_type_node
, V16QI_type_node
,
16969 pchar_type_node
, NULL_TREE
);
16970 tree v2df_ftype_pcdouble
16971 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16972 tree v2df_ftype_v2df_v2df
16973 = build_function_type_list (V2DF_type_node
,
16974 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16975 tree v16qi_ftype_v16qi_v16qi
16976 = build_function_type_list (V16QI_type_node
,
16977 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16978 tree v8hi_ftype_v8hi_v8hi
16979 = build_function_type_list (V8HI_type_node
,
16980 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16981 tree v4si_ftype_v4si_v4si
16982 = build_function_type_list (V4SI_type_node
,
16983 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16984 tree v2di_ftype_v2di_v2di
16985 = build_function_type_list (V2DI_type_node
,
16986 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16987 tree v2di_ftype_v2df_v2df
16988 = build_function_type_list (V2DI_type_node
,
16989 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16990 tree v2df_ftype_v2df
16991 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16992 tree v2di_ftype_v2di_int
16993 = build_function_type_list (V2DI_type_node
,
16994 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16995 tree v2di_ftype_v2di_v2di_int
16996 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16997 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16998 tree v4si_ftype_v4si_int
16999 = build_function_type_list (V4SI_type_node
,
17000 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17001 tree v8hi_ftype_v8hi_int
17002 = build_function_type_list (V8HI_type_node
,
17003 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17004 tree v8hi_ftype_v8hi_v2di
17005 = build_function_type_list (V8HI_type_node
,
17006 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
17007 tree v4si_ftype_v4si_v2di
17008 = build_function_type_list (V4SI_type_node
,
17009 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
17010 tree v4si_ftype_v8hi_v8hi
17011 = build_function_type_list (V4SI_type_node
,
17012 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17013 tree di_ftype_v8qi_v8qi
17014 = build_function_type_list (long_long_unsigned_type_node
,
17015 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17016 tree di_ftype_v2si_v2si
17017 = build_function_type_list (long_long_unsigned_type_node
,
17018 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17019 tree v2di_ftype_v16qi_v16qi
17020 = build_function_type_list (V2DI_type_node
,
17021 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17022 tree v2di_ftype_v4si_v4si
17023 = build_function_type_list (V2DI_type_node
,
17024 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17025 tree int_ftype_v16qi
17026 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17027 tree v16qi_ftype_pcchar
17028 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17029 tree void_ftype_pchar_v16qi
17030 = build_function_type_list (void_type_node
,
17031 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17033 tree v2di_ftype_v2di_unsigned_unsigned
17034 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17035 unsigned_type_node
, unsigned_type_node
,
17037 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17038 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17039 unsigned_type_node
, unsigned_type_node
,
17041 tree v2di_ftype_v2di_v16qi
17042 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17046 tree float128_type
;
17049 /* The __float80 type. */
17050 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17051 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17055 /* The __float80 type. */
17056 float80_type
= make_node (REAL_TYPE
);
17057 TYPE_PRECISION (float80_type
) = 80;
17058 layout_type (float80_type
);
17059 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17064 float128_type
= make_node (REAL_TYPE
);
17065 TYPE_PRECISION (float128_type
) = 128;
17066 layout_type (float128_type
);
17067 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17070 /* Add all builtins that are more or less simple operations on two
17072 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17074 /* Use one of the operands; the target can have a different mode for
17075 mask-generating compares. */
17076 enum machine_mode mode
;
17081 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17086 type
= v16qi_ftype_v16qi_v16qi
;
17089 type
= v8hi_ftype_v8hi_v8hi
;
17092 type
= v4si_ftype_v4si_v4si
;
17095 type
= v2di_ftype_v2di_v2di
;
17098 type
= v2df_ftype_v2df_v2df
;
17101 type
= v4sf_ftype_v4sf_v4sf
;
17104 type
= v8qi_ftype_v8qi_v8qi
;
17107 type
= v4hi_ftype_v4hi_v4hi
;
17110 type
= v2si_ftype_v2si_v2si
;
17113 type
= di_ftype_di_di
;
17117 gcc_unreachable ();
17120 /* Override for comparisons. */
17121 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17122 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17123 type
= v4si_ftype_v4sf_v4sf
;
17125 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17126 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17127 type
= v2di_ftype_v2df_v2df
;
17129 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17132 /* Add all builtins that are more or less simple operations on 1 operand. */
17133 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17135 enum machine_mode mode
;
17140 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17145 type
= v16qi_ftype_v16qi
;
17148 type
= v8hi_ftype_v8hi
;
17151 type
= v4si_ftype_v4si
;
17154 type
= v2df_ftype_v2df
;
17157 type
= v4sf_ftype_v4sf
;
17160 type
= v8qi_ftype_v8qi
;
17163 type
= v4hi_ftype_v4hi
;
17166 type
= v2si_ftype_v2si
;
17173 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17176 /* Add the remaining MMX insns with somewhat more complicated types. */
17177 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17178 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17179 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17180 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17182 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17183 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17184 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17186 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17187 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17189 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17190 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17192 /* comi/ucomi insns. */
17193 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17194 if (d
->mask
== MASK_SSE2
)
17195 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17197 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17199 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17200 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17201 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17203 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17204 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17205 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17206 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17207 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17208 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17209 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17210 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17211 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17212 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17213 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17215 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17217 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17218 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17220 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17221 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17222 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17223 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17225 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17226 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17227 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17228 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17230 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17232 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17234 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17235 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17236 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17237 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17238 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17239 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17241 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17243 /* Original 3DNow! */
17244 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17245 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17246 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17247 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17248 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17249 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17250 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17251 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17252 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17253 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17254 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17255 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17256 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17257 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17258 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17259 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17260 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17261 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17262 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17263 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17265 /* 3DNow! extension as used in the Athlon CPU. */
17266 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17267 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17268 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17269 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17270 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17271 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17274 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17276 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17277 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17279 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17280 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17282 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17283 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17284 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17285 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17286 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17288 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17289 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17290 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17291 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17293 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17294 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17296 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17298 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17299 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17301 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17302 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17303 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17304 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17305 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17307 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17309 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17310 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17311 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17312 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17314 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17315 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17316 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17318 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17319 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17320 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17321 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17323 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17324 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17325 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17327 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17328 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17330 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17331 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17333 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17334 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17335 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17337 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17338 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17339 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17341 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17342 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17344 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17345 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17346 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17347 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17349 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17350 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17351 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17352 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17354 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17355 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17357 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17359 /* Prescott New Instructions. */
17360 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17361 void_ftype_pcvoid_unsigned_unsigned
,
17362 IX86_BUILTIN_MONITOR
);
17363 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17364 void_ftype_unsigned_unsigned
,
17365 IX86_BUILTIN_MWAIT
);
17366 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17367 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17370 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17371 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17372 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17373 IX86_BUILTIN_PALIGNR
);
17375 /* AMDFAM10 SSE4A New built-ins */
17376 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17377 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17378 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17379 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17380 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17381 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17382 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17383 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17384 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17385 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17386 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17387 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17389 /* Access to the vec_init patterns. */
17390 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17391 integer_type_node
, NULL_TREE
);
17392 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17393 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17395 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17396 short_integer_type_node
,
17397 short_integer_type_node
,
17398 short_integer_type_node
, NULL_TREE
);
17399 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17400 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17402 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17403 char_type_node
, char_type_node
,
17404 char_type_node
, char_type_node
,
17405 char_type_node
, char_type_node
,
17406 char_type_node
, NULL_TREE
);
17407 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17408 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17410 /* Access to the vec_extract patterns. */
17411 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17412 integer_type_node
, NULL_TREE
);
17413 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17414 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17416 ftype
= build_function_type_list (long_long_integer_type_node
,
17417 V2DI_type_node
, integer_type_node
,
17419 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17420 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17422 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17423 integer_type_node
, NULL_TREE
);
17424 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17425 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17427 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17428 integer_type_node
, NULL_TREE
);
17429 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17430 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17432 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17433 integer_type_node
, NULL_TREE
);
17434 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17435 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17437 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17438 integer_type_node
, NULL_TREE
);
17439 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17440 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17442 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17443 integer_type_node
, NULL_TREE
);
17444 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17445 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17447 /* Access to the vec_set patterns. */
17448 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17450 integer_type_node
, NULL_TREE
);
17451 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17452 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17454 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17456 integer_type_node
, NULL_TREE
);
17457 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17458 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17461 /* Errors in the source file can cause expand_expr to return const0_rtx
17462 where we expect a vector. To avoid crashing, use one of the vector
17463 clear instructions. */
17465 safe_vector_operand (rtx x
, enum machine_mode mode
)
17467 if (x
== const0_rtx
)
17468 x
= CONST0_RTX (mode
);
17472 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17475 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17478 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17479 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17480 rtx op0
= expand_normal (arg0
);
17481 rtx op1
= expand_normal (arg1
);
17482 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17483 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17484 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17486 if (VECTOR_MODE_P (mode0
))
17487 op0
= safe_vector_operand (op0
, mode0
);
17488 if (VECTOR_MODE_P (mode1
))
17489 op1
= safe_vector_operand (op1
, mode1
);
17491 if (optimize
|| !target
17492 || GET_MODE (target
) != tmode
17493 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17494 target
= gen_reg_rtx (tmode
);
17496 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17498 rtx x
= gen_reg_rtx (V4SImode
);
17499 emit_insn (gen_sse2_loadd (x
, op1
));
17500 op1
= gen_lowpart (TImode
, x
);
17503 /* The insn must want input operands in the same modes as the
17505 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17506 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17508 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17509 op0
= copy_to_mode_reg (mode0
, op0
);
17510 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17511 op1
= copy_to_mode_reg (mode1
, op1
);
17513 /* ??? Using ix86_fixup_binary_operands is problematic when
17514 we've got mismatched modes. Fake it. */
17520 if (tmode
== mode0
&& tmode
== mode1
)
17522 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17526 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17528 op0
= force_reg (mode0
, op0
);
17529 op1
= force_reg (mode1
, op1
);
17530 target
= gen_reg_rtx (tmode
);
17533 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17540 /* Subroutine of ix86_expand_builtin to take care of stores. */
17543 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17546 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17547 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17548 rtx op0
= expand_normal (arg0
);
17549 rtx op1
= expand_normal (arg1
);
17550 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17551 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17553 if (VECTOR_MODE_P (mode1
))
17554 op1
= safe_vector_operand (op1
, mode1
);
17556 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17557 op1
= copy_to_mode_reg (mode1
, op1
);
17559 pat
= GEN_FCN (icode
) (op0
, op1
);
17565 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17568 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17569 rtx target
, int do_load
)
17572 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17573 rtx op0
= expand_normal (arg0
);
17574 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17575 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17577 if (optimize
|| !target
17578 || GET_MODE (target
) != tmode
17579 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17580 target
= gen_reg_rtx (tmode
);
17582 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17585 if (VECTOR_MODE_P (mode0
))
17586 op0
= safe_vector_operand (op0
, mode0
);
17588 if ((optimize
&& !register_operand (op0
, mode0
))
17589 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17590 op0
= copy_to_mode_reg (mode0
, op0
);
17593 pat
= GEN_FCN (icode
) (target
, op0
);
17600 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17601 sqrtss, rsqrtss, rcpss. */
17604 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17607 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17608 rtx op1
, op0
= expand_normal (arg0
);
17609 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17610 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17612 if (optimize
|| !target
17613 || GET_MODE (target
) != tmode
17614 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17615 target
= gen_reg_rtx (tmode
);
17617 if (VECTOR_MODE_P (mode0
))
17618 op0
= safe_vector_operand (op0
, mode0
);
17620 if ((optimize
&& !register_operand (op0
, mode0
))
17621 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17622 op0
= copy_to_mode_reg (mode0
, op0
);
17625 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17626 op1
= copy_to_mode_reg (mode0
, op1
);
17628 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17635 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17638 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17642 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17643 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17644 rtx op0
= expand_normal (arg0
);
17645 rtx op1
= expand_normal (arg1
);
17647 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17648 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17649 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17650 enum rtx_code comparison
= d
->comparison
;
17652 if (VECTOR_MODE_P (mode0
))
17653 op0
= safe_vector_operand (op0
, mode0
);
17654 if (VECTOR_MODE_P (mode1
))
17655 op1
= safe_vector_operand (op1
, mode1
);
17657 /* Swap operands if we have a comparison that isn't available in
17659 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17661 rtx tmp
= gen_reg_rtx (mode1
);
17662 emit_move_insn (tmp
, op1
);
17667 if (optimize
|| !target
17668 || GET_MODE (target
) != tmode
17669 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17670 target
= gen_reg_rtx (tmode
);
17672 if ((optimize
&& !register_operand (op0
, mode0
))
17673 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17674 op0
= copy_to_mode_reg (mode0
, op0
);
17675 if ((optimize
&& !register_operand (op1
, mode1
))
17676 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17677 op1
= copy_to_mode_reg (mode1
, op1
);
17679 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17680 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17687 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17690 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17694 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17695 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17696 rtx op0
= expand_normal (arg0
);
17697 rtx op1
= expand_normal (arg1
);
17699 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17700 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17701 enum rtx_code comparison
= d
->comparison
;
17703 if (VECTOR_MODE_P (mode0
))
17704 op0
= safe_vector_operand (op0
, mode0
);
17705 if (VECTOR_MODE_P (mode1
))
17706 op1
= safe_vector_operand (op1
, mode1
);
17708 /* Swap operands if we have a comparison that isn't available in
17710 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17717 target
= gen_reg_rtx (SImode
);
17718 emit_move_insn (target
, const0_rtx
);
17719 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17721 if ((optimize
&& !register_operand (op0
, mode0
))
17722 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17723 op0
= copy_to_mode_reg (mode0
, op0
);
17724 if ((optimize
&& !register_operand (op1
, mode1
))
17725 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17726 op1
= copy_to_mode_reg (mode1
, op1
);
17728 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17729 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17733 emit_insn (gen_rtx_SET (VOIDmode
,
17734 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17735 gen_rtx_fmt_ee (comparison
, QImode
,
17739 return SUBREG_REG (target
);
17742 /* Return the integer constant in ARG. Constrain it to be in the range
17743 of the subparts of VEC_TYPE; issue an error if not. */
17746 get_element_number (tree vec_type
, tree arg
)
17748 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17750 if (!host_integerp (arg
, 1)
17751 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17753 error ("selector must be an integer constant in the range 0..%wi", max
);
17760 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17761 ix86_expand_vector_init. We DO have language-level syntax for this, in
17762 the form of (type){ init-list }. Except that since we can't place emms
17763 instructions from inside the compiler, we can't allow the use of MMX
17764 registers unless the user explicitly asks for it. So we do *not* define
17765 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17766 we have builtins invoked by mmintrin.h that gives us license to emit
17767 these sorts of instructions. */
17770 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17772 enum machine_mode tmode
= TYPE_MODE (type
);
17773 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17774 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17775 rtvec v
= rtvec_alloc (n_elt
);
17777 gcc_assert (VECTOR_MODE_P (tmode
));
17778 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17780 for (i
= 0; i
< n_elt
; ++i
)
17782 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17783 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17786 if (!target
|| !register_operand (target
, tmode
))
17787 target
= gen_reg_rtx (tmode
);
17789 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17793 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17794 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17795 had a language-level syntax for referencing vector elements. */
17798 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17800 enum machine_mode tmode
, mode0
;
17805 arg0
= CALL_EXPR_ARG (exp
, 0);
17806 arg1
= CALL_EXPR_ARG (exp
, 1);
17808 op0
= expand_normal (arg0
);
17809 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17811 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17812 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17813 gcc_assert (VECTOR_MODE_P (mode0
));
17815 op0
= force_reg (mode0
, op0
);
17817 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17818 target
= gen_reg_rtx (tmode
);
17820 ix86_expand_vector_extract (true, target
, op0
, elt
);
17825 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17826 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17827 a language-level syntax for referencing vector elements. */
17830 ix86_expand_vec_set_builtin (tree exp
)
17832 enum machine_mode tmode
, mode1
;
17833 tree arg0
, arg1
, arg2
;
17837 arg0
= CALL_EXPR_ARG (exp
, 0);
17838 arg1
= CALL_EXPR_ARG (exp
, 1);
17839 arg2
= CALL_EXPR_ARG (exp
, 2);
17841 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17842 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17843 gcc_assert (VECTOR_MODE_P (tmode
));
17845 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17846 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17847 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17849 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17850 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17852 op0
= force_reg (tmode
, op0
);
17853 op1
= force_reg (mode1
, op1
);
17855 ix86_expand_vector_set (true, op0
, op1
, elt
);
17860 /* Expand an expression EXP that calls a built-in function,
17861 with result going to TARGET if that's convenient
17862 (and in mode MODE if that's convenient).
17863 SUBTARGET may be used as the target for computing one of EXP's operands.
17864 IGNORE is nonzero if the value is to be ignored. */
17867 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17868 enum machine_mode mode ATTRIBUTE_UNUSED
,
17869 int ignore ATTRIBUTE_UNUSED
)
17871 const struct builtin_description
*d
;
17873 enum insn_code icode
;
17874 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17875 tree arg0
, arg1
, arg2
, arg3
;
17876 rtx op0
, op1
, op2
, op3
, pat
;
17877 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17878 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17882 case IX86_BUILTIN_EMMS
:
17883 emit_insn (gen_mmx_emms ());
17886 case IX86_BUILTIN_SFENCE
:
17887 emit_insn (gen_sse_sfence ());
17890 case IX86_BUILTIN_MASKMOVQ
:
17891 case IX86_BUILTIN_MASKMOVDQU
:
17892 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17893 ? CODE_FOR_mmx_maskmovq
17894 : CODE_FOR_sse2_maskmovdqu
);
17895 /* Note the arg order is different from the operand order. */
17896 arg1
= CALL_EXPR_ARG (exp
, 0);
17897 arg2
= CALL_EXPR_ARG (exp
, 1);
17898 arg0
= CALL_EXPR_ARG (exp
, 2);
17899 op0
= expand_normal (arg0
);
17900 op1
= expand_normal (arg1
);
17901 op2
= expand_normal (arg2
);
17902 mode0
= insn_data
[icode
].operand
[0].mode
;
17903 mode1
= insn_data
[icode
].operand
[1].mode
;
17904 mode2
= insn_data
[icode
].operand
[2].mode
;
17906 op0
= force_reg (Pmode
, op0
);
17907 op0
= gen_rtx_MEM (mode1
, op0
);
17909 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17910 op0
= copy_to_mode_reg (mode0
, op0
);
17911 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17912 op1
= copy_to_mode_reg (mode1
, op1
);
17913 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17914 op2
= copy_to_mode_reg (mode2
, op2
);
17915 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17921 case IX86_BUILTIN_SQRTSS
:
17922 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17923 case IX86_BUILTIN_RSQRTSS
:
17924 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17925 case IX86_BUILTIN_RCPSS
:
17926 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17928 case IX86_BUILTIN_LOADUPS
:
17929 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17931 case IX86_BUILTIN_STOREUPS
:
17932 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17934 case IX86_BUILTIN_LOADHPS
:
17935 case IX86_BUILTIN_LOADLPS
:
17936 case IX86_BUILTIN_LOADHPD
:
17937 case IX86_BUILTIN_LOADLPD
:
17938 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17939 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17940 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17941 : CODE_FOR_sse2_loadlpd
);
17942 arg0
= CALL_EXPR_ARG (exp
, 0);
17943 arg1
= CALL_EXPR_ARG (exp
, 1);
17944 op0
= expand_normal (arg0
);
17945 op1
= expand_normal (arg1
);
17946 tmode
= insn_data
[icode
].operand
[0].mode
;
17947 mode0
= insn_data
[icode
].operand
[1].mode
;
17948 mode1
= insn_data
[icode
].operand
[2].mode
;
17950 op0
= force_reg (mode0
, op0
);
17951 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17952 if (optimize
|| target
== 0
17953 || GET_MODE (target
) != tmode
17954 || !register_operand (target
, tmode
))
17955 target
= gen_reg_rtx (tmode
);
17956 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17962 case IX86_BUILTIN_STOREHPS
:
17963 case IX86_BUILTIN_STORELPS
:
17964 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17965 : CODE_FOR_sse_storelps
);
17966 arg0
= CALL_EXPR_ARG (exp
, 0);
17967 arg1
= CALL_EXPR_ARG (exp
, 1);
17968 op0
= expand_normal (arg0
);
17969 op1
= expand_normal (arg1
);
17970 mode0
= insn_data
[icode
].operand
[0].mode
;
17971 mode1
= insn_data
[icode
].operand
[1].mode
;
17973 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17974 op1
= force_reg (mode1
, op1
);
17976 pat
= GEN_FCN (icode
) (op0
, op1
);
17982 case IX86_BUILTIN_MOVNTPS
:
17983 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17984 case IX86_BUILTIN_MOVNTQ
:
17985 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17987 case IX86_BUILTIN_LDMXCSR
:
17988 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17989 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17990 emit_move_insn (target
, op0
);
17991 emit_insn (gen_sse_ldmxcsr (target
));
17994 case IX86_BUILTIN_STMXCSR
:
17995 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17996 emit_insn (gen_sse_stmxcsr (target
));
17997 return copy_to_mode_reg (SImode
, target
);
17999 case IX86_BUILTIN_SHUFPS
:
18000 case IX86_BUILTIN_SHUFPD
:
18001 icode
= (fcode
== IX86_BUILTIN_SHUFPS
18002 ? CODE_FOR_sse_shufps
18003 : CODE_FOR_sse2_shufpd
);
18004 arg0
= CALL_EXPR_ARG (exp
, 0);
18005 arg1
= CALL_EXPR_ARG (exp
, 1);
18006 arg2
= CALL_EXPR_ARG (exp
, 2);
18007 op0
= expand_normal (arg0
);
18008 op1
= expand_normal (arg1
);
18009 op2
= expand_normal (arg2
);
18010 tmode
= insn_data
[icode
].operand
[0].mode
;
18011 mode0
= insn_data
[icode
].operand
[1].mode
;
18012 mode1
= insn_data
[icode
].operand
[2].mode
;
18013 mode2
= insn_data
[icode
].operand
[3].mode
;
18015 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18016 op0
= copy_to_mode_reg (mode0
, op0
);
18017 if ((optimize
&& !register_operand (op1
, mode1
))
18018 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18019 op1
= copy_to_mode_reg (mode1
, op1
);
18020 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18022 /* @@@ better error message */
18023 error ("mask must be an immediate");
18024 return gen_reg_rtx (tmode
);
18026 if (optimize
|| target
== 0
18027 || GET_MODE (target
) != tmode
18028 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18029 target
= gen_reg_rtx (tmode
);
18030 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18036 case IX86_BUILTIN_PSHUFW
:
18037 case IX86_BUILTIN_PSHUFD
:
18038 case IX86_BUILTIN_PSHUFHW
:
18039 case IX86_BUILTIN_PSHUFLW
:
18040 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18041 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18042 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18043 : CODE_FOR_mmx_pshufw
);
18044 arg0
= CALL_EXPR_ARG (exp
, 0);
18045 arg1
= CALL_EXPR_ARG (exp
, 1);
18046 op0
= expand_normal (arg0
);
18047 op1
= expand_normal (arg1
);
18048 tmode
= insn_data
[icode
].operand
[0].mode
;
18049 mode1
= insn_data
[icode
].operand
[1].mode
;
18050 mode2
= insn_data
[icode
].operand
[2].mode
;
18052 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18053 op0
= copy_to_mode_reg (mode1
, op0
);
18054 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18056 /* @@@ better error message */
18057 error ("mask must be an immediate");
18061 || GET_MODE (target
) != tmode
18062 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18063 target
= gen_reg_rtx (tmode
);
18064 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18070 case IX86_BUILTIN_PSLLDQI128
:
18071 case IX86_BUILTIN_PSRLDQI128
:
18072 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18073 : CODE_FOR_sse2_lshrti3
);
18074 arg0
= CALL_EXPR_ARG (exp
, 0);
18075 arg1
= CALL_EXPR_ARG (exp
, 1);
18076 op0
= expand_normal (arg0
);
18077 op1
= expand_normal (arg1
);
18078 tmode
= insn_data
[icode
].operand
[0].mode
;
18079 mode1
= insn_data
[icode
].operand
[1].mode
;
18080 mode2
= insn_data
[icode
].operand
[2].mode
;
18082 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18084 op0
= copy_to_reg (op0
);
18085 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18087 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18089 error ("shift must be an immediate");
18092 target
= gen_reg_rtx (V2DImode
);
18093 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
18099 case IX86_BUILTIN_FEMMS
:
18100 emit_insn (gen_mmx_femms ());
18103 case IX86_BUILTIN_PAVGUSB
:
18104 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18106 case IX86_BUILTIN_PF2ID
:
18107 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18109 case IX86_BUILTIN_PFACC
:
18110 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18112 case IX86_BUILTIN_PFADD
:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18115 case IX86_BUILTIN_PFCMPEQ
:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18118 case IX86_BUILTIN_PFCMPGE
:
18119 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18121 case IX86_BUILTIN_PFCMPGT
:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18124 case IX86_BUILTIN_PFMAX
:
18125 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18127 case IX86_BUILTIN_PFMIN
:
18128 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18130 case IX86_BUILTIN_PFMUL
:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18133 case IX86_BUILTIN_PFRCP
:
18134 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18136 case IX86_BUILTIN_PFRCPIT1
:
18137 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18139 case IX86_BUILTIN_PFRCPIT2
:
18140 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18142 case IX86_BUILTIN_PFRSQIT1
:
18143 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18145 case IX86_BUILTIN_PFRSQRT
:
18146 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18148 case IX86_BUILTIN_PFSUB
:
18149 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18151 case IX86_BUILTIN_PFSUBR
:
18152 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18154 case IX86_BUILTIN_PI2FD
:
18155 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18157 case IX86_BUILTIN_PMULHRW
:
18158 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18160 case IX86_BUILTIN_PF2IW
:
18161 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18163 case IX86_BUILTIN_PFNACC
:
18164 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18166 case IX86_BUILTIN_PFPNACC
:
18167 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18169 case IX86_BUILTIN_PI2FW
:
18170 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18172 case IX86_BUILTIN_PSWAPDSI
:
18173 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18175 case IX86_BUILTIN_PSWAPDSF
:
18176 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18178 case IX86_BUILTIN_SQRTSD
:
18179 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18180 case IX86_BUILTIN_LOADUPD
:
18181 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18182 case IX86_BUILTIN_STOREUPD
:
18183 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18185 case IX86_BUILTIN_MFENCE
:
18186 emit_insn (gen_sse2_mfence ());
18188 case IX86_BUILTIN_LFENCE
:
18189 emit_insn (gen_sse2_lfence ());
18192 case IX86_BUILTIN_CLFLUSH
:
18193 arg0
= CALL_EXPR_ARG (exp
, 0);
18194 op0
= expand_normal (arg0
);
18195 icode
= CODE_FOR_sse2_clflush
;
18196 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18197 op0
= copy_to_mode_reg (Pmode
, op0
);
18199 emit_insn (gen_sse2_clflush (op0
));
18202 case IX86_BUILTIN_MOVNTPD
:
18203 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18204 case IX86_BUILTIN_MOVNTDQ
:
18205 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18206 case IX86_BUILTIN_MOVNTI
:
18207 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18209 case IX86_BUILTIN_LOADDQU
:
18210 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18211 case IX86_BUILTIN_STOREDQU
:
18212 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18214 case IX86_BUILTIN_MONITOR
:
18215 arg0
= CALL_EXPR_ARG (exp
, 0);
18216 arg1
= CALL_EXPR_ARG (exp
, 1);
18217 arg2
= CALL_EXPR_ARG (exp
, 2);
18218 op0
= expand_normal (arg0
);
18219 op1
= expand_normal (arg1
);
18220 op2
= expand_normal (arg2
);
18222 op0
= copy_to_mode_reg (Pmode
, op0
);
18224 op1
= copy_to_mode_reg (SImode
, op1
);
18226 op2
= copy_to_mode_reg (SImode
, op2
);
18228 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18230 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18233 case IX86_BUILTIN_MWAIT
:
18234 arg0
= CALL_EXPR_ARG (exp
, 0);
18235 arg1
= CALL_EXPR_ARG (exp
, 1);
18236 op0
= expand_normal (arg0
);
18237 op1
= expand_normal (arg1
);
18239 op0
= copy_to_mode_reg (SImode
, op0
);
18241 op1
= copy_to_mode_reg (SImode
, op1
);
18242 emit_insn (gen_sse3_mwait (op0
, op1
));
18245 case IX86_BUILTIN_LDDQU
:
18246 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18249 case IX86_BUILTIN_PALIGNR
:
18250 case IX86_BUILTIN_PALIGNR128
:
18251 if (fcode
== IX86_BUILTIN_PALIGNR
)
18253 icode
= CODE_FOR_ssse3_palignrdi
;
18258 icode
= CODE_FOR_ssse3_palignrti
;
18261 arg0
= CALL_EXPR_ARG (exp
, 0);
18262 arg1
= CALL_EXPR_ARG (exp
, 1);
18263 arg2
= CALL_EXPR_ARG (exp
, 2);
18264 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18265 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18266 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18267 tmode
= insn_data
[icode
].operand
[0].mode
;
18268 mode1
= insn_data
[icode
].operand
[1].mode
;
18269 mode2
= insn_data
[icode
].operand
[2].mode
;
18270 mode3
= insn_data
[icode
].operand
[3].mode
;
18272 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18274 op0
= copy_to_reg (op0
);
18275 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18277 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18279 op1
= copy_to_reg (op1
);
18280 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18282 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18284 error ("shift must be an immediate");
18287 target
= gen_reg_rtx (mode
);
18288 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18295 case IX86_BUILTIN_MOVNTSD
:
18296 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18298 case IX86_BUILTIN_MOVNTSS
:
18299 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18301 case IX86_BUILTIN_INSERTQ
:
18302 case IX86_BUILTIN_EXTRQ
:
18303 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18304 ? CODE_FOR_sse4a_extrq
18305 : CODE_FOR_sse4a_insertq
);
18306 arg0
= CALL_EXPR_ARG (exp
, 0);
18307 arg1
= CALL_EXPR_ARG (exp
, 1);
18308 op0
= expand_normal (arg0
);
18309 op1
= expand_normal (arg1
);
18310 tmode
= insn_data
[icode
].operand
[0].mode
;
18311 mode1
= insn_data
[icode
].operand
[1].mode
;
18312 mode2
= insn_data
[icode
].operand
[2].mode
;
18313 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18314 op0
= copy_to_mode_reg (mode1
, op0
);
18315 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18316 op1
= copy_to_mode_reg (mode2
, op1
);
18317 if (optimize
|| target
== 0
18318 || GET_MODE (target
) != tmode
18319 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18320 target
= gen_reg_rtx (tmode
);
18321 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18327 case IX86_BUILTIN_EXTRQI
:
18328 icode
= CODE_FOR_sse4a_extrqi
;
18329 arg0
= CALL_EXPR_ARG (exp
, 0);
18330 arg1
= CALL_EXPR_ARG (exp
, 1);
18331 arg2
= CALL_EXPR_ARG (exp
, 2);
18332 op0
= expand_normal (arg0
);
18333 op1
= expand_normal (arg1
);
18334 op2
= expand_normal (arg2
);
18335 tmode
= insn_data
[icode
].operand
[0].mode
;
18336 mode1
= insn_data
[icode
].operand
[1].mode
;
18337 mode2
= insn_data
[icode
].operand
[2].mode
;
18338 mode3
= insn_data
[icode
].operand
[3].mode
;
18339 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18340 op0
= copy_to_mode_reg (mode1
, op0
);
18341 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18343 error ("index mask must be an immediate");
18344 return gen_reg_rtx (tmode
);
18346 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18348 error ("length mask must be an immediate");
18349 return gen_reg_rtx (tmode
);
18351 if (optimize
|| target
== 0
18352 || GET_MODE (target
) != tmode
18353 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18354 target
= gen_reg_rtx (tmode
);
18355 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18361 case IX86_BUILTIN_INSERTQI
:
18362 icode
= CODE_FOR_sse4a_insertqi
;
18363 arg0
= CALL_EXPR_ARG (exp
, 0);
18364 arg1
= CALL_EXPR_ARG (exp
, 1);
18365 arg2
= CALL_EXPR_ARG (exp
, 2);
18366 arg3
= CALL_EXPR_ARG (exp
, 3);
18367 op0
= expand_normal (arg0
);
18368 op1
= expand_normal (arg1
);
18369 op2
= expand_normal (arg2
);
18370 op3
= expand_normal (arg3
);
18371 tmode
= insn_data
[icode
].operand
[0].mode
;
18372 mode1
= insn_data
[icode
].operand
[1].mode
;
18373 mode2
= insn_data
[icode
].operand
[2].mode
;
18374 mode3
= insn_data
[icode
].operand
[3].mode
;
18375 mode4
= insn_data
[icode
].operand
[4].mode
;
18377 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18378 op0
= copy_to_mode_reg (mode1
, op0
);
18380 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18381 op1
= copy_to_mode_reg (mode2
, op1
);
18383 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18385 error ("index mask must be an immediate");
18386 return gen_reg_rtx (tmode
);
18388 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18390 error ("length mask must be an immediate");
18391 return gen_reg_rtx (tmode
);
18393 if (optimize
|| target
== 0
18394 || GET_MODE (target
) != tmode
18395 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18396 target
= gen_reg_rtx (tmode
);
18397 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18403 case IX86_BUILTIN_VEC_INIT_V2SI
:
18404 case IX86_BUILTIN_VEC_INIT_V4HI
:
18405 case IX86_BUILTIN_VEC_INIT_V8QI
:
18406 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18408 case IX86_BUILTIN_VEC_EXT_V2DF
:
18409 case IX86_BUILTIN_VEC_EXT_V2DI
:
18410 case IX86_BUILTIN_VEC_EXT_V4SF
:
18411 case IX86_BUILTIN_VEC_EXT_V4SI
:
18412 case IX86_BUILTIN_VEC_EXT_V8HI
:
18413 case IX86_BUILTIN_VEC_EXT_V2SI
:
18414 case IX86_BUILTIN_VEC_EXT_V4HI
:
18415 return ix86_expand_vec_ext_builtin (exp
, target
);
18417 case IX86_BUILTIN_VEC_SET_V8HI
:
18418 case IX86_BUILTIN_VEC_SET_V4HI
:
18419 return ix86_expand_vec_set_builtin (exp
);
18425 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18426 if (d
->code
== fcode
)
18428 /* Compares are treated specially. */
18429 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18430 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18431 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18432 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18433 return ix86_expand_sse_compare (d
, exp
, target
);
18435 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18438 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18439 if (d
->code
== fcode
)
18440 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18442 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18443 if (d
->code
== fcode
)
18444 return ix86_expand_sse_comi (d
, exp
, target
);
18446 gcc_unreachable ();
18449 /* Returns a function decl for a vectorized version of the builtin function
18450 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18451 if it is not available. */
18454 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18457 enum machine_mode in_mode
, out_mode
;
18460 if (TREE_CODE (type_out
) != VECTOR_TYPE
18461 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18464 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18465 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18466 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18467 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18471 case BUILT_IN_SQRT
:
18472 if (out_mode
== DFmode
&& out_n
== 2
18473 && in_mode
== DFmode
&& in_n
== 2)
18474 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18477 case BUILT_IN_SQRTF
:
18478 if (out_mode
== SFmode
&& out_n
== 4
18479 && in_mode
== SFmode
&& in_n
== 4)
18480 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18483 case BUILT_IN_LRINTF
:
18484 if (out_mode
== SImode
&& out_n
== 4
18485 && in_mode
== SFmode
&& in_n
== 4)
18486 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18496 /* Returns a decl of a function that implements conversion of the
18497 input vector of type TYPE, or NULL_TREE if it is not available. */
18500 ix86_builtin_conversion (enum tree_code code
, tree type
)
18502 if (TREE_CODE (type
) != VECTOR_TYPE
)
18508 switch (TYPE_MODE (type
))
18511 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18516 case FIX_TRUNC_EXPR
:
18517 switch (TYPE_MODE (type
))
18520 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18530 /* Store OPERAND to the memory after reload is completed. This means
18531 that we can't easily use assign_stack_local. */
18533 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18537 gcc_assert (reload_completed
);
18538 if (TARGET_RED_ZONE
)
18540 result
= gen_rtx_MEM (mode
,
18541 gen_rtx_PLUS (Pmode
,
18543 GEN_INT (-RED_ZONE_SIZE
)));
18544 emit_move_insn (result
, operand
);
18546 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18552 operand
= gen_lowpart (DImode
, operand
);
18556 gen_rtx_SET (VOIDmode
,
18557 gen_rtx_MEM (DImode
,
18558 gen_rtx_PRE_DEC (DImode
,
18559 stack_pointer_rtx
)),
18563 gcc_unreachable ();
18565 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18574 split_di (&operand
, 1, operands
, operands
+ 1);
18576 gen_rtx_SET (VOIDmode
,
18577 gen_rtx_MEM (SImode
,
18578 gen_rtx_PRE_DEC (Pmode
,
18579 stack_pointer_rtx
)),
18582 gen_rtx_SET (VOIDmode
,
18583 gen_rtx_MEM (SImode
,
18584 gen_rtx_PRE_DEC (Pmode
,
18585 stack_pointer_rtx
)),
18590 /* Store HImodes as SImodes. */
18591 operand
= gen_lowpart (SImode
, operand
);
18595 gen_rtx_SET (VOIDmode
,
18596 gen_rtx_MEM (GET_MODE (operand
),
18597 gen_rtx_PRE_DEC (SImode
,
18598 stack_pointer_rtx
)),
18602 gcc_unreachable ();
18604 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18609 /* Free operand from the memory. */
18611 ix86_free_from_memory (enum machine_mode mode
)
18613 if (!TARGET_RED_ZONE
)
18617 if (mode
== DImode
|| TARGET_64BIT
)
18621 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18622 to pop or add instruction if registers are available. */
18623 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18624 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18629 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18630 QImode must go into class Q_REGS.
18631 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18632 movdf to do mem-to-mem moves through integer regs. */
18634 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18636 enum machine_mode mode
= GET_MODE (x
);
18638 /* We're only allowed to return a subclass of CLASS. Many of the
18639 following checks fail for NO_REGS, so eliminate that early. */
18640 if (class == NO_REGS
)
18643 /* All classes can load zeros. */
18644 if (x
== CONST0_RTX (mode
))
18647 /* Force constants into memory if we are loading a (nonzero) constant into
18648 an MMX or SSE register. This is because there are no MMX/SSE instructions
18649 to load from a constant. */
18651 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18654 /* Prefer SSE regs only, if we can use them for math. */
18655 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18656 return SSE_CLASS_P (class) ? class : NO_REGS
;
18658 /* Floating-point constants need more complex checks. */
18659 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18661 /* General regs can load everything. */
18662 if (reg_class_subset_p (class, GENERAL_REGS
))
18665 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18666 zero above. We only want to wind up preferring 80387 registers if
18667 we plan on doing computation with them. */
18669 && standard_80387_constant_p (x
))
18671 /* Limit class to non-sse. */
18672 if (class == FLOAT_SSE_REGS
)
18674 if (class == FP_TOP_SSE_REGS
)
18676 if (class == FP_SECOND_SSE_REGS
)
18677 return FP_SECOND_REG
;
18678 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18685 /* Generally when we see PLUS here, it's the function invariant
18686 (plus soft-fp const_int). Which can only be computed into general
18688 if (GET_CODE (x
) == PLUS
)
18689 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18691 /* QImode constants are easy to load, but non-constant QImode data
18692 must go into Q_REGS. */
18693 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18695 if (reg_class_subset_p (class, Q_REGS
))
18697 if (reg_class_subset_p (Q_REGS
, class))
18705 /* Discourage putting floating-point values in SSE registers unless
18706 SSE math is being used, and likewise for the 387 registers. */
18708 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18710 enum machine_mode mode
= GET_MODE (x
);
18712 /* Restrict the output reload class to the register bank that we are doing
18713 math on. If we would like not to return a subset of CLASS, reject this
18714 alternative: if reload cannot do this, it will still use its choice. */
18715 mode
= GET_MODE (x
);
18716 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18717 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18719 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18721 if (class == FP_TOP_SSE_REGS
)
18723 else if (class == FP_SECOND_SSE_REGS
)
18724 return FP_SECOND_REG
;
18726 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18732 /* If we are copying between general and FP registers, we need a memory
18733 location. The same is true for SSE and MMX registers.
18735 The macro can't work reliably when one of the CLASSES is class containing
18736 registers from multiple units (SSE, MMX, integer). We avoid this by never
18737 combining those units in single alternative in the machine description.
18738 Ensure that this constraint holds to avoid unexpected surprises.
18740 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18741 enforce these sanity checks. */
18744 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18745 enum machine_mode mode
, int strict
)
18747 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18748 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18749 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18750 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18751 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18752 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18754 gcc_assert (!strict
);
18758 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18761 /* ??? This is a lie. We do have moves between mmx/general, and for
18762 mmx/sse2. But by saying we need secondary memory we discourage the
18763 register allocator from using the mmx registers unless needed. */
18764 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18767 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18769 /* SSE1 doesn't have any direct moves from other classes. */
18773 /* If the target says that inter-unit moves are more expensive
18774 than moving through memory, then don't generate them. */
18775 if (!TARGET_INTER_UNIT_MOVES
)
18778 /* Between SSE and general, we have moves no larger than word size. */
18779 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18786 /* Return true if the registers in CLASS cannot represent the change from
18787 modes FROM to TO. */
18790 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18791 enum reg_class
class)
18796 /* x87 registers can't do subreg at all, as all values are reformatted
18797 to extended precision. */
18798 if (MAYBE_FLOAT_CLASS_P (class))
18801 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18803 /* Vector registers do not support QI or HImode loads. If we don't
18804 disallow a change to these modes, reload will assume it's ok to
18805 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18806 the vec_dupv4hi pattern. */
18807 if (GET_MODE_SIZE (from
) < 4)
18810 /* Vector registers do not support subreg with nonzero offsets, which
18811 are otherwise valid for integer registers. Since we can't see
18812 whether we have a nonzero offset from here, prohibit all
18813 nonparadoxical subregs changing size. */
18814 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18821 /* Return the cost of moving data from a register in class CLASS1 to
18822 one in class CLASS2.
18824 It is not required that the cost always equal 2 when FROM is the same as TO;
18825 on some machines it is expensive to move between registers if they are not
18826 general registers. */
18829 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18830 enum reg_class class2
)
18832 /* In case we require secondary memory, compute cost of the store followed
18833 by load. In order to avoid bad register allocation choices, we need
18834 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18836 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18840 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18841 MEMORY_MOVE_COST (mode
, class1
, 1));
18842 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18843 MEMORY_MOVE_COST (mode
, class2
, 1));
18845 /* In case of copying from general_purpose_register we may emit multiple
18846 stores followed by single load causing memory size mismatch stall.
18847 Count this as arbitrarily high cost of 20. */
18848 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18851 /* In the case of FP/MMX moves, the registers actually overlap, and we
18852 have to switch modes in order to treat them differently. */
18853 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18854 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18860 /* Moves between SSE/MMX and integer unit are expensive. */
18861 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18862 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18863 return ix86_cost
->mmxsse_to_integer
;
18864 if (MAYBE_FLOAT_CLASS_P (class1
))
18865 return ix86_cost
->fp_move
;
18866 if (MAYBE_SSE_CLASS_P (class1
))
18867 return ix86_cost
->sse_move
;
18868 if (MAYBE_MMX_CLASS_P (class1
))
18869 return ix86_cost
->mmx_move
;
18873 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18876 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18878 /* Flags and only flags can only hold CCmode values. */
18879 if (CC_REGNO_P (regno
))
18880 return GET_MODE_CLASS (mode
) == MODE_CC
;
18881 if (GET_MODE_CLASS (mode
) == MODE_CC
18882 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18883 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18885 if (FP_REGNO_P (regno
))
18886 return VALID_FP_MODE_P (mode
);
18887 if (SSE_REGNO_P (regno
))
18889 /* We implement the move patterns for all vector modes into and
18890 out of SSE registers, even when no operation instructions
18892 return (VALID_SSE_REG_MODE (mode
)
18893 || VALID_SSE2_REG_MODE (mode
)
18894 || VALID_MMX_REG_MODE (mode
)
18895 || VALID_MMX_REG_MODE_3DNOW (mode
));
18897 if (MMX_REGNO_P (regno
))
18899 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18900 so if the register is available at all, then we can move data of
18901 the given mode into or out of it. */
18902 return (VALID_MMX_REG_MODE (mode
)
18903 || VALID_MMX_REG_MODE_3DNOW (mode
));
18906 if (mode
== QImode
)
18908 /* Take care for QImode values - they can be in non-QI regs,
18909 but then they do cause partial register stalls. */
18910 if (regno
< 4 || TARGET_64BIT
)
18912 if (!TARGET_PARTIAL_REG_STALL
)
18914 return reload_in_progress
|| reload_completed
;
18916 /* We handle both integer and floats in the general purpose registers. */
18917 else if (VALID_INT_MODE_P (mode
))
18919 else if (VALID_FP_MODE_P (mode
))
18921 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18922 on to use that value in smaller contexts, this can easily force a
18923 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18924 supporting DImode, allow it. */
18925 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18931 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18932 tieable integer mode. */
18935 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18944 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18947 return TARGET_64BIT
;
18954 /* Return true if MODE1 is accessible in a register that can hold MODE2
18955 without copying. That is, all register classes that can hold MODE2
18956 can also hold MODE1. */
18959 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18961 if (mode1
== mode2
)
18964 if (ix86_tieable_integer_mode_p (mode1
)
18965 && ix86_tieable_integer_mode_p (mode2
))
18968 /* MODE2 being XFmode implies fp stack or general regs, which means we
18969 can tie any smaller floating point modes to it. Note that we do not
18970 tie this with TFmode. */
18971 if (mode2
== XFmode
)
18972 return mode1
== SFmode
|| mode1
== DFmode
;
18974 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18975 that we can tie it with SFmode. */
18976 if (mode2
== DFmode
)
18977 return mode1
== SFmode
;
18979 /* If MODE2 is only appropriate for an SSE register, then tie with
18980 any other mode acceptable to SSE registers. */
18981 if (GET_MODE_SIZE (mode2
) == 16
18982 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18983 return (GET_MODE_SIZE (mode1
) == 16
18984 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18986 /* If MODE2 is appropriate for an MMX register, then tie
18987 with any other mode acceptable to MMX registers. */
18988 if (GET_MODE_SIZE (mode2
) == 8
18989 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18990 return (GET_MODE_SIZE (mode1
) == 8
18991 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18996 /* Return the cost of moving data of mode M between a
18997 register and memory. A value of 2 is the default; this cost is
18998 relative to those in `REGISTER_MOVE_COST'.
19000 If moving between registers and memory is more expensive than
19001 between two registers, you should define this macro to express the
19004 Model also increased moving costs of QImode registers in non
19008 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19010 if (FLOAT_CLASS_P (class))
19027 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19029 if (SSE_CLASS_P (class))
19032 switch (GET_MODE_SIZE (mode
))
19046 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19048 if (MMX_CLASS_P (class))
19051 switch (GET_MODE_SIZE (mode
))
19062 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19064 switch (GET_MODE_SIZE (mode
))
19068 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19069 : ix86_cost
->movzbl_load
);
19071 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19072 : ix86_cost
->int_store
[0] + 4);
19075 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19077 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19078 if (mode
== TFmode
)
19080 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19081 * (((int) GET_MODE_SIZE (mode
)
19082 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19086 /* Compute a (partial) cost for rtx X. Return true if the complete
19087 cost has been computed, and false if subexpressions should be
19088 scanned. In either case, *TOTAL contains the cost result. */
19091 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19093 enum machine_mode mode
= GET_MODE (x
);
19101 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19103 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19105 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19107 || (!GET_CODE (x
) != LABEL_REF
19108 && (GET_CODE (x
) != SYMBOL_REF
19109 || !SYMBOL_REF_LOCAL_P (x
)))))
19116 if (mode
== VOIDmode
)
19119 switch (standard_80387_constant_p (x
))
19124 default: /* Other constants */
19129 /* Start with (MEM (SYMBOL_REF)), since that's where
19130 it'll probably end up. Add a penalty for size. */
19131 *total
= (COSTS_N_INSNS (1)
19132 + (flag_pic
!= 0 && !TARGET_64BIT
)
19133 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19139 /* The zero extensions is often completely free on x86_64, so make
19140 it as cheap as possible. */
19141 if (TARGET_64BIT
&& mode
== DImode
19142 && GET_MODE (XEXP (x
, 0)) == SImode
)
19144 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19145 *total
= ix86_cost
->add
;
19147 *total
= ix86_cost
->movzx
;
19151 *total
= ix86_cost
->movsx
;
19155 if (CONST_INT_P (XEXP (x
, 1))
19156 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19158 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19161 *total
= ix86_cost
->add
;
19164 if ((value
== 2 || value
== 3)
19165 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19167 *total
= ix86_cost
->lea
;
19177 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19179 if (CONST_INT_P (XEXP (x
, 1)))
19181 if (INTVAL (XEXP (x
, 1)) > 32)
19182 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19184 *total
= ix86_cost
->shift_const
* 2;
19188 if (GET_CODE (XEXP (x
, 1)) == AND
)
19189 *total
= ix86_cost
->shift_var
* 2;
19191 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19196 if (CONST_INT_P (XEXP (x
, 1)))
19197 *total
= ix86_cost
->shift_const
;
19199 *total
= ix86_cost
->shift_var
;
19204 if (FLOAT_MODE_P (mode
))
19206 *total
= ix86_cost
->fmul
;
19211 rtx op0
= XEXP (x
, 0);
19212 rtx op1
= XEXP (x
, 1);
19214 if (CONST_INT_P (XEXP (x
, 1)))
19216 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19217 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19221 /* This is arbitrary. */
19224 /* Compute costs correctly for widening multiplication. */
19225 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19226 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19227 == GET_MODE_SIZE (mode
))
19229 int is_mulwiden
= 0;
19230 enum machine_mode inner_mode
= GET_MODE (op0
);
19232 if (GET_CODE (op0
) == GET_CODE (op1
))
19233 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19234 else if (CONST_INT_P (op1
))
19236 if (GET_CODE (op0
) == SIGN_EXTEND
)
19237 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19240 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19244 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19247 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19248 + nbits
* ix86_cost
->mult_bit
19249 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19258 if (FLOAT_MODE_P (mode
))
19259 *total
= ix86_cost
->fdiv
;
19261 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19265 if (FLOAT_MODE_P (mode
))
19266 *total
= ix86_cost
->fadd
;
19267 else if (GET_MODE_CLASS (mode
) == MODE_INT
19268 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19270 if (GET_CODE (XEXP (x
, 0)) == PLUS
19271 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19272 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19273 && CONSTANT_P (XEXP (x
, 1)))
19275 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19276 if (val
== 2 || val
== 4 || val
== 8)
19278 *total
= ix86_cost
->lea
;
19279 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19280 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19282 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19286 else if (GET_CODE (XEXP (x
, 0)) == MULT
19287 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19289 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19290 if (val
== 2 || val
== 4 || val
== 8)
19292 *total
= ix86_cost
->lea
;
19293 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19294 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19298 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19300 *total
= ix86_cost
->lea
;
19301 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19302 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19303 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19310 if (FLOAT_MODE_P (mode
))
19312 *total
= ix86_cost
->fadd
;
19320 if (!TARGET_64BIT
&& mode
== DImode
)
19322 *total
= (ix86_cost
->add
* 2
19323 + (rtx_cost (XEXP (x
, 0), outer_code
)
19324 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19325 + (rtx_cost (XEXP (x
, 1), outer_code
)
19326 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19332 if (FLOAT_MODE_P (mode
))
19334 *total
= ix86_cost
->fchs
;
19340 if (!TARGET_64BIT
&& mode
== DImode
)
19341 *total
= ix86_cost
->add
* 2;
19343 *total
= ix86_cost
->add
;
19347 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19348 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19349 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19350 && XEXP (x
, 1) == const0_rtx
)
19352 /* This kind of construct is implemented using test[bwl].
19353 Treat it as if we had an AND. */
19354 *total
= (ix86_cost
->add
19355 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19356 + rtx_cost (const1_rtx
, outer_code
));
19362 if (!TARGET_SSE_MATH
19364 || (mode
== DFmode
&& !TARGET_SSE2
))
19369 if (FLOAT_MODE_P (mode
))
19370 *total
= ix86_cost
->fabs
;
19374 if (FLOAT_MODE_P (mode
))
19375 *total
= ix86_cost
->fsqrt
;
19379 if (XINT (x
, 1) == UNSPEC_TP
)
19390 static int current_machopic_label_num
;
19392 /* Given a symbol name and its associated stub, write out the
19393 definition of the stub. */
19396 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19398 unsigned int length
;
19399 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19400 int label
= ++current_machopic_label_num
;
19402 /* For 64-bit we shouldn't get here. */
19403 gcc_assert (!TARGET_64BIT
);
19405 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19406 symb
= (*targetm
.strip_name_encoding
) (symb
);
19408 length
= strlen (stub
);
19409 binder_name
= alloca (length
+ 32);
19410 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19412 length
= strlen (symb
);
19413 symbol_name
= alloca (length
+ 32);
19414 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19416 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19419 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19421 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19423 fprintf (file
, "%s:\n", stub
);
19424 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19428 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19429 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19430 fprintf (file
, "\tjmp\t*%%edx\n");
19433 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19435 fprintf (file
, "%s:\n", binder_name
);
19439 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19440 fprintf (file
, "\tpushl\t%%eax\n");
19443 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19445 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19447 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19448 fprintf (file
, "%s:\n", lazy_ptr_name
);
19449 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19450 fprintf (file
, "\t.long %s\n", binder_name
);
19454 darwin_x86_file_end (void)
19456 darwin_file_end ();
19459 #endif /* TARGET_MACHO */
19461 /* Order the registers for register allocator. */
19464 x86_order_regs_for_local_alloc (void)
19469 /* First allocate the local general purpose registers. */
19470 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19471 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19472 reg_alloc_order
[pos
++] = i
;
19474 /* Global general purpose registers. */
19475 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19476 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19477 reg_alloc_order
[pos
++] = i
;
19479 /* x87 registers come first in case we are doing FP math
19481 if (!TARGET_SSE_MATH
)
19482 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19483 reg_alloc_order
[pos
++] = i
;
19485 /* SSE registers. */
19486 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19487 reg_alloc_order
[pos
++] = i
;
19488 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19489 reg_alloc_order
[pos
++] = i
;
19491 /* x87 registers. */
19492 if (TARGET_SSE_MATH
)
19493 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19494 reg_alloc_order
[pos
++] = i
;
19496 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19497 reg_alloc_order
[pos
++] = i
;
19499 /* Initialize the rest of array as we do not allocate some registers
19501 while (pos
< FIRST_PSEUDO_REGISTER
)
19502 reg_alloc_order
[pos
++] = 0;
19505 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19506 struct attribute_spec.handler. */
19508 ix86_handle_struct_attribute (tree
*node
, tree name
,
19509 tree args ATTRIBUTE_UNUSED
,
19510 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19513 if (DECL_P (*node
))
19515 if (TREE_CODE (*node
) == TYPE_DECL
)
19516 type
= &TREE_TYPE (*node
);
19521 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19522 || TREE_CODE (*type
) == UNION_TYPE
)))
19524 warning (OPT_Wattributes
, "%qs attribute ignored",
19525 IDENTIFIER_POINTER (name
));
19526 *no_add_attrs
= true;
19529 else if ((is_attribute_p ("ms_struct", name
)
19530 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19531 || ((is_attribute_p ("gcc_struct", name
)
19532 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19534 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19535 IDENTIFIER_POINTER (name
));
19536 *no_add_attrs
= true;
19543 ix86_ms_bitfield_layout_p (tree record_type
)
19545 return (TARGET_MS_BITFIELD_LAYOUT
&&
19546 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19547 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19550 /* Returns an expression indicating where the this parameter is
19551 located on entry to the FUNCTION. */
19554 x86_this_parameter (tree function
)
19556 tree type
= TREE_TYPE (function
);
19560 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19561 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19564 if (ix86_function_regparm (type
, function
) > 0)
19568 parm
= TYPE_ARG_TYPES (type
);
19569 /* Figure out whether or not the function has a variable number of
19571 for (; parm
; parm
= TREE_CHAIN (parm
))
19572 if (TREE_VALUE (parm
) == void_type_node
)
19574 /* If not, the this parameter is in the first argument. */
19578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19580 return gen_rtx_REG (SImode
, regno
);
19584 if (aggregate_value_p (TREE_TYPE (type
), type
))
19585 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19587 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19590 /* Determine whether x86_output_mi_thunk can succeed. */
19593 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19594 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19595 HOST_WIDE_INT vcall_offset
, tree function
)
19597 /* 64-bit can handle anything. */
19601 /* For 32-bit, everything's fine if we have one free register. */
19602 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19605 /* Need a free register for vcall_offset. */
19609 /* Need a free register for GOT references. */
19610 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19613 /* Otherwise ok. */
19617 /* Output the assembler code for a thunk function. THUNK_DECL is the
19618 declaration for the thunk function itself, FUNCTION is the decl for
19619 the target function. DELTA is an immediate constant offset to be
19620 added to THIS. If VCALL_OFFSET is nonzero, the word at
19621 *(*this + vcall_offset) should be added to THIS. */
19624 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19625 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19626 HOST_WIDE_INT vcall_offset
, tree function
)
19629 rtx
this = x86_this_parameter (function
);
19632 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19633 pull it in now and let DELTA benefit. */
19636 else if (vcall_offset
)
19638 /* Put the this parameter into %eax. */
19640 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19641 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19644 this_reg
= NULL_RTX
;
19646 /* Adjust the this parameter by a fixed constant. */
19649 xops
[0] = GEN_INT (delta
);
19650 xops
[1] = this_reg
? this_reg
: this;
19653 if (!x86_64_general_operand (xops
[0], DImode
))
19655 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19657 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19661 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19664 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19667 /* Adjust the this parameter by a value stored in the vtable. */
19671 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19674 int tmp_regno
= 2 /* ECX */;
19675 if (lookup_attribute ("fastcall",
19676 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19677 tmp_regno
= 0 /* EAX */;
19678 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19681 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19684 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19686 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19688 /* Adjust the this parameter. */
19689 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19690 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19692 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19693 xops
[0] = GEN_INT (vcall_offset
);
19695 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19696 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19698 xops
[1] = this_reg
;
19700 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19702 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19705 /* If necessary, drop THIS back to its stack slot. */
19706 if (this_reg
&& this_reg
!= this)
19708 xops
[0] = this_reg
;
19710 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19713 xops
[0] = XEXP (DECL_RTL (function
), 0);
19716 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19717 output_asm_insn ("jmp\t%P0", xops
);
19720 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19721 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19722 tmp
= gen_rtx_MEM (QImode
, tmp
);
19724 output_asm_insn ("jmp\t%A0", xops
);
19729 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19730 output_asm_insn ("jmp\t%P0", xops
);
19735 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19736 tmp
= (gen_rtx_SYMBOL_REF
19738 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19739 tmp
= gen_rtx_MEM (QImode
, tmp
);
19741 output_asm_insn ("jmp\t%0", xops
);
19744 #endif /* TARGET_MACHO */
19746 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19747 output_set_got (tmp
, NULL_RTX
);
19750 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19751 output_asm_insn ("jmp\t{*}%1", xops
);
19757 x86_file_start (void)
19759 default_file_start ();
19761 darwin_file_start ();
19763 if (X86_FILE_START_VERSION_DIRECTIVE
)
19764 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19765 if (X86_FILE_START_FLTUSED
)
19766 fputs ("\t.global\t__fltused\n", asm_out_file
);
19767 if (ix86_asm_dialect
== ASM_INTEL
)
19768 fputs ("\t.intel_syntax\n", asm_out_file
);
19772 x86_field_alignment (tree field
, int computed
)
19774 enum machine_mode mode
;
19775 tree type
= TREE_TYPE (field
);
19777 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19779 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19780 ? get_inner_array_type (type
) : type
);
19781 if (mode
== DFmode
|| mode
== DCmode
19782 || GET_MODE_CLASS (mode
) == MODE_INT
19783 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19784 return MIN (32, computed
);
19788 /* Output assembler code to FILE to increment profiler label # LABELNO
19789 for profiling a function entry. */
19791 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19796 #ifndef NO_PROFILE_COUNTERS
19797 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19799 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19803 #ifndef NO_PROFILE_COUNTERS
19804 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19806 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19810 #ifndef NO_PROFILE_COUNTERS
19811 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19812 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19814 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19818 #ifndef NO_PROFILE_COUNTERS
19819 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19820 PROFILE_COUNT_REGISTER
);
19822 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19826 /* We don't have exact information about the insn sizes, but we may assume
19827 quite safely that we are informed about all 1 byte insns and memory
19828 address sizes. This is enough to eliminate unnecessary padding in
19832 min_insn_size (rtx insn
)
19836 if (!INSN_P (insn
) || !active_insn_p (insn
))
19839 /* Discard alignments we've emit and jump instructions. */
19840 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19841 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19844 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19845 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19848 /* Important case - calls are always 5 bytes.
19849 It is common to have many calls in the row. */
19851 && symbolic_reference_mentioned_p (PATTERN (insn
))
19852 && !SIBLING_CALL_P (insn
))
19854 if (get_attr_length (insn
) <= 1)
19857 /* For normal instructions we may rely on the sizes of addresses
19858 and the presence of symbol to require 4 bytes of encoding.
19859 This is not the case for jumps where references are PC relative. */
19860 if (!JUMP_P (insn
))
19862 l
= get_attr_length_address (insn
);
19863 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19872 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19876 ix86_avoid_jump_misspredicts (void)
19878 rtx insn
, start
= get_insns ();
19879 int nbytes
= 0, njumps
= 0;
19882 /* Look for all minimal intervals of instructions containing 4 jumps.
19883 The intervals are bounded by START and INSN. NBYTES is the total
19884 size of instructions in the interval including INSN and not including
19885 START. When the NBYTES is smaller than 16 bytes, it is possible
19886 that the end of START and INSN ends up in the same 16byte page.
19888 The smallest offset in the page INSN can start is the case where START
19889 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19890 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19892 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19895 nbytes
+= min_insn_size (insn
);
19897 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19898 INSN_UID (insn
), min_insn_size (insn
));
19900 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19901 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19909 start
= NEXT_INSN (start
);
19910 if ((JUMP_P (start
)
19911 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19912 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19914 njumps
--, isjump
= 1;
19917 nbytes
-= min_insn_size (start
);
19919 gcc_assert (njumps
>= 0);
19921 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19922 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19924 if (njumps
== 3 && isjump
&& nbytes
< 16)
19926 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19929 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19930 INSN_UID (insn
), padsize
);
19931 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19936 /* AMD Athlon works faster
19937 when RET is not destination of conditional jump or directly preceded
19938 by other jump instruction. We avoid the penalty by inserting NOP just
19939 before the RET instructions in such cases. */
19941 ix86_pad_returns (void)
19946 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19948 basic_block bb
= e
->src
;
19949 rtx ret
= BB_END (bb
);
19951 bool replace
= false;
19953 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19954 || !maybe_hot_bb_p (bb
))
19956 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19957 if (active_insn_p (prev
) || LABEL_P (prev
))
19959 if (prev
&& LABEL_P (prev
))
19964 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19965 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19966 && !(e
->flags
& EDGE_FALLTHRU
))
19971 prev
= prev_active_insn (ret
);
19973 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19976 /* Empty functions get branch mispredict even when the jump destination
19977 is not visible to us. */
19978 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19983 emit_insn_before (gen_return_internal_long (), ret
);
19989 /* Implement machine specific optimizations. We implement padding of returns
19990 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19994 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19995 ix86_pad_returns ();
19996 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19997 ix86_avoid_jump_misspredicts ();
20000 /* Return nonzero when QImode register that must be represented via REX prefix
20003 x86_extended_QIreg_mentioned_p (rtx insn
)
20006 extract_insn_cached (insn
);
20007 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20008 if (REG_P (recog_data
.operand
[i
])
20009 && REGNO (recog_data
.operand
[i
]) >= 4)
20014 /* Return nonzero when P points to register encoded via REX prefix.
20015 Called via for_each_rtx. */
20017 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20019 unsigned int regno
;
20022 regno
= REGNO (*p
);
20023 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20026 /* Return true when INSN mentions register that must be encoded using REX
20029 x86_extended_reg_mentioned_p (rtx insn
)
20031 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20034 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20035 optabs would emit if we didn't have TFmode patterns. */
20038 x86_emit_floatuns (rtx operands
[2])
20040 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20041 enum machine_mode mode
, inmode
;
20043 inmode
= GET_MODE (operands
[1]);
20044 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20047 in
= force_reg (inmode
, operands
[1]);
20048 mode
= GET_MODE (out
);
20049 neglab
= gen_label_rtx ();
20050 donelab
= gen_label_rtx ();
20051 f0
= gen_reg_rtx (mode
);
20053 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20055 expand_float (out
, in
, 0);
20057 emit_jump_insn (gen_jump (donelab
));
20060 emit_label (neglab
);
20062 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20064 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20066 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20068 expand_float (f0
, i0
, 0);
20070 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20072 emit_label (donelab
);
20075 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20076 with all elements equal to VAR. Return true if successful. */
20079 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20080 rtx target
, rtx val
)
20082 enum machine_mode smode
, wsmode
, wvmode
;
20097 val
= force_reg (GET_MODE_INNER (mode
), val
);
20098 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20099 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20105 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20107 val
= gen_lowpart (SImode
, val
);
20108 x
= gen_rtx_TRUNCATE (HImode
, val
);
20109 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20110 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20132 /* Extend HImode to SImode using a paradoxical SUBREG. */
20133 tmp1
= gen_reg_rtx (SImode
);
20134 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20135 /* Insert the SImode value as low element of V4SImode vector. */
20136 tmp2
= gen_reg_rtx (V4SImode
);
20137 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20138 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20139 CONST0_RTX (V4SImode
),
20141 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20142 /* Cast the V4SImode vector back to a V8HImode vector. */
20143 tmp1
= gen_reg_rtx (V8HImode
);
20144 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20145 /* Duplicate the low short through the whole low SImode word. */
20146 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20147 /* Cast the V8HImode vector back to a V4SImode vector. */
20148 tmp2
= gen_reg_rtx (V4SImode
);
20149 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20150 /* Replicate the low element of the V4SImode vector. */
20151 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20152 /* Cast the V2SImode back to V8HImode, and store in target. */
20153 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20164 /* Extend QImode to SImode using a paradoxical SUBREG. */
20165 tmp1
= gen_reg_rtx (SImode
);
20166 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20167 /* Insert the SImode value as low element of V4SImode vector. */
20168 tmp2
= gen_reg_rtx (V4SImode
);
20169 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20170 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20171 CONST0_RTX (V4SImode
),
20173 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20174 /* Cast the V4SImode vector back to a V16QImode vector. */
20175 tmp1
= gen_reg_rtx (V16QImode
);
20176 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20177 /* Duplicate the low byte through the whole low SImode word. */
20178 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20179 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20180 /* Cast the V16QImode vector back to a V4SImode vector. */
20181 tmp2
= gen_reg_rtx (V4SImode
);
20182 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20183 /* Replicate the low element of the V4SImode vector. */
20184 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20185 /* Cast the V2SImode back to V16QImode, and store in target. */
20186 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20194 /* Replicate the value once into the next wider mode and recurse. */
20195 val
= convert_modes (wsmode
, smode
, val
, true);
20196 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20197 GEN_INT (GET_MODE_BITSIZE (smode
)),
20198 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20199 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20201 x
= gen_reg_rtx (wvmode
);
20202 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20203 gcc_unreachable ();
20204 emit_move_insn (target
, gen_lowpart (mode
, x
));
20212 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20213 whose ONE_VAR element is VAR, and other elements are zero. Return true
20217 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20218 rtx target
, rtx var
, int one_var
)
20220 enum machine_mode vsimode
;
20236 var
= force_reg (GET_MODE_INNER (mode
), var
);
20237 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20238 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20243 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20244 new_target
= gen_reg_rtx (mode
);
20246 new_target
= target
;
20247 var
= force_reg (GET_MODE_INNER (mode
), var
);
20248 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20249 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20250 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20253 /* We need to shuffle the value to the correct position, so
20254 create a new pseudo to store the intermediate result. */
20256 /* With SSE2, we can use the integer shuffle insns. */
20257 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20259 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20261 GEN_INT (one_var
== 1 ? 0 : 1),
20262 GEN_INT (one_var
== 2 ? 0 : 1),
20263 GEN_INT (one_var
== 3 ? 0 : 1)));
20264 if (target
!= new_target
)
20265 emit_move_insn (target
, new_target
);
20269 /* Otherwise convert the intermediate result to V4SFmode and
20270 use the SSE1 shuffle instructions. */
20271 if (mode
!= V4SFmode
)
20273 tmp
= gen_reg_rtx (V4SFmode
);
20274 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20279 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20281 GEN_INT (one_var
== 1 ? 0 : 1),
20282 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20283 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20285 if (mode
!= V4SFmode
)
20286 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20287 else if (tmp
!= target
)
20288 emit_move_insn (target
, tmp
);
20290 else if (target
!= new_target
)
20291 emit_move_insn (target
, new_target
);
20296 vsimode
= V4SImode
;
20302 vsimode
= V2SImode
;
20308 /* Zero extend the variable element to SImode and recurse. */
20309 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20311 x
= gen_reg_rtx (vsimode
);
20312 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20314 gcc_unreachable ();
20316 emit_move_insn (target
, gen_lowpart (mode
, x
));
20324 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20325 consisting of the values in VALS. It is known that all elements
20326 except ONE_VAR are constants. Return true if successful. */
20329 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20330 rtx target
, rtx vals
, int one_var
)
20332 rtx var
= XVECEXP (vals
, 0, one_var
);
20333 enum machine_mode wmode
;
20336 const_vec
= copy_rtx (vals
);
20337 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20338 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20346 /* For the two element vectors, it's just as easy to use
20347 the general case. */
20363 /* There's no way to set one QImode entry easily. Combine
20364 the variable value with its adjacent constant value, and
20365 promote to an HImode set. */
20366 x
= XVECEXP (vals
, 0, one_var
^ 1);
20369 var
= convert_modes (HImode
, QImode
, var
, true);
20370 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20371 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20372 x
= GEN_INT (INTVAL (x
) & 0xff);
20376 var
= convert_modes (HImode
, QImode
, var
, true);
20377 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20379 if (x
!= const0_rtx
)
20380 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20381 1, OPTAB_LIB_WIDEN
);
20383 x
= gen_reg_rtx (wmode
);
20384 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20385 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20387 emit_move_insn (target
, gen_lowpart (mode
, x
));
20394 emit_move_insn (target
, const_vec
);
20395 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20399 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20400 all values variable, and none identical. */
20403 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20404 rtx target
, rtx vals
)
20406 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20407 rtx op0
= NULL
, op1
= NULL
;
20408 bool use_vec_concat
= false;
20414 if (!mmx_ok
&& !TARGET_SSE
)
20420 /* For the two element vectors, we always implement VEC_CONCAT. */
20421 op0
= XVECEXP (vals
, 0, 0);
20422 op1
= XVECEXP (vals
, 0, 1);
20423 use_vec_concat
= true;
20427 half_mode
= V2SFmode
;
20430 half_mode
= V2SImode
;
20436 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20437 Recurse to load the two halves. */
20439 op0
= gen_reg_rtx (half_mode
);
20440 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20441 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20443 op1
= gen_reg_rtx (half_mode
);
20444 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20445 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20447 use_vec_concat
= true;
20458 gcc_unreachable ();
20461 if (use_vec_concat
)
20463 if (!register_operand (op0
, half_mode
))
20464 op0
= force_reg (half_mode
, op0
);
20465 if (!register_operand (op1
, half_mode
))
20466 op1
= force_reg (half_mode
, op1
);
20468 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20469 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20473 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20474 enum machine_mode inner_mode
;
20475 rtx words
[4], shift
;
20477 inner_mode
= GET_MODE_INNER (mode
);
20478 n_elts
= GET_MODE_NUNITS (mode
);
20479 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20480 n_elt_per_word
= n_elts
/ n_words
;
20481 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20483 for (i
= 0; i
< n_words
; ++i
)
20485 rtx word
= NULL_RTX
;
20487 for (j
= 0; j
< n_elt_per_word
; ++j
)
20489 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20490 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20496 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20497 word
, 1, OPTAB_LIB_WIDEN
);
20498 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20499 word
, 1, OPTAB_LIB_WIDEN
);
20507 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20508 else if (n_words
== 2)
20510 rtx tmp
= gen_reg_rtx (mode
);
20511 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20512 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20513 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20514 emit_move_insn (target
, tmp
);
20516 else if (n_words
== 4)
20518 rtx tmp
= gen_reg_rtx (V4SImode
);
20519 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20520 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20521 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20524 gcc_unreachable ();
20528 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20529 instructions unless MMX_OK is true. */
20532 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20534 enum machine_mode mode
= GET_MODE (target
);
20535 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20536 int n_elts
= GET_MODE_NUNITS (mode
);
20537 int n_var
= 0, one_var
= -1;
20538 bool all_same
= true, all_const_zero
= true;
20542 for (i
= 0; i
< n_elts
; ++i
)
20544 x
= XVECEXP (vals
, 0, i
);
20545 if (!CONSTANT_P (x
))
20546 n_var
++, one_var
= i
;
20547 else if (x
!= CONST0_RTX (inner_mode
))
20548 all_const_zero
= false;
20549 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20553 /* Constants are best loaded from the constant pool. */
20556 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20560 /* If all values are identical, broadcast the value. */
20562 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20563 XVECEXP (vals
, 0, 0)))
20566 /* Values where only one field is non-constant are best loaded from
20567 the pool and overwritten via move later. */
20571 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20572 XVECEXP (vals
, 0, one_var
),
20576 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20580 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20584 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20586 enum machine_mode mode
= GET_MODE (target
);
20587 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20588 bool use_vec_merge
= false;
20597 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20598 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20600 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20602 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20603 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20613 /* For the two element vectors, we implement a VEC_CONCAT with
20614 the extraction of the other element. */
20616 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20617 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20620 op0
= val
, op1
= tmp
;
20622 op0
= tmp
, op1
= val
;
20624 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20625 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20633 use_vec_merge
= true;
20637 /* tmp = target = A B C D */
20638 tmp
= copy_to_reg (target
);
20639 /* target = A A B B */
20640 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20641 /* target = X A B B */
20642 ix86_expand_vector_set (false, target
, val
, 0);
20643 /* target = A X C D */
20644 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20645 GEN_INT (1), GEN_INT (0),
20646 GEN_INT (2+4), GEN_INT (3+4)));
20650 /* tmp = target = A B C D */
20651 tmp
= copy_to_reg (target
);
20652 /* tmp = X B C D */
20653 ix86_expand_vector_set (false, tmp
, val
, 0);
20654 /* target = A B X D */
20655 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20656 GEN_INT (0), GEN_INT (1),
20657 GEN_INT (0+4), GEN_INT (3+4)));
20661 /* tmp = target = A B C D */
20662 tmp
= copy_to_reg (target
);
20663 /* tmp = X B C D */
20664 ix86_expand_vector_set (false, tmp
, val
, 0);
20665 /* target = A B X D */
20666 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20667 GEN_INT (0), GEN_INT (1),
20668 GEN_INT (2+4), GEN_INT (0+4)));
20672 gcc_unreachable ();
20677 /* Element 0 handled by vec_merge below. */
20680 use_vec_merge
= true;
20686 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20687 store into element 0, then shuffle them back. */
20691 order
[0] = GEN_INT (elt
);
20692 order
[1] = const1_rtx
;
20693 order
[2] = const2_rtx
;
20694 order
[3] = GEN_INT (3);
20695 order
[elt
] = const0_rtx
;
20697 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20698 order
[1], order
[2], order
[3]));
20700 ix86_expand_vector_set (false, target
, val
, 0);
20702 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20703 order
[1], order
[2], order
[3]));
20707 /* For SSE1, we have to reuse the V4SF code. */
20708 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20709 gen_lowpart (SFmode
, val
), elt
);
20714 use_vec_merge
= TARGET_SSE2
;
20717 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20728 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20729 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20730 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20734 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20736 emit_move_insn (mem
, target
);
20738 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20739 emit_move_insn (tmp
, val
);
20741 emit_move_insn (target
, mem
);
20746 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20748 enum machine_mode mode
= GET_MODE (vec
);
20749 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20750 bool use_vec_extr
= false;
20763 use_vec_extr
= true;
20775 tmp
= gen_reg_rtx (mode
);
20776 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20777 GEN_INT (elt
), GEN_INT (elt
),
20778 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20782 tmp
= gen_reg_rtx (mode
);
20783 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20787 gcc_unreachable ();
20790 use_vec_extr
= true;
20805 tmp
= gen_reg_rtx (mode
);
20806 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20807 GEN_INT (elt
), GEN_INT (elt
),
20808 GEN_INT (elt
), GEN_INT (elt
)));
20812 tmp
= gen_reg_rtx (mode
);
20813 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20817 gcc_unreachable ();
20820 use_vec_extr
= true;
20825 /* For SSE1, we have to reuse the V4SF code. */
20826 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20827 gen_lowpart (V4SFmode
, vec
), elt
);
20833 use_vec_extr
= TARGET_SSE2
;
20836 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20841 /* ??? Could extract the appropriate HImode element and shift. */
20848 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20849 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20851 /* Let the rtl optimizers know about the zero extension performed. */
20852 if (inner_mode
== HImode
)
20854 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20855 target
= gen_lowpart (SImode
, target
);
20858 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20862 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20864 emit_move_insn (mem
, vec
);
20866 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20867 emit_move_insn (target
, tmp
);
20871 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20872 pattern to reduce; DEST is the destination; IN is the input vector. */
20875 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20877 rtx tmp1
, tmp2
, tmp3
;
20879 tmp1
= gen_reg_rtx (V4SFmode
);
20880 tmp2
= gen_reg_rtx (V4SFmode
);
20881 tmp3
= gen_reg_rtx (V4SFmode
);
20883 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20884 emit_insn (fn (tmp2
, tmp1
, in
));
20886 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20887 GEN_INT (1), GEN_INT (1),
20888 GEN_INT (1+4), GEN_INT (1+4)));
20889 emit_insn (fn (dest
, tmp2
, tmp3
));
20892 /* Target hook for scalar_mode_supported_p. */
20894 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20896 if (DECIMAL_FLOAT_MODE_P (mode
))
20899 return default_scalar_mode_supported_p (mode
);
20902 /* Implements target hook vector_mode_supported_p. */
20904 ix86_vector_mode_supported_p (enum machine_mode mode
)
20906 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20908 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20910 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20912 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20917 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20919 We do this in the new i386 backend to maintain source compatibility
20920 with the old cc0-based compiler. */
20923 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20924 tree inputs ATTRIBUTE_UNUSED
,
20927 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20929 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20934 /* Return true if this goes in small data/bss. */
20937 ix86_in_large_data_p (tree exp
)
20939 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20942 /* Functions are never large data. */
20943 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20946 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20948 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20949 if (strcmp (section
, ".ldata") == 0
20950 || strcmp (section
, ".lbss") == 0)
20956 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20958 /* If this is an incomplete type with size 0, then we can't put it
20959 in data because it might be too big when completed. */
20960 if (!size
|| size
> ix86_section_threshold
)
20967 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20969 default_encode_section_info (decl
, rtl
, first
);
20971 if (TREE_CODE (decl
) == VAR_DECL
20972 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20973 && ix86_in_large_data_p (decl
))
20974 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20977 /* Worker function for REVERSE_CONDITION. */
20980 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20982 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20983 ? reverse_condition (code
)
20984 : reverse_condition_maybe_unordered (code
));
20987 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20991 output_387_reg_move (rtx insn
, rtx
*operands
)
20993 if (REG_P (operands
[1])
20994 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20996 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20997 return output_387_ffreep (operands
, 0);
20998 return "fstp\t%y0";
21000 if (STACK_TOP_P (operands
[0]))
21001 return "fld%z1\t%y1";
21005 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21006 FP status register is set. */
21009 ix86_emit_fp_unordered_jump (rtx label
)
21011 rtx reg
= gen_reg_rtx (HImode
);
21014 emit_insn (gen_x86_fnstsw_1 (reg
));
21016 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
21018 emit_insn (gen_x86_sahf_1 (reg
));
21020 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21021 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21025 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21027 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21028 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21031 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21032 gen_rtx_LABEL_REF (VOIDmode
, label
),
21034 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21035 emit_jump_insn (temp
);
21038 /* Output code to perform a log1p XFmode calculation. */
21040 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21042 rtx label1
= gen_label_rtx ();
21043 rtx label2
= gen_label_rtx ();
21045 rtx tmp
= gen_reg_rtx (XFmode
);
21046 rtx tmp2
= gen_reg_rtx (XFmode
);
21048 emit_insn (gen_absxf2 (tmp
, op1
));
21049 emit_insn (gen_cmpxf (tmp
,
21050 CONST_DOUBLE_FROM_REAL_VALUE (
21051 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21053 emit_jump_insn (gen_bge (label1
));
21055 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21056 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21057 emit_jump (label2
);
21059 emit_label (label1
);
21060 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21061 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21062 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21063 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21065 emit_label (label2
);
21068 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21071 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21074 /* With Binutils 2.15, the "@unwind" marker must be specified on
21075 every occurrence of the ".eh_frame" section, not just the first
21078 && strcmp (name
, ".eh_frame") == 0)
21080 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21081 flags
& SECTION_WRITE
? "aw" : "a");
21084 default_elf_asm_named_section (name
, flags
, decl
);
21087 /* Return the mangling of TYPE if it is an extended fundamental type. */
21089 static const char *
21090 ix86_mangle_fundamental_type (tree type
)
21092 switch (TYPE_MODE (type
))
21095 /* __float128 is "g". */
21098 /* "long double" or __float80 is "e". */
21105 /* For 32-bit code we can save PIC register setup by using
21106 __stack_chk_fail_local hidden function instead of calling
21107 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21108 register, so it is better to call __stack_chk_fail directly. */
21111 ix86_stack_protect_fail (void)
21113 return TARGET_64BIT
21114 ? default_external_stack_protect_fail ()
21115 : default_hidden_stack_protect_fail ();
21118 /* Select a format to encode pointers in exception handling data. CODE
21119 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21120 true if the symbol may be affected by dynamic relocations.
21122 ??? All x86 object file formats are capable of representing this.
21123 After all, the relocation needed is the same as for the call insn.
21124 Whether or not a particular assembler allows us to enter such, I
21125 guess we'll have to see. */
21127 asm_preferred_eh_data_format (int code
, int global
)
21131 int type
= DW_EH_PE_sdata8
;
21133 || ix86_cmodel
== CM_SMALL_PIC
21134 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21135 type
= DW_EH_PE_sdata4
;
21136 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21138 if (ix86_cmodel
== CM_SMALL
21139 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21140 return DW_EH_PE_udata4
;
21141 return DW_EH_PE_absptr
;
21144 /* Expand copysign from SIGN to the positive value ABS_VALUE
21145 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21148 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21150 enum machine_mode mode
= GET_MODE (sign
);
21151 rtx sgn
= gen_reg_rtx (mode
);
21152 if (mask
== NULL_RTX
)
21154 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21155 if (!VECTOR_MODE_P (mode
))
21157 /* We need to generate a scalar mode mask in this case. */
21158 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21159 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21160 mask
= gen_reg_rtx (mode
);
21161 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21165 mask
= gen_rtx_NOT (mode
, mask
);
21166 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21167 gen_rtx_AND (mode
, mask
, sign
)));
21168 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21169 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21172 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21173 mask for masking out the sign-bit is stored in *SMASK, if that is
21176 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21178 enum machine_mode mode
= GET_MODE (op0
);
21181 xa
= gen_reg_rtx (mode
);
21182 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21183 if (!VECTOR_MODE_P (mode
))
21185 /* We need to generate a scalar mode mask in this case. */
21186 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21187 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21188 mask
= gen_reg_rtx (mode
);
21189 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21191 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21192 gen_rtx_AND (mode
, op0
, mask
)));
21200 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21201 swapping the operands if SWAP_OPERANDS is true. The expanded
21202 code is a forward jump to a newly created label in case the
21203 comparison is true. The generated label rtx is returned. */
21205 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21206 bool swap_operands
)
21217 label
= gen_label_rtx ();
21218 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21219 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21220 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21221 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21222 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21223 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21224 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21225 JUMP_LABEL (tmp
) = label
;
21230 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21231 using comparison code CODE. Operands are swapped for the comparison if
21232 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21234 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21235 bool swap_operands
)
21237 enum machine_mode mode
= GET_MODE (op0
);
21238 rtx mask
= gen_reg_rtx (mode
);
21247 if (mode
== DFmode
)
21248 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21249 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21251 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21252 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21257 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21258 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21260 ix86_gen_TWO52 (enum machine_mode mode
)
21262 REAL_VALUE_TYPE TWO52r
;
21265 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21266 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21267 TWO52
= force_reg (mode
, TWO52
);
21272 /* Expand SSE sequence for computing lround from OP1 storing
21275 ix86_expand_lround (rtx op0
, rtx op1
)
21277 /* C code for the stuff we're doing below:
21278 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21281 enum machine_mode mode
= GET_MODE (op1
);
21282 const struct real_format
*fmt
;
21283 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21286 /* load nextafter (0.5, 0.0) */
21287 fmt
= REAL_MODE_FORMAT (mode
);
21288 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21289 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21291 /* adj = copysign (0.5, op1) */
21292 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21293 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21295 /* adj = op1 + adj */
21296 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21298 /* op0 = (imode)adj */
21299 expand_fix (op0
, adj
, 0);
21302 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21305 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21307 /* C code for the stuff we're doing below (for do_floor):
21309 xi -= (double)xi > op1 ? 1 : 0;
21312 enum machine_mode fmode
= GET_MODE (op1
);
21313 enum machine_mode imode
= GET_MODE (op0
);
21314 rtx ireg
, freg
, label
, tmp
;
21316 /* reg = (long)op1 */
21317 ireg
= gen_reg_rtx (imode
);
21318 expand_fix (ireg
, op1
, 0);
21320 /* freg = (double)reg */
21321 freg
= gen_reg_rtx (fmode
);
21322 expand_float (freg
, ireg
, 0);
21324 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21325 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21326 freg
, op1
, !do_floor
);
21327 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21328 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21329 emit_move_insn (ireg
, tmp
);
21331 emit_label (label
);
21332 LABEL_NUSES (label
) = 1;
21334 emit_move_insn (op0
, ireg
);
21337 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21338 result in OPERAND0. */
21340 ix86_expand_rint (rtx operand0
, rtx operand1
)
21342 /* C code for the stuff we're doing below:
21343 xa = fabs (operand1);
21344 if (!isless (xa, 2**52))
21346 xa = xa + 2**52 - 2**52;
21347 return copysign (xa, operand1);
21349 enum machine_mode mode
= GET_MODE (operand0
);
21350 rtx res
, xa
, label
, TWO52
, mask
;
21352 res
= gen_reg_rtx (mode
);
21353 emit_move_insn (res
, operand1
);
21355 /* xa = abs (operand1) */
21356 xa
= ix86_expand_sse_fabs (res
, &mask
);
21358 /* if (!isless (xa, TWO52)) goto label; */
21359 TWO52
= ix86_gen_TWO52 (mode
);
21360 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21362 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21363 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21365 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21367 emit_label (label
);
21368 LABEL_NUSES (label
) = 1;
21370 emit_move_insn (operand0
, res
);
21373 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21376 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21378 /* C code for the stuff we expand below.
21379 double xa = fabs (x), x2;
21380 if (!isless (xa, TWO52))
21382 xa = xa + TWO52 - TWO52;
21383 x2 = copysign (xa, x);
21392 enum machine_mode mode
= GET_MODE (operand0
);
21393 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21395 TWO52
= ix86_gen_TWO52 (mode
);
21397 /* Temporary for holding the result, initialized to the input
21398 operand to ease control flow. */
21399 res
= gen_reg_rtx (mode
);
21400 emit_move_insn (res
, operand1
);
21402 /* xa = abs (operand1) */
21403 xa
= ix86_expand_sse_fabs (res
, &mask
);
21405 /* if (!isless (xa, TWO52)) goto label; */
21406 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21408 /* xa = xa + TWO52 - TWO52; */
21409 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21410 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21412 /* xa = copysign (xa, operand1) */
21413 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21415 /* generate 1.0 or -1.0 */
21416 one
= force_reg (mode
,
21417 const_double_from_real_value (do_floor
21418 ? dconst1
: dconstm1
, mode
));
21420 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21421 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21422 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21423 gen_rtx_AND (mode
, one
, tmp
)));
21424 /* We always need to subtract here to preserve signed zero. */
21425 tmp
= expand_simple_binop (mode
, MINUS
,
21426 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21427 emit_move_insn (res
, tmp
);
21429 emit_label (label
);
21430 LABEL_NUSES (label
) = 1;
21432 emit_move_insn (operand0
, res
);
21435 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21438 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21440 /* C code for the stuff we expand below.
21441 double xa = fabs (x), x2;
21442 if (!isless (xa, TWO52))
21444 x2 = (double)(long)x;
21451 if (HONOR_SIGNED_ZEROS (mode))
21452 return copysign (x2, x);
21455 enum machine_mode mode
= GET_MODE (operand0
);
21456 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21458 TWO52
= ix86_gen_TWO52 (mode
);
21460 /* Temporary for holding the result, initialized to the input
21461 operand to ease control flow. */
21462 res
= gen_reg_rtx (mode
);
21463 emit_move_insn (res
, operand1
);
21465 /* xa = abs (operand1) */
21466 xa
= ix86_expand_sse_fabs (res
, &mask
);
21468 /* if (!isless (xa, TWO52)) goto label; */
21469 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21471 /* xa = (double)(long)x */
21472 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21473 expand_fix (xi
, res
, 0);
21474 expand_float (xa
, xi
, 0);
21477 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21479 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21480 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21481 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21482 gen_rtx_AND (mode
, one
, tmp
)));
21483 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21484 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21485 emit_move_insn (res
, tmp
);
21487 if (HONOR_SIGNED_ZEROS (mode
))
21488 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21490 emit_label (label
);
21491 LABEL_NUSES (label
) = 1;
21493 emit_move_insn (operand0
, res
);
21496 /* Expand SSE sequence for computing round from OPERAND1 storing
21497 into OPERAND0. Sequence that works without relying on DImode truncation
21498 via cvttsd2siq that is only available on 64bit targets. */
21500 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21502 /* C code for the stuff we expand below.
21503 double xa = fabs (x), xa2, x2;
21504 if (!isless (xa, TWO52))
21506 Using the absolute value and copying back sign makes
21507 -0.0 -> -0.0 correct.
21508 xa2 = xa + TWO52 - TWO52;
21513 else if (dxa > 0.5)
21515 x2 = copysign (xa2, x);
21518 enum machine_mode mode
= GET_MODE (operand0
);
21519 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21521 TWO52
= ix86_gen_TWO52 (mode
);
21523 /* Temporary for holding the result, initialized to the input
21524 operand to ease control flow. */
21525 res
= gen_reg_rtx (mode
);
21526 emit_move_insn (res
, operand1
);
21528 /* xa = abs (operand1) */
21529 xa
= ix86_expand_sse_fabs (res
, &mask
);
21531 /* if (!isless (xa, TWO52)) goto label; */
21532 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21534 /* xa2 = xa + TWO52 - TWO52; */
21535 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21536 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21538 /* dxa = xa2 - xa; */
21539 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21541 /* generate 0.5, 1.0 and -0.5 */
21542 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21543 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21544 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21548 tmp
= gen_reg_rtx (mode
);
21549 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21550 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21551 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21552 gen_rtx_AND (mode
, one
, tmp
)));
21553 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21554 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21555 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21556 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21557 gen_rtx_AND (mode
, one
, tmp
)));
21558 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21560 /* res = copysign (xa2, operand1) */
21561 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21563 emit_label (label
);
21564 LABEL_NUSES (label
) = 1;
21566 emit_move_insn (operand0
, res
);
21569 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21572 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21574 /* C code for SSE variant we expand below.
21575 double xa = fabs (x), x2;
21576 if (!isless (xa, TWO52))
21578 x2 = (double)(long)x;
21579 if (HONOR_SIGNED_ZEROS (mode))
21580 return copysign (x2, x);
21583 enum machine_mode mode
= GET_MODE (operand0
);
21584 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21586 TWO52
= ix86_gen_TWO52 (mode
);
21588 /* Temporary for holding the result, initialized to the input
21589 operand to ease control flow. */
21590 res
= gen_reg_rtx (mode
);
21591 emit_move_insn (res
, operand1
);
21593 /* xa = abs (operand1) */
21594 xa
= ix86_expand_sse_fabs (res
, &mask
);
21596 /* if (!isless (xa, TWO52)) goto label; */
21597 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21599 /* x = (double)(long)x */
21600 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21601 expand_fix (xi
, res
, 0);
21602 expand_float (res
, xi
, 0);
21604 if (HONOR_SIGNED_ZEROS (mode
))
21605 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21607 emit_label (label
);
21608 LABEL_NUSES (label
) = 1;
21610 emit_move_insn (operand0
, res
);
21613 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21616 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21618 enum machine_mode mode
= GET_MODE (operand0
);
21619 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21621 /* C code for SSE variant we expand below.
21622 double xa = fabs (x), x2;
21623 if (!isless (xa, TWO52))
21625 xa2 = xa + TWO52 - TWO52;
21629 x2 = copysign (xa2, x);
21633 TWO52
= ix86_gen_TWO52 (mode
);
21635 /* Temporary for holding the result, initialized to the input
21636 operand to ease control flow. */
21637 res
= gen_reg_rtx (mode
);
21638 emit_move_insn (res
, operand1
);
21640 /* xa = abs (operand1) */
21641 xa
= ix86_expand_sse_fabs (res
, &smask
);
21643 /* if (!isless (xa, TWO52)) goto label; */
21644 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21646 /* res = xa + TWO52 - TWO52; */
21647 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21648 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21649 emit_move_insn (res
, tmp
);
21652 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21654 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21655 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21656 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21657 gen_rtx_AND (mode
, mask
, one
)));
21658 tmp
= expand_simple_binop (mode
, MINUS
,
21659 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21660 emit_move_insn (res
, tmp
);
21662 /* res = copysign (res, operand1) */
21663 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21665 emit_label (label
);
21666 LABEL_NUSES (label
) = 1;
21668 emit_move_insn (operand0
, res
);
21671 /* Expand SSE sequence for computing round from OPERAND1 storing
21674 ix86_expand_round (rtx operand0
, rtx operand1
)
21676 /* C code for the stuff we're doing below:
21677 double xa = fabs (x);
21678 if (!isless (xa, TWO52))
21680 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21681 return copysign (xa, x);
21683 enum machine_mode mode
= GET_MODE (operand0
);
21684 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21685 const struct real_format
*fmt
;
21686 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21688 /* Temporary for holding the result, initialized to the input
21689 operand to ease control flow. */
21690 res
= gen_reg_rtx (mode
);
21691 emit_move_insn (res
, operand1
);
21693 TWO52
= ix86_gen_TWO52 (mode
);
21694 xa
= ix86_expand_sse_fabs (res
, &mask
);
21695 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21697 /* load nextafter (0.5, 0.0) */
21698 fmt
= REAL_MODE_FORMAT (mode
);
21699 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21700 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21702 /* xa = xa + 0.5 */
21703 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21704 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21706 /* xa = (double)(int64_t)xa */
21707 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21708 expand_fix (xi
, xa
, 0);
21709 expand_float (xa
, xi
, 0);
21711 /* res = copysign (xa, operand1) */
21712 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21714 emit_label (label
);
21715 LABEL_NUSES (label
) = 1;
21717 emit_move_insn (operand0
, res
);
21720 #include "gt-i386.h"