1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
,
1043 /* | m_GENERIC | m_ATHLON_K8 ? */
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1221 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1223 static enum stringop_alg stringop_alg
= no_stringop
;
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1232 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1233 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1238 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1240 /* ax, dx, cx, bx */
1241 AREG
, DREG
, CREG
, BREG
,
1242 /* si, di, bp, sp */
1243 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1245 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1246 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1251 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1253 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1255 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1256 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1257 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1261 /* The "default" register map used in 32bit mode. */
1263 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1274 static int const x86_64_int_parameter_registers
[6] =
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1280 static int const x86_64_int_return_registers
[4] =
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1351 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1365 rtx ix86_compare_op0
= NULL_RTX
;
1366 rtx ix86_compare_op1
= NULL_RTX
;
1367 rtx ix86_compare_emitted
= NULL_RTX
;
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1372 /* Define the structure for the machine field in struct function. */
1374 struct stack_local_entry
GTY(())
1376 unsigned short mode
;
1379 struct stack_local_entry
*next
;
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1406 HOST_WIDE_INT frame
;
1408 int outgoing_arguments_size
;
1411 HOST_WIDE_INT to_allocate
;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset
;
1414 HOST_WIDE_INT hard_frame_pointer_offset
;
1415 HOST_WIDE_INT stack_pointer_offset
;
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov
;
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel
;
1425 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1427 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath
;
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune
;
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch
;
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse
;
1441 /* true if cmpxchg16b is supported. */
1444 /* ix86_regparm_string as a number */
1445 static int ix86_regparm
;
1447 /* -mstackrealign option */
1448 extern int ix86_force_align_arg_pointer
;
1449 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1451 /* Preferred alignment for stack boundary in bits. */
1452 unsigned int ix86_preferred_stack_boundary
;
1454 /* Values 1-5: see jump.c */
1455 int ix86_branch_cost
;
1457 /* Variables which are this size or smaller are put in the data/bss
1458 or ldata/lbss sections. */
1460 int ix86_section_threshold
= 65536;
1462 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1463 char internal_label_prefix
[16];
1464 int internal_label_prefix_len
;
1466 static bool ix86_handle_option (size_t, const char *, int);
1467 static void output_pic_addr_const (FILE *, rtx
, int);
1468 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1470 static const char *get_some_local_dynamic_name (void);
1471 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1472 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1473 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1475 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1476 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1478 static rtx
get_thread_pointer (int);
1479 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1480 static void get_pc_thunk_name (char [32], unsigned int);
1481 static rtx
gen_push (rtx
);
1482 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1483 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1484 static struct machine_function
* ix86_init_machine_status (void);
1485 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1486 static int ix86_nsaved_regs (void);
1487 static void ix86_emit_save_regs (void);
1488 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1489 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1490 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1491 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1492 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1493 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1494 static int ix86_issue_rate (void);
1495 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1496 static int ia32_multipass_dfa_lookahead (void);
1497 static void ix86_init_mmx_sse_builtins (void);
1498 static rtx
x86_this_parameter (tree
);
1499 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1500 HOST_WIDE_INT
, tree
);
1501 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1502 static void x86_file_start (void);
1503 static void ix86_reorg (void);
1504 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1505 static tree
ix86_build_builtin_va_list (void);
1506 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1508 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1509 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1510 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1512 static int ix86_address_cost (rtx
);
1513 static bool ix86_cannot_force_const_mem (rtx
);
1514 static rtx
ix86_delegitimize_address (rtx
);
1516 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1518 struct builtin_description
;
1519 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1521 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1523 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1524 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1525 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1526 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1527 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1528 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1529 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1530 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1531 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1532 static int ix86_fp_comparison_cost (enum rtx_code code
);
1533 static unsigned int ix86_select_alt_pic_regnum (void);
1534 static int ix86_save_reg (unsigned int, int);
1535 static void ix86_compute_frame_layout (struct ix86_frame
*);
1536 static int ix86_comp_type_attributes (tree
, tree
);
1537 static int ix86_function_regparm (tree
, tree
);
1538 const struct attribute_spec ix86_attribute_table
[];
1539 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1540 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1541 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1542 static bool contains_128bit_aligned_vector_p (tree
);
1543 static rtx
ix86_struct_value_rtx (tree
, int);
1544 static bool ix86_ms_bitfield_layout_p (tree
);
1545 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1546 static int extended_reg_mentioned_1 (rtx
*, void *);
1547 static bool ix86_rtx_costs (rtx
, int, int, int *);
1548 static int min_insn_size (rtx
);
1549 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1550 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1551 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1553 static void ix86_init_builtins (void);
1554 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1555 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1556 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1557 static const char *ix86_mangle_fundamental_type (tree
);
1558 static tree
ix86_stack_protect_fail (void);
1559 static rtx
ix86_internal_arg_pointer (void);
1560 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1561 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1564 /* This function is only used on Solaris. */
1565 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1568 /* Register class used for passing given 64bit part of the argument.
1569 These represent classes as documented by the PS ABI, with the exception
1570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1574 whenever possible (upper half does contain padding).
1576 enum x86_64_reg_class
1579 X86_64_INTEGER_CLASS
,
1580 X86_64_INTEGERSI_CLASS
,
1587 X86_64_COMPLEX_X87_CLASS
,
1590 static const char * const x86_64_reg_class_name
[] = {
1591 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1592 "sseup", "x87", "x87up", "cplx87", "no"
1595 #define MAX_CLASSES 4
1597 /* Table of constants used by fldpi, fldln2, etc.... */
1598 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1599 static bool ext_80387_constants_init
= 0;
1600 static void init_ext_80387_constants (void);
1601 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1602 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1603 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1604 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1605 unsigned HOST_WIDE_INT align
)
1608 /* Initialize the GCC target structure. */
1609 #undef TARGET_ATTRIBUTE_TABLE
1610 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1611 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1612 # undef TARGET_MERGE_DECL_ATTRIBUTES
1613 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1616 #undef TARGET_COMP_TYPE_ATTRIBUTES
1617 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1619 #undef TARGET_INIT_BUILTINS
1620 #define TARGET_INIT_BUILTINS ix86_init_builtins
1621 #undef TARGET_EXPAND_BUILTIN
1622 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1626 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1627 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1629 #undef TARGET_ASM_FUNCTION_EPILOGUE
1630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1632 #undef TARGET_ENCODE_SECTION_INFO
1633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1639 #undef TARGET_ASM_OPEN_PAREN
1640 #define TARGET_ASM_OPEN_PAREN ""
1641 #undef TARGET_ASM_CLOSE_PAREN
1642 #define TARGET_ASM_CLOSE_PAREN ""
1644 #undef TARGET_ASM_ALIGNED_HI_OP
1645 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1646 #undef TARGET_ASM_ALIGNED_SI_OP
1647 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1649 #undef TARGET_ASM_ALIGNED_DI_OP
1650 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1653 #undef TARGET_ASM_UNALIGNED_HI_OP
1654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1655 #undef TARGET_ASM_UNALIGNED_SI_OP
1656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1657 #undef TARGET_ASM_UNALIGNED_DI_OP
1658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1660 #undef TARGET_SCHED_ADJUST_COST
1661 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1662 #undef TARGET_SCHED_ISSUE_RATE
1663 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1666 ia32_multipass_dfa_lookahead
1668 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1669 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1672 #undef TARGET_HAVE_TLS
1673 #define TARGET_HAVE_TLS true
1675 #undef TARGET_CANNOT_FORCE_CONST_MEM
1676 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1677 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1678 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1680 #undef TARGET_DELEGITIMIZE_ADDRESS
1681 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1683 #undef TARGET_MS_BITFIELD_LAYOUT_P
1684 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1687 #undef TARGET_BINDS_LOCAL_P
1688 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1691 #undef TARGET_ASM_OUTPUT_MI_THUNK
1692 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1693 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1694 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1696 #undef TARGET_ASM_FILE_START
1697 #define TARGET_ASM_FILE_START x86_file_start
1699 #undef TARGET_DEFAULT_TARGET_FLAGS
1700 #define TARGET_DEFAULT_TARGET_FLAGS \
1702 | TARGET_64BIT_DEFAULT \
1703 | TARGET_SUBTARGET_DEFAULT \
1704 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1706 #undef TARGET_HANDLE_OPTION
1707 #define TARGET_HANDLE_OPTION ix86_handle_option
1709 #undef TARGET_RTX_COSTS
1710 #define TARGET_RTX_COSTS ix86_rtx_costs
1711 #undef TARGET_ADDRESS_COST
1712 #define TARGET_ADDRESS_COST ix86_address_cost
1714 #undef TARGET_FIXED_CONDITION_CODE_REGS
1715 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1716 #undef TARGET_CC_MODES_COMPATIBLE
1717 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1719 #undef TARGET_MACHINE_DEPENDENT_REORG
1720 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1722 #undef TARGET_BUILD_BUILTIN_VA_LIST
1723 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1725 #undef TARGET_MD_ASM_CLOBBERS
1726 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1728 #undef TARGET_PROMOTE_PROTOTYPES
1729 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1730 #undef TARGET_STRUCT_VALUE_RTX
1731 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1732 #undef TARGET_SETUP_INCOMING_VARARGS
1733 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1734 #undef TARGET_MUST_PASS_IN_STACK
1735 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1736 #undef TARGET_PASS_BY_REFERENCE
1737 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1738 #undef TARGET_INTERNAL_ARG_POINTER
1739 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1740 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1741 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1743 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1744 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1746 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1747 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1749 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1750 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1757 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1758 #undef TARGET_INSERT_ATTRIBUTES
1759 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1762 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1763 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1765 #undef TARGET_STACK_PROTECT_FAIL
1766 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1768 #undef TARGET_FUNCTION_VALUE
1769 #define TARGET_FUNCTION_VALUE ix86_function_value
1771 struct gcc_target targetm
= TARGET_INITIALIZER
;
1774 /* The svr4 ABI for the i386 says that records and unions are returned
1776 #ifndef DEFAULT_PCC_STRUCT_RETURN
1777 #define DEFAULT_PCC_STRUCT_RETURN 1
1780 /* Implement TARGET_HANDLE_OPTION. */
1783 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1790 target_flags
&= ~MASK_3DNOW_A
;
1791 target_flags_explicit
|= MASK_3DNOW_A
;
1798 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1799 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1806 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1807 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1814 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1815 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1822 target_flags
&= ~MASK_SSE4A
;
1823 target_flags_explicit
|= MASK_SSE4A
;
1832 /* Sometimes certain combinations of command options do not make
1833 sense on a particular target machine. You can define a macro
1834 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1835 defined, is executed once just after all the command options have
1838 Don't use this macro to turn on various extra optimizations for
1839 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1842 override_options (void)
1845 int ix86_tune_defaulted
= 0;
1846 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1848 /* Comes from final.c -- no real reason to change it. */
1849 #define MAX_CODE_ALIGN 16
1853 const struct processor_costs
*cost
; /* Processor costs */
1854 const int target_enable
; /* Target flags to enable. */
1855 const int target_disable
; /* Target flags to disable. */
1856 const int align_loop
; /* Default alignments. */
1857 const int align_loop_max_skip
;
1858 const int align_jump
;
1859 const int align_jump_max_skip
;
1860 const int align_func
;
1862 const processor_target_table
[PROCESSOR_max
] =
1864 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1865 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1866 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1867 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1868 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1869 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1870 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1871 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1872 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1873 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1874 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1875 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1876 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1877 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1880 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1883 const char *const name
; /* processor name or nickname. */
1884 const enum processor_type processor
;
1885 const enum pta_flags
1891 PTA_PREFETCH_SSE
= 16,
1902 const processor_alias_table
[] =
1904 {"i386", PROCESSOR_I386
, 0},
1905 {"i486", PROCESSOR_I486
, 0},
1906 {"i586", PROCESSOR_PENTIUM
, 0},
1907 {"pentium", PROCESSOR_PENTIUM
, 0},
1908 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1909 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1910 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1911 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1912 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1913 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1914 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1915 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1916 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1917 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1918 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1919 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1920 | PTA_MMX
| PTA_PREFETCH_SSE
},
1921 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1922 | PTA_MMX
| PTA_PREFETCH_SSE
},
1923 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1924 | PTA_MMX
| PTA_PREFETCH_SSE
},
1925 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1926 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1927 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1928 | PTA_64BIT
| PTA_MMX
1929 | PTA_PREFETCH_SSE
| PTA_CX16
},
1930 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1932 {"k6", PROCESSOR_K6
, PTA_MMX
},
1933 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1934 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1935 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1937 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1938 | PTA_3DNOW
| PTA_3DNOW_A
},
1939 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1940 | PTA_3DNOW_A
| PTA_SSE
},
1941 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1942 | PTA_3DNOW_A
| PTA_SSE
},
1943 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1944 | PTA_3DNOW_A
| PTA_SSE
},
1945 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1946 | PTA_SSE
| PTA_SSE2
},
1947 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1948 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1949 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1950 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1951 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1952 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1953 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1954 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1955 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1956 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1957 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1958 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1959 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1960 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1963 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1965 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1966 SUBTARGET_OVERRIDE_OPTIONS
;
1969 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1970 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1973 /* -fPIC is the default for x86_64. */
1974 if (TARGET_MACHO
&& TARGET_64BIT
)
1977 /* Set the default values for switches whose default depends on TARGET_64BIT
1978 in case they weren't overwritten by command line options. */
1981 /* Mach-O doesn't support omitting the frame pointer for now. */
1982 if (flag_omit_frame_pointer
== 2)
1983 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1984 if (flag_asynchronous_unwind_tables
== 2)
1985 flag_asynchronous_unwind_tables
= 1;
1986 if (flag_pcc_struct_return
== 2)
1987 flag_pcc_struct_return
= 0;
1991 if (flag_omit_frame_pointer
== 2)
1992 flag_omit_frame_pointer
= 0;
1993 if (flag_asynchronous_unwind_tables
== 2)
1994 flag_asynchronous_unwind_tables
= 0;
1995 if (flag_pcc_struct_return
== 2)
1996 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1999 /* Need to check -mtune=generic first. */
2000 if (ix86_tune_string
)
2002 if (!strcmp (ix86_tune_string
, "generic")
2003 || !strcmp (ix86_tune_string
, "i686")
2004 /* As special support for cross compilers we read -mtune=native
2005 as -mtune=generic. With native compilers we won't see the
2006 -mtune=native, as it was changed by the driver. */
2007 || !strcmp (ix86_tune_string
, "native"))
2010 ix86_tune_string
= "generic64";
2012 ix86_tune_string
= "generic32";
2014 else if (!strncmp (ix86_tune_string
, "generic", 7))
2015 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2019 if (ix86_arch_string
)
2020 ix86_tune_string
= ix86_arch_string
;
2021 if (!ix86_tune_string
)
2023 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2024 ix86_tune_defaulted
= 1;
2027 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2028 need to use a sensible tune option. */
2029 if (!strcmp (ix86_tune_string
, "generic")
2030 || !strcmp (ix86_tune_string
, "x86-64")
2031 || !strcmp (ix86_tune_string
, "i686"))
2034 ix86_tune_string
= "generic64";
2036 ix86_tune_string
= "generic32";
2039 if (ix86_stringop_string
)
2041 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2042 stringop_alg
= rep_prefix_1_byte
;
2043 else if (!strcmp (ix86_stringop_string
, "libcall"))
2044 stringop_alg
= libcall
;
2045 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2046 stringop_alg
= rep_prefix_4_byte
;
2047 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2048 stringop_alg
= rep_prefix_8_byte
;
2049 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2050 stringop_alg
= loop_1_byte
;
2051 else if (!strcmp (ix86_stringop_string
, "loop"))
2052 stringop_alg
= loop
;
2053 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2054 stringop_alg
= unrolled_loop
;
2056 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2058 if (!strcmp (ix86_tune_string
, "x86-64"))
2059 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2060 "-mtune=generic instead as appropriate.");
2062 if (!ix86_arch_string
)
2063 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2064 if (!strcmp (ix86_arch_string
, "generic"))
2065 error ("generic CPU can be used only for -mtune= switch");
2066 if (!strncmp (ix86_arch_string
, "generic", 7))
2067 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2069 if (ix86_cmodel_string
!= 0)
2071 if (!strcmp (ix86_cmodel_string
, "small"))
2072 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2073 else if (!strcmp (ix86_cmodel_string
, "medium"))
2074 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2076 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
2077 else if (!strcmp (ix86_cmodel_string
, "32"))
2078 ix86_cmodel
= CM_32
;
2079 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2080 ix86_cmodel
= CM_KERNEL
;
2081 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
2082 ix86_cmodel
= CM_LARGE
;
2084 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2088 ix86_cmodel
= CM_32
;
2090 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2092 if (ix86_asm_string
!= 0)
2095 && !strcmp (ix86_asm_string
, "intel"))
2096 ix86_asm_dialect
= ASM_INTEL
;
2097 else if (!strcmp (ix86_asm_string
, "att"))
2098 ix86_asm_dialect
= ASM_ATT
;
2100 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2102 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2103 error ("code model %qs not supported in the %s bit mode",
2104 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2105 if (ix86_cmodel
== CM_LARGE
)
2106 sorry ("code model %<large%> not supported yet");
2107 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2108 sorry ("%i-bit mode not compiled in",
2109 (target_flags
& MASK_64BIT
) ? 64 : 32);
2111 for (i
= 0; i
< pta_size
; i
++)
2112 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2114 ix86_arch
= processor_alias_table
[i
].processor
;
2115 /* Default cpu tuning to the architecture. */
2116 ix86_tune
= ix86_arch
;
2117 if (processor_alias_table
[i
].flags
& PTA_MMX
2118 && !(target_flags_explicit
& MASK_MMX
))
2119 target_flags
|= MASK_MMX
;
2120 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2121 && !(target_flags_explicit
& MASK_3DNOW
))
2122 target_flags
|= MASK_3DNOW
;
2123 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2124 && !(target_flags_explicit
& MASK_3DNOW_A
))
2125 target_flags
|= MASK_3DNOW_A
;
2126 if (processor_alias_table
[i
].flags
& PTA_SSE
2127 && !(target_flags_explicit
& MASK_SSE
))
2128 target_flags
|= MASK_SSE
;
2129 if (processor_alias_table
[i
].flags
& PTA_SSE2
2130 && !(target_flags_explicit
& MASK_SSE2
))
2131 target_flags
|= MASK_SSE2
;
2132 if (processor_alias_table
[i
].flags
& PTA_SSE3
2133 && !(target_flags_explicit
& MASK_SSE3
))
2134 target_flags
|= MASK_SSE3
;
2135 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2136 && !(target_flags_explicit
& MASK_SSSE3
))
2137 target_flags
|= MASK_SSSE3
;
2138 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2139 x86_prefetch_sse
= true;
2140 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2141 x86_cmpxchg16b
= true;
2142 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2143 && !(target_flags_explicit
& MASK_POPCNT
))
2144 target_flags
|= MASK_POPCNT
;
2145 if (processor_alias_table
[i
].flags
& PTA_ABM
2146 && !(target_flags_explicit
& MASK_ABM
))
2147 target_flags
|= MASK_ABM
;
2148 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2149 && !(target_flags_explicit
& MASK_SSE4A
))
2150 target_flags
|= MASK_SSE4A
;
2151 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2152 error ("CPU you selected does not support x86-64 "
2158 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2160 ix86_arch_mask
= 1u << ix86_arch
;
2161 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2162 ix86_arch_features
[i
] &= ix86_arch_mask
;
2164 for (i
= 0; i
< pta_size
; i
++)
2165 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2167 ix86_tune
= processor_alias_table
[i
].processor
;
2168 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2170 if (ix86_tune_defaulted
)
2172 ix86_tune_string
= "x86-64";
2173 for (i
= 0; i
< pta_size
; i
++)
2174 if (! strcmp (ix86_tune_string
,
2175 processor_alias_table
[i
].name
))
2177 ix86_tune
= processor_alias_table
[i
].processor
;
2180 error ("CPU you selected does not support x86-64 "
2183 /* Intel CPUs have always interpreted SSE prefetch instructions as
2184 NOPs; so, we can enable SSE prefetch instructions even when
2185 -mtune (rather than -march) points us to a processor that has them.
2186 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2187 higher processors. */
2188 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2189 x86_prefetch_sse
= true;
2193 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2195 ix86_tune_mask
= 1u << ix86_tune
;
2196 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2197 ix86_tune_features
[i
] &= ix86_tune_mask
;
2200 ix86_cost
= &size_cost
;
2202 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2203 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2204 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2206 /* Arrange to set up i386_stack_locals for all functions. */
2207 init_machine_status
= ix86_init_machine_status
;
2209 /* Validate -mregparm= value. */
2210 if (ix86_regparm_string
)
2212 i
= atoi (ix86_regparm_string
);
2213 if (i
< 0 || i
> REGPARM_MAX
)
2214 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2220 ix86_regparm
= REGPARM_MAX
;
2222 /* If the user has provided any of the -malign-* options,
2223 warn and use that value only if -falign-* is not set.
2224 Remove this code in GCC 3.2 or later. */
2225 if (ix86_align_loops_string
)
2227 warning (0, "-malign-loops is obsolete, use -falign-loops");
2228 if (align_loops
== 0)
2230 i
= atoi (ix86_align_loops_string
);
2231 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2232 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2234 align_loops
= 1 << i
;
2238 if (ix86_align_jumps_string
)
2240 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2241 if (align_jumps
== 0)
2243 i
= atoi (ix86_align_jumps_string
);
2244 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2245 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2247 align_jumps
= 1 << i
;
2251 if (ix86_align_funcs_string
)
2253 warning (0, "-malign-functions is obsolete, use -falign-functions");
2254 if (align_functions
== 0)
2256 i
= atoi (ix86_align_funcs_string
);
2257 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2258 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2260 align_functions
= 1 << i
;
2264 /* Default align_* from the processor table. */
2265 if (align_loops
== 0)
2267 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2268 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2270 if (align_jumps
== 0)
2272 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2273 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2275 if (align_functions
== 0)
2277 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2280 /* Validate -mbranch-cost= value, or provide default. */
2281 ix86_branch_cost
= ix86_cost
->branch_cost
;
2282 if (ix86_branch_cost_string
)
2284 i
= atoi (ix86_branch_cost_string
);
2286 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2288 ix86_branch_cost
= i
;
2290 if (ix86_section_threshold_string
)
2292 i
= atoi (ix86_section_threshold_string
);
2294 error ("-mlarge-data-threshold=%d is negative", i
);
2296 ix86_section_threshold
= i
;
2299 if (ix86_tls_dialect_string
)
2301 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2302 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2303 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2304 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2305 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2306 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2308 error ("bad value (%s) for -mtls-dialect= switch",
2309 ix86_tls_dialect_string
);
2312 /* Keep nonleaf frame pointers. */
2313 if (flag_omit_frame_pointer
)
2314 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2315 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2316 flag_omit_frame_pointer
= 1;
2318 /* If we're doing fast math, we don't care about comparison order
2319 wrt NaNs. This lets us use a shorter comparison sequence. */
2320 if (flag_finite_math_only
)
2321 target_flags
&= ~MASK_IEEE_FP
;
2323 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2324 since the insns won't need emulation. */
2325 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2326 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2328 /* Likewise, if the target doesn't have a 387, or we've specified
2329 software floating point, don't use 387 inline intrinsics. */
2331 target_flags
|= MASK_NO_FANCY_MATH_387
;
2333 /* Turn on SSE3 builtins for -mssse3. */
2335 target_flags
|= MASK_SSE3
;
2337 /* Turn on SSE3 builtins for -msse4a. */
2339 target_flags
|= MASK_SSE3
;
2341 /* Turn on SSE2 builtins for -msse3. */
2343 target_flags
|= MASK_SSE2
;
2345 /* Turn on SSE builtins for -msse2. */
2347 target_flags
|= MASK_SSE
;
2349 /* Turn on MMX builtins for -msse. */
2352 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2353 x86_prefetch_sse
= true;
2356 /* Turn on MMX builtins for 3Dnow. */
2358 target_flags
|= MASK_MMX
;
2360 /* Turn on POPCNT builtins for -mabm. */
2362 target_flags
|= MASK_POPCNT
;
2366 if (TARGET_ALIGN_DOUBLE
)
2367 error ("-malign-double makes no sense in the 64bit mode");
2369 error ("-mrtd calling convention not supported in the 64bit mode");
2371 /* Enable by default the SSE and MMX builtins. Do allow the user to
2372 explicitly disable any of these. In particular, disabling SSE and
2373 MMX for kernel code is extremely useful. */
2375 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2376 & ~target_flags_explicit
);
2380 /* i386 ABI does not specify red zone. It still makes sense to use it
2381 when programmer takes care to stack from being destroyed. */
2382 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2383 target_flags
|= MASK_NO_RED_ZONE
;
2386 /* Validate -mpreferred-stack-boundary= value, or provide default.
2387 The default of 128 bits is for Pentium III's SSE __m128. We can't
2388 change it because of optimize_size. Otherwise, we can't mix object
2389 files compiled with -Os and -On. */
2390 ix86_preferred_stack_boundary
= 128;
2391 if (ix86_preferred_stack_boundary_string
)
2393 i
= atoi (ix86_preferred_stack_boundary_string
);
2394 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2395 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2396 TARGET_64BIT
? 4 : 2);
2398 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2401 /* Accept -msseregparm only if at least SSE support is enabled. */
2402 if (TARGET_SSEREGPARM
2404 error ("-msseregparm used without SSE enabled");
2406 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2407 if (ix86_fpmath_string
!= 0)
2409 if (! strcmp (ix86_fpmath_string
, "387"))
2410 ix86_fpmath
= FPMATH_387
;
2411 else if (! strcmp (ix86_fpmath_string
, "sse"))
2415 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2416 ix86_fpmath
= FPMATH_387
;
2419 ix86_fpmath
= FPMATH_SSE
;
2421 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2422 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2426 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2427 ix86_fpmath
= FPMATH_387
;
2429 else if (!TARGET_80387
)
2431 warning (0, "387 instruction set disabled, using SSE arithmetics");
2432 ix86_fpmath
= FPMATH_SSE
;
2435 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2438 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2441 /* If the i387 is disabled, then do not return values in it. */
2443 target_flags
&= ~MASK_FLOAT_RETURNS
;
2445 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2446 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2448 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2450 /* ??? Unwind info is not correct around the CFG unless either a frame
2451 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2452 unwind info generation to be aware of the CFG and propagating states
2454 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2455 || flag_exceptions
|| flag_non_call_exceptions
)
2456 && flag_omit_frame_pointer
2457 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2459 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2460 warning (0, "unwind tables currently require either a frame pointer "
2461 "or -maccumulate-outgoing-args for correctness");
2462 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2465 /* For sane SSE instruction set generation we need fcomi instruction.
2466 It is safe to enable all CMOVE instructions. */
2470 /* ??? Any idea why this is unconditionally disabled for 64-bit? */
2472 TARGET_USE_SAHF
= 0;
2474 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2477 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2478 p
= strchr (internal_label_prefix
, 'X');
2479 internal_label_prefix_len
= p
- internal_label_prefix
;
2483 /* When scheduling description is not available, disable scheduler pass
2484 so it won't slow down the compilation and make x87 code slower. */
2485 if (!TARGET_SCHEDULE
)
2486 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2488 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2489 set_param_value ("simultaneous-prefetches",
2490 ix86_cost
->simultaneous_prefetches
);
2491 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2492 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2495 /* switch to the appropriate section for output of DECL.
2496 DECL is either a `VAR_DECL' node or a constant of some sort.
2497 RELOC indicates whether forming the initial value of DECL requires
2498 link-time relocations. */
2501 x86_64_elf_select_section (tree decl
, int reloc
,
2502 unsigned HOST_WIDE_INT align
)
2504 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2505 && ix86_in_large_data_p (decl
))
2507 const char *sname
= NULL
;
2508 unsigned int flags
= SECTION_WRITE
;
2509 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2514 case SECCAT_DATA_REL
:
2515 sname
= ".ldata.rel";
2517 case SECCAT_DATA_REL_LOCAL
:
2518 sname
= ".ldata.rel.local";
2520 case SECCAT_DATA_REL_RO
:
2521 sname
= ".ldata.rel.ro";
2523 case SECCAT_DATA_REL_RO_LOCAL
:
2524 sname
= ".ldata.rel.ro.local";
2528 flags
|= SECTION_BSS
;
2531 case SECCAT_RODATA_MERGE_STR
:
2532 case SECCAT_RODATA_MERGE_STR_INIT
:
2533 case SECCAT_RODATA_MERGE_CONST
:
2537 case SECCAT_SRODATA
:
2544 /* We don't split these for medium model. Place them into
2545 default sections and hope for best. */
2550 /* We might get called with string constants, but get_named_section
2551 doesn't like them as they are not DECLs. Also, we need to set
2552 flags in that case. */
2554 return get_section (sname
, flags
, NULL
);
2555 return get_named_section (decl
, sname
, reloc
);
2558 return default_elf_select_section (decl
, reloc
, align
);
2561 /* Build up a unique section name, expressed as a
2562 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2563 RELOC indicates whether the initial value of EXP requires
2564 link-time relocations. */
2567 x86_64_elf_unique_section (tree decl
, int reloc
)
2569 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2570 && ix86_in_large_data_p (decl
))
2572 const char *prefix
= NULL
;
2573 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2574 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2576 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2579 case SECCAT_DATA_REL
:
2580 case SECCAT_DATA_REL_LOCAL
:
2581 case SECCAT_DATA_REL_RO
:
2582 case SECCAT_DATA_REL_RO_LOCAL
:
2583 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2586 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2589 case SECCAT_RODATA_MERGE_STR
:
2590 case SECCAT_RODATA_MERGE_STR_INIT
:
2591 case SECCAT_RODATA_MERGE_CONST
:
2592 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2594 case SECCAT_SRODATA
:
2601 /* We don't split these for medium model. Place them into
2602 default sections and hope for best. */
2610 plen
= strlen (prefix
);
2612 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2613 name
= targetm
.strip_name_encoding (name
);
2614 nlen
= strlen (name
);
2616 string
= alloca (nlen
+ plen
+ 1);
2617 memcpy (string
, prefix
, plen
);
2618 memcpy (string
+ plen
, name
, nlen
+ 1);
2620 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2624 default_unique_section (decl
, reloc
);
2627 #ifdef COMMON_ASM_OP
2628 /* This says how to output assembler code to declare an
2629 uninitialized external linkage data object.
2631 For medium model x86-64 we need to use .largecomm opcode for
2634 x86_elf_aligned_common (FILE *file
,
2635 const char *name
, unsigned HOST_WIDE_INT size
,
2638 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2639 && size
> (unsigned int)ix86_section_threshold
)
2640 fprintf (file
, ".largecomm\t");
2642 fprintf (file
, "%s", COMMON_ASM_OP
);
2643 assemble_name (file
, name
);
2644 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2645 size
, align
/ BITS_PER_UNIT
);
2648 /* Utility function for targets to use in implementing
2649 ASM_OUTPUT_ALIGNED_BSS. */
2652 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2653 const char *name
, unsigned HOST_WIDE_INT size
,
2656 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2657 && size
> (unsigned int)ix86_section_threshold
)
2658 switch_to_section (get_named_section (decl
, ".lbss", 0));
2660 switch_to_section (bss_section
);
2661 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2662 #ifdef ASM_DECLARE_OBJECT_NAME
2663 last_assemble_variable_decl
= decl
;
2664 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2666 /* Standard thing is just output label for the object. */
2667 ASM_OUTPUT_LABEL (file
, name
);
2668 #endif /* ASM_DECLARE_OBJECT_NAME */
2669 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2673 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2675 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2676 make the problem with not enough registers even worse. */
2677 #ifdef INSN_SCHEDULING
2679 flag_schedule_insns
= 0;
2683 /* The Darwin libraries never set errno, so we might as well
2684 avoid calling them when that's the only reason we would. */
2685 flag_errno_math
= 0;
2687 /* The default values of these switches depend on the TARGET_64BIT
2688 that is not known at this moment. Mark these values with 2 and
2689 let user the to override these. In case there is no command line option
2690 specifying them, we will set the defaults in override_options. */
2692 flag_omit_frame_pointer
= 2;
2693 flag_pcc_struct_return
= 2;
2694 flag_asynchronous_unwind_tables
= 2;
2695 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2696 SUBTARGET_OPTIMIZATION_OPTIONS
;
2700 /* Table of valid machine attributes. */
2701 const struct attribute_spec ix86_attribute_table
[] =
2703 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2704 /* Stdcall attribute says callee is responsible for popping arguments
2705 if they are not variable. */
2706 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2707 /* Fastcall attribute says callee is responsible for popping arguments
2708 if they are not variable. */
2709 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2710 /* Cdecl attribute says the callee is a normal C declaration */
2711 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2712 /* Regparm attribute specifies how many integer arguments are to be
2713 passed in registers. */
2714 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2715 /* Sseregparm attribute says we are using x86_64 calling conventions
2716 for FP arguments. */
2717 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2718 /* force_align_arg_pointer says this function realigns the stack at entry. */
2719 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2720 false, true, true, ix86_handle_cconv_attribute
},
2721 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2722 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2723 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2724 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2726 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2727 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2728 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2729 SUBTARGET_ATTRIBUTE_TABLE
,
2731 { NULL
, 0, 0, false, false, false, NULL
}
2734 /* Decide whether we can make a sibling call to a function. DECL is the
2735 declaration of the function being targeted by the call and EXP is the
2736 CALL_EXPR representing the call. */
2739 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2744 /* If we are generating position-independent code, we cannot sibcall
2745 optimize any indirect call, or a direct call to a global function,
2746 as the PLT requires %ebx be live. */
2747 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2754 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2755 if (POINTER_TYPE_P (func
))
2756 func
= TREE_TYPE (func
);
2759 /* Check that the return value locations are the same. Like
2760 if we are returning floats on the 80387 register stack, we cannot
2761 make a sibcall from a function that doesn't return a float to a
2762 function that does or, conversely, from a function that does return
2763 a float to a function that doesn't; the necessary stack adjustment
2764 would not be executed. This is also the place we notice
2765 differences in the return value ABI. Note that it is ok for one
2766 of the functions to have void return type as long as the return
2767 value of the other is passed in a register. */
2768 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2769 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2771 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2773 if (!rtx_equal_p (a
, b
))
2776 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2778 else if (!rtx_equal_p (a
, b
))
2781 /* If this call is indirect, we'll need to be able to use a call-clobbered
2782 register for the address of the target function. Make sure that all
2783 such registers are not used for passing parameters. */
2784 if (!decl
&& !TARGET_64BIT
)
2788 /* We're looking at the CALL_EXPR, we need the type of the function. */
2789 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2790 type
= TREE_TYPE (type
); /* pointer type */
2791 type
= TREE_TYPE (type
); /* function type */
2793 if (ix86_function_regparm (type
, NULL
) >= 3)
2795 /* ??? Need to count the actual number of registers to be used,
2796 not the possible number of registers. Fix later. */
2801 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2802 /* Dllimport'd functions are also called indirectly. */
2803 if (decl
&& DECL_DLLIMPORT_P (decl
)
2804 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2808 /* If we forced aligned the stack, then sibcalling would unalign the
2809 stack, which may break the called function. */
2810 if (cfun
->machine
->force_align_arg_pointer
)
2813 /* Otherwise okay. That also includes certain types of indirect calls. */
2817 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2818 calling convention attributes;
2819 arguments as in struct attribute_spec.handler. */
2822 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2824 int flags ATTRIBUTE_UNUSED
,
2827 if (TREE_CODE (*node
) != FUNCTION_TYPE
2828 && TREE_CODE (*node
) != METHOD_TYPE
2829 && TREE_CODE (*node
) != FIELD_DECL
2830 && TREE_CODE (*node
) != TYPE_DECL
)
2832 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2833 IDENTIFIER_POINTER (name
));
2834 *no_add_attrs
= true;
2838 /* Can combine regparm with all attributes but fastcall. */
2839 if (is_attribute_p ("regparm", name
))
2843 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2845 error ("fastcall and regparm attributes are not compatible");
2848 cst
= TREE_VALUE (args
);
2849 if (TREE_CODE (cst
) != INTEGER_CST
)
2851 warning (OPT_Wattributes
,
2852 "%qs attribute requires an integer constant argument",
2853 IDENTIFIER_POINTER (name
));
2854 *no_add_attrs
= true;
2856 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2858 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2859 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2860 *no_add_attrs
= true;
2864 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2865 TYPE_ATTRIBUTES (*node
))
2866 && compare_tree_int (cst
, REGPARM_MAX
-1))
2868 error ("%s functions limited to %d register parameters",
2869 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2877 warning (OPT_Wattributes
, "%qs attribute ignored",
2878 IDENTIFIER_POINTER (name
));
2879 *no_add_attrs
= true;
2883 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2884 if (is_attribute_p ("fastcall", name
))
2886 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2888 error ("fastcall and cdecl attributes are not compatible");
2890 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2892 error ("fastcall and stdcall attributes are not compatible");
2894 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2896 error ("fastcall and regparm attributes are not compatible");
2900 /* Can combine stdcall with fastcall (redundant), regparm and
2902 else if (is_attribute_p ("stdcall", name
))
2904 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2906 error ("stdcall and cdecl attributes are not compatible");
2908 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2910 error ("stdcall and fastcall attributes are not compatible");
2914 /* Can combine cdecl with regparm and sseregparm. */
2915 else if (is_attribute_p ("cdecl", name
))
2917 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2919 error ("stdcall and cdecl attributes are not compatible");
2921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2923 error ("fastcall and cdecl attributes are not compatible");
2927 /* Can combine sseregparm with all attributes. */
2932 /* Return 0 if the attributes for two types are incompatible, 1 if they
2933 are compatible, and 2 if they are nearly compatible (which causes a
2934 warning to be generated). */
2937 ix86_comp_type_attributes (tree type1
, tree type2
)
2939 /* Check for mismatch of non-default calling convention. */
2940 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2942 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2945 /* Check for mismatched fastcall/regparm types. */
2946 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2947 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2948 || (ix86_function_regparm (type1
, NULL
)
2949 != ix86_function_regparm (type2
, NULL
)))
2952 /* Check for mismatched sseregparm types. */
2953 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2954 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2957 /* Check for mismatched return types (cdecl vs stdcall). */
2958 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2959 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2965 /* Return the regparm value for a function with the indicated TYPE and DECL.
2966 DECL may be NULL when calling function indirectly
2967 or considering a libcall. */
2970 ix86_function_regparm (tree type
, tree decl
)
2973 int regparm
= ix86_regparm
;
2974 bool user_convention
= false;
2978 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2981 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2982 user_convention
= true;
2985 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2988 user_convention
= true;
2991 /* Use register calling convention for local functions when possible. */
2992 if (!TARGET_64BIT
&& !user_convention
&& decl
2993 && flag_unit_at_a_time
&& !profile_flag
)
2995 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2998 int local_regparm
, globals
= 0, regno
;
3000 /* Make sure no regparm register is taken by a global register
3002 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3003 if (global_regs
[local_regparm
])
3005 /* We can't use regparm(3) for nested functions as these use
3006 static chain pointer in third argument. */
3007 if (local_regparm
== 3
3008 && decl_function_context (decl
)
3009 && !DECL_NO_STATIC_CHAIN (decl
))
3011 /* If the function realigns its stackpointer, the
3012 prologue will clobber %ecx. If we've already
3013 generated code for the callee, the callee
3014 DECL_STRUCT_FUNCTION is gone, so we fall back to
3015 scanning the attributes for the self-realigning
3017 if ((DECL_STRUCT_FUNCTION (decl
)
3018 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
3019 || (!DECL_STRUCT_FUNCTION (decl
)
3020 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3021 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3023 /* Each global register variable increases register preassure,
3024 so the more global reg vars there are, the smaller regparm
3025 optimization use, unless requested by the user explicitly. */
3026 for (regno
= 0; regno
< 6; regno
++)
3027 if (global_regs
[regno
])
3030 = globals
< local_regparm
? local_regparm
- globals
: 0;
3032 if (local_regparm
> regparm
)
3033 regparm
= local_regparm
;
3040 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3041 DFmode (2) arguments in SSE registers for a function with the
3042 indicated TYPE and DECL. DECL may be NULL when calling function
3043 indirectly or considering a libcall. Otherwise return 0. */
3046 ix86_function_sseregparm (tree type
, tree decl
)
3048 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3049 by the sseregparm attribute. */
3050 if (TARGET_SSEREGPARM
3052 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3057 error ("Calling %qD with attribute sseregparm without "
3058 "SSE/SSE2 enabled", decl
);
3060 error ("Calling %qT with attribute sseregparm without "
3061 "SSE/SSE2 enabled", type
);
3068 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3069 (and DFmode for SSE2) arguments in SSE registers,
3070 even for 32-bit targets. */
3071 if (!TARGET_64BIT
&& decl
3072 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3074 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3076 return TARGET_SSE2
? 2 : 1;
3082 /* Return true if EAX is live at the start of the function. Used by
3083 ix86_expand_prologue to determine if we need special help before
3084 calling allocate_stack_worker. */
3087 ix86_eax_live_at_start_p (void)
3089 /* Cheat. Don't bother working forward from ix86_function_regparm
3090 to the function type to whether an actual argument is located in
3091 eax. Instead just look at cfg info, which is still close enough
3092 to correct at this point. This gives false positives for broken
3093 functions that might use uninitialized data that happens to be
3094 allocated in eax, but who cares? */
3095 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3098 /* Value is the number of bytes of arguments automatically
3099 popped when returning from a subroutine call.
3100 FUNDECL is the declaration node of the function (as a tree),
3101 FUNTYPE is the data type of the function (as a tree),
3102 or for a library call it is an identifier node for the subroutine name.
3103 SIZE is the number of bytes of arguments passed on the stack.
3105 On the 80386, the RTD insn may be used to pop them if the number
3106 of args is fixed, but if the number is variable then the caller
3107 must pop them all. RTD can't be used for library calls now
3108 because the library is compiled with the Unix compiler.
3109 Use of RTD is a selectable option, since it is incompatible with
3110 standard Unix calling sequences. If the option is not selected,
3111 the caller must always pop the args.
3113 The attribute stdcall is equivalent to RTD on a per module basis. */
3116 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3118 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3120 /* Cdecl functions override -mrtd, and never pop the stack. */
3121 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3123 /* Stdcall and fastcall functions will pop the stack if not
3125 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3126 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3130 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3131 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3132 == void_type_node
)))
3136 /* Lose any fake structure return argument if it is passed on the stack. */
3137 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3139 && !KEEP_AGGREGATE_RETURN_POINTER
)
3141 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3144 return GET_MODE_SIZE (Pmode
);
3150 /* Argument support functions. */
3152 /* Return true when register may be used to pass function parameters. */
3154 ix86_function_arg_regno_p (int regno
)
3160 return (regno
< REGPARM_MAX
3161 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3163 return (regno
< REGPARM_MAX
3164 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3165 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3166 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3167 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3172 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3177 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3178 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3181 /* RAX is used as hidden argument to va_arg functions. */
3184 for (i
= 0; i
< REGPARM_MAX
; i
++)
3185 if (regno
== x86_64_int_parameter_registers
[i
])
3190 /* Return if we do not know how to pass TYPE solely in registers. */
3193 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3195 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3198 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3199 The layout_type routine is crafty and tries to trick us into passing
3200 currently unsupported vector types on the stack by using TImode. */
3201 return (!TARGET_64BIT
&& mode
== TImode
3202 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3205 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3206 for a call to a function whose data type is FNTYPE.
3207 For a library call, FNTYPE is 0. */
3210 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3211 tree fntype
, /* tree ptr for function decl */
3212 rtx libname
, /* SYMBOL_REF of library name or 0 */
3215 static CUMULATIVE_ARGS zero_cum
;
3216 tree param
, next_param
;
3218 if (TARGET_DEBUG_ARG
)
3220 fprintf (stderr
, "\ninit_cumulative_args (");
3222 fprintf (stderr
, "fntype code = %s, ret code = %s",
3223 tree_code_name
[(int) TREE_CODE (fntype
)],
3224 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3226 fprintf (stderr
, "no fntype");
3229 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3234 /* Set up the number of registers to use for passing arguments. */
3235 cum
->nregs
= ix86_regparm
;
3237 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3239 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3240 cum
->warn_sse
= true;
3241 cum
->warn_mmx
= true;
3242 cum
->maybe_vaarg
= false;
3244 /* Use ecx and edx registers if function has fastcall attribute,
3245 else look for regparm information. */
3246 if (fntype
&& !TARGET_64BIT
)
3248 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3254 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3257 /* Set up the number of SSE registers used for passing SFmode
3258 and DFmode arguments. Warn for mismatching ABI. */
3259 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3261 /* Determine if this function has variable arguments. This is
3262 indicated by the last argument being 'void_type_mode' if there
3263 are no variable arguments. If there are variable arguments, then
3264 we won't pass anything in registers in 32-bit mode. */
3266 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3268 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3269 param
!= 0; param
= next_param
)
3271 next_param
= TREE_CHAIN (param
);
3272 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3282 cum
->float_in_sse
= 0;
3284 cum
->maybe_vaarg
= true;
3288 if ((!fntype
&& !libname
)
3289 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3290 cum
->maybe_vaarg
= true;
3292 if (TARGET_DEBUG_ARG
)
3293 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3298 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3299 But in the case of vector types, it is some vector mode.
3301 When we have only some of our vector isa extensions enabled, then there
3302 are some modes for which vector_mode_supported_p is false. For these
3303 modes, the generic vector support in gcc will choose some non-vector mode
3304 in order to implement the type. By computing the natural mode, we'll
3305 select the proper ABI location for the operand and not depend on whatever
3306 the middle-end decides to do with these vector types. */
3308 static enum machine_mode
3309 type_natural_mode (tree type
)
3311 enum machine_mode mode
= TYPE_MODE (type
);
3313 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3315 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3316 if ((size
== 8 || size
== 16)
3317 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3318 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3320 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3322 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3323 mode
= MIN_MODE_VECTOR_FLOAT
;
3325 mode
= MIN_MODE_VECTOR_INT
;
3327 /* Get the mode which has this inner mode and number of units. */
3328 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3329 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3330 && GET_MODE_INNER (mode
) == innermode
)
3340 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3341 this may not agree with the mode that the type system has chosen for the
3342 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3343 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3346 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3351 if (orig_mode
!= BLKmode
)
3352 tmp
= gen_rtx_REG (orig_mode
, regno
);
3355 tmp
= gen_rtx_REG (mode
, regno
);
3356 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3357 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3363 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3364 of this code is to classify each 8bytes of incoming argument by the register
3365 class and assign registers accordingly. */
3367 /* Return the union class of CLASS1 and CLASS2.
3368 See the x86-64 PS ABI for details. */
3370 static enum x86_64_reg_class
3371 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3373 /* Rule #1: If both classes are equal, this is the resulting class. */
3374 if (class1
== class2
)
3377 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3379 if (class1
== X86_64_NO_CLASS
)
3381 if (class2
== X86_64_NO_CLASS
)
3384 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3385 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3386 return X86_64_MEMORY_CLASS
;
3388 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3389 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3390 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3391 return X86_64_INTEGERSI_CLASS
;
3392 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3393 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3394 return X86_64_INTEGER_CLASS
;
3396 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3398 if (class1
== X86_64_X87_CLASS
3399 || class1
== X86_64_X87UP_CLASS
3400 || class1
== X86_64_COMPLEX_X87_CLASS
3401 || class2
== X86_64_X87_CLASS
3402 || class2
== X86_64_X87UP_CLASS
3403 || class2
== X86_64_COMPLEX_X87_CLASS
)
3404 return X86_64_MEMORY_CLASS
;
3406 /* Rule #6: Otherwise class SSE is used. */
3407 return X86_64_SSE_CLASS
;
3410 /* Classify the argument of type TYPE and mode MODE.
3411 CLASSES will be filled by the register class used to pass each word
3412 of the operand. The number of words is returned. In case the parameter
3413 should be passed in memory, 0 is returned. As a special case for zero
3414 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3416 BIT_OFFSET is used internally for handling records and specifies offset
3417 of the offset in bits modulo 256 to avoid overflow cases.
3419 See the x86-64 PS ABI for details.
3423 classify_argument (enum machine_mode mode
, tree type
,
3424 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3426 HOST_WIDE_INT bytes
=
3427 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3428 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3430 /* Variable sized entities are always passed/returned in memory. */
3434 if (mode
!= VOIDmode
3435 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3438 if (type
&& AGGREGATE_TYPE_P (type
))
3442 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3444 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3448 for (i
= 0; i
< words
; i
++)
3449 classes
[i
] = X86_64_NO_CLASS
;
3451 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3452 signalize memory class, so handle it as special case. */
3455 classes
[0] = X86_64_NO_CLASS
;
3459 /* Classify each field of record and merge classes. */
3460 switch (TREE_CODE (type
))
3463 /* And now merge the fields of structure. */
3464 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3466 if (TREE_CODE (field
) == FIELD_DECL
)
3470 if (TREE_TYPE (field
) == error_mark_node
)
3473 /* Bitfields are always classified as integer. Handle them
3474 early, since later code would consider them to be
3475 misaligned integers. */
3476 if (DECL_BIT_FIELD (field
))
3478 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3479 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3480 + tree_low_cst (DECL_SIZE (field
), 0)
3483 merge_classes (X86_64_INTEGER_CLASS
,
3488 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3489 TREE_TYPE (field
), subclasses
,
3490 (int_bit_position (field
)
3491 + bit_offset
) % 256);
3494 for (i
= 0; i
< num
; i
++)
3497 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3499 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3507 /* Arrays are handled as small records. */
3510 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3511 TREE_TYPE (type
), subclasses
, bit_offset
);
3515 /* The partial classes are now full classes. */
3516 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3517 subclasses
[0] = X86_64_SSE_CLASS
;
3518 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3519 subclasses
[0] = X86_64_INTEGER_CLASS
;
3521 for (i
= 0; i
< words
; i
++)
3522 classes
[i
] = subclasses
[i
% num
];
3527 case QUAL_UNION_TYPE
:
3528 /* Unions are similar to RECORD_TYPE but offset is always 0.
3530 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3532 if (TREE_CODE (field
) == FIELD_DECL
)
3536 if (TREE_TYPE (field
) == error_mark_node
)
3539 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3540 TREE_TYPE (field
), subclasses
,
3544 for (i
= 0; i
< num
; i
++)
3545 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3554 /* Final merger cleanup. */
3555 for (i
= 0; i
< words
; i
++)
3557 /* If one class is MEMORY, everything should be passed in
3559 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3562 /* The X86_64_SSEUP_CLASS should be always preceded by
3563 X86_64_SSE_CLASS. */
3564 if (classes
[i
] == X86_64_SSEUP_CLASS
3565 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3566 classes
[i
] = X86_64_SSE_CLASS
;
3568 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3569 if (classes
[i
] == X86_64_X87UP_CLASS
3570 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3571 classes
[i
] = X86_64_SSE_CLASS
;
3576 /* Compute alignment needed. We align all types to natural boundaries with
3577 exception of XFmode that is aligned to 64bits. */
3578 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3580 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3583 mode_alignment
= 128;
3584 else if (mode
== XCmode
)
3585 mode_alignment
= 256;
3586 if (COMPLEX_MODE_P (mode
))
3587 mode_alignment
/= 2;
3588 /* Misaligned fields are always returned in memory. */
3589 if (bit_offset
% mode_alignment
)
3593 /* for V1xx modes, just use the base mode */
3594 if (VECTOR_MODE_P (mode
)
3595 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3596 mode
= GET_MODE_INNER (mode
);
3598 /* Classification of atomic types. */
3603 classes
[0] = X86_64_SSE_CLASS
;
3606 classes
[0] = X86_64_SSE_CLASS
;
3607 classes
[1] = X86_64_SSEUP_CLASS
;
3616 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3617 classes
[0] = X86_64_INTEGERSI_CLASS
;
3619 classes
[0] = X86_64_INTEGER_CLASS
;
3623 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3628 if (!(bit_offset
% 64))
3629 classes
[0] = X86_64_SSESF_CLASS
;
3631 classes
[0] = X86_64_SSE_CLASS
;
3634 classes
[0] = X86_64_SSEDF_CLASS
;
3637 classes
[0] = X86_64_X87_CLASS
;
3638 classes
[1] = X86_64_X87UP_CLASS
;
3641 classes
[0] = X86_64_SSE_CLASS
;
3642 classes
[1] = X86_64_SSEUP_CLASS
;
3645 classes
[0] = X86_64_SSE_CLASS
;
3648 classes
[0] = X86_64_SSEDF_CLASS
;
3649 classes
[1] = X86_64_SSEDF_CLASS
;
3652 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3655 /* This modes is larger than 16 bytes. */
3663 classes
[0] = X86_64_SSE_CLASS
;
3664 classes
[1] = X86_64_SSEUP_CLASS
;
3670 classes
[0] = X86_64_SSE_CLASS
;
3676 gcc_assert (VECTOR_MODE_P (mode
));
3681 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3683 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3684 classes
[0] = X86_64_INTEGERSI_CLASS
;
3686 classes
[0] = X86_64_INTEGER_CLASS
;
3687 classes
[1] = X86_64_INTEGER_CLASS
;
3688 return 1 + (bytes
> 8);
3692 /* Examine the argument and return set number of register required in each
3693 class. Return 0 iff parameter should be passed in memory. */
3695 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3696 int *int_nregs
, int *sse_nregs
)
3698 enum x86_64_reg_class
class[MAX_CLASSES
];
3699 int n
= classify_argument (mode
, type
, class, 0);
3705 for (n
--; n
>= 0; n
--)
3708 case X86_64_INTEGER_CLASS
:
3709 case X86_64_INTEGERSI_CLASS
:
3712 case X86_64_SSE_CLASS
:
3713 case X86_64_SSESF_CLASS
:
3714 case X86_64_SSEDF_CLASS
:
3717 case X86_64_NO_CLASS
:
3718 case X86_64_SSEUP_CLASS
:
3720 case X86_64_X87_CLASS
:
3721 case X86_64_X87UP_CLASS
:
3725 case X86_64_COMPLEX_X87_CLASS
:
3726 return in_return
? 2 : 0;
3727 case X86_64_MEMORY_CLASS
:
3733 /* Construct container for the argument used by GCC interface. See
3734 FUNCTION_ARG for the detailed description. */
3737 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3738 tree type
, int in_return
, int nintregs
, int nsseregs
,
3739 const int *intreg
, int sse_regno
)
3741 /* The following variables hold the static issued_error state. */
3742 static bool issued_sse_arg_error
;
3743 static bool issued_sse_ret_error
;
3744 static bool issued_x87_ret_error
;
3746 enum machine_mode tmpmode
;
3748 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3749 enum x86_64_reg_class
class[MAX_CLASSES
];
3753 int needed_sseregs
, needed_intregs
;
3754 rtx exp
[MAX_CLASSES
];
3757 n
= classify_argument (mode
, type
, class, 0);
3758 if (TARGET_DEBUG_ARG
)
3761 fprintf (stderr
, "Memory class\n");
3764 fprintf (stderr
, "Classes:");
3765 for (i
= 0; i
< n
; i
++)
3767 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3769 fprintf (stderr
, "\n");
3774 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3777 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3780 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3781 some less clueful developer tries to use floating-point anyway. */
3782 if (needed_sseregs
&& !TARGET_SSE
)
3786 if (!issued_sse_ret_error
)
3788 error ("SSE register return with SSE disabled");
3789 issued_sse_ret_error
= true;
3792 else if (!issued_sse_arg_error
)
3794 error ("SSE register argument with SSE disabled");
3795 issued_sse_arg_error
= true;
3800 /* Likewise, error if the ABI requires us to return values in the
3801 x87 registers and the user specified -mno-80387. */
3802 if (!TARGET_80387
&& in_return
)
3803 for (i
= 0; i
< n
; i
++)
3804 if (class[i
] == X86_64_X87_CLASS
3805 || class[i
] == X86_64_X87UP_CLASS
3806 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3808 if (!issued_x87_ret_error
)
3810 error ("x87 register return with x87 disabled");
3811 issued_x87_ret_error
= true;
3816 /* First construct simple cases. Avoid SCmode, since we want to use
3817 single register to pass this type. */
3818 if (n
== 1 && mode
!= SCmode
)
3821 case X86_64_INTEGER_CLASS
:
3822 case X86_64_INTEGERSI_CLASS
:
3823 return gen_rtx_REG (mode
, intreg
[0]);
3824 case X86_64_SSE_CLASS
:
3825 case X86_64_SSESF_CLASS
:
3826 case X86_64_SSEDF_CLASS
:
3827 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3828 case X86_64_X87_CLASS
:
3829 case X86_64_COMPLEX_X87_CLASS
:
3830 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3831 case X86_64_NO_CLASS
:
3832 /* Zero sized array, struct or class. */
3837 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3839 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3841 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3842 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3843 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3844 && class[1] == X86_64_INTEGER_CLASS
3845 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3846 && intreg
[0] + 1 == intreg
[1])
3847 return gen_rtx_REG (mode
, intreg
[0]);
3849 /* Otherwise figure out the entries of the PARALLEL. */
3850 for (i
= 0; i
< n
; i
++)
3854 case X86_64_NO_CLASS
:
3856 case X86_64_INTEGER_CLASS
:
3857 case X86_64_INTEGERSI_CLASS
:
3858 /* Merge TImodes on aligned occasions here too. */
3859 if (i
* 8 + 8 > bytes
)
3860 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3861 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3865 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3866 if (tmpmode
== BLKmode
)
3868 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3869 gen_rtx_REG (tmpmode
, *intreg
),
3873 case X86_64_SSESF_CLASS
:
3874 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3875 gen_rtx_REG (SFmode
,
3876 SSE_REGNO (sse_regno
)),
3880 case X86_64_SSEDF_CLASS
:
3881 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3882 gen_rtx_REG (DFmode
,
3883 SSE_REGNO (sse_regno
)),
3887 case X86_64_SSE_CLASS
:
3888 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3892 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3893 gen_rtx_REG (tmpmode
,
3894 SSE_REGNO (sse_regno
)),
3896 if (tmpmode
== TImode
)
3905 /* Empty aligned struct, union or class. */
3909 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3910 for (i
= 0; i
< nexps
; i
++)
3911 XVECEXP (ret
, 0, i
) = exp
[i
];
3915 /* Update the data in CUM to advance over an argument
3916 of mode MODE and data type TYPE.
3917 (TYPE is null for libcalls where that information may not be available.) */
3920 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3921 tree type
, int named
)
3924 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3925 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3928 mode
= type_natural_mode (type
);
3930 if (TARGET_DEBUG_ARG
)
3931 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3932 "mode=%s, named=%d)\n\n",
3933 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3934 GET_MODE_NAME (mode
), named
);
3938 int int_nregs
, sse_nregs
;
3939 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3940 cum
->words
+= words
;
3941 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3943 cum
->nregs
-= int_nregs
;
3944 cum
->sse_nregs
-= sse_nregs
;
3945 cum
->regno
+= int_nregs
;
3946 cum
->sse_regno
+= sse_nregs
;
3949 cum
->words
+= words
;
3967 cum
->words
+= words
;
3968 cum
->nregs
-= words
;
3969 cum
->regno
+= words
;
3971 if (cum
->nregs
<= 0)
3979 if (cum
->float_in_sse
< 2)
3982 if (cum
->float_in_sse
< 1)
3993 if (!type
|| !AGGREGATE_TYPE_P (type
))
3995 cum
->sse_words
+= words
;
3996 cum
->sse_nregs
-= 1;
3997 cum
->sse_regno
+= 1;
3998 if (cum
->sse_nregs
<= 0)
4010 if (!type
|| !AGGREGATE_TYPE_P (type
))
4012 cum
->mmx_words
+= words
;
4013 cum
->mmx_nregs
-= 1;
4014 cum
->mmx_regno
+= 1;
4015 if (cum
->mmx_nregs
<= 0)
4026 /* Define where to put the arguments to a function.
4027 Value is zero to push the argument on the stack,
4028 or a hard register in which to store the argument.
4030 MODE is the argument's machine mode.
4031 TYPE is the data type of the argument (as a tree).
4032 This is null for libcalls where that information may
4034 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4035 the preceding args and about the function being called.
4036 NAMED is nonzero if this argument is a named parameter
4037 (otherwise it is an extra parameter matching an ellipsis). */
4040 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
4041 tree type
, int named
)
4043 enum machine_mode mode
= orig_mode
;
4046 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
4047 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4048 static bool warnedsse
, warnedmmx
;
4050 /* To simplify the code below, represent vector types with a vector mode
4051 even if MMX/SSE are not active. */
4052 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4053 mode
= type_natural_mode (type
);
4055 /* Handle a hidden AL argument containing number of registers for varargs
4056 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4058 if (mode
== VOIDmode
)
4061 return GEN_INT (cum
->maybe_vaarg
4062 ? (cum
->sse_nregs
< 0
4070 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4072 &x86_64_int_parameter_registers
[cum
->regno
],
4077 /* For now, pass fp/complex values on the stack. */
4089 if (words
<= cum
->nregs
)
4091 int regno
= cum
->regno
;
4093 /* Fastcall allocates the first two DWORD (SImode) or
4094 smaller arguments to ECX and EDX. */
4097 if (mode
== BLKmode
|| mode
== DImode
)
4100 /* ECX not EAX is the first allocated register. */
4104 ret
= gen_rtx_REG (mode
, regno
);
4108 if (cum
->float_in_sse
< 2)
4111 if (cum
->float_in_sse
< 1)
4121 if (!type
|| !AGGREGATE_TYPE_P (type
))
4123 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4126 warning (0, "SSE vector argument without SSE enabled "
4130 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4131 cum
->sse_regno
+ FIRST_SSE_REG
);
4138 if (!type
|| !AGGREGATE_TYPE_P (type
))
4140 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4143 warning (0, "MMX vector argument without MMX enabled "
4147 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4148 cum
->mmx_regno
+ FIRST_MMX_REG
);
4153 if (TARGET_DEBUG_ARG
)
4156 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4157 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4160 print_simple_rtl (stderr
, ret
);
4162 fprintf (stderr
, ", stack");
4164 fprintf (stderr
, " )\n");
4170 /* A C expression that indicates when an argument must be passed by
4171 reference. If nonzero for an argument, a copy of that argument is
4172 made in memory and a pointer to the argument is passed instead of
4173 the argument itself. The pointer is passed in whatever way is
4174 appropriate for passing a pointer to that type. */
4177 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4178 enum machine_mode mode ATTRIBUTE_UNUSED
,
4179 tree type
, bool named ATTRIBUTE_UNUSED
)
4184 if (type
&& int_size_in_bytes (type
) == -1)
4186 if (TARGET_DEBUG_ARG
)
4187 fprintf (stderr
, "function_arg_pass_by_reference\n");
4194 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4195 ABI. Only called if TARGET_SSE. */
4197 contains_128bit_aligned_vector_p (tree type
)
4199 enum machine_mode mode
= TYPE_MODE (type
);
4200 if (SSE_REG_MODE_P (mode
)
4201 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4203 if (TYPE_ALIGN (type
) < 128)
4206 if (AGGREGATE_TYPE_P (type
))
4208 /* Walk the aggregates recursively. */
4209 switch (TREE_CODE (type
))
4213 case QUAL_UNION_TYPE
:
4217 /* Walk all the structure fields. */
4218 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4220 if (TREE_CODE (field
) == FIELD_DECL
4221 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4228 /* Just for use if some languages passes arrays by value. */
4229 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4240 /* Gives the alignment boundary, in bits, of an argument with the
4241 specified mode and type. */
4244 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4248 align
= TYPE_ALIGN (type
);
4250 align
= GET_MODE_ALIGNMENT (mode
);
4251 if (align
< PARM_BOUNDARY
)
4252 align
= PARM_BOUNDARY
;
4255 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4256 make an exception for SSE modes since these require 128bit
4259 The handling here differs from field_alignment. ICC aligns MMX
4260 arguments to 4 byte boundaries, while structure fields are aligned
4261 to 8 byte boundaries. */
4263 align
= PARM_BOUNDARY
;
4266 if (!SSE_REG_MODE_P (mode
))
4267 align
= PARM_BOUNDARY
;
4271 if (!contains_128bit_aligned_vector_p (type
))
4272 align
= PARM_BOUNDARY
;
4280 /* Return true if N is a possible register number of function value. */
4282 ix86_function_value_regno_p (int regno
)
4288 return ((regno
) == 0
4289 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4290 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4292 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4293 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4294 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4299 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4300 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4304 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4311 /* Define how to find the value returned by a function.
4312 VALTYPE is the data type of the value (as a tree).
4313 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4314 otherwise, FUNC is 0. */
4316 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4317 bool outgoing ATTRIBUTE_UNUSED
)
4319 enum machine_mode natmode
= type_natural_mode (valtype
);
4323 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4324 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4325 x86_64_int_return_registers
, 0);
4326 /* For zero sized structures, construct_container return NULL, but we
4327 need to keep rest of compiler happy by returning meaningful value. */
4329 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4334 tree fn
= NULL_TREE
, fntype
;
4336 && DECL_P (fntype_or_decl
))
4337 fn
= fntype_or_decl
;
4338 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4339 return gen_rtx_REG (TYPE_MODE (valtype
),
4340 ix86_value_regno (natmode
, fn
, fntype
));
4344 /* Return true iff type is returned in memory. */
4346 ix86_return_in_memory (tree type
)
4348 int needed_intregs
, needed_sseregs
, size
;
4349 enum machine_mode mode
= type_natural_mode (type
);
4352 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4354 if (mode
== BLKmode
)
4357 size
= int_size_in_bytes (type
);
4359 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4362 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4364 /* User-created vectors small enough to fit in EAX. */
4368 /* MMX/3dNow values are returned in MM0,
4369 except when it doesn't exits. */
4371 return (TARGET_MMX
? 0 : 1);
4373 /* SSE values are returned in XMM0, except when it doesn't exist. */
4375 return (TARGET_SSE
? 0 : 1);
4389 /* When returning SSE vector types, we have a choice of either
4390 (1) being abi incompatible with a -march switch, or
4391 (2) generating an error.
4392 Given no good solution, I think the safest thing is one warning.
4393 The user won't be able to use -Werror, but....
4395 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4396 called in response to actually generating a caller or callee that
4397 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4398 via aggregate_value_p for general type probing from tree-ssa. */
4401 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4403 static bool warnedsse
, warnedmmx
;
4407 /* Look at the return type of the function, not the function type. */
4408 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4410 if (!TARGET_SSE
&& !warnedsse
)
4413 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4416 warning (0, "SSE vector return without SSE enabled "
4421 if (!TARGET_MMX
&& !warnedmmx
)
4423 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4426 warning (0, "MMX vector return without MMX enabled "
4435 /* Define how to find the value returned by a library function
4436 assuming the value has mode MODE. */
4438 ix86_libcall_value (enum machine_mode mode
)
4452 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4455 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4459 return gen_rtx_REG (mode
, 0);
4463 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4466 /* Given a mode, return the register to use for a return value. */
4469 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4471 gcc_assert (!TARGET_64BIT
);
4473 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4474 we normally prevent this case when mmx is not available. However
4475 some ABIs may require the result to be returned like DImode. */
4476 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4477 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4479 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4480 we prevent this case when sse is not available. However some ABIs
4481 may require the result to be returned like integer TImode. */
4482 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4483 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4485 /* Decimal floating point values can go in %eax, unlike other float modes. */
4486 if (DECIMAL_FLOAT_MODE_P (mode
))
4489 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4490 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4493 /* Floating point return values in %st(0), except for local functions when
4494 SSE math is enabled or for functions with sseregparm attribute. */
4495 if ((func
|| fntype
)
4496 && (mode
== SFmode
|| mode
== DFmode
))
4498 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4499 if ((sse_level
>= 1 && mode
== SFmode
)
4500 || (sse_level
== 2 && mode
== DFmode
))
4501 return FIRST_SSE_REG
;
4504 return FIRST_FLOAT_REG
;
4507 /* Create the va_list data type. */
4510 ix86_build_builtin_va_list (void)
4512 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4514 /* For i386 we use plain pointer to argument area. */
4516 return build_pointer_type (char_type_node
);
4518 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4519 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4521 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4522 unsigned_type_node
);
4523 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4524 unsigned_type_node
);
4525 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4527 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4530 va_list_gpr_counter_field
= f_gpr
;
4531 va_list_fpr_counter_field
= f_fpr
;
4533 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4534 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4535 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4536 DECL_FIELD_CONTEXT (f_sav
) = record
;
4538 TREE_CHAIN (record
) = type_decl
;
4539 TYPE_NAME (record
) = type_decl
;
4540 TYPE_FIELDS (record
) = f_gpr
;
4541 TREE_CHAIN (f_gpr
) = f_fpr
;
4542 TREE_CHAIN (f_fpr
) = f_ovf
;
4543 TREE_CHAIN (f_ovf
) = f_sav
;
4545 layout_type (record
);
4547 /* The correct type is an array type of one element. */
4548 return build_array_type (record
, build_index_type (size_zero_node
));
4551 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4554 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4555 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4558 CUMULATIVE_ARGS next_cum
;
4559 rtx save_area
= NULL_RTX
, mem
;
4572 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4575 /* Indicate to allocate space on the stack for varargs save area. */
4576 ix86_save_varrargs_registers
= 1;
4578 cfun
->stack_alignment_needed
= 128;
4580 fntype
= TREE_TYPE (current_function_decl
);
4581 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4582 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4583 != void_type_node
));
4585 /* For varargs, we do not want to skip the dummy va_dcl argument.
4586 For stdargs, we do want to skip the last named argument. */
4589 function_arg_advance (&next_cum
, mode
, type
, 1);
4592 save_area
= frame_pointer_rtx
;
4594 set
= get_varargs_alias_set ();
4596 for (i
= next_cum
.regno
;
4598 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4601 mem
= gen_rtx_MEM (Pmode
,
4602 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4603 MEM_NOTRAP_P (mem
) = 1;
4604 set_mem_alias_set (mem
, set
);
4605 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4606 x86_64_int_parameter_registers
[i
]));
4609 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4611 /* Now emit code to save SSE registers. The AX parameter contains number
4612 of SSE parameter registers used to call this function. We use
4613 sse_prologue_save insn template that produces computed jump across
4614 SSE saves. We need some preparation work to get this working. */
4616 label
= gen_label_rtx ();
4617 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4619 /* Compute address to jump to :
4620 label - 5*eax + nnamed_sse_arguments*5 */
4621 tmp_reg
= gen_reg_rtx (Pmode
);
4622 nsse_reg
= gen_reg_rtx (Pmode
);
4623 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4624 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4625 gen_rtx_MULT (Pmode
, nsse_reg
,
4627 if (next_cum
.sse_regno
)
4630 gen_rtx_CONST (DImode
,
4631 gen_rtx_PLUS (DImode
,
4633 GEN_INT (next_cum
.sse_regno
* 4))));
4635 emit_move_insn (nsse_reg
, label_ref
);
4636 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4638 /* Compute address of memory block we save into. We always use pointer
4639 pointing 127 bytes after first byte to store - this is needed to keep
4640 instruction size limited by 4 bytes. */
4641 tmp_reg
= gen_reg_rtx (Pmode
);
4642 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4643 plus_constant (save_area
,
4644 8 * REGPARM_MAX
+ 127)));
4645 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4646 MEM_NOTRAP_P (mem
) = 1;
4647 set_mem_alias_set (mem
, set
);
4648 set_mem_align (mem
, BITS_PER_WORD
);
4650 /* And finally do the dirty job! */
4651 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4652 GEN_INT (next_cum
.sse_regno
), label
));
4657 /* Implement va_start. */
4660 ix86_va_start (tree valist
, rtx nextarg
)
4662 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4663 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4664 tree gpr
, fpr
, ovf
, sav
, t
;
4667 /* Only 64bit target needs something special. */
4670 std_expand_builtin_va_start (valist
, nextarg
);
4674 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4675 f_fpr
= TREE_CHAIN (f_gpr
);
4676 f_ovf
= TREE_CHAIN (f_fpr
);
4677 f_sav
= TREE_CHAIN (f_ovf
);
4679 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4680 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4681 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4682 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4683 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4685 /* Count number of gp and fp argument registers used. */
4686 words
= current_function_args_info
.words
;
4687 n_gpr
= current_function_args_info
.regno
;
4688 n_fpr
= current_function_args_info
.sse_regno
;
4690 if (TARGET_DEBUG_ARG
)
4691 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4692 (int) words
, (int) n_gpr
, (int) n_fpr
);
4694 if (cfun
->va_list_gpr_size
)
4696 type
= TREE_TYPE (gpr
);
4697 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4698 build_int_cst (type
, n_gpr
* 8));
4699 TREE_SIDE_EFFECTS (t
) = 1;
4700 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4703 if (cfun
->va_list_fpr_size
)
4705 type
= TREE_TYPE (fpr
);
4706 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4707 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4708 TREE_SIDE_EFFECTS (t
) = 1;
4709 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4712 /* Find the overflow area. */
4713 type
= TREE_TYPE (ovf
);
4714 t
= make_tree (type
, virtual_incoming_args_rtx
);
4716 t
= build2 (PLUS_EXPR
, type
, t
,
4717 build_int_cst (type
, words
* UNITS_PER_WORD
));
4718 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4719 TREE_SIDE_EFFECTS (t
) = 1;
4720 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4722 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4724 /* Find the register save area.
4725 Prologue of the function save it right above stack frame. */
4726 type
= TREE_TYPE (sav
);
4727 t
= make_tree (type
, frame_pointer_rtx
);
4728 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4729 TREE_SIDE_EFFECTS (t
) = 1;
4730 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4734 /* Implement va_arg. */
4737 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4739 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4740 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4741 tree gpr
, fpr
, ovf
, sav
, t
;
4743 tree lab_false
, lab_over
= NULL_TREE
;
4748 enum machine_mode nat_mode
;
4750 /* Only 64bit target needs something special. */
4752 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4754 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4755 f_fpr
= TREE_CHAIN (f_gpr
);
4756 f_ovf
= TREE_CHAIN (f_fpr
);
4757 f_sav
= TREE_CHAIN (f_ovf
);
4759 valist
= build_va_arg_indirect_ref (valist
);
4760 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4761 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4762 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4763 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4765 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4767 type
= build_pointer_type (type
);
4768 size
= int_size_in_bytes (type
);
4769 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4771 nat_mode
= type_natural_mode (type
);
4772 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4773 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4775 /* Pull the value out of the saved registers. */
4777 addr
= create_tmp_var (ptr_type_node
, "addr");
4778 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4782 int needed_intregs
, needed_sseregs
;
4784 tree int_addr
, sse_addr
;
4786 lab_false
= create_artificial_label ();
4787 lab_over
= create_artificial_label ();
4789 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4791 need_temp
= (!REG_P (container
)
4792 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4793 || TYPE_ALIGN (type
) > 128));
4795 /* In case we are passing structure, verify that it is consecutive block
4796 on the register save area. If not we need to do moves. */
4797 if (!need_temp
&& !REG_P (container
))
4799 /* Verify that all registers are strictly consecutive */
4800 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4804 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4806 rtx slot
= XVECEXP (container
, 0, i
);
4807 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4808 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4816 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4818 rtx slot
= XVECEXP (container
, 0, i
);
4819 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4820 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4832 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4833 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4834 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4835 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4838 /* First ensure that we fit completely in registers. */
4841 t
= build_int_cst (TREE_TYPE (gpr
),
4842 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4843 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4844 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4845 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4846 gimplify_and_add (t
, pre_p
);
4850 t
= build_int_cst (TREE_TYPE (fpr
),
4851 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4853 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4854 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4855 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4856 gimplify_and_add (t
, pre_p
);
4859 /* Compute index to start of area used for integer regs. */
4862 /* int_addr = gpr + sav; */
4863 t
= fold_convert (ptr_type_node
, gpr
);
4864 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4865 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4866 gimplify_and_add (t
, pre_p
);
4870 /* sse_addr = fpr + sav; */
4871 t
= fold_convert (ptr_type_node
, fpr
);
4872 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4873 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4874 gimplify_and_add (t
, pre_p
);
4879 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4882 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4883 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4884 gimplify_and_add (t
, pre_p
);
4886 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4888 rtx slot
= XVECEXP (container
, 0, i
);
4889 rtx reg
= XEXP (slot
, 0);
4890 enum machine_mode mode
= GET_MODE (reg
);
4891 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4892 tree addr_type
= build_pointer_type (piece_type
);
4895 tree dest_addr
, dest
;
4897 if (SSE_REGNO_P (REGNO (reg
)))
4899 src_addr
= sse_addr
;
4900 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4904 src_addr
= int_addr
;
4905 src_offset
= REGNO (reg
) * 8;
4907 src_addr
= fold_convert (addr_type
, src_addr
);
4908 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4909 size_int (src_offset
));
4910 src
= build_va_arg_indirect_ref (src_addr
);
4912 dest_addr
= fold_convert (addr_type
, addr
);
4913 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4914 size_int (INTVAL (XEXP (slot
, 1))));
4915 dest
= build_va_arg_indirect_ref (dest_addr
);
4917 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4918 gimplify_and_add (t
, pre_p
);
4924 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4925 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4926 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4927 gimplify_and_add (t
, pre_p
);
4931 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4932 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4933 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4934 gimplify_and_add (t
, pre_p
);
4937 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4938 gimplify_and_add (t
, pre_p
);
4940 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4941 append_to_statement_list (t
, pre_p
);
4944 /* ... otherwise out of the overflow area. */
4946 /* Care for on-stack alignment if needed. */
4947 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4948 || integer_zerop (TYPE_SIZE (type
)))
4952 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4953 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4954 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4955 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4956 build_int_cst (TREE_TYPE (t
), -align
));
4958 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4960 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4961 gimplify_and_add (t2
, pre_p
);
4963 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4964 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4965 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4966 gimplify_and_add (t
, pre_p
);
4970 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4971 append_to_statement_list (t
, pre_p
);
4974 ptrtype
= build_pointer_type (type
);
4975 addr
= fold_convert (ptrtype
, addr
);
4978 addr
= build_va_arg_indirect_ref (addr
);
4979 return build_va_arg_indirect_ref (addr
);
4982 /* Return nonzero if OPNUM's MEM should be matched
4983 in movabs* patterns. */
4986 ix86_check_movabs (rtx insn
, int opnum
)
4990 set
= PATTERN (insn
);
4991 if (GET_CODE (set
) == PARALLEL
)
4992 set
= XVECEXP (set
, 0, 0);
4993 gcc_assert (GET_CODE (set
) == SET
);
4994 mem
= XEXP (set
, opnum
);
4995 while (GET_CODE (mem
) == SUBREG
)
4996 mem
= SUBREG_REG (mem
);
4997 gcc_assert (MEM_P (mem
));
4998 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5001 /* Initialize the table of extra 80387 mathematical constants. */
5004 init_ext_80387_constants (void)
5006 static const char * cst
[5] =
5008 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5009 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5010 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5011 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5012 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5016 for (i
= 0; i
< 5; i
++)
5018 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5019 /* Ensure each constant is rounded to XFmode precision. */
5020 real_convert (&ext_80387_constants_table
[i
],
5021 XFmode
, &ext_80387_constants_table
[i
]);
5024 ext_80387_constants_init
= 1;
5027 /* Return true if the constant is something that can be loaded with
5028 a special instruction. */
5031 standard_80387_constant_p (rtx x
)
5035 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
5038 if (x
== CONST0_RTX (GET_MODE (x
)))
5040 if (x
== CONST1_RTX (GET_MODE (x
)))
5043 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5045 /* For XFmode constants, try to find a special 80387 instruction when
5046 optimizing for size or on those CPUs that benefit from them. */
5047 if (GET_MODE (x
) == XFmode
5048 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5052 if (! ext_80387_constants_init
)
5053 init_ext_80387_constants ();
5055 for (i
= 0; i
< 5; i
++)
5056 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5060 /* Load of the constant -0.0 or -1.0 will be split as
5061 fldz;fchs or fld1;fchs sequence. */
5062 if (real_isnegzero (&r
))
5064 if (real_identical (&r
, &dconstm1
))
5070 /* Return the opcode of the special instruction to be used to load
5074 standard_80387_constant_opcode (rtx x
)
5076 switch (standard_80387_constant_p (x
))
5100 /* Return the CONST_DOUBLE representing the 80387 constant that is
5101 loaded by the specified special instruction. The argument IDX
5102 matches the return value from standard_80387_constant_p. */
5105 standard_80387_constant_rtx (int idx
)
5109 if (! ext_80387_constants_init
)
5110 init_ext_80387_constants ();
5126 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5130 /* Return 1 if mode is a valid mode for sse. */
5132 standard_sse_mode_p (enum machine_mode mode
)
5149 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5152 standard_sse_constant_p (rtx x
)
5154 enum machine_mode mode
= GET_MODE (x
);
5156 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5158 if (vector_all_ones_operand (x
, mode
)
5159 && standard_sse_mode_p (mode
))
5160 return TARGET_SSE2
? 2 : -1;
5165 /* Return the opcode of the special instruction to be used to load
5169 standard_sse_constant_opcode (rtx insn
, rtx x
)
5171 switch (standard_sse_constant_p (x
))
5174 if (get_attr_mode (insn
) == MODE_V4SF
)
5175 return "xorps\t%0, %0";
5176 else if (get_attr_mode (insn
) == MODE_V2DF
)
5177 return "xorpd\t%0, %0";
5179 return "pxor\t%0, %0";
5181 return "pcmpeqd\t%0, %0";
5186 /* Returns 1 if OP contains a symbol reference */
5189 symbolic_reference_mentioned_p (rtx op
)
5194 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5197 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5198 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5204 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5205 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5209 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5216 /* Return 1 if it is appropriate to emit `ret' instructions in the
5217 body of a function. Do this only if the epilogue is simple, needing a
5218 couple of insns. Prior to reloading, we can't tell how many registers
5219 must be saved, so return 0 then. Return 0 if there is no frame
5220 marker to de-allocate. */
5223 ix86_can_use_return_insn_p (void)
5225 struct ix86_frame frame
;
5227 if (! reload_completed
|| frame_pointer_needed
)
5230 /* Don't allow more than 32 pop, since that's all we can do
5231 with one instruction. */
5232 if (current_function_pops_args
5233 && current_function_args_size
>= 32768)
5236 ix86_compute_frame_layout (&frame
);
5237 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5240 /* Value should be nonzero if functions must have frame pointers.
5241 Zero means the frame pointer need not be set up (and parms may
5242 be accessed via the stack pointer) in functions that seem suitable. */
5245 ix86_frame_pointer_required (void)
5247 /* If we accessed previous frames, then the generated code expects
5248 to be able to access the saved ebp value in our frame. */
5249 if (cfun
->machine
->accesses_prev_frame
)
5252 /* Several x86 os'es need a frame pointer for other reasons,
5253 usually pertaining to setjmp. */
5254 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5257 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5258 the frame pointer by default. Turn it back on now if we've not
5259 got a leaf function. */
5260 if (TARGET_OMIT_LEAF_FRAME_POINTER
5261 && (!current_function_is_leaf
5262 || ix86_current_function_calls_tls_descriptor
))
5265 if (current_function_profile
)
5271 /* Record that the current function accesses previous call frames. */
5274 ix86_setup_frame_addresses (void)
5276 cfun
->machine
->accesses_prev_frame
= 1;
5279 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5280 # define USE_HIDDEN_LINKONCE 1
5282 # define USE_HIDDEN_LINKONCE 0
5285 static int pic_labels_used
;
5287 /* Fills in the label name that should be used for a pc thunk for
5288 the given register. */
5291 get_pc_thunk_name (char name
[32], unsigned int regno
)
5293 gcc_assert (!TARGET_64BIT
);
5295 if (USE_HIDDEN_LINKONCE
)
5296 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5298 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5302 /* This function generates code for -fpic that loads %ebx with
5303 the return address of the caller and then returns. */
5306 ix86_file_end (void)
5311 for (regno
= 0; regno
< 8; ++regno
)
5315 if (! ((pic_labels_used
>> regno
) & 1))
5318 get_pc_thunk_name (name
, regno
);
5323 switch_to_section (darwin_sections
[text_coal_section
]);
5324 fputs ("\t.weak_definition\t", asm_out_file
);
5325 assemble_name (asm_out_file
, name
);
5326 fputs ("\n\t.private_extern\t", asm_out_file
);
5327 assemble_name (asm_out_file
, name
);
5328 fputs ("\n", asm_out_file
);
5329 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5333 if (USE_HIDDEN_LINKONCE
)
5337 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5339 TREE_PUBLIC (decl
) = 1;
5340 TREE_STATIC (decl
) = 1;
5341 DECL_ONE_ONLY (decl
) = 1;
5343 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5344 switch_to_section (get_named_section (decl
, NULL
, 0));
5346 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5347 fputs ("\t.hidden\t", asm_out_file
);
5348 assemble_name (asm_out_file
, name
);
5349 fputc ('\n', asm_out_file
);
5350 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5354 switch_to_section (text_section
);
5355 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5358 xops
[0] = gen_rtx_REG (SImode
, regno
);
5359 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5360 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5361 output_asm_insn ("ret", xops
);
5364 if (NEED_INDICATE_EXEC_STACK
)
5365 file_end_indicate_exec_stack ();
5368 /* Emit code for the SET_GOT patterns. */
5371 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5376 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5378 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5380 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5383 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5385 output_asm_insn ("call\t%a2", xops
);
5388 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5389 is what will be referenced by the Mach-O PIC subsystem. */
5391 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5394 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5395 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5398 output_asm_insn ("pop{l}\t%0", xops
);
5403 get_pc_thunk_name (name
, REGNO (dest
));
5404 pic_labels_used
|= 1 << REGNO (dest
);
5406 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5407 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5408 output_asm_insn ("call\t%X2", xops
);
5409 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5410 is what will be referenced by the Mach-O PIC subsystem. */
5413 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5415 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5416 CODE_LABEL_NUMBER (label
));
5423 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5424 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5426 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5431 /* Generate an "push" pattern for input ARG. */
5436 return gen_rtx_SET (VOIDmode
,
5438 gen_rtx_PRE_DEC (Pmode
,
5439 stack_pointer_rtx
)),
5443 /* Return >= 0 if there is an unused call-clobbered register available
5444 for the entire function. */
5447 ix86_select_alt_pic_regnum (void)
5449 if (current_function_is_leaf
&& !current_function_profile
5450 && !ix86_current_function_calls_tls_descriptor
)
5453 for (i
= 2; i
>= 0; --i
)
5454 if (!regs_ever_live
[i
])
5458 return INVALID_REGNUM
;
5461 /* Return 1 if we need to save REGNO. */
5463 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5465 if (pic_offset_table_rtx
5466 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5467 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5468 || current_function_profile
5469 || current_function_calls_eh_return
5470 || current_function_uses_const_pool
))
5472 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5477 if (current_function_calls_eh_return
&& maybe_eh_return
)
5482 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5483 if (test
== INVALID_REGNUM
)
5490 if (cfun
->machine
->force_align_arg_pointer
5491 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5494 return (regs_ever_live
[regno
]
5495 && !call_used_regs
[regno
]
5496 && !fixed_regs
[regno
]
5497 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5500 /* Return number of registers to be saved on the stack. */
5503 ix86_nsaved_regs (void)
5508 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5509 if (ix86_save_reg (regno
, true))
5514 /* Return the offset between two registers, one to be eliminated, and the other
5515 its replacement, at the start of a routine. */
5518 ix86_initial_elimination_offset (int from
, int to
)
5520 struct ix86_frame frame
;
5521 ix86_compute_frame_layout (&frame
);
5523 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5524 return frame
.hard_frame_pointer_offset
;
5525 else if (from
== FRAME_POINTER_REGNUM
5526 && to
== HARD_FRAME_POINTER_REGNUM
)
5527 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5530 gcc_assert (to
== STACK_POINTER_REGNUM
);
5532 if (from
== ARG_POINTER_REGNUM
)
5533 return frame
.stack_pointer_offset
;
5535 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5536 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5540 /* Fill structure ix86_frame about frame of currently computed function. */
5543 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5545 HOST_WIDE_INT total_size
;
5546 unsigned int stack_alignment_needed
;
5547 HOST_WIDE_INT offset
;
5548 unsigned int preferred_alignment
;
5549 HOST_WIDE_INT size
= get_frame_size ();
5551 frame
->nregs
= ix86_nsaved_regs ();
5554 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5555 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5557 /* During reload iteration the amount of registers saved can change.
5558 Recompute the value as needed. Do not recompute when amount of registers
5559 didn't change as reload does multiple calls to the function and does not
5560 expect the decision to change within single iteration. */
5562 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5564 int count
= frame
->nregs
;
5566 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5567 /* The fast prologue uses move instead of push to save registers. This
5568 is significantly longer, but also executes faster as modern hardware
5569 can execute the moves in parallel, but can't do that for push/pop.
5571 Be careful about choosing what prologue to emit: When function takes
5572 many instructions to execute we may use slow version as well as in
5573 case function is known to be outside hot spot (this is known with
5574 feedback only). Weight the size of function by number of registers
5575 to save as it is cheap to use one or two push instructions but very
5576 slow to use many of them. */
5578 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5579 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5580 || (flag_branch_probabilities
5581 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5582 cfun
->machine
->use_fast_prologue_epilogue
= false;
5584 cfun
->machine
->use_fast_prologue_epilogue
5585 = !expensive_function_p (count
);
5587 if (TARGET_PROLOGUE_USING_MOVE
5588 && cfun
->machine
->use_fast_prologue_epilogue
)
5589 frame
->save_regs_using_mov
= true;
5591 frame
->save_regs_using_mov
= false;
5594 /* Skip return address and saved base pointer. */
5595 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5597 frame
->hard_frame_pointer_offset
= offset
;
5599 /* Do some sanity checking of stack_alignment_needed and
5600 preferred_alignment, since i386 port is the only using those features
5601 that may break easily. */
5603 gcc_assert (!size
|| stack_alignment_needed
);
5604 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5605 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5606 gcc_assert (stack_alignment_needed
5607 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5609 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5610 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5612 /* Register save area */
5613 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5616 if (ix86_save_varrargs_registers
)
5618 offset
+= X86_64_VARARGS_SIZE
;
5619 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5622 frame
->va_arg_size
= 0;
5624 /* Align start of frame for local function. */
5625 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5626 & -stack_alignment_needed
) - offset
;
5628 offset
+= frame
->padding1
;
5630 /* Frame pointer points here. */
5631 frame
->frame_pointer_offset
= offset
;
5635 /* Add outgoing arguments area. Can be skipped if we eliminated
5636 all the function calls as dead code.
5637 Skipping is however impossible when function calls alloca. Alloca
5638 expander assumes that last current_function_outgoing_args_size
5639 of stack frame are unused. */
5640 if (ACCUMULATE_OUTGOING_ARGS
5641 && (!current_function_is_leaf
|| current_function_calls_alloca
5642 || ix86_current_function_calls_tls_descriptor
))
5644 offset
+= current_function_outgoing_args_size
;
5645 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5648 frame
->outgoing_arguments_size
= 0;
5650 /* Align stack boundary. Only needed if we're calling another function
5652 if (!current_function_is_leaf
|| current_function_calls_alloca
5653 || ix86_current_function_calls_tls_descriptor
)
5654 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5655 & -preferred_alignment
) - offset
;
5657 frame
->padding2
= 0;
5659 offset
+= frame
->padding2
;
5661 /* We've reached end of stack frame. */
5662 frame
->stack_pointer_offset
= offset
;
5664 /* Size prologue needs to allocate. */
5665 frame
->to_allocate
=
5666 (size
+ frame
->padding1
+ frame
->padding2
5667 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5669 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5670 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5671 frame
->save_regs_using_mov
= false;
5673 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5674 && current_function_is_leaf
5675 && !ix86_current_function_calls_tls_descriptor
)
5677 frame
->red_zone_size
= frame
->to_allocate
;
5678 if (frame
->save_regs_using_mov
)
5679 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5680 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5681 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5684 frame
->red_zone_size
= 0;
5685 frame
->to_allocate
-= frame
->red_zone_size
;
5686 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5688 fprintf (stderr
, "\n");
5689 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5690 fprintf (stderr
, "size: %ld\n", (long)size
);
5691 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5692 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5693 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5694 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5695 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5696 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5697 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5698 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5699 (long)frame
->hard_frame_pointer_offset
);
5700 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5701 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5702 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5703 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5707 /* Emit code to save registers in the prologue. */
5710 ix86_emit_save_regs (void)
5715 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5716 if (ix86_save_reg (regno
, true))
5718 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5719 RTX_FRAME_RELATED_P (insn
) = 1;
5723 /* Emit code to save registers using MOV insns. First register
5724 is restored from POINTER + OFFSET. */
5726 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5731 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5732 if (ix86_save_reg (regno
, true))
5734 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5736 gen_rtx_REG (Pmode
, regno
));
5737 RTX_FRAME_RELATED_P (insn
) = 1;
5738 offset
+= UNITS_PER_WORD
;
5742 /* Expand prologue or epilogue stack adjustment.
5743 The pattern exist to put a dependency on all ebp-based memory accesses.
5744 STYLE should be negative if instructions should be marked as frame related,
5745 zero if %r11 register is live and cannot be freely used and positive
5749 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5754 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5755 else if (x86_64_immediate_operand (offset
, DImode
))
5756 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5760 /* r11 is used by indirect sibcall return as well, set before the
5761 epilogue and used after the epilogue. ATM indirect sibcall
5762 shouldn't be used together with huge frame sizes in one
5763 function because of the frame_size check in sibcall.c. */
5765 r11
= gen_rtx_REG (DImode
, R11_REG
);
5766 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5768 RTX_FRAME_RELATED_P (insn
) = 1;
5769 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5773 RTX_FRAME_RELATED_P (insn
) = 1;
5776 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5779 ix86_internal_arg_pointer (void)
5781 bool has_force_align_arg_pointer
=
5782 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5783 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5784 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5785 && DECL_NAME (current_function_decl
)
5786 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5787 && DECL_FILE_SCOPE_P (current_function_decl
))
5788 || ix86_force_align_arg_pointer
5789 || has_force_align_arg_pointer
)
5791 /* Nested functions can't realign the stack due to a register
5793 if (DECL_CONTEXT (current_function_decl
)
5794 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5796 if (ix86_force_align_arg_pointer
)
5797 warning (0, "-mstackrealign ignored for nested functions");
5798 if (has_force_align_arg_pointer
)
5799 error ("%s not supported for nested functions",
5800 ix86_force_align_arg_pointer_string
);
5801 return virtual_incoming_args_rtx
;
5803 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5804 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5807 return virtual_incoming_args_rtx
;
5810 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5811 This is called from dwarf2out.c to emit call frame instructions
5812 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5814 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5816 rtx unspec
= SET_SRC (pattern
);
5817 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5821 case UNSPEC_REG_SAVE
:
5822 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5823 SET_DEST (pattern
));
5825 case UNSPEC_DEF_CFA
:
5826 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5827 INTVAL (XVECEXP (unspec
, 0, 0)));
5834 /* Expand the prologue into a bunch of separate insns. */
5837 ix86_expand_prologue (void)
5841 struct ix86_frame frame
;
5842 HOST_WIDE_INT allocate
;
5844 ix86_compute_frame_layout (&frame
);
5846 if (cfun
->machine
->force_align_arg_pointer
)
5850 /* Grab the argument pointer. */
5851 x
= plus_constant (stack_pointer_rtx
, 4);
5852 y
= cfun
->machine
->force_align_arg_pointer
;
5853 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5854 RTX_FRAME_RELATED_P (insn
) = 1;
5856 /* The unwind info consists of two parts: install the fafp as the cfa,
5857 and record the fafp as the "save register" of the stack pointer.
5858 The later is there in order that the unwinder can see where it
5859 should restore the stack pointer across the and insn. */
5860 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5861 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5862 RTX_FRAME_RELATED_P (x
) = 1;
5863 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5865 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5866 RTX_FRAME_RELATED_P (y
) = 1;
5867 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5868 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5869 REG_NOTES (insn
) = x
;
5871 /* Align the stack. */
5872 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5875 /* And here we cheat like madmen with the unwind info. We force the
5876 cfa register back to sp+4, which is exactly what it was at the
5877 start of the function. Re-pushing the return address results in
5878 the return at the same spot relative to the cfa, and thus is
5879 correct wrt the unwind info. */
5880 x
= cfun
->machine
->force_align_arg_pointer
;
5881 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5882 insn
= emit_insn (gen_push (x
));
5883 RTX_FRAME_RELATED_P (insn
) = 1;
5886 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5887 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5888 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5889 REG_NOTES (insn
) = x
;
5892 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5893 slower on all targets. Also sdb doesn't like it. */
5895 if (frame_pointer_needed
)
5897 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5898 RTX_FRAME_RELATED_P (insn
) = 1;
5900 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5901 RTX_FRAME_RELATED_P (insn
) = 1;
5904 allocate
= frame
.to_allocate
;
5906 if (!frame
.save_regs_using_mov
)
5907 ix86_emit_save_regs ();
5909 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5911 /* When using red zone we may start register saving before allocating
5912 the stack frame saving one cycle of the prologue. */
5913 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5914 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5915 : stack_pointer_rtx
,
5916 -frame
.nregs
* UNITS_PER_WORD
);
5920 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5921 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5922 GEN_INT (-allocate
), -1);
5925 /* Only valid for Win32. */
5926 rtx eax
= gen_rtx_REG (SImode
, 0);
5927 bool eax_live
= ix86_eax_live_at_start_p ();
5930 gcc_assert (!TARGET_64BIT
);
5934 emit_insn (gen_push (eax
));
5938 emit_move_insn (eax
, GEN_INT (allocate
));
5940 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5941 RTX_FRAME_RELATED_P (insn
) = 1;
5942 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5943 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5944 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5945 t
, REG_NOTES (insn
));
5949 if (frame_pointer_needed
)
5950 t
= plus_constant (hard_frame_pointer_rtx
,
5953 - frame
.nregs
* UNITS_PER_WORD
);
5955 t
= plus_constant (stack_pointer_rtx
, allocate
);
5956 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5960 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5962 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5963 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5965 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5966 -frame
.nregs
* UNITS_PER_WORD
);
5969 pic_reg_used
= false;
5970 if (pic_offset_table_rtx
5971 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5972 || current_function_profile
))
5974 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5976 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5977 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5979 pic_reg_used
= true;
5985 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5987 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5989 /* Even with accurate pre-reload life analysis, we can wind up
5990 deleting all references to the pic register after reload.
5991 Consider if cross-jumping unifies two sides of a branch
5992 controlled by a comparison vs the only read from a global.
5993 In which case, allow the set_got to be deleted, though we're
5994 too late to do anything about the ebx save in the prologue. */
5995 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5998 /* Prevent function calls from be scheduled before the call to mcount.
5999 In the pic_reg_used case, make sure that the got load isn't deleted. */
6000 if (current_function_profile
)
6001 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6004 /* Emit code to restore saved registers using MOV insns. First register
6005 is restored from POINTER + OFFSET. */
6007 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6008 int maybe_eh_return
)
6011 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6013 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6014 if (ix86_save_reg (regno
, maybe_eh_return
))
6016 /* Ensure that adjust_address won't be forced to produce pointer
6017 out of range allowed by x86-64 instruction set. */
6018 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6022 r11
= gen_rtx_REG (DImode
, R11_REG
);
6023 emit_move_insn (r11
, GEN_INT (offset
));
6024 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6025 base_address
= gen_rtx_MEM (Pmode
, r11
);
6028 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6029 adjust_address (base_address
, Pmode
, offset
));
6030 offset
+= UNITS_PER_WORD
;
6034 /* Restore function stack, frame, and registers. */
6037 ix86_expand_epilogue (int style
)
6040 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6041 struct ix86_frame frame
;
6042 HOST_WIDE_INT offset
;
6044 ix86_compute_frame_layout (&frame
);
6046 /* Calculate start of saved registers relative to ebp. Special care
6047 must be taken for the normal return case of a function using
6048 eh_return: the eax and edx registers are marked as saved, but not
6049 restored along this path. */
6050 offset
= frame
.nregs
;
6051 if (current_function_calls_eh_return
&& style
!= 2)
6053 offset
*= -UNITS_PER_WORD
;
6055 /* If we're only restoring one register and sp is not valid then
6056 using a move instruction to restore the register since it's
6057 less work than reloading sp and popping the register.
6059 The default code result in stack adjustment using add/lea instruction,
6060 while this code results in LEAVE instruction (or discrete equivalent),
6061 so it is profitable in some other cases as well. Especially when there
6062 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6063 and there is exactly one register to pop. This heuristic may need some
6064 tuning in future. */
6065 if ((!sp_valid
&& frame
.nregs
<= 1)
6066 || (TARGET_EPILOGUE_USING_MOVE
6067 && cfun
->machine
->use_fast_prologue_epilogue
6068 && (frame
.nregs
> 1 || frame
.to_allocate
))
6069 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6070 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6071 && cfun
->machine
->use_fast_prologue_epilogue
6072 && frame
.nregs
== 1)
6073 || current_function_calls_eh_return
)
6075 /* Restore registers. We can use ebp or esp to address the memory
6076 locations. If both are available, default to ebp, since offsets
6077 are known to be small. Only exception is esp pointing directly to the
6078 end of block of saved registers, where we may simplify addressing
6081 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6082 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6083 frame
.to_allocate
, style
== 2);
6085 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6086 offset
, style
== 2);
6088 /* eh_return epilogues need %ecx added to the stack pointer. */
6091 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6093 if (frame_pointer_needed
)
6095 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6096 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6097 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6099 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6100 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6102 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6107 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6108 tmp
= plus_constant (tmp
, (frame
.to_allocate
6109 + frame
.nregs
* UNITS_PER_WORD
));
6110 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6113 else if (!frame_pointer_needed
)
6114 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6115 GEN_INT (frame
.to_allocate
6116 + frame
.nregs
* UNITS_PER_WORD
),
6118 /* If not an i386, mov & pop is faster than "leave". */
6119 else if (TARGET_USE_LEAVE
|| optimize_size
6120 || !cfun
->machine
->use_fast_prologue_epilogue
)
6121 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6124 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6125 hard_frame_pointer_rtx
,
6128 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6130 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6135 /* First step is to deallocate the stack frame so that we can
6136 pop the registers. */
6139 gcc_assert (frame_pointer_needed
);
6140 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6141 hard_frame_pointer_rtx
,
6142 GEN_INT (offset
), style
);
6144 else if (frame
.to_allocate
)
6145 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6146 GEN_INT (frame
.to_allocate
), style
);
6148 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6149 if (ix86_save_reg (regno
, false))
6152 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6154 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6156 if (frame_pointer_needed
)
6158 /* Leave results in shorter dependency chains on CPUs that are
6159 able to grok it fast. */
6160 if (TARGET_USE_LEAVE
)
6161 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6162 else if (TARGET_64BIT
)
6163 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6165 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6169 if (cfun
->machine
->force_align_arg_pointer
)
6171 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6172 cfun
->machine
->force_align_arg_pointer
,
6176 /* Sibcall epilogues don't want a return instruction. */
6180 if (current_function_pops_args
&& current_function_args_size
)
6182 rtx popc
= GEN_INT (current_function_pops_args
);
6184 /* i386 can only pop 64K bytes. If asked to pop more, pop
6185 return address, do explicit add, and jump indirectly to the
6188 if (current_function_pops_args
>= 65536)
6190 rtx ecx
= gen_rtx_REG (SImode
, 2);
6192 /* There is no "pascal" calling convention in 64bit ABI. */
6193 gcc_assert (!TARGET_64BIT
);
6195 emit_insn (gen_popsi1 (ecx
));
6196 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6197 emit_jump_insn (gen_return_indirect_internal (ecx
));
6200 emit_jump_insn (gen_return_pop_internal (popc
));
6203 emit_jump_insn (gen_return_internal ());
6206 /* Reset from the function's potential modifications. */
6209 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6210 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6212 if (pic_offset_table_rtx
)
6213 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6215 /* Mach-O doesn't support labels at the end of objects, so if
6216 it looks like we might want one, insert a NOP. */
6218 rtx insn
= get_last_insn ();
6221 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6222 insn
= PREV_INSN (insn
);
6226 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6227 fputs ("\tnop\n", file
);
6233 /* Extract the parts of an RTL expression that is a valid memory address
6234 for an instruction. Return 0 if the structure of the address is
6235 grossly off. Return -1 if the address contains ASHIFT, so it is not
6236 strictly valid, but still used for computing length of lea instruction. */
6239 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6241 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6242 rtx base_reg
, index_reg
;
6243 HOST_WIDE_INT scale
= 1;
6244 rtx scale_rtx
= NULL_RTX
;
6246 enum ix86_address_seg seg
= SEG_DEFAULT
;
6248 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6250 else if (GET_CODE (addr
) == PLUS
)
6260 addends
[n
++] = XEXP (op
, 1);
6263 while (GET_CODE (op
) == PLUS
);
6268 for (i
= n
; i
>= 0; --i
)
6271 switch (GET_CODE (op
))
6276 index
= XEXP (op
, 0);
6277 scale_rtx
= XEXP (op
, 1);
6281 if (XINT (op
, 1) == UNSPEC_TP
6282 && TARGET_TLS_DIRECT_SEG_REFS
6283 && seg
== SEG_DEFAULT
)
6284 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6313 else if (GET_CODE (addr
) == MULT
)
6315 index
= XEXP (addr
, 0); /* index*scale */
6316 scale_rtx
= XEXP (addr
, 1);
6318 else if (GET_CODE (addr
) == ASHIFT
)
6322 /* We're called for lea too, which implements ashift on occasion. */
6323 index
= XEXP (addr
, 0);
6324 tmp
= XEXP (addr
, 1);
6325 if (!CONST_INT_P (tmp
))
6327 scale
= INTVAL (tmp
);
6328 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6334 disp
= addr
; /* displacement */
6336 /* Extract the integral value of scale. */
6339 if (!CONST_INT_P (scale_rtx
))
6341 scale
= INTVAL (scale_rtx
);
6344 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6345 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6347 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6348 if (base_reg
&& index_reg
&& scale
== 1
6349 && (index_reg
== arg_pointer_rtx
6350 || index_reg
== frame_pointer_rtx
6351 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6354 tmp
= base
, base
= index
, index
= tmp
;
6355 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6358 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6359 if ((base_reg
== hard_frame_pointer_rtx
6360 || base_reg
== frame_pointer_rtx
6361 || base_reg
== arg_pointer_rtx
) && !disp
)
6364 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6365 Avoid this by transforming to [%esi+0]. */
6366 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6367 && base_reg
&& !index_reg
&& !disp
6369 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6372 /* Special case: encode reg+reg instead of reg*2. */
6373 if (!base
&& index
&& scale
&& scale
== 2)
6374 base
= index
, base_reg
= index_reg
, scale
= 1;
6376 /* Special case: scaling cannot be encoded without base or displacement. */
6377 if (!base
&& !disp
&& index
&& scale
!= 1)
6389 /* Return cost of the memory address x.
6390 For i386, it is better to use a complex address than let gcc copy
6391 the address into a reg and make a new pseudo. But not if the address
6392 requires to two regs - that would mean more pseudos with longer
6395 ix86_address_cost (rtx x
)
6397 struct ix86_address parts
;
6399 int ok
= ix86_decompose_address (x
, &parts
);
6403 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6404 parts
.base
= SUBREG_REG (parts
.base
);
6405 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6406 parts
.index
= SUBREG_REG (parts
.index
);
6408 /* More complex memory references are better. */
6409 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6411 if (parts
.seg
!= SEG_DEFAULT
)
6414 /* Attempt to minimize number of registers in the address. */
6416 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6418 && (!REG_P (parts
.index
)
6419 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6423 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6425 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6426 && parts
.base
!= parts
.index
)
6429 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6430 since it's predecode logic can't detect the length of instructions
6431 and it degenerates to vector decoded. Increase cost of such
6432 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6433 to split such addresses or even refuse such addresses at all.
6435 Following addressing modes are affected:
6440 The first and last case may be avoidable by explicitly coding the zero in
6441 memory address, but I don't have AMD-K6 machine handy to check this
6445 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6446 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6447 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6453 /* If X is a machine specific address (i.e. a symbol or label being
6454 referenced as a displacement from the GOT implemented using an
6455 UNSPEC), then return the base term. Otherwise return X. */
6458 ix86_find_base_term (rtx x
)
6464 if (GET_CODE (x
) != CONST
)
6467 if (GET_CODE (term
) == PLUS
6468 && (CONST_INT_P (XEXP (term
, 1))
6469 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6470 term
= XEXP (term
, 0);
6471 if (GET_CODE (term
) != UNSPEC
6472 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6475 term
= XVECEXP (term
, 0, 0);
6477 if (GET_CODE (term
) != SYMBOL_REF
6478 && GET_CODE (term
) != LABEL_REF
)
6484 term
= ix86_delegitimize_address (x
);
6486 if (GET_CODE (term
) != SYMBOL_REF
6487 && GET_CODE (term
) != LABEL_REF
)
6493 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6494 this is used for to form addresses to local data when -fPIC is in
6498 darwin_local_data_pic (rtx disp
)
6500 if (GET_CODE (disp
) == MINUS
)
6502 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6503 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6504 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6506 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6507 if (! strcmp (sym_name
, "<pic base>"))
6515 /* Determine if a given RTX is a valid constant. We already know this
6516 satisfies CONSTANT_P. */
6519 legitimate_constant_p (rtx x
)
6521 switch (GET_CODE (x
))
6526 if (GET_CODE (x
) == PLUS
)
6528 if (!CONST_INT_P (XEXP (x
, 1)))
6533 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6536 /* Only some unspecs are valid as "constants". */
6537 if (GET_CODE (x
) == UNSPEC
)
6538 switch (XINT (x
, 1))
6541 return TARGET_64BIT
;
6544 x
= XVECEXP (x
, 0, 0);
6545 return (GET_CODE (x
) == SYMBOL_REF
6546 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6548 x
= XVECEXP (x
, 0, 0);
6549 return (GET_CODE (x
) == SYMBOL_REF
6550 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6555 /* We must have drilled down to a symbol. */
6556 if (GET_CODE (x
) == LABEL_REF
)
6558 if (GET_CODE (x
) != SYMBOL_REF
)
6563 /* TLS symbols are never valid. */
6564 if (SYMBOL_REF_TLS_MODEL (x
))
6569 if (GET_MODE (x
) == TImode
6570 && x
!= CONST0_RTX (TImode
)
6576 if (x
== CONST0_RTX (GET_MODE (x
)))
6584 /* Otherwise we handle everything else in the move patterns. */
6588 /* Determine if it's legal to put X into the constant pool. This
6589 is not possible for the address of thread-local symbols, which
6590 is checked above. */
6593 ix86_cannot_force_const_mem (rtx x
)
6595 /* We can always put integral constants and vectors in memory. */
6596 switch (GET_CODE (x
))
6606 return !legitimate_constant_p (x
);
6609 /* Determine if a given RTX is a valid constant address. */
6612 constant_address_p (rtx x
)
6614 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6617 /* Nonzero if the constant value X is a legitimate general operand
6618 when generating PIC code. It is given that flag_pic is on and
6619 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6622 legitimate_pic_operand_p (rtx x
)
6626 switch (GET_CODE (x
))
6629 inner
= XEXP (x
, 0);
6630 if (GET_CODE (inner
) == PLUS
6631 && CONST_INT_P (XEXP (inner
, 1)))
6632 inner
= XEXP (inner
, 0);
6634 /* Only some unspecs are valid as "constants". */
6635 if (GET_CODE (inner
) == UNSPEC
)
6636 switch (XINT (inner
, 1))
6639 return TARGET_64BIT
;
6641 x
= XVECEXP (inner
, 0, 0);
6642 return (GET_CODE (x
) == SYMBOL_REF
6643 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6651 return legitimate_pic_address_disp_p (x
);
6658 /* Determine if a given CONST RTX is a valid memory displacement
6662 legitimate_pic_address_disp_p (rtx disp
)
6666 /* In 64bit mode we can allow direct addresses of symbols and labels
6667 when they are not dynamic symbols. */
6670 rtx op0
= disp
, op1
;
6672 switch (GET_CODE (disp
))
6678 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6680 op0
= XEXP (XEXP (disp
, 0), 0);
6681 op1
= XEXP (XEXP (disp
, 0), 1);
6682 if (!CONST_INT_P (op1
)
6683 || INTVAL (op1
) >= 16*1024*1024
6684 || INTVAL (op1
) < -16*1024*1024)
6686 if (GET_CODE (op0
) == LABEL_REF
)
6688 if (GET_CODE (op0
) != SYMBOL_REF
)
6693 /* TLS references should always be enclosed in UNSPEC. */
6694 if (SYMBOL_REF_TLS_MODEL (op0
))
6696 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6704 if (GET_CODE (disp
) != CONST
)
6706 disp
= XEXP (disp
, 0);
6710 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6711 of GOT tables. We should not need these anyway. */
6712 if (GET_CODE (disp
) != UNSPEC
6713 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6714 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6717 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6718 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6724 if (GET_CODE (disp
) == PLUS
)
6726 if (!CONST_INT_P (XEXP (disp
, 1)))
6728 disp
= XEXP (disp
, 0);
6732 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6735 if (GET_CODE (disp
) != UNSPEC
)
6738 switch (XINT (disp
, 1))
6743 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6745 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6746 While ABI specify also 32bit relocation but we don't produce it in
6747 small PIC model at all. */
6748 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6749 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6751 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6753 case UNSPEC_GOTTPOFF
:
6754 case UNSPEC_GOTNTPOFF
:
6755 case UNSPEC_INDNTPOFF
:
6758 disp
= XVECEXP (disp
, 0, 0);
6759 return (GET_CODE (disp
) == SYMBOL_REF
6760 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6762 disp
= XVECEXP (disp
, 0, 0);
6763 return (GET_CODE (disp
) == SYMBOL_REF
6764 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6766 disp
= XVECEXP (disp
, 0, 0);
6767 return (GET_CODE (disp
) == SYMBOL_REF
6768 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6774 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6775 memory address for an instruction. The MODE argument is the machine mode
6776 for the MEM expression that wants to use this address.
6778 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6779 convert common non-canonical forms to canonical form so that they will
6783 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6785 struct ix86_address parts
;
6786 rtx base
, index
, disp
;
6787 HOST_WIDE_INT scale
;
6788 const char *reason
= NULL
;
6789 rtx reason_rtx
= NULL_RTX
;
6791 if (TARGET_DEBUG_ADDR
)
6794 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6795 GET_MODE_NAME (mode
), strict
);
6799 if (ix86_decompose_address (addr
, &parts
) <= 0)
6801 reason
= "decomposition failed";
6806 index
= parts
.index
;
6808 scale
= parts
.scale
;
6810 /* Validate base register.
6812 Don't allow SUBREG's that span more than a word here. It can lead to spill
6813 failures when the base is one word out of a two word structure, which is
6814 represented internally as a DImode int. */
6823 else if (GET_CODE (base
) == SUBREG
6824 && REG_P (SUBREG_REG (base
))
6825 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6827 reg
= SUBREG_REG (base
);
6830 reason
= "base is not a register";
6834 if (GET_MODE (base
) != Pmode
)
6836 reason
= "base is not in Pmode";
6840 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6841 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6843 reason
= "base is not valid";
6848 /* Validate index register.
6850 Don't allow SUBREG's that span more than a word here -- same as above. */
6859 else if (GET_CODE (index
) == SUBREG
6860 && REG_P (SUBREG_REG (index
))
6861 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6863 reg
= SUBREG_REG (index
);
6866 reason
= "index is not a register";
6870 if (GET_MODE (index
) != Pmode
)
6872 reason
= "index is not in Pmode";
6876 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6877 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6879 reason
= "index is not valid";
6884 /* Validate scale factor. */
6887 reason_rtx
= GEN_INT (scale
);
6890 reason
= "scale without index";
6894 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6896 reason
= "scale is not a valid multiplier";
6901 /* Validate displacement. */
6906 if (GET_CODE (disp
) == CONST
6907 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6908 switch (XINT (XEXP (disp
, 0), 1))
6910 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6911 used. While ABI specify also 32bit relocations, we don't produce
6912 them at all and use IP relative instead. */
6915 gcc_assert (flag_pic
);
6917 goto is_legitimate_pic
;
6918 reason
= "64bit address unspec";
6921 case UNSPEC_GOTPCREL
:
6922 gcc_assert (flag_pic
);
6923 goto is_legitimate_pic
;
6925 case UNSPEC_GOTTPOFF
:
6926 case UNSPEC_GOTNTPOFF
:
6927 case UNSPEC_INDNTPOFF
:
6933 reason
= "invalid address unspec";
6937 else if (SYMBOLIC_CONST (disp
)
6941 && MACHOPIC_INDIRECT
6942 && !machopic_operand_p (disp
)
6948 if (TARGET_64BIT
&& (index
|| base
))
6950 /* foo@dtpoff(%rX) is ok. */
6951 if (GET_CODE (disp
) != CONST
6952 || GET_CODE (XEXP (disp
, 0)) != PLUS
6953 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6954 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6955 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6956 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6958 reason
= "non-constant pic memory reference";
6962 else if (! legitimate_pic_address_disp_p (disp
))
6964 reason
= "displacement is an invalid pic construct";
6968 /* This code used to verify that a symbolic pic displacement
6969 includes the pic_offset_table_rtx register.
6971 While this is good idea, unfortunately these constructs may
6972 be created by "adds using lea" optimization for incorrect
6981 This code is nonsensical, but results in addressing
6982 GOT table with pic_offset_table_rtx base. We can't
6983 just refuse it easily, since it gets matched by
6984 "addsi3" pattern, that later gets split to lea in the
6985 case output register differs from input. While this
6986 can be handled by separate addsi pattern for this case
6987 that never results in lea, this seems to be easier and
6988 correct fix for crash to disable this test. */
6990 else if (GET_CODE (disp
) != LABEL_REF
6991 && !CONST_INT_P (disp
)
6992 && (GET_CODE (disp
) != CONST
6993 || !legitimate_constant_p (disp
))
6994 && (GET_CODE (disp
) != SYMBOL_REF
6995 || !legitimate_constant_p (disp
)))
6997 reason
= "displacement is not constant";
7000 else if (TARGET_64BIT
7001 && !x86_64_immediate_operand (disp
, VOIDmode
))
7003 reason
= "displacement is out of range";
7008 /* Everything looks valid. */
7009 if (TARGET_DEBUG_ADDR
)
7010 fprintf (stderr
, "Success.\n");
7014 if (TARGET_DEBUG_ADDR
)
7016 fprintf (stderr
, "Error: %s\n", reason
);
7017 debug_rtx (reason_rtx
);
7022 /* Return a unique alias set for the GOT. */
7024 static HOST_WIDE_INT
7025 ix86_GOT_alias_set (void)
7027 static HOST_WIDE_INT set
= -1;
7029 set
= new_alias_set ();
7033 /* Return a legitimate reference for ORIG (an address) using the
7034 register REG. If REG is 0, a new pseudo is generated.
7036 There are two types of references that must be handled:
7038 1. Global data references must load the address from the GOT, via
7039 the PIC reg. An insn is emitted to do this load, and the reg is
7042 2. Static data references, constant pool addresses, and code labels
7043 compute the address as an offset from the GOT, whose base is in
7044 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7045 differentiate them from global data objects. The returned
7046 address is the PIC reg + an unspec constant.
7048 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7049 reg also appears in the address. */
7052 legitimize_pic_address (rtx orig
, rtx reg
)
7059 if (TARGET_MACHO
&& !TARGET_64BIT
)
7062 reg
= gen_reg_rtx (Pmode
);
7063 /* Use the generic Mach-O PIC machinery. */
7064 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7068 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7070 else if (TARGET_64BIT
7071 && ix86_cmodel
!= CM_SMALL_PIC
7072 && local_symbolic_operand (addr
, Pmode
))
7075 /* This symbol may be referenced via a displacement from the PIC
7076 base address (@GOTOFF). */
7078 if (reload_in_progress
)
7079 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7080 if (GET_CODE (addr
) == CONST
)
7081 addr
= XEXP (addr
, 0);
7082 if (GET_CODE (addr
) == PLUS
)
7084 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7085 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7088 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7089 new = gen_rtx_CONST (Pmode
, new);
7091 tmpreg
= gen_reg_rtx (Pmode
);
7094 emit_move_insn (tmpreg
, new);
7098 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7099 tmpreg
, 1, OPTAB_DIRECT
);
7102 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7104 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
7106 /* This symbol may be referenced via a displacement from the PIC
7107 base address (@GOTOFF). */
7109 if (reload_in_progress
)
7110 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7111 if (GET_CODE (addr
) == CONST
)
7112 addr
= XEXP (addr
, 0);
7113 if (GET_CODE (addr
) == PLUS
)
7115 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7116 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7119 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7120 new = gen_rtx_CONST (Pmode
, new);
7121 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7125 emit_move_insn (reg
, new);
7129 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7133 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7134 new = gen_rtx_CONST (Pmode
, new);
7135 new = gen_const_mem (Pmode
, new);
7136 set_mem_alias_set (new, ix86_GOT_alias_set ());
7139 reg
= gen_reg_rtx (Pmode
);
7140 /* Use directly gen_movsi, otherwise the address is loaded
7141 into register for CSE. We don't want to CSE this addresses,
7142 instead we CSE addresses from the GOT table, so skip this. */
7143 emit_insn (gen_movsi (reg
, new));
7148 /* This symbol must be referenced via a load from the
7149 Global Offset Table (@GOT). */
7151 if (reload_in_progress
)
7152 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7153 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7154 new = gen_rtx_CONST (Pmode
, new);
7155 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7156 new = gen_const_mem (Pmode
, new);
7157 set_mem_alias_set (new, ix86_GOT_alias_set ());
7160 reg
= gen_reg_rtx (Pmode
);
7161 emit_move_insn (reg
, new);
7167 if (CONST_INT_P (addr
)
7168 && !x86_64_immediate_operand (addr
, VOIDmode
))
7172 emit_move_insn (reg
, addr
);
7176 new = force_reg (Pmode
, addr
);
7178 else if (GET_CODE (addr
) == CONST
)
7180 addr
= XEXP (addr
, 0);
7182 /* We must match stuff we generate before. Assume the only
7183 unspecs that can get here are ours. Not that we could do
7184 anything with them anyway.... */
7185 if (GET_CODE (addr
) == UNSPEC
7186 || (GET_CODE (addr
) == PLUS
7187 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7189 gcc_assert (GET_CODE (addr
) == PLUS
);
7191 if (GET_CODE (addr
) == PLUS
)
7193 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7195 /* Check first to see if this is a constant offset from a @GOTOFF
7196 symbol reference. */
7197 if (local_symbolic_operand (op0
, Pmode
)
7198 && CONST_INT_P (op1
))
7202 if (reload_in_progress
)
7203 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7204 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7206 new = gen_rtx_PLUS (Pmode
, new, op1
);
7207 new = gen_rtx_CONST (Pmode
, new);
7208 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7212 emit_move_insn (reg
, new);
7218 if (INTVAL (op1
) < -16*1024*1024
7219 || INTVAL (op1
) >= 16*1024*1024)
7221 if (!x86_64_immediate_operand (op1
, Pmode
))
7222 op1
= force_reg (Pmode
, op1
);
7223 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7229 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7230 new = legitimize_pic_address (XEXP (addr
, 1),
7231 base
== reg
? NULL_RTX
: reg
);
7233 if (CONST_INT_P (new))
7234 new = plus_constant (base
, INTVAL (new));
7237 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7239 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7240 new = XEXP (new, 1);
7242 new = gen_rtx_PLUS (Pmode
, base
, new);
7250 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7253 get_thread_pointer (int to_reg
)
7257 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7261 reg
= gen_reg_rtx (Pmode
);
7262 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7263 insn
= emit_insn (insn
);
7268 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7269 false if we expect this to be used for a memory address and true if
7270 we expect to load the address into a register. */
7273 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7275 rtx dest
, base
, off
, pic
, tp
;
7280 case TLS_MODEL_GLOBAL_DYNAMIC
:
7281 dest
= gen_reg_rtx (Pmode
);
7282 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7284 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7286 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7289 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7290 insns
= get_insns ();
7293 emit_libcall_block (insns
, dest
, rax
, x
);
7295 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7296 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7298 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7300 if (TARGET_GNU2_TLS
)
7302 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7304 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7308 case TLS_MODEL_LOCAL_DYNAMIC
:
7309 base
= gen_reg_rtx (Pmode
);
7310 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7312 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7314 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7317 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7318 insns
= get_insns ();
7321 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7322 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7323 emit_libcall_block (insns
, base
, rax
, note
);
7325 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7326 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7328 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7330 if (TARGET_GNU2_TLS
)
7332 rtx x
= ix86_tls_module_base ();
7334 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7335 gen_rtx_MINUS (Pmode
, x
, tp
));
7338 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7339 off
= gen_rtx_CONST (Pmode
, off
);
7341 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7343 if (TARGET_GNU2_TLS
)
7345 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7347 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7352 case TLS_MODEL_INITIAL_EXEC
:
7356 type
= UNSPEC_GOTNTPOFF
;
7360 if (reload_in_progress
)
7361 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7362 pic
= pic_offset_table_rtx
;
7363 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7365 else if (!TARGET_ANY_GNU_TLS
)
7367 pic
= gen_reg_rtx (Pmode
);
7368 emit_insn (gen_set_got (pic
));
7369 type
= UNSPEC_GOTTPOFF
;
7374 type
= UNSPEC_INDNTPOFF
;
7377 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7378 off
= gen_rtx_CONST (Pmode
, off
);
7380 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7381 off
= gen_const_mem (Pmode
, off
);
7382 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7384 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7386 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7387 off
= force_reg (Pmode
, off
);
7388 return gen_rtx_PLUS (Pmode
, base
, off
);
7392 base
= get_thread_pointer (true);
7393 dest
= gen_reg_rtx (Pmode
);
7394 emit_insn (gen_subsi3 (dest
, base
, off
));
7398 case TLS_MODEL_LOCAL_EXEC
:
7399 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7400 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7401 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7402 off
= gen_rtx_CONST (Pmode
, off
);
7404 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7406 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7407 return gen_rtx_PLUS (Pmode
, base
, off
);
7411 base
= get_thread_pointer (true);
7412 dest
= gen_reg_rtx (Pmode
);
7413 emit_insn (gen_subsi3 (dest
, base
, off
));
7424 /* Try machine-dependent ways of modifying an illegitimate address
7425 to be legitimate. If we find one, return the new, valid address.
7426 This macro is used in only one place: `memory_address' in explow.c.
7428 OLDX is the address as it was before break_out_memory_refs was called.
7429 In some cases it is useful to look at this to decide what needs to be done.
7431 MODE and WIN are passed so that this macro can use
7432 GO_IF_LEGITIMATE_ADDRESS.
7434 It is always safe for this macro to do nothing. It exists to recognize
7435 opportunities to optimize the output.
7437 For the 80386, we handle X+REG by loading X into a register R and
7438 using R+REG. R will go in a general reg and indexing will be used.
7439 However, if REG is a broken-out memory address or multiplication,
7440 nothing needs to be done because REG can certainly go in a general reg.
7442 When -fpic is used, special handling is needed for symbolic references.
7443 See comments by legitimize_pic_address in i386.c for details. */
7446 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7451 if (TARGET_DEBUG_ADDR
)
7453 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7454 GET_MODE_NAME (mode
));
7458 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7460 return legitimize_tls_address (x
, log
, false);
7461 if (GET_CODE (x
) == CONST
7462 && GET_CODE (XEXP (x
, 0)) == PLUS
7463 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7464 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7466 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7467 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7470 if (flag_pic
&& SYMBOLIC_CONST (x
))
7471 return legitimize_pic_address (x
, 0);
7473 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7474 if (GET_CODE (x
) == ASHIFT
7475 && CONST_INT_P (XEXP (x
, 1))
7476 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7479 log
= INTVAL (XEXP (x
, 1));
7480 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7481 GEN_INT (1 << log
));
7484 if (GET_CODE (x
) == PLUS
)
7486 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7488 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7489 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7490 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7493 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7494 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7495 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7496 GEN_INT (1 << log
));
7499 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7500 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7501 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7504 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7505 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7506 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7507 GEN_INT (1 << log
));
7510 /* Put multiply first if it isn't already. */
7511 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7513 rtx tmp
= XEXP (x
, 0);
7514 XEXP (x
, 0) = XEXP (x
, 1);
7519 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7520 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7521 created by virtual register instantiation, register elimination, and
7522 similar optimizations. */
7523 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7526 x
= gen_rtx_PLUS (Pmode
,
7527 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7528 XEXP (XEXP (x
, 1), 0)),
7529 XEXP (XEXP (x
, 1), 1));
7533 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7534 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7535 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7536 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7537 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7538 && CONSTANT_P (XEXP (x
, 1)))
7541 rtx other
= NULL_RTX
;
7543 if (CONST_INT_P (XEXP (x
, 1)))
7545 constant
= XEXP (x
, 1);
7546 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7548 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7550 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7551 other
= XEXP (x
, 1);
7559 x
= gen_rtx_PLUS (Pmode
,
7560 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7561 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7562 plus_constant (other
, INTVAL (constant
)));
7566 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7569 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7572 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7575 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7578 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7582 && REG_P (XEXP (x
, 1))
7583 && REG_P (XEXP (x
, 0)))
7586 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7589 x
= legitimize_pic_address (x
, 0);
7592 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7595 if (REG_P (XEXP (x
, 0)))
7597 rtx temp
= gen_reg_rtx (Pmode
);
7598 rtx val
= force_operand (XEXP (x
, 1), temp
);
7600 emit_move_insn (temp
, val
);
7606 else if (REG_P (XEXP (x
, 1)))
7608 rtx temp
= gen_reg_rtx (Pmode
);
7609 rtx val
= force_operand (XEXP (x
, 0), temp
);
7611 emit_move_insn (temp
, val
);
7621 /* Print an integer constant expression in assembler syntax. Addition
7622 and subtraction are the only arithmetic that may appear in these
7623 expressions. FILE is the stdio stream to write to, X is the rtx, and
7624 CODE is the operand print code from the output string. */
7627 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7631 switch (GET_CODE (x
))
7634 gcc_assert (flag_pic
);
7639 if (! TARGET_MACHO
|| TARGET_64BIT
)
7640 output_addr_const (file
, x
);
7643 const char *name
= XSTR (x
, 0);
7645 /* Mark the decl as referenced so that cgraph will output the function. */
7646 if (SYMBOL_REF_DECL (x
))
7647 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7650 if (MACHOPIC_INDIRECT
7651 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7652 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7654 assemble_name (file
, name
);
7656 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7657 fputs ("@PLT", file
);
7664 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7665 assemble_name (asm_out_file
, buf
);
7669 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7673 /* This used to output parentheses around the expression,
7674 but that does not work on the 386 (either ATT or BSD assembler). */
7675 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7679 if (GET_MODE (x
) == VOIDmode
)
7681 /* We can use %d if the number is <32 bits and positive. */
7682 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7683 fprintf (file
, "0x%lx%08lx",
7684 (unsigned long) CONST_DOUBLE_HIGH (x
),
7685 (unsigned long) CONST_DOUBLE_LOW (x
));
7687 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7690 /* We can't handle floating point constants;
7691 PRINT_OPERAND must handle them. */
7692 output_operand_lossage ("floating constant misused");
7696 /* Some assemblers need integer constants to appear first. */
7697 if (CONST_INT_P (XEXP (x
, 0)))
7699 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7701 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7705 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7706 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7708 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7714 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7715 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7717 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7719 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7723 gcc_assert (XVECLEN (x
, 0) == 1);
7724 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7725 switch (XINT (x
, 1))
7728 fputs ("@GOT", file
);
7731 fputs ("@GOTOFF", file
);
7733 case UNSPEC_GOTPCREL
:
7734 fputs ("@GOTPCREL(%rip)", file
);
7736 case UNSPEC_GOTTPOFF
:
7737 /* FIXME: This might be @TPOFF in Sun ld too. */
7738 fputs ("@GOTTPOFF", file
);
7741 fputs ("@TPOFF", file
);
7745 fputs ("@TPOFF", file
);
7747 fputs ("@NTPOFF", file
);
7750 fputs ("@DTPOFF", file
);
7752 case UNSPEC_GOTNTPOFF
:
7754 fputs ("@GOTTPOFF(%rip)", file
);
7756 fputs ("@GOTNTPOFF", file
);
7758 case UNSPEC_INDNTPOFF
:
7759 fputs ("@INDNTPOFF", file
);
7762 output_operand_lossage ("invalid UNSPEC as operand");
7768 output_operand_lossage ("invalid expression as operand");
7772 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7773 We need to emit DTP-relative relocations. */
7776 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7778 fputs (ASM_LONG
, file
);
7779 output_addr_const (file
, x
);
7780 fputs ("@DTPOFF", file
);
7786 fputs (", 0", file
);
7793 /* In the name of slightly smaller debug output, and to cater to
7794 general assembler lossage, recognize PIC+GOTOFF and turn it back
7795 into a direct symbol reference.
7797 On Darwin, this is necessary to avoid a crash, because Darwin
7798 has a different PIC label for each routine but the DWARF debugging
7799 information is not associated with any particular routine, so it's
7800 necessary to remove references to the PIC label from RTL stored by
7801 the DWARF output code. */
7804 ix86_delegitimize_address (rtx orig_x
)
7807 /* reg_addend is NULL or a multiple of some register. */
7808 rtx reg_addend
= NULL_RTX
;
7809 /* const_addend is NULL or a const_int. */
7810 rtx const_addend
= NULL_RTX
;
7811 /* This is the result, or NULL. */
7812 rtx result
= NULL_RTX
;
7819 if (GET_CODE (x
) != CONST
7820 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7821 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7824 return XVECEXP (XEXP (x
, 0), 0, 0);
7827 if (GET_CODE (x
) != PLUS
7828 || GET_CODE (XEXP (x
, 1)) != CONST
)
7831 if (REG_P (XEXP (x
, 0))
7832 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7833 /* %ebx + GOT/GOTOFF */
7835 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7837 /* %ebx + %reg * scale + GOT/GOTOFF */
7838 reg_addend
= XEXP (x
, 0);
7839 if (REG_P (XEXP (reg_addend
, 0))
7840 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7841 reg_addend
= XEXP (reg_addend
, 1);
7842 else if (REG_P (XEXP (reg_addend
, 1))
7843 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7844 reg_addend
= XEXP (reg_addend
, 0);
7847 if (!REG_P (reg_addend
)
7848 && GET_CODE (reg_addend
) != MULT
7849 && GET_CODE (reg_addend
) != ASHIFT
)
7855 x
= XEXP (XEXP (x
, 1), 0);
7856 if (GET_CODE (x
) == PLUS
7857 && CONST_INT_P (XEXP (x
, 1)))
7859 const_addend
= XEXP (x
, 1);
7863 if (GET_CODE (x
) == UNSPEC
7864 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7865 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7866 result
= XVECEXP (x
, 0, 0);
7868 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7870 result
= XEXP (x
, 0);
7876 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7878 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7883 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7888 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7890 enum rtx_code second_code
, bypass_code
;
7891 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7892 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7893 code
= ix86_fp_compare_code_to_integer (code
);
7897 code
= reverse_condition (code
);
7908 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7912 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7913 Those same assemblers have the same but opposite lossage on cmov. */
7914 gcc_assert (mode
== CCmode
);
7915 suffix
= fp
? "nbe" : "a";
7935 gcc_assert (mode
== CCmode
);
7957 gcc_assert (mode
== CCmode
);
7958 suffix
= fp
? "nb" : "ae";
7961 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7965 gcc_assert (mode
== CCmode
);
7969 suffix
= fp
? "u" : "p";
7972 suffix
= fp
? "nu" : "np";
7977 fputs (suffix
, file
);
7980 /* Print the name of register X to FILE based on its machine mode and number.
7981 If CODE is 'w', pretend the mode is HImode.
7982 If CODE is 'b', pretend the mode is QImode.
7983 If CODE is 'k', pretend the mode is SImode.
7984 If CODE is 'q', pretend the mode is DImode.
7985 If CODE is 'h', pretend the reg is the 'high' byte register.
7986 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7989 print_reg (rtx x
, int code
, FILE *file
)
7991 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7992 && REGNO (x
) != FRAME_POINTER_REGNUM
7993 && REGNO (x
) != FLAGS_REG
7994 && REGNO (x
) != FPSR_REG
7995 && REGNO (x
) != FPCR_REG
);
7997 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8000 if (code
== 'w' || MMX_REG_P (x
))
8002 else if (code
== 'b')
8004 else if (code
== 'k')
8006 else if (code
== 'q')
8008 else if (code
== 'y')
8010 else if (code
== 'h')
8013 code
= GET_MODE_SIZE (GET_MODE (x
));
8015 /* Irritatingly, AMD extended registers use different naming convention
8016 from the normal registers. */
8017 if (REX_INT_REG_P (x
))
8019 gcc_assert (TARGET_64BIT
);
8023 error ("extended registers have no high halves");
8026 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8029 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8032 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8035 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8038 error ("unsupported operand size for extended register");
8046 if (STACK_TOP_P (x
))
8048 fputs ("st(0)", file
);
8055 if (! ANY_FP_REG_P (x
))
8056 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8061 fputs (hi_reg_name
[REGNO (x
)], file
);
8064 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8066 fputs (qi_reg_name
[REGNO (x
)], file
);
8069 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8071 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8078 /* Locate some local-dynamic symbol still in use by this function
8079 so that we can print its name in some tls_local_dynamic_base
8083 get_some_local_dynamic_name (void)
8087 if (cfun
->machine
->some_ld_name
)
8088 return cfun
->machine
->some_ld_name
;
8090 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8092 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8093 return cfun
->machine
->some_ld_name
;
8099 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8103 if (GET_CODE (x
) == SYMBOL_REF
8104 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8106 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8114 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8115 C -- print opcode suffix for set/cmov insn.
8116 c -- like C, but print reversed condition
8117 F,f -- likewise, but for floating-point.
8118 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8120 R -- print the prefix for register names.
8121 z -- print the opcode suffix for the size of the current operand.
8122 * -- print a star (in certain assembler syntax)
8123 A -- print an absolute memory reference.
8124 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8125 s -- print a shift double count, followed by the assemblers argument
8127 b -- print the QImode name of the register for the indicated operand.
8128 %b0 would print %al if operands[0] is reg 0.
8129 w -- likewise, print the HImode name of the register.
8130 k -- likewise, print the SImode name of the register.
8131 q -- likewise, print the DImode name of the register.
8132 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8133 y -- print "st(0)" instead of "st" as a register.
8134 D -- print condition for SSE cmp instruction.
8135 P -- if PIC, print an @PLT suffix.
8136 X -- don't print any sort of PIC '@' suffix for a symbol.
8137 & -- print some in-use local-dynamic symbol name.
8138 H -- print a memory address offset by 8; used for sse high-parts
8142 print_operand (FILE *file
, rtx x
, int code
)
8149 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8154 assemble_name (file
, get_some_local_dynamic_name ());
8158 switch (ASSEMBLER_DIALECT
)
8165 /* Intel syntax. For absolute addresses, registers should not
8166 be surrounded by braces. */
8170 PRINT_OPERAND (file
, x
, 0);
8180 PRINT_OPERAND (file
, x
, 0);
8185 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8190 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8195 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8200 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8205 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8210 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8215 /* 387 opcodes don't get size suffixes if the operands are
8217 if (STACK_REG_P (x
))
8220 /* Likewise if using Intel opcodes. */
8221 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8224 /* This is the size of op from size of operand. */
8225 switch (GET_MODE_SIZE (GET_MODE (x
)))
8232 #ifdef HAVE_GAS_FILDS_FISTS
8238 if (GET_MODE (x
) == SFmode
)
8253 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8255 #ifdef GAS_MNEMONICS
8281 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8283 PRINT_OPERAND (file
, x
, 0);
8289 /* Little bit of braindamage here. The SSE compare instructions
8290 does use completely different names for the comparisons that the
8291 fp conditional moves. */
8292 switch (GET_CODE (x
))
8307 fputs ("unord", file
);
8311 fputs ("neq", file
);
8315 fputs ("nlt", file
);
8319 fputs ("nle", file
);
8322 fputs ("ord", file
);
8329 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8330 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8332 switch (GET_MODE (x
))
8334 case HImode
: putc ('w', file
); break;
8336 case SFmode
: putc ('l', file
); break;
8338 case DFmode
: putc ('q', file
); break;
8339 default: gcc_unreachable ();
8346 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8349 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8350 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8353 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8356 /* Like above, but reverse condition */
8358 /* Check to see if argument to %c is really a constant
8359 and not a condition code which needs to be reversed. */
8360 if (!COMPARISON_P (x
))
8362 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8365 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8368 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8369 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8372 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8376 /* It doesn't actually matter what mode we use here, as we're
8377 only going to use this for printing. */
8378 x
= adjust_address_nv (x
, DImode
, 8);
8385 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8388 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8391 int pred_val
= INTVAL (XEXP (x
, 0));
8393 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8394 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8396 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8397 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8399 /* Emit hints only in the case default branch prediction
8400 heuristics would fail. */
8401 if (taken
!= cputaken
)
8403 /* We use 3e (DS) prefix for taken branches and
8404 2e (CS) prefix for not taken branches. */
8406 fputs ("ds ; ", file
);
8408 fputs ("cs ; ", file
);
8415 output_operand_lossage ("invalid operand code '%c'", code
);
8420 print_reg (x
, code
, file
);
8424 /* No `byte ptr' prefix for call instructions. */
8425 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8428 switch (GET_MODE_SIZE (GET_MODE (x
)))
8430 case 1: size
= "BYTE"; break;
8431 case 2: size
= "WORD"; break;
8432 case 4: size
= "DWORD"; break;
8433 case 8: size
= "QWORD"; break;
8434 case 12: size
= "XWORD"; break;
8435 case 16: size
= "XMMWORD"; break;
8440 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8443 else if (code
== 'w')
8445 else if (code
== 'k')
8449 fputs (" PTR ", file
);
8453 /* Avoid (%rip) for call operands. */
8454 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8455 && !CONST_INT_P (x
))
8456 output_addr_const (file
, x
);
8457 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8458 output_operand_lossage ("invalid constraints for operand");
8463 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8468 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8469 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8471 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8473 fprintf (file
, "0x%08lx", l
);
8476 /* These float cases don't actually occur as immediate operands. */
8477 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8481 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8482 fprintf (file
, "%s", dstr
);
8485 else if (GET_CODE (x
) == CONST_DOUBLE
8486 && GET_MODE (x
) == XFmode
)
8490 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8491 fprintf (file
, "%s", dstr
);
8496 /* We have patterns that allow zero sets of memory, for instance.
8497 In 64-bit mode, we should probably support all 8-byte vectors,
8498 since we can in fact encode that into an immediate. */
8499 if (GET_CODE (x
) == CONST_VECTOR
)
8501 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8507 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8509 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8512 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8513 || GET_CODE (x
) == LABEL_REF
)
8515 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8518 fputs ("OFFSET FLAT:", file
);
8521 if (CONST_INT_P (x
))
8522 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8524 output_pic_addr_const (file
, x
, code
);
8526 output_addr_const (file
, x
);
8530 /* Print a memory operand whose address is ADDR. */
8533 print_operand_address (FILE *file
, rtx addr
)
8535 struct ix86_address parts
;
8536 rtx base
, index
, disp
;
8538 int ok
= ix86_decompose_address (addr
, &parts
);
8543 index
= parts
.index
;
8545 scale
= parts
.scale
;
8553 if (USER_LABEL_PREFIX
[0] == 0)
8555 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8561 if (!base
&& !index
)
8563 /* Displacement only requires special attention. */
8565 if (CONST_INT_P (disp
))
8567 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8569 if (USER_LABEL_PREFIX
[0] == 0)
8571 fputs ("ds:", file
);
8573 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8576 output_pic_addr_const (file
, disp
, 0);
8578 output_addr_const (file
, disp
);
8580 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8583 if (GET_CODE (disp
) == CONST
8584 && GET_CODE (XEXP (disp
, 0)) == PLUS
8585 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8586 disp
= XEXP (XEXP (disp
, 0), 0);
8587 if (GET_CODE (disp
) == LABEL_REF
8588 || (GET_CODE (disp
) == SYMBOL_REF
8589 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8590 fputs ("(%rip)", file
);
8595 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8600 output_pic_addr_const (file
, disp
, 0);
8601 else if (GET_CODE (disp
) == LABEL_REF
)
8602 output_asm_label (disp
);
8604 output_addr_const (file
, disp
);
8609 print_reg (base
, 0, file
);
8613 print_reg (index
, 0, file
);
8615 fprintf (file
, ",%d", scale
);
8621 rtx offset
= NULL_RTX
;
8625 /* Pull out the offset of a symbol; print any symbol itself. */
8626 if (GET_CODE (disp
) == CONST
8627 && GET_CODE (XEXP (disp
, 0)) == PLUS
8628 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8630 offset
= XEXP (XEXP (disp
, 0), 1);
8631 disp
= gen_rtx_CONST (VOIDmode
,
8632 XEXP (XEXP (disp
, 0), 0));
8636 output_pic_addr_const (file
, disp
, 0);
8637 else if (GET_CODE (disp
) == LABEL_REF
)
8638 output_asm_label (disp
);
8639 else if (CONST_INT_P (disp
))
8642 output_addr_const (file
, disp
);
8648 print_reg (base
, 0, file
);
8651 if (INTVAL (offset
) >= 0)
8653 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8657 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8664 print_reg (index
, 0, file
);
8666 fprintf (file
, "*%d", scale
);
8674 output_addr_const_extra (FILE *file
, rtx x
)
8678 if (GET_CODE (x
) != UNSPEC
)
8681 op
= XVECEXP (x
, 0, 0);
8682 switch (XINT (x
, 1))
8684 case UNSPEC_GOTTPOFF
:
8685 output_addr_const (file
, op
);
8686 /* FIXME: This might be @TPOFF in Sun ld. */
8687 fputs ("@GOTTPOFF", file
);
8690 output_addr_const (file
, op
);
8691 fputs ("@TPOFF", file
);
8694 output_addr_const (file
, op
);
8696 fputs ("@TPOFF", file
);
8698 fputs ("@NTPOFF", file
);
8701 output_addr_const (file
, op
);
8702 fputs ("@DTPOFF", file
);
8704 case UNSPEC_GOTNTPOFF
:
8705 output_addr_const (file
, op
);
8707 fputs ("@GOTTPOFF(%rip)", file
);
8709 fputs ("@GOTNTPOFF", file
);
8711 case UNSPEC_INDNTPOFF
:
8712 output_addr_const (file
, op
);
8713 fputs ("@INDNTPOFF", file
);
8723 /* Split one or more DImode RTL references into pairs of SImode
8724 references. The RTL can be REG, offsettable MEM, integer constant, or
8725 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8726 split and "num" is its length. lo_half and hi_half are output arrays
8727 that parallel "operands". */
8730 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8734 rtx op
= operands
[num
];
8736 /* simplify_subreg refuse to split volatile memory addresses,
8737 but we still have to handle it. */
8740 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8741 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8745 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8746 GET_MODE (op
) == VOIDmode
8747 ? DImode
: GET_MODE (op
), 0);
8748 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8749 GET_MODE (op
) == VOIDmode
8750 ? DImode
: GET_MODE (op
), 4);
8754 /* Split one or more TImode RTL references into pairs of DImode
8755 references. The RTL can be REG, offsettable MEM, integer constant, or
8756 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8757 split and "num" is its length. lo_half and hi_half are output arrays
8758 that parallel "operands". */
8761 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8765 rtx op
= operands
[num
];
8767 /* simplify_subreg refuse to split volatile memory addresses, but we
8768 still have to handle it. */
8771 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8772 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8776 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8777 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8782 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8783 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8784 is the expression of the binary operation. The output may either be
8785 emitted here, or returned to the caller, like all output_* functions.
8787 There is no guarantee that the operands are the same mode, as they
8788 might be within FLOAT or FLOAT_EXTEND expressions. */
8790 #ifndef SYSV386_COMPAT
8791 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8792 wants to fix the assemblers because that causes incompatibility
8793 with gcc. No-one wants to fix gcc because that causes
8794 incompatibility with assemblers... You can use the option of
8795 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8796 #define SYSV386_COMPAT 1
8800 output_387_binary_op (rtx insn
, rtx
*operands
)
8802 static char buf
[30];
8805 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8807 #ifdef ENABLE_CHECKING
8808 /* Even if we do not want to check the inputs, this documents input
8809 constraints. Which helps in understanding the following code. */
8810 if (STACK_REG_P (operands
[0])
8811 && ((REG_P (operands
[1])
8812 && REGNO (operands
[0]) == REGNO (operands
[1])
8813 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8814 || (REG_P (operands
[2])
8815 && REGNO (operands
[0]) == REGNO (operands
[2])
8816 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8817 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8820 gcc_assert (is_sse
);
8823 switch (GET_CODE (operands
[3]))
8826 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8827 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8835 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8836 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8844 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8845 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8853 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8854 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8868 if (GET_MODE (operands
[0]) == SFmode
)
8869 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8871 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8876 switch (GET_CODE (operands
[3]))
8880 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8882 rtx temp
= operands
[2];
8883 operands
[2] = operands
[1];
8887 /* know operands[0] == operands[1]. */
8889 if (MEM_P (operands
[2]))
8895 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8897 if (STACK_TOP_P (operands
[0]))
8898 /* How is it that we are storing to a dead operand[2]?
8899 Well, presumably operands[1] is dead too. We can't
8900 store the result to st(0) as st(0) gets popped on this
8901 instruction. Instead store to operands[2] (which I
8902 think has to be st(1)). st(1) will be popped later.
8903 gcc <= 2.8.1 didn't have this check and generated
8904 assembly code that the Unixware assembler rejected. */
8905 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8907 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8911 if (STACK_TOP_P (operands
[0]))
8912 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8914 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8919 if (MEM_P (operands
[1]))
8925 if (MEM_P (operands
[2]))
8931 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8934 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8935 derived assemblers, confusingly reverse the direction of
8936 the operation for fsub{r} and fdiv{r} when the
8937 destination register is not st(0). The Intel assembler
8938 doesn't have this brain damage. Read !SYSV386_COMPAT to
8939 figure out what the hardware really does. */
8940 if (STACK_TOP_P (operands
[0]))
8941 p
= "{p\t%0, %2|rp\t%2, %0}";
8943 p
= "{rp\t%2, %0|p\t%0, %2}";
8945 if (STACK_TOP_P (operands
[0]))
8946 /* As above for fmul/fadd, we can't store to st(0). */
8947 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8949 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8954 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8957 if (STACK_TOP_P (operands
[0]))
8958 p
= "{rp\t%0, %1|p\t%1, %0}";
8960 p
= "{p\t%1, %0|rp\t%0, %1}";
8962 if (STACK_TOP_P (operands
[0]))
8963 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8965 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8970 if (STACK_TOP_P (operands
[0]))
8972 if (STACK_TOP_P (operands
[1]))
8973 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8975 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8978 else if (STACK_TOP_P (operands
[1]))
8981 p
= "{\t%1, %0|r\t%0, %1}";
8983 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8989 p
= "{r\t%2, %0|\t%0, %2}";
8991 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9004 /* Return needed mode for entity in optimize_mode_switching pass. */
9007 ix86_mode_needed (int entity
, rtx insn
)
9009 enum attr_i387_cw mode
;
9011 /* The mode UNINITIALIZED is used to store control word after a
9012 function call or ASM pattern. The mode ANY specify that function
9013 has no requirements on the control word and make no changes in the
9014 bits we are interested in. */
9017 || (NONJUMP_INSN_P (insn
)
9018 && (asm_noperands (PATTERN (insn
)) >= 0
9019 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9020 return I387_CW_UNINITIALIZED
;
9022 if (recog_memoized (insn
) < 0)
9025 mode
= get_attr_i387_cw (insn
);
9030 if (mode
== I387_CW_TRUNC
)
9035 if (mode
== I387_CW_FLOOR
)
9040 if (mode
== I387_CW_CEIL
)
9045 if (mode
== I387_CW_MASK_PM
)
9056 /* Output code to initialize control word copies used by trunc?f?i and
9057 rounding patterns. CURRENT_MODE is set to current control word,
9058 while NEW_MODE is set to new control word. */
9061 emit_i387_cw_initialization (int mode
)
9063 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9068 rtx reg
= gen_reg_rtx (HImode
);
9070 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9071 emit_move_insn (reg
, copy_rtx (stored_mode
));
9073 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9078 /* round toward zero (truncate) */
9079 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9080 slot
= SLOT_CW_TRUNC
;
9084 /* round down toward -oo */
9085 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9086 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9087 slot
= SLOT_CW_FLOOR
;
9091 /* round up toward +oo */
9092 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9093 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9094 slot
= SLOT_CW_CEIL
;
9097 case I387_CW_MASK_PM
:
9098 /* mask precision exception for nearbyint() */
9099 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9100 slot
= SLOT_CW_MASK_PM
;
9112 /* round toward zero (truncate) */
9113 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9114 slot
= SLOT_CW_TRUNC
;
9118 /* round down toward -oo */
9119 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9120 slot
= SLOT_CW_FLOOR
;
9124 /* round up toward +oo */
9125 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9126 slot
= SLOT_CW_CEIL
;
9129 case I387_CW_MASK_PM
:
9130 /* mask precision exception for nearbyint() */
9131 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9132 slot
= SLOT_CW_MASK_PM
;
9140 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9142 new_mode
= assign_386_stack_local (HImode
, slot
);
9143 emit_move_insn (new_mode
, reg
);
9146 /* Output code for INSN to convert a float to a signed int. OPERANDS
9147 are the insn operands. The output may be [HSD]Imode and the input
9148 operand may be [SDX]Fmode. */
9151 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9153 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9154 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9155 int round_mode
= get_attr_i387_cw (insn
);
9157 /* Jump through a hoop or two for DImode, since the hardware has no
9158 non-popping instruction. We used to do this a different way, but
9159 that was somewhat fragile and broke with post-reload splitters. */
9160 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9161 output_asm_insn ("fld\t%y1", operands
);
9163 gcc_assert (STACK_TOP_P (operands
[1]));
9164 gcc_assert (MEM_P (operands
[0]));
9167 output_asm_insn ("fisttp%z0\t%0", operands
);
9170 if (round_mode
!= I387_CW_ANY
)
9171 output_asm_insn ("fldcw\t%3", operands
);
9172 if (stack_top_dies
|| dimode_p
)
9173 output_asm_insn ("fistp%z0\t%0", operands
);
9175 output_asm_insn ("fist%z0\t%0", operands
);
9176 if (round_mode
!= I387_CW_ANY
)
9177 output_asm_insn ("fldcw\t%2", operands
);
9183 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9184 have the values zero or one, indicates the ffreep insn's operand
9185 from the OPERANDS array. */
9188 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9190 if (TARGET_USE_FFREEP
)
9191 #if HAVE_AS_IX86_FFREEP
9192 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9195 static char retval
[] = ".word\t0xc_df";
9196 int regno
= REGNO (operands
[opno
]);
9198 gcc_assert (FP_REGNO_P (regno
));
9200 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9205 return opno
? "fstp\t%y1" : "fstp\t%y0";
9209 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9210 should be used. UNORDERED_P is true when fucom should be used. */
9213 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9216 rtx cmp_op0
, cmp_op1
;
9217 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9221 cmp_op0
= operands
[0];
9222 cmp_op1
= operands
[1];
9226 cmp_op0
= operands
[1];
9227 cmp_op1
= operands
[2];
9232 if (GET_MODE (operands
[0]) == SFmode
)
9234 return "ucomiss\t{%1, %0|%0, %1}";
9236 return "comiss\t{%1, %0|%0, %1}";
9239 return "ucomisd\t{%1, %0|%0, %1}";
9241 return "comisd\t{%1, %0|%0, %1}";
9244 gcc_assert (STACK_TOP_P (cmp_op0
));
9246 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9248 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9252 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9253 return output_387_ffreep (operands
, 1);
9256 return "ftst\n\tfnstsw\t%0";
9259 if (STACK_REG_P (cmp_op1
)
9261 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9262 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9264 /* If both the top of the 387 stack dies, and the other operand
9265 is also a stack register that dies, then this must be a
9266 `fcompp' float compare */
9270 /* There is no double popping fcomi variant. Fortunately,
9271 eflags is immune from the fstp's cc clobbering. */
9273 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9275 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9276 return output_387_ffreep (operands
, 0);
9281 return "fucompp\n\tfnstsw\t%0";
9283 return "fcompp\n\tfnstsw\t%0";
9288 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9290 static const char * const alt
[16] =
9292 "fcom%z2\t%y2\n\tfnstsw\t%0",
9293 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9294 "fucom%z2\t%y2\n\tfnstsw\t%0",
9295 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9297 "ficom%z2\t%y2\n\tfnstsw\t%0",
9298 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9302 "fcomi\t{%y1, %0|%0, %y1}",
9303 "fcomip\t{%y1, %0|%0, %y1}",
9304 "fucomi\t{%y1, %0|%0, %y1}",
9305 "fucomip\t{%y1, %0|%0, %y1}",
9316 mask
= eflags_p
<< 3;
9317 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9318 mask
|= unordered_p
<< 1;
9319 mask
|= stack_top_dies
;
9321 gcc_assert (mask
< 16);
9330 ix86_output_addr_vec_elt (FILE *file
, int value
)
9332 const char *directive
= ASM_LONG
;
9336 directive
= ASM_QUAD
;
9338 gcc_assert (!TARGET_64BIT
);
9341 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9345 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9348 fprintf (file
, "%s%s%d-%s%d\n",
9349 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9350 else if (HAVE_AS_GOTOFF_IN_DATA
)
9351 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9353 else if (TARGET_MACHO
)
9355 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9356 machopic_output_function_base_name (file
);
9357 fprintf(file
, "\n");
9361 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9362 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9365 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9369 ix86_expand_clear (rtx dest
)
9373 /* We play register width games, which are only valid after reload. */
9374 gcc_assert (reload_completed
);
9376 /* Avoid HImode and its attendant prefix byte. */
9377 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9378 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9380 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9382 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9383 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9385 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9386 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9392 /* X is an unchanging MEM. If it is a constant pool reference, return
9393 the constant pool rtx, else NULL. */
9396 maybe_get_pool_constant (rtx x
)
9398 x
= ix86_delegitimize_address (XEXP (x
, 0));
9400 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9401 return get_pool_constant (x
);
9407 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9409 int strict
= (reload_in_progress
|| reload_completed
);
9411 enum tls_model model
;
9416 if (GET_CODE (op1
) == SYMBOL_REF
)
9418 model
= SYMBOL_REF_TLS_MODEL (op1
);
9421 op1
= legitimize_tls_address (op1
, model
, true);
9422 op1
= force_operand (op1
, op0
);
9427 else if (GET_CODE (op1
) == CONST
9428 && GET_CODE (XEXP (op1
, 0)) == PLUS
9429 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9431 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9434 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9435 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9436 op1
= force_operand (op1
, NULL
);
9437 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9438 op0
, 1, OPTAB_DIRECT
);
9444 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9446 if (TARGET_MACHO
&& !TARGET_64BIT
)
9451 rtx temp
= ((reload_in_progress
9452 || ((op0
&& REG_P (op0
))
9454 ? op0
: gen_reg_rtx (Pmode
));
9455 op1
= machopic_indirect_data_reference (op1
, temp
);
9456 op1
= machopic_legitimize_pic_address (op1
, mode
,
9457 temp
== op1
? 0 : temp
);
9459 else if (MACHOPIC_INDIRECT
)
9460 op1
= machopic_indirect_data_reference (op1
, 0);
9468 op1
= force_reg (Pmode
, op1
);
9470 op1
= legitimize_address (op1
, op1
, Pmode
);
9476 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9477 || !push_operand (op0
, mode
))
9479 op1
= force_reg (mode
, op1
);
9481 if (push_operand (op0
, mode
)
9482 && ! general_no_elim_operand (op1
, mode
))
9483 op1
= copy_to_mode_reg (mode
, op1
);
9485 /* Force large constants in 64bit compilation into register
9486 to get them CSEed. */
9487 if (TARGET_64BIT
&& mode
== DImode
9488 && immediate_operand (op1
, mode
)
9489 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9490 && !register_operand (op0
, mode
)
9491 && optimize
&& !reload_completed
&& !reload_in_progress
)
9492 op1
= copy_to_mode_reg (mode
, op1
);
9494 if (FLOAT_MODE_P (mode
))
9496 /* If we are loading a floating point constant to a register,
9497 force the value to memory now, since we'll get better code
9498 out the back end. */
9502 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9504 op1
= validize_mem (force_const_mem (mode
, op1
));
9505 if (!register_operand (op0
, mode
))
9507 rtx temp
= gen_reg_rtx (mode
);
9508 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9509 emit_move_insn (op0
, temp
);
9516 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9520 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9522 rtx op0
= operands
[0], op1
= operands
[1];
9524 /* Force constants other than zero into memory. We do not know how
9525 the instructions used to build constants modify the upper 64 bits
9526 of the register, once we have that information we may be able
9527 to handle some of them more efficiently. */
9528 if ((reload_in_progress
| reload_completed
) == 0
9529 && register_operand (op0
, mode
)
9531 && standard_sse_constant_p (op1
) <= 0)
9532 op1
= validize_mem (force_const_mem (mode
, op1
));
9534 /* Make operand1 a register if it isn't already. */
9536 && !register_operand (op0
, mode
)
9537 && !register_operand (op1
, mode
))
9539 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9543 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9546 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9547 straight to ix86_expand_vector_move. */
9548 /* Code generation for scalar reg-reg moves of single and double precision data:
9549 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9553 if (x86_sse_partial_reg_dependency == true)
9558 Code generation for scalar loads of double precision data:
9559 if (x86_sse_split_regs == true)
9560 movlpd mem, reg (gas syntax)
9564 Code generation for unaligned packed loads of single precision data
9565 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9566 if (x86_sse_unaligned_move_optimal)
9569 if (x86_sse_partial_reg_dependency == true)
9581 Code generation for unaligned packed loads of double precision data
9582 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9583 if (x86_sse_unaligned_move_optimal)
9586 if (x86_sse_split_regs == true)
9599 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9608 /* If we're optimizing for size, movups is the smallest. */
9611 op0
= gen_lowpart (V4SFmode
, op0
);
9612 op1
= gen_lowpart (V4SFmode
, op1
);
9613 emit_insn (gen_sse_movups (op0
, op1
));
9617 /* ??? If we have typed data, then it would appear that using
9618 movdqu is the only way to get unaligned data loaded with
9620 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9622 op0
= gen_lowpart (V16QImode
, op0
);
9623 op1
= gen_lowpart (V16QImode
, op1
);
9624 emit_insn (gen_sse2_movdqu (op0
, op1
));
9628 if (TARGET_SSE2
&& mode
== V2DFmode
)
9632 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9634 op0
= gen_lowpart (V2DFmode
, op0
);
9635 op1
= gen_lowpart (V2DFmode
, op1
);
9636 emit_insn (gen_sse2_movupd (op0
, op1
));
9640 /* When SSE registers are split into halves, we can avoid
9641 writing to the top half twice. */
9642 if (TARGET_SSE_SPLIT_REGS
)
9644 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9649 /* ??? Not sure about the best option for the Intel chips.
9650 The following would seem to satisfy; the register is
9651 entirely cleared, breaking the dependency chain. We
9652 then store to the upper half, with a dependency depth
9653 of one. A rumor has it that Intel recommends two movsd
9654 followed by an unpacklpd, but this is unconfirmed. And
9655 given that the dependency depth of the unpacklpd would
9656 still be one, I'm not sure why this would be better. */
9657 zero
= CONST0_RTX (V2DFmode
);
9660 m
= adjust_address (op1
, DFmode
, 0);
9661 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9662 m
= adjust_address (op1
, DFmode
, 8);
9663 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9667 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9669 op0
= gen_lowpart (V4SFmode
, op0
);
9670 op1
= gen_lowpart (V4SFmode
, op1
);
9671 emit_insn (gen_sse_movups (op0
, op1
));
9675 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9676 emit_move_insn (op0
, CONST0_RTX (mode
));
9678 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9680 if (mode
!= V4SFmode
)
9681 op0
= gen_lowpart (V4SFmode
, op0
);
9682 m
= adjust_address (op1
, V2SFmode
, 0);
9683 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9684 m
= adjust_address (op1
, V2SFmode
, 8);
9685 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9688 else if (MEM_P (op0
))
9690 /* If we're optimizing for size, movups is the smallest. */
9693 op0
= gen_lowpart (V4SFmode
, op0
);
9694 op1
= gen_lowpart (V4SFmode
, op1
);
9695 emit_insn (gen_sse_movups (op0
, op1
));
9699 /* ??? Similar to above, only less clear because of quote
9700 typeless stores unquote. */
9701 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9702 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9704 op0
= gen_lowpart (V16QImode
, op0
);
9705 op1
= gen_lowpart (V16QImode
, op1
);
9706 emit_insn (gen_sse2_movdqu (op0
, op1
));
9710 if (TARGET_SSE2
&& mode
== V2DFmode
)
9712 m
= adjust_address (op0
, DFmode
, 0);
9713 emit_insn (gen_sse2_storelpd (m
, op1
));
9714 m
= adjust_address (op0
, DFmode
, 8);
9715 emit_insn (gen_sse2_storehpd (m
, op1
));
9719 if (mode
!= V4SFmode
)
9720 op1
= gen_lowpart (V4SFmode
, op1
);
9721 m
= adjust_address (op0
, V2SFmode
, 0);
9722 emit_insn (gen_sse_storelps (m
, op1
));
9723 m
= adjust_address (op0
, V2SFmode
, 8);
9724 emit_insn (gen_sse_storehps (m
, op1
));
9731 /* Expand a push in MODE. This is some mode for which we do not support
9732 proper push instructions, at least from the registers that we expect
9733 the value to live in. */
9736 ix86_expand_push (enum machine_mode mode
, rtx x
)
9740 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9741 GEN_INT (-GET_MODE_SIZE (mode
)),
9742 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9743 if (tmp
!= stack_pointer_rtx
)
9744 emit_move_insn (stack_pointer_rtx
, tmp
);
9746 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9747 emit_move_insn (tmp
, x
);
9750 /* Helper function of ix86_fixup_binary_operands to canonicalize
9751 operand order. Returns true if the operands should be swapped. */
9754 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9757 rtx dst
= operands
[0];
9758 rtx src1
= operands
[1];
9759 rtx src2
= operands
[2];
9761 /* If the operation is not commutative, we can't do anything. */
9762 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9765 /* Highest priority is that src1 should match dst. */
9766 if (rtx_equal_p (dst
, src1
))
9768 if (rtx_equal_p (dst
, src2
))
9771 /* Next highest priority is that immediate constants come second. */
9772 if (immediate_operand (src2
, mode
))
9774 if (immediate_operand (src1
, mode
))
9777 /* Lowest priority is that memory references should come second. */
9787 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9788 destination to use for the operation. If different from the true
9789 destination in operands[0], a copy operation will be required. */
9792 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9795 rtx dst
= operands
[0];
9796 rtx src1
= operands
[1];
9797 rtx src2
= operands
[2];
9799 /* Canonicalize operand order. */
9800 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9807 /* Both source operands cannot be in memory. */
9808 if (MEM_P (src1
) && MEM_P (src2
))
9810 /* Optimization: Only read from memory once. */
9811 if (rtx_equal_p (src1
, src2
))
9813 src2
= force_reg (mode
, src2
);
9817 src2
= force_reg (mode
, src2
);
9820 /* If the destination is memory, and we do not have matching source
9821 operands, do things in registers. */
9822 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9823 dst
= gen_reg_rtx (mode
);
9825 /* Source 1 cannot be a constant. */
9826 if (CONSTANT_P (src1
))
9827 src1
= force_reg (mode
, src1
);
9829 /* Source 1 cannot be a non-matching memory. */
9830 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9831 src1
= force_reg (mode
, src1
);
9838 /* Similarly, but assume that the destination has already been
9842 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9843 enum machine_mode mode
, rtx operands
[])
9845 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9846 gcc_assert (dst
== operands
[0]);
9849 /* Attempt to expand a binary operator. Make the expansion closer to the
9850 actual machine, then just general_operand, which will allow 3 separate
9851 memory references (one output, two input) in a single insn. */
9854 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9857 rtx src1
, src2
, dst
, op
, clob
;
9859 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9863 /* Emit the instruction. */
9865 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9866 if (reload_in_progress
)
9868 /* Reload doesn't know about the flags register, and doesn't know that
9869 it doesn't want to clobber it. We can only do this with PLUS. */
9870 gcc_assert (code
== PLUS
);
9875 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9876 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9879 /* Fix up the destination if needed. */
9880 if (dst
!= operands
[0])
9881 emit_move_insn (operands
[0], dst
);
9884 /* Return TRUE or FALSE depending on whether the binary operator meets the
9885 appropriate constraints. */
9888 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9891 rtx dst
= operands
[0];
9892 rtx src1
= operands
[1];
9893 rtx src2
= operands
[2];
9895 /* Both source operands cannot be in memory. */
9896 if (MEM_P (src1
) && MEM_P (src2
))
9899 /* Canonicalize operand order for commutative operators. */
9900 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9907 /* If the destination is memory, we must have a matching source operand. */
9908 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9911 /* Source 1 cannot be a constant. */
9912 if (CONSTANT_P (src1
))
9915 /* Source 1 cannot be a non-matching memory. */
9916 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9922 /* Attempt to expand a unary operator. Make the expansion closer to the
9923 actual machine, then just general_operand, which will allow 2 separate
9924 memory references (one output, one input) in a single insn. */
9927 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9930 int matching_memory
;
9931 rtx src
, dst
, op
, clob
;
9936 /* If the destination is memory, and we do not have matching source
9937 operands, do things in registers. */
9938 matching_memory
= 0;
9941 if (rtx_equal_p (dst
, src
))
9942 matching_memory
= 1;
9944 dst
= gen_reg_rtx (mode
);
9947 /* When source operand is memory, destination must match. */
9948 if (MEM_P (src
) && !matching_memory
)
9949 src
= force_reg (mode
, src
);
9951 /* Emit the instruction. */
9953 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9954 if (reload_in_progress
|| code
== NOT
)
9956 /* Reload doesn't know about the flags register, and doesn't know that
9957 it doesn't want to clobber it. */
9958 gcc_assert (code
== NOT
);
9963 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9964 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9967 /* Fix up the destination if needed. */
9968 if (dst
!= operands
[0])
9969 emit_move_insn (operands
[0], dst
);
9972 /* Return TRUE or FALSE depending on whether the unary operator meets the
9973 appropriate constraints. */
9976 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9977 enum machine_mode mode ATTRIBUTE_UNUSED
,
9978 rtx operands
[2] ATTRIBUTE_UNUSED
)
9980 /* If one of operands is memory, source and destination must match. */
9981 if ((MEM_P (operands
[0])
9982 || MEM_P (operands
[1]))
9983 && ! rtx_equal_p (operands
[0], operands
[1]))
9988 /* Post-reload splitter for converting an SF or DFmode value in an
9989 SSE register into an unsigned SImode. */
9992 ix86_split_convert_uns_si_sse (rtx operands
[])
9994 enum machine_mode vecmode
;
9995 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
9997 large
= operands
[1];
9998 zero_or_two31
= operands
[2];
9999 input
= operands
[3];
10000 two31
= operands
[4];
10001 vecmode
= GET_MODE (large
);
10002 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10004 /* Load up the value into the low element. We must ensure that the other
10005 elements are valid floats -- zero is the easiest such value. */
10008 if (vecmode
== V4SFmode
)
10009 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10011 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10015 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10016 emit_move_insn (value
, CONST0_RTX (vecmode
));
10017 if (vecmode
== V4SFmode
)
10018 emit_insn (gen_sse_movss (value
, value
, input
));
10020 emit_insn (gen_sse2_movsd (value
, value
, input
));
10023 emit_move_insn (large
, two31
);
10024 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10026 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10027 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10029 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10030 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10032 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10033 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10035 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10036 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10038 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10039 if (vecmode
== V4SFmode
)
10040 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10042 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10045 emit_insn (gen_xorv4si3 (value
, value
, large
));
10048 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10049 Expects the 64-bit DImode to be supplied in a pair of integral
10050 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10051 -mfpmath=sse, !optimize_size only. */
10054 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10056 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10057 rtx int_xmm
, fp_xmm
;
10058 rtx biases
, exponents
;
10061 int_xmm
= gen_reg_rtx (V4SImode
);
10062 if (TARGET_INTER_UNIT_MOVES
)
10063 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10064 else if (TARGET_SSE_SPLIT_REGS
)
10066 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10067 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10071 x
= gen_reg_rtx (V2DImode
);
10072 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10073 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10076 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10077 gen_rtvec (4, GEN_INT (0x43300000UL
),
10078 GEN_INT (0x45300000UL
),
10079 const0_rtx
, const0_rtx
));
10080 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10082 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10083 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10085 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10086 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10087 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10088 (0x1.0p84 + double(fp_value_hi_xmm)).
10089 Note these exponents differ by 32. */
10091 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10093 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10094 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10095 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10096 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10097 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10098 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10099 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10100 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10101 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10103 /* Add the upper and lower DFmode values together. */
10105 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10108 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10109 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10110 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10113 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10116 /* Convert an unsigned SImode value into a DFmode. Only currently used
10117 for SSE, but applicable anywhere. */
10120 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10122 REAL_VALUE_TYPE TWO31r
;
10125 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10126 NULL
, 1, OPTAB_DIRECT
);
10128 fp
= gen_reg_rtx (DFmode
);
10129 emit_insn (gen_floatsidf2 (fp
, x
));
10131 real_ldexp (&TWO31r
, &dconst1
, 31);
10132 x
= const_double_from_real_value (TWO31r
, DFmode
);
10134 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10136 emit_move_insn (target
, x
);
10139 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10140 32-bit mode; otherwise we have a direct convert instruction. */
10143 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10145 REAL_VALUE_TYPE TWO32r
;
10146 rtx fp_lo
, fp_hi
, x
;
10148 fp_lo
= gen_reg_rtx (DFmode
);
10149 fp_hi
= gen_reg_rtx (DFmode
);
10151 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10153 real_ldexp (&TWO32r
, &dconst1
, 32);
10154 x
= const_double_from_real_value (TWO32r
, DFmode
);
10155 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10157 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10159 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10162 emit_move_insn (target
, x
);
10165 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10166 For x86_32, -mfpmath=sse, !optimize_size only. */
10168 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10170 REAL_VALUE_TYPE ONE16r
;
10171 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10173 real_ldexp (&ONE16r
, &dconst1
, 16);
10174 x
= const_double_from_real_value (ONE16r
, SFmode
);
10175 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10176 NULL
, 0, OPTAB_DIRECT
);
10177 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10178 NULL
, 0, OPTAB_DIRECT
);
10179 fp_hi
= gen_reg_rtx (SFmode
);
10180 fp_lo
= gen_reg_rtx (SFmode
);
10181 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10182 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10183 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10185 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10187 if (!rtx_equal_p (target
, fp_hi
))
10188 emit_move_insn (target
, fp_hi
);
10191 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10192 then replicate the value for all elements of the vector
10196 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10203 v
= gen_rtvec (4, value
, value
, value
, value
);
10205 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10206 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10207 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10211 v
= gen_rtvec (2, value
, value
);
10213 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10214 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10217 gcc_unreachable ();
10221 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10222 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10223 true, then replicate the mask for all elements of the vector register.
10224 If INVERT is true, then create a mask excluding the sign bit. */
10227 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10229 enum machine_mode vec_mode
;
10230 HOST_WIDE_INT hi
, lo
;
10235 /* Find the sign bit, sign extended to 2*HWI. */
10236 if (mode
== SFmode
)
10237 lo
= 0x80000000, hi
= lo
< 0;
10238 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10239 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10241 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10244 lo
= ~lo
, hi
= ~hi
;
10246 /* Force this value into the low part of a fp vector constant. */
10247 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10248 mask
= gen_lowpart (mode
, mask
);
10250 v
= ix86_build_const_vector (mode
, vect
, mask
);
10251 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10252 return force_reg (vec_mode
, v
);
10255 /* Generate code for floating point ABS or NEG. */
10258 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10261 rtx mask
, set
, use
, clob
, dst
, src
;
10262 bool matching_memory
;
10263 bool use_sse
= false;
10264 bool vector_mode
= VECTOR_MODE_P (mode
);
10265 enum machine_mode elt_mode
= mode
;
10269 elt_mode
= GET_MODE_INNER (mode
);
10272 else if (TARGET_SSE_MATH
)
10273 use_sse
= SSE_FLOAT_MODE_P (mode
);
10275 /* NEG and ABS performed with SSE use bitwise mask operations.
10276 Create the appropriate mask now. */
10278 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10285 /* If the destination is memory, and we don't have matching source
10286 operands or we're using the x87, do things in registers. */
10287 matching_memory
= false;
10290 if (use_sse
&& rtx_equal_p (dst
, src
))
10291 matching_memory
= true;
10293 dst
= gen_reg_rtx (mode
);
10295 if (MEM_P (src
) && !matching_memory
)
10296 src
= force_reg (mode
, src
);
10300 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10301 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10306 set
= gen_rtx_fmt_e (code
, mode
, src
);
10307 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10310 use
= gen_rtx_USE (VOIDmode
, mask
);
10311 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10312 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10313 gen_rtvec (3, set
, use
, clob
)));
10319 if (dst
!= operands
[0])
10320 emit_move_insn (operands
[0], dst
);
10323 /* Expand a copysign operation. Special case operand 0 being a constant. */
10326 ix86_expand_copysign (rtx operands
[])
10328 enum machine_mode mode
, vmode
;
10329 rtx dest
, op0
, op1
, mask
, nmask
;
10331 dest
= operands
[0];
10335 mode
= GET_MODE (dest
);
10336 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10338 if (GET_CODE (op0
) == CONST_DOUBLE
)
10342 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10343 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10345 if (op0
== CONST0_RTX (mode
))
10346 op0
= CONST0_RTX (vmode
);
10349 if (mode
== SFmode
)
10350 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10351 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10353 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10354 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10357 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10359 if (mode
== SFmode
)
10360 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10362 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10366 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10367 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10369 if (mode
== SFmode
)
10370 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10372 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10376 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10377 be a constant, and so has already been expanded into a vector constant. */
10380 ix86_split_copysign_const (rtx operands
[])
10382 enum machine_mode mode
, vmode
;
10383 rtx dest
, op0
, op1
, mask
, x
;
10385 dest
= operands
[0];
10388 mask
= operands
[3];
10390 mode
= GET_MODE (dest
);
10391 vmode
= GET_MODE (mask
);
10393 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10394 x
= gen_rtx_AND (vmode
, dest
, mask
);
10395 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10397 if (op0
!= CONST0_RTX (vmode
))
10399 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10400 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10404 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10405 so we have to do two masks. */
10408 ix86_split_copysign_var (rtx operands
[])
10410 enum machine_mode mode
, vmode
;
10411 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10413 dest
= operands
[0];
10414 scratch
= operands
[1];
10417 nmask
= operands
[4];
10418 mask
= operands
[5];
10420 mode
= GET_MODE (dest
);
10421 vmode
= GET_MODE (mask
);
10423 if (rtx_equal_p (op0
, op1
))
10425 /* Shouldn't happen often (it's useless, obviously), but when it does
10426 we'd generate incorrect code if we continue below. */
10427 emit_move_insn (dest
, op0
);
10431 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10433 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10435 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10436 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10439 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10440 x
= gen_rtx_NOT (vmode
, dest
);
10441 x
= gen_rtx_AND (vmode
, x
, op0
);
10442 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10446 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10448 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10450 else /* alternative 2,4 */
10452 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10453 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10454 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10456 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10458 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10460 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10461 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10463 else /* alternative 3,4 */
10465 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10467 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10468 x
= gen_rtx_AND (vmode
, dest
, op0
);
10470 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10473 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10474 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10477 /* Return TRUE or FALSE depending on whether the first SET in INSN
10478 has source and destination with matching CC modes, and that the
10479 CC mode is at least as constrained as REQ_MODE. */
10482 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10485 enum machine_mode set_mode
;
10487 set
= PATTERN (insn
);
10488 if (GET_CODE (set
) == PARALLEL
)
10489 set
= XVECEXP (set
, 0, 0);
10490 gcc_assert (GET_CODE (set
) == SET
);
10491 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10493 set_mode
= GET_MODE (SET_DEST (set
));
10497 if (req_mode
!= CCNOmode
10498 && (req_mode
!= CCmode
10499 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10503 if (req_mode
== CCGCmode
)
10507 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10511 if (req_mode
== CCZmode
)
10518 gcc_unreachable ();
10521 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10524 /* Generate insn patterns to do an integer compare of OPERANDS. */
10527 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10529 enum machine_mode cmpmode
;
10532 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10533 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10535 /* This is very simple, but making the interface the same as in the
10536 FP case makes the rest of the code easier. */
10537 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10538 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10540 /* Return the test that should be put into the flags user, i.e.
10541 the bcc, scc, or cmov instruction. */
10542 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10545 /* Figure out whether to use ordered or unordered fp comparisons.
10546 Return the appropriate mode to use. */
10549 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10551 /* ??? In order to make all comparisons reversible, we do all comparisons
10552 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10553 all forms trapping and nontrapping comparisons, we can make inequality
10554 comparisons trapping again, since it results in better code when using
10555 FCOM based compares. */
10556 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10560 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10562 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10563 return ix86_fp_compare_mode (code
);
10566 /* Only zero flag is needed. */
10567 case EQ
: /* ZF=0 */
10568 case NE
: /* ZF!=0 */
10570 /* Codes needing carry flag. */
10571 case GEU
: /* CF=0 */
10572 case GTU
: /* CF=0 & ZF=0 */
10573 case LTU
: /* CF=1 */
10574 case LEU
: /* CF=1 | ZF=1 */
10576 /* Codes possibly doable only with sign flag when
10577 comparing against zero. */
10578 case GE
: /* SF=OF or SF=0 */
10579 case LT
: /* SF<>OF or SF=1 */
10580 if (op1
== const0_rtx
)
10583 /* For other cases Carry flag is not required. */
10585 /* Codes doable only with sign flag when comparing
10586 against zero, but we miss jump instruction for it
10587 so we need to use relational tests against overflow
10588 that thus needs to be zero. */
10589 case GT
: /* ZF=0 & SF=OF */
10590 case LE
: /* ZF=1 | SF<>OF */
10591 if (op1
== const0_rtx
)
10595 /* strcmp pattern do (use flags) and combine may ask us for proper
10600 gcc_unreachable ();
10604 /* Return the fixed registers used for condition codes. */
10607 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10614 /* If two condition code modes are compatible, return a condition code
10615 mode which is compatible with both. Otherwise, return
10618 static enum machine_mode
10619 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10624 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10627 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10628 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10634 gcc_unreachable ();
10656 /* These are only compatible with themselves, which we already
10662 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10665 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10667 enum rtx_code swapped_code
= swap_condition (code
);
10668 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10669 || (ix86_fp_comparison_cost (swapped_code
)
10670 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10673 /* Swap, force into registers, or otherwise massage the two operands
10674 to a fp comparison. The operands are updated in place; the new
10675 comparison code is returned. */
10677 static enum rtx_code
10678 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10680 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10681 rtx op0
= *pop0
, op1
= *pop1
;
10682 enum machine_mode op_mode
= GET_MODE (op0
);
10683 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10685 /* All of the unordered compare instructions only work on registers.
10686 The same is true of the fcomi compare instructions. The XFmode
10687 compare instructions require registers except when comparing
10688 against zero or when converting operand 1 from fixed point to
10692 && (fpcmp_mode
== CCFPUmode
10693 || (op_mode
== XFmode
10694 && ! (standard_80387_constant_p (op0
) == 1
10695 || standard_80387_constant_p (op1
) == 1)
10696 && GET_CODE (op1
) != FLOAT
)
10697 || ix86_use_fcomi_compare (code
)))
10699 op0
= force_reg (op_mode
, op0
);
10700 op1
= force_reg (op_mode
, op1
);
10704 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10705 things around if they appear profitable, otherwise force op0
10706 into a register. */
10708 if (standard_80387_constant_p (op0
) == 0
10710 && ! (standard_80387_constant_p (op1
) == 0
10714 tmp
= op0
, op0
= op1
, op1
= tmp
;
10715 code
= swap_condition (code
);
10719 op0
= force_reg (op_mode
, op0
);
10721 if (CONSTANT_P (op1
))
10723 int tmp
= standard_80387_constant_p (op1
);
10725 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10729 op1
= force_reg (op_mode
, op1
);
10732 op1
= force_reg (op_mode
, op1
);
10736 /* Try to rearrange the comparison to make it cheaper. */
10737 if (ix86_fp_comparison_cost (code
)
10738 > ix86_fp_comparison_cost (swap_condition (code
))
10739 && (REG_P (op1
) || !no_new_pseudos
))
10742 tmp
= op0
, op0
= op1
, op1
= tmp
;
10743 code
= swap_condition (code
);
10745 op0
= force_reg (op_mode
, op0
);
10753 /* Convert comparison codes we use to represent FP comparison to integer
10754 code that will result in proper branch. Return UNKNOWN if no such code
10758 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10787 /* Split comparison code CODE into comparisons we can do using branch
10788 instructions. BYPASS_CODE is comparison code for branch that will
10789 branch around FIRST_CODE and SECOND_CODE. If some of branches
10790 is not required, set value to UNKNOWN.
10791 We never require more than two branches. */
10794 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10795 enum rtx_code
*first_code
,
10796 enum rtx_code
*second_code
)
10798 *first_code
= code
;
10799 *bypass_code
= UNKNOWN
;
10800 *second_code
= UNKNOWN
;
10802 /* The fcomi comparison sets flags as follows:
10812 case GT
: /* GTU - CF=0 & ZF=0 */
10813 case GE
: /* GEU - CF=0 */
10814 case ORDERED
: /* PF=0 */
10815 case UNORDERED
: /* PF=1 */
10816 case UNEQ
: /* EQ - ZF=1 */
10817 case UNLT
: /* LTU - CF=1 */
10818 case UNLE
: /* LEU - CF=1 | ZF=1 */
10819 case LTGT
: /* EQ - ZF=0 */
10821 case LT
: /* LTU - CF=1 - fails on unordered */
10822 *first_code
= UNLT
;
10823 *bypass_code
= UNORDERED
;
10825 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10826 *first_code
= UNLE
;
10827 *bypass_code
= UNORDERED
;
10829 case EQ
: /* EQ - ZF=1 - fails on unordered */
10830 *first_code
= UNEQ
;
10831 *bypass_code
= UNORDERED
;
10833 case NE
: /* NE - ZF=0 - fails on unordered */
10834 *first_code
= LTGT
;
10835 *second_code
= UNORDERED
;
10837 case UNGE
: /* GEU - CF=0 - fails on unordered */
10839 *second_code
= UNORDERED
;
10841 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10843 *second_code
= UNORDERED
;
10846 gcc_unreachable ();
10848 if (!TARGET_IEEE_FP
)
10850 *second_code
= UNKNOWN
;
10851 *bypass_code
= UNKNOWN
;
10855 /* Return cost of comparison done fcom + arithmetics operations on AX.
10856 All following functions do use number of instructions as a cost metrics.
10857 In future this should be tweaked to compute bytes for optimize_size and
10858 take into account performance of various instructions on various CPUs. */
10860 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10862 if (!TARGET_IEEE_FP
)
10864 /* The cost of code output by ix86_expand_fp_compare. */
10888 gcc_unreachable ();
10892 /* Return cost of comparison done using fcomi operation.
10893 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10895 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10897 enum rtx_code bypass_code
, first_code
, second_code
;
10898 /* Return arbitrarily high cost when instruction is not supported - this
10899 prevents gcc from using it. */
10902 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10903 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10906 /* Return cost of comparison done using sahf operation.
10907 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10909 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10911 enum rtx_code bypass_code
, first_code
, second_code
;
10912 /* Return arbitrarily high cost when instruction is not preferred - this
10913 avoids gcc from using it. */
10914 if (!TARGET_USE_SAHF
&& !optimize_size
)
10916 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10917 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10920 /* Compute cost of the comparison done using any method.
10921 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10923 ix86_fp_comparison_cost (enum rtx_code code
)
10925 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10928 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10929 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10931 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10932 if (min
> sahf_cost
)
10934 if (min
> fcomi_cost
)
10939 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10942 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10943 rtx
*second_test
, rtx
*bypass_test
)
10945 enum machine_mode fpcmp_mode
, intcmp_mode
;
10947 int cost
= ix86_fp_comparison_cost (code
);
10948 enum rtx_code bypass_code
, first_code
, second_code
;
10950 fpcmp_mode
= ix86_fp_compare_mode (code
);
10951 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10954 *second_test
= NULL_RTX
;
10956 *bypass_test
= NULL_RTX
;
10958 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10960 /* Do fcomi/sahf based test when profitable. */
10961 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10962 && (second_code
== UNKNOWN
|| second_test
)
10963 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10967 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10968 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10974 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10975 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10977 scratch
= gen_reg_rtx (HImode
);
10978 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10979 emit_insn (gen_x86_sahf_1 (scratch
));
10982 /* The FP codes work out to act like unsigned. */
10983 intcmp_mode
= fpcmp_mode
;
10985 if (bypass_code
!= UNKNOWN
)
10986 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10987 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10989 if (second_code
!= UNKNOWN
)
10990 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10991 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10996 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10997 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10998 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11000 scratch
= gen_reg_rtx (HImode
);
11001 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11003 /* In the unordered case, we have to check C2 for NaN's, which
11004 doesn't happen to work out to anything nice combination-wise.
11005 So do some bit twiddling on the value we've got in AH to come
11006 up with an appropriate set of condition codes. */
11008 intcmp_mode
= CCNOmode
;
11013 if (code
== GT
|| !TARGET_IEEE_FP
)
11015 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11020 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11021 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11022 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11023 intcmp_mode
= CCmode
;
11029 if (code
== LT
&& TARGET_IEEE_FP
)
11031 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11032 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11033 intcmp_mode
= CCmode
;
11038 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11044 if (code
== GE
|| !TARGET_IEEE_FP
)
11046 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11051 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11052 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11059 if (code
== LE
&& TARGET_IEEE_FP
)
11061 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11062 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11063 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11064 intcmp_mode
= CCmode
;
11069 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11075 if (code
== EQ
&& TARGET_IEEE_FP
)
11077 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11078 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11079 intcmp_mode
= CCmode
;
11084 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11091 if (code
== NE
&& TARGET_IEEE_FP
)
11093 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11094 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11100 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11106 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11110 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11115 gcc_unreachable ();
11119 /* Return the test that should be put into the flags user, i.e.
11120 the bcc, scc, or cmov instruction. */
11121 return gen_rtx_fmt_ee (code
, VOIDmode
,
11122 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11127 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11130 op0
= ix86_compare_op0
;
11131 op1
= ix86_compare_op1
;
11134 *second_test
= NULL_RTX
;
11136 *bypass_test
= NULL_RTX
;
11138 if (ix86_compare_emitted
)
11140 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11141 ix86_compare_emitted
= NULL_RTX
;
11143 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11144 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11145 second_test
, bypass_test
);
11147 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11152 /* Return true if the CODE will result in nontrivial jump sequence. */
11154 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11156 enum rtx_code bypass_code
, first_code
, second_code
;
11159 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11160 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11164 ix86_expand_branch (enum rtx_code code
, rtx label
)
11168 /* If we have emitted a compare insn, go straight to simple.
11169 ix86_expand_compare won't emit anything if ix86_compare_emitted
11171 if (ix86_compare_emitted
)
11174 switch (GET_MODE (ix86_compare_op0
))
11180 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11181 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11182 gen_rtx_LABEL_REF (VOIDmode
, label
),
11184 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11193 enum rtx_code bypass_code
, first_code
, second_code
;
11195 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11196 &ix86_compare_op1
);
11198 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11200 /* Check whether we will use the natural sequence with one jump. If
11201 so, we can expand jump early. Otherwise delay expansion by
11202 creating compound insn to not confuse optimizers. */
11203 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11206 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11207 gen_rtx_LABEL_REF (VOIDmode
, label
),
11208 pc_rtx
, NULL_RTX
, NULL_RTX
);
11212 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11213 ix86_compare_op0
, ix86_compare_op1
);
11214 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11215 gen_rtx_LABEL_REF (VOIDmode
, label
),
11217 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11219 use_fcomi
= ix86_use_fcomi_compare (code
);
11220 vec
= rtvec_alloc (3 + !use_fcomi
);
11221 RTVEC_ELT (vec
, 0) = tmp
;
11223 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11225 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11228 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11230 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11239 /* Expand DImode branch into multiple compare+branch. */
11241 rtx lo
[2], hi
[2], label2
;
11242 enum rtx_code code1
, code2
, code3
;
11243 enum machine_mode submode
;
11245 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11247 tmp
= ix86_compare_op0
;
11248 ix86_compare_op0
= ix86_compare_op1
;
11249 ix86_compare_op1
= tmp
;
11250 code
= swap_condition (code
);
11252 if (GET_MODE (ix86_compare_op0
) == DImode
)
11254 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11255 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11260 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11261 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11265 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11266 avoid two branches. This costs one extra insn, so disable when
11267 optimizing for size. */
11269 if ((code
== EQ
|| code
== NE
)
11271 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11276 if (hi
[1] != const0_rtx
)
11277 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11278 NULL_RTX
, 0, OPTAB_WIDEN
);
11281 if (lo
[1] != const0_rtx
)
11282 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11283 NULL_RTX
, 0, OPTAB_WIDEN
);
11285 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11286 NULL_RTX
, 0, OPTAB_WIDEN
);
11288 ix86_compare_op0
= tmp
;
11289 ix86_compare_op1
= const0_rtx
;
11290 ix86_expand_branch (code
, label
);
11294 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11295 op1 is a constant and the low word is zero, then we can just
11296 examine the high word. */
11298 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11301 case LT
: case LTU
: case GE
: case GEU
:
11302 ix86_compare_op0
= hi
[0];
11303 ix86_compare_op1
= hi
[1];
11304 ix86_expand_branch (code
, label
);
11310 /* Otherwise, we need two or three jumps. */
11312 label2
= gen_label_rtx ();
11315 code2
= swap_condition (code
);
11316 code3
= unsigned_condition (code
);
11320 case LT
: case GT
: case LTU
: case GTU
:
11323 case LE
: code1
= LT
; code2
= GT
; break;
11324 case GE
: code1
= GT
; code2
= LT
; break;
11325 case LEU
: code1
= LTU
; code2
= GTU
; break;
11326 case GEU
: code1
= GTU
; code2
= LTU
; break;
11328 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11329 case NE
: code2
= UNKNOWN
; break;
11332 gcc_unreachable ();
11337 * if (hi(a) < hi(b)) goto true;
11338 * if (hi(a) > hi(b)) goto false;
11339 * if (lo(a) < lo(b)) goto true;
11343 ix86_compare_op0
= hi
[0];
11344 ix86_compare_op1
= hi
[1];
11346 if (code1
!= UNKNOWN
)
11347 ix86_expand_branch (code1
, label
);
11348 if (code2
!= UNKNOWN
)
11349 ix86_expand_branch (code2
, label2
);
11351 ix86_compare_op0
= lo
[0];
11352 ix86_compare_op1
= lo
[1];
11353 ix86_expand_branch (code3
, label
);
11355 if (code2
!= UNKNOWN
)
11356 emit_label (label2
);
11361 gcc_unreachable ();
11365 /* Split branch based on floating point condition. */
11367 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11368 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11370 rtx second
, bypass
;
11371 rtx label
= NULL_RTX
;
11373 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11376 if (target2
!= pc_rtx
)
11379 code
= reverse_condition_maybe_unordered (code
);
11384 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11385 tmp
, &second
, &bypass
);
11387 /* Remove pushed operand from stack. */
11389 ix86_free_from_memory (GET_MODE (pushed
));
11391 if (split_branch_probability
>= 0)
11393 /* Distribute the probabilities across the jumps.
11394 Assume the BYPASS and SECOND to be always test
11396 probability
= split_branch_probability
;
11398 /* Value of 1 is low enough to make no need for probability
11399 to be updated. Later we may run some experiments and see
11400 if unordered values are more frequent in practice. */
11402 bypass_probability
= 1;
11404 second_probability
= 1;
11406 if (bypass
!= NULL_RTX
)
11408 label
= gen_label_rtx ();
11409 i
= emit_jump_insn (gen_rtx_SET
11411 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11413 gen_rtx_LABEL_REF (VOIDmode
,
11416 if (bypass_probability
>= 0)
11418 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11419 GEN_INT (bypass_probability
),
11422 i
= emit_jump_insn (gen_rtx_SET
11424 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11425 condition
, target1
, target2
)));
11426 if (probability
>= 0)
11428 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11429 GEN_INT (probability
),
11431 if (second
!= NULL_RTX
)
11433 i
= emit_jump_insn (gen_rtx_SET
11435 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11437 if (second_probability
>= 0)
11439 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11440 GEN_INT (second_probability
),
11443 if (label
!= NULL_RTX
)
11444 emit_label (label
);
11448 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11450 rtx ret
, tmp
, tmpreg
, equiv
;
11451 rtx second_test
, bypass_test
;
11453 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11454 return 0; /* FAIL */
11456 gcc_assert (GET_MODE (dest
) == QImode
);
11458 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11459 PUT_MODE (ret
, QImode
);
11464 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11465 if (bypass_test
|| second_test
)
11467 rtx test
= second_test
;
11469 rtx tmp2
= gen_reg_rtx (QImode
);
11472 gcc_assert (!second_test
);
11473 test
= bypass_test
;
11475 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11477 PUT_MODE (test
, QImode
);
11478 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11481 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11483 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11486 /* Attach a REG_EQUAL note describing the comparison result. */
11487 if (ix86_compare_op0
&& ix86_compare_op1
)
11489 equiv
= simplify_gen_relational (code
, QImode
,
11490 GET_MODE (ix86_compare_op0
),
11491 ix86_compare_op0
, ix86_compare_op1
);
11492 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11495 return 1; /* DONE */
11498 /* Expand comparison setting or clearing carry flag. Return true when
11499 successful and set pop for the operation. */
11501 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11503 enum machine_mode mode
=
11504 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11506 /* Do not handle DImode compares that go through special path. Also we can't
11507 deal with FP compares yet. This is possible to add. */
11508 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11510 if (FLOAT_MODE_P (mode
))
11512 rtx second_test
= NULL
, bypass_test
= NULL
;
11513 rtx compare_op
, compare_seq
;
11515 /* Shortcut: following common codes never translate into carry flag compares. */
11516 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11517 || code
== ORDERED
|| code
== UNORDERED
)
11520 /* These comparisons require zero flag; swap operands so they won't. */
11521 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11522 && !TARGET_IEEE_FP
)
11527 code
= swap_condition (code
);
11530 /* Try to expand the comparison and verify that we end up with carry flag
11531 based comparison. This is fails to be true only when we decide to expand
11532 comparison using arithmetic that is not too common scenario. */
11534 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11535 &second_test
, &bypass_test
);
11536 compare_seq
= get_insns ();
11539 if (second_test
|| bypass_test
)
11541 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11542 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11543 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11545 code
= GET_CODE (compare_op
);
11546 if (code
!= LTU
&& code
!= GEU
)
11548 emit_insn (compare_seq
);
11552 if (!INTEGRAL_MODE_P (mode
))
11560 /* Convert a==0 into (unsigned)a<1. */
11563 if (op1
!= const0_rtx
)
11566 code
= (code
== EQ
? LTU
: GEU
);
11569 /* Convert a>b into b<a or a>=b-1. */
11572 if (CONST_INT_P (op1
))
11574 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11575 /* Bail out on overflow. We still can swap operands but that
11576 would force loading of the constant into register. */
11577 if (op1
== const0_rtx
11578 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11580 code
= (code
== GTU
? GEU
: LTU
);
11587 code
= (code
== GTU
? LTU
: GEU
);
11591 /* Convert a>=0 into (unsigned)a<0x80000000. */
11594 if (mode
== DImode
|| op1
!= const0_rtx
)
11596 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11597 code
= (code
== LT
? GEU
: LTU
);
11601 if (mode
== DImode
|| op1
!= constm1_rtx
)
11603 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11604 code
= (code
== LE
? GEU
: LTU
);
11610 /* Swapping operands may cause constant to appear as first operand. */
11611 if (!nonimmediate_operand (op0
, VOIDmode
))
11613 if (no_new_pseudos
)
11615 op0
= force_reg (mode
, op0
);
11617 ix86_compare_op0
= op0
;
11618 ix86_compare_op1
= op1
;
11619 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11620 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11625 ix86_expand_int_movcc (rtx operands
[])
11627 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11628 rtx compare_seq
, compare_op
;
11629 rtx second_test
, bypass_test
;
11630 enum machine_mode mode
= GET_MODE (operands
[0]);
11631 bool sign_bit_compare_p
= false;;
11634 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11635 compare_seq
= get_insns ();
11638 compare_code
= GET_CODE (compare_op
);
11640 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11641 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11642 sign_bit_compare_p
= true;
11644 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11645 HImode insns, we'd be swallowed in word prefix ops. */
11647 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11648 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11649 && CONST_INT_P (operands
[2])
11650 && CONST_INT_P (operands
[3]))
11652 rtx out
= operands
[0];
11653 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11654 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11655 HOST_WIDE_INT diff
;
11658 /* Sign bit compares are better done using shifts than we do by using
11660 if (sign_bit_compare_p
11661 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11662 ix86_compare_op1
, &compare_op
))
11664 /* Detect overlap between destination and compare sources. */
11667 if (!sign_bit_compare_p
)
11669 bool fpcmp
= false;
11671 compare_code
= GET_CODE (compare_op
);
11673 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11674 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11677 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11680 /* To simplify rest of code, restrict to the GEU case. */
11681 if (compare_code
== LTU
)
11683 HOST_WIDE_INT tmp
= ct
;
11686 compare_code
= reverse_condition (compare_code
);
11687 code
= reverse_condition (code
);
11692 PUT_CODE (compare_op
,
11693 reverse_condition_maybe_unordered
11694 (GET_CODE (compare_op
)));
11696 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11700 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11701 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11702 tmp
= gen_reg_rtx (mode
);
11704 if (mode
== DImode
)
11705 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11707 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11711 if (code
== GT
|| code
== GE
)
11712 code
= reverse_condition (code
);
11715 HOST_WIDE_INT tmp
= ct
;
11720 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11721 ix86_compare_op1
, VOIDmode
, 0, -1);
11734 tmp
= expand_simple_binop (mode
, PLUS
,
11736 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11747 tmp
= expand_simple_binop (mode
, IOR
,
11749 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11751 else if (diff
== -1 && ct
)
11761 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11763 tmp
= expand_simple_binop (mode
, PLUS
,
11764 copy_rtx (tmp
), GEN_INT (cf
),
11765 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11773 * andl cf - ct, dest
11783 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11786 tmp
= expand_simple_binop (mode
, AND
,
11788 gen_int_mode (cf
- ct
, mode
),
11789 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11791 tmp
= expand_simple_binop (mode
, PLUS
,
11792 copy_rtx (tmp
), GEN_INT (ct
),
11793 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11796 if (!rtx_equal_p (tmp
, out
))
11797 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11799 return 1; /* DONE */
11805 tmp
= ct
, ct
= cf
, cf
= tmp
;
11807 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11809 /* We may be reversing unordered compare to normal compare, that
11810 is not valid in general (we may convert non-trapping condition
11811 to trapping one), however on i386 we currently emit all
11812 comparisons unordered. */
11813 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11814 code
= reverse_condition_maybe_unordered (code
);
11818 compare_code
= reverse_condition (compare_code
);
11819 code
= reverse_condition (code
);
11823 compare_code
= UNKNOWN
;
11824 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11825 && CONST_INT_P (ix86_compare_op1
))
11827 if (ix86_compare_op1
== const0_rtx
11828 && (code
== LT
|| code
== GE
))
11829 compare_code
= code
;
11830 else if (ix86_compare_op1
== constm1_rtx
)
11834 else if (code
== GT
)
11839 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11840 if (compare_code
!= UNKNOWN
11841 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11842 && (cf
== -1 || ct
== -1))
11844 /* If lea code below could be used, only optimize
11845 if it results in a 2 insn sequence. */
11847 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11848 || diff
== 3 || diff
== 5 || diff
== 9)
11849 || (compare_code
== LT
&& ct
== -1)
11850 || (compare_code
== GE
&& cf
== -1))
11853 * notl op1 (if necessary)
11861 code
= reverse_condition (code
);
11864 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11865 ix86_compare_op1
, VOIDmode
, 0, -1);
11867 out
= expand_simple_binop (mode
, IOR
,
11869 out
, 1, OPTAB_DIRECT
);
11870 if (out
!= operands
[0])
11871 emit_move_insn (operands
[0], out
);
11873 return 1; /* DONE */
11878 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11879 || diff
== 3 || diff
== 5 || diff
== 9)
11880 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11882 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11888 * lea cf(dest*(ct-cf)),dest
11892 * This also catches the degenerate setcc-only case.
11898 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11899 ix86_compare_op1
, VOIDmode
, 0, 1);
11902 /* On x86_64 the lea instruction operates on Pmode, so we need
11903 to get arithmetics done in proper mode to match. */
11905 tmp
= copy_rtx (out
);
11909 out1
= copy_rtx (out
);
11910 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11914 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11920 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11923 if (!rtx_equal_p (tmp
, out
))
11926 out
= force_operand (tmp
, copy_rtx (out
));
11928 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11930 if (!rtx_equal_p (out
, operands
[0]))
11931 emit_move_insn (operands
[0], copy_rtx (out
));
11933 return 1; /* DONE */
11937 * General case: Jumpful:
11938 * xorl dest,dest cmpl op1, op2
11939 * cmpl op1, op2 movl ct, dest
11940 * setcc dest jcc 1f
11941 * decl dest movl cf, dest
11942 * andl (cf-ct),dest 1:
11945 * Size 20. Size 14.
11947 * This is reasonably steep, but branch mispredict costs are
11948 * high on modern cpus, so consider failing only if optimizing
11952 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11953 && BRANCH_COST
>= 2)
11959 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11960 /* We may be reversing unordered compare to normal compare,
11961 that is not valid in general (we may convert non-trapping
11962 condition to trapping one), however on i386 we currently
11963 emit all comparisons unordered. */
11964 code
= reverse_condition_maybe_unordered (code
);
11967 code
= reverse_condition (code
);
11968 if (compare_code
!= UNKNOWN
)
11969 compare_code
= reverse_condition (compare_code
);
11973 if (compare_code
!= UNKNOWN
)
11975 /* notl op1 (if needed)
11980 For x < 0 (resp. x <= -1) there will be no notl,
11981 so if possible swap the constants to get rid of the
11983 True/false will be -1/0 while code below (store flag
11984 followed by decrement) is 0/-1, so the constants need
11985 to be exchanged once more. */
11987 if (compare_code
== GE
|| !cf
)
11989 code
= reverse_condition (code
);
11994 HOST_WIDE_INT tmp
= cf
;
11999 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12000 ix86_compare_op1
, VOIDmode
, 0, -1);
12004 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12005 ix86_compare_op1
, VOIDmode
, 0, 1);
12007 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12008 copy_rtx (out
), 1, OPTAB_DIRECT
);
12011 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12012 gen_int_mode (cf
- ct
, mode
),
12013 copy_rtx (out
), 1, OPTAB_DIRECT
);
12015 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12016 copy_rtx (out
), 1, OPTAB_DIRECT
);
12017 if (!rtx_equal_p (out
, operands
[0]))
12018 emit_move_insn (operands
[0], copy_rtx (out
));
12020 return 1; /* DONE */
12024 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12026 /* Try a few things more with specific constants and a variable. */
12029 rtx var
, orig_out
, out
, tmp
;
12031 if (BRANCH_COST
<= 2)
12032 return 0; /* FAIL */
12034 /* If one of the two operands is an interesting constant, load a
12035 constant with the above and mask it in with a logical operation. */
12037 if (CONST_INT_P (operands
[2]))
12040 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12041 operands
[3] = constm1_rtx
, op
= and_optab
;
12042 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12043 operands
[3] = const0_rtx
, op
= ior_optab
;
12045 return 0; /* FAIL */
12047 else if (CONST_INT_P (operands
[3]))
12050 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12051 operands
[2] = constm1_rtx
, op
= and_optab
;
12052 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12053 operands
[2] = const0_rtx
, op
= ior_optab
;
12055 return 0; /* FAIL */
12058 return 0; /* FAIL */
12060 orig_out
= operands
[0];
12061 tmp
= gen_reg_rtx (mode
);
12064 /* Recurse to get the constant loaded. */
12065 if (ix86_expand_int_movcc (operands
) == 0)
12066 return 0; /* FAIL */
12068 /* Mask in the interesting variable. */
12069 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12071 if (!rtx_equal_p (out
, orig_out
))
12072 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12074 return 1; /* DONE */
12078 * For comparison with above,
12088 if (! nonimmediate_operand (operands
[2], mode
))
12089 operands
[2] = force_reg (mode
, operands
[2]);
12090 if (! nonimmediate_operand (operands
[3], mode
))
12091 operands
[3] = force_reg (mode
, operands
[3]);
12093 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12095 rtx tmp
= gen_reg_rtx (mode
);
12096 emit_move_insn (tmp
, operands
[3]);
12099 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12101 rtx tmp
= gen_reg_rtx (mode
);
12102 emit_move_insn (tmp
, operands
[2]);
12106 if (! register_operand (operands
[2], VOIDmode
)
12108 || ! register_operand (operands
[3], VOIDmode
)))
12109 operands
[2] = force_reg (mode
, operands
[2]);
12112 && ! register_operand (operands
[3], VOIDmode
))
12113 operands
[3] = force_reg (mode
, operands
[3]);
12115 emit_insn (compare_seq
);
12116 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12117 gen_rtx_IF_THEN_ELSE (mode
,
12118 compare_op
, operands
[2],
12121 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12122 gen_rtx_IF_THEN_ELSE (mode
,
12124 copy_rtx (operands
[3]),
12125 copy_rtx (operands
[0]))));
12127 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12128 gen_rtx_IF_THEN_ELSE (mode
,
12130 copy_rtx (operands
[2]),
12131 copy_rtx (operands
[0]))));
12133 return 1; /* DONE */
12136 /* Swap, force into registers, or otherwise massage the two operands
12137 to an sse comparison with a mask result. Thus we differ a bit from
12138 ix86_prepare_fp_compare_args which expects to produce a flags result.
12140 The DEST operand exists to help determine whether to commute commutative
12141 operators. The POP0/POP1 operands are updated in place. The new
12142 comparison code is returned, or UNKNOWN if not implementable. */
12144 static enum rtx_code
12145 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12146 rtx
*pop0
, rtx
*pop1
)
12154 /* We have no LTGT as an operator. We could implement it with
12155 NE & ORDERED, but this requires an extra temporary. It's
12156 not clear that it's worth it. */
12163 /* These are supported directly. */
12170 /* For commutative operators, try to canonicalize the destination
12171 operand to be first in the comparison - this helps reload to
12172 avoid extra moves. */
12173 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12181 /* These are not supported directly. Swap the comparison operands
12182 to transform into something that is supported. */
12186 code
= swap_condition (code
);
12190 gcc_unreachable ();
12196 /* Detect conditional moves that exactly match min/max operational
12197 semantics. Note that this is IEEE safe, as long as we don't
12198 interchange the operands.
12200 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12201 and TRUE if the operation is successful and instructions are emitted. */
12204 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12205 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12207 enum machine_mode mode
;
12213 else if (code
== UNGE
)
12216 if_true
= if_false
;
12222 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12224 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12229 mode
= GET_MODE (dest
);
12231 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12232 but MODE may be a vector mode and thus not appropriate. */
12233 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12235 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12238 if_true
= force_reg (mode
, if_true
);
12239 v
= gen_rtvec (2, if_true
, if_false
);
12240 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12244 code
= is_min
? SMIN
: SMAX
;
12245 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12248 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12252 /* Expand an sse vector comparison. Return the register with the result. */
12255 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12256 rtx op_true
, rtx op_false
)
12258 enum machine_mode mode
= GET_MODE (dest
);
12261 cmp_op0
= force_reg (mode
, cmp_op0
);
12262 if (!nonimmediate_operand (cmp_op1
, mode
))
12263 cmp_op1
= force_reg (mode
, cmp_op1
);
12266 || reg_overlap_mentioned_p (dest
, op_true
)
12267 || reg_overlap_mentioned_p (dest
, op_false
))
12268 dest
= gen_reg_rtx (mode
);
12270 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12271 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12276 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12277 operations. This is used for both scalar and vector conditional moves. */
12280 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12282 enum machine_mode mode
= GET_MODE (dest
);
12285 if (op_false
== CONST0_RTX (mode
))
12287 op_true
= force_reg (mode
, op_true
);
12288 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12289 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12291 else if (op_true
== CONST0_RTX (mode
))
12293 op_false
= force_reg (mode
, op_false
);
12294 x
= gen_rtx_NOT (mode
, cmp
);
12295 x
= gen_rtx_AND (mode
, x
, op_false
);
12296 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12300 op_true
= force_reg (mode
, op_true
);
12301 op_false
= force_reg (mode
, op_false
);
12303 t2
= gen_reg_rtx (mode
);
12305 t3
= gen_reg_rtx (mode
);
12309 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12310 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12312 x
= gen_rtx_NOT (mode
, cmp
);
12313 x
= gen_rtx_AND (mode
, x
, op_false
);
12314 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12316 x
= gen_rtx_IOR (mode
, t3
, t2
);
12317 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12321 /* Expand a floating-point conditional move. Return true if successful. */
12324 ix86_expand_fp_movcc (rtx operands
[])
12326 enum machine_mode mode
= GET_MODE (operands
[0]);
12327 enum rtx_code code
= GET_CODE (operands
[1]);
12328 rtx tmp
, compare_op
, second_test
, bypass_test
;
12330 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12332 enum machine_mode cmode
;
12334 /* Since we've no cmove for sse registers, don't force bad register
12335 allocation just to gain access to it. Deny movcc when the
12336 comparison mode doesn't match the move mode. */
12337 cmode
= GET_MODE (ix86_compare_op0
);
12338 if (cmode
== VOIDmode
)
12339 cmode
= GET_MODE (ix86_compare_op1
);
12343 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12345 &ix86_compare_op1
);
12346 if (code
== UNKNOWN
)
12349 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12350 ix86_compare_op1
, operands
[2],
12354 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12355 ix86_compare_op1
, operands
[2], operands
[3]);
12356 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12360 /* The floating point conditional move instructions don't directly
12361 support conditions resulting from a signed integer comparison. */
12363 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12365 /* The floating point conditional move instructions don't directly
12366 support signed integer comparisons. */
12368 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12370 gcc_assert (!second_test
&& !bypass_test
);
12371 tmp
= gen_reg_rtx (QImode
);
12372 ix86_expand_setcc (code
, tmp
);
12374 ix86_compare_op0
= tmp
;
12375 ix86_compare_op1
= const0_rtx
;
12376 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12378 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12380 tmp
= gen_reg_rtx (mode
);
12381 emit_move_insn (tmp
, operands
[3]);
12384 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12386 tmp
= gen_reg_rtx (mode
);
12387 emit_move_insn (tmp
, operands
[2]);
12391 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12392 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12393 operands
[2], operands
[3])));
12395 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12396 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12397 operands
[3], operands
[0])));
12399 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12400 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12401 operands
[2], operands
[0])));
12406 /* Expand a floating-point vector conditional move; a vcond operation
12407 rather than a movcc operation. */
12410 ix86_expand_fp_vcond (rtx operands
[])
12412 enum rtx_code code
= GET_CODE (operands
[3]);
12415 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12416 &operands
[4], &operands
[5]);
12417 if (code
== UNKNOWN
)
12420 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12421 operands
[5], operands
[1], operands
[2]))
12424 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12425 operands
[1], operands
[2]);
12426 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12430 /* Expand a signed integral vector conditional move. */
12433 ix86_expand_int_vcond (rtx operands
[])
12435 enum machine_mode mode
= GET_MODE (operands
[0]);
12436 enum rtx_code code
= GET_CODE (operands
[3]);
12437 bool negate
= false;
12440 cop0
= operands
[4];
12441 cop1
= operands
[5];
12443 /* Canonicalize the comparison to EQ, GT, GTU. */
12454 code
= reverse_condition (code
);
12460 code
= reverse_condition (code
);
12466 code
= swap_condition (code
);
12467 x
= cop0
, cop0
= cop1
, cop1
= x
;
12471 gcc_unreachable ();
12474 /* Unsigned parallel compare is not supported by the hardware. Play some
12475 tricks to turn this into a signed comparison against 0. */
12478 cop0
= force_reg (mode
, cop0
);
12486 /* Perform a parallel modulo subtraction. */
12487 t1
= gen_reg_rtx (mode
);
12488 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12490 /* Extract the original sign bit of op0. */
12491 mask
= GEN_INT (-0x80000000);
12492 mask
= gen_rtx_CONST_VECTOR (mode
,
12493 gen_rtvec (4, mask
, mask
, mask
, mask
));
12494 mask
= force_reg (mode
, mask
);
12495 t2
= gen_reg_rtx (mode
);
12496 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12498 /* XOR it back into the result of the subtraction. This results
12499 in the sign bit set iff we saw unsigned underflow. */
12500 x
= gen_reg_rtx (mode
);
12501 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12509 /* Perform a parallel unsigned saturating subtraction. */
12510 x
= gen_reg_rtx (mode
);
12511 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12512 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12519 gcc_unreachable ();
12523 cop1
= CONST0_RTX (mode
);
12526 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12527 operands
[1+negate
], operands
[2-negate
]);
12529 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12530 operands
[2-negate
]);
12534 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12535 true if we should do zero extension, else sign extension. HIGH_P is
12536 true if we want the N/2 high elements, else the low elements. */
12539 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12541 enum machine_mode imode
= GET_MODE (operands
[1]);
12542 rtx (*unpack
)(rtx
, rtx
, rtx
);
12549 unpack
= gen_vec_interleave_highv16qi
;
12551 unpack
= gen_vec_interleave_lowv16qi
;
12555 unpack
= gen_vec_interleave_highv8hi
;
12557 unpack
= gen_vec_interleave_lowv8hi
;
12561 unpack
= gen_vec_interleave_highv4si
;
12563 unpack
= gen_vec_interleave_lowv4si
;
12566 gcc_unreachable ();
12569 dest
= gen_lowpart (imode
, operands
[0]);
12572 se
= force_reg (imode
, CONST0_RTX (imode
));
12574 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12575 operands
[1], pc_rtx
, pc_rtx
);
12577 emit_insn (unpack (dest
, operands
[1], se
));
12580 /* Expand conditional increment or decrement using adb/sbb instructions.
12581 The default case using setcc followed by the conditional move can be
12582 done by generic code. */
12584 ix86_expand_int_addcc (rtx operands
[])
12586 enum rtx_code code
= GET_CODE (operands
[1]);
12588 rtx val
= const0_rtx
;
12589 bool fpcmp
= false;
12590 enum machine_mode mode
= GET_MODE (operands
[0]);
12592 if (operands
[3] != const1_rtx
12593 && operands
[3] != constm1_rtx
)
12595 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12596 ix86_compare_op1
, &compare_op
))
12598 code
= GET_CODE (compare_op
);
12600 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12601 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12604 code
= ix86_fp_compare_code_to_integer (code
);
12611 PUT_CODE (compare_op
,
12612 reverse_condition_maybe_unordered
12613 (GET_CODE (compare_op
)));
12615 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12617 PUT_MODE (compare_op
, mode
);
12619 /* Construct either adc or sbb insn. */
12620 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12622 switch (GET_MODE (operands
[0]))
12625 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12628 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12631 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12634 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12637 gcc_unreachable ();
12642 switch (GET_MODE (operands
[0]))
12645 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12648 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12651 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12654 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12657 gcc_unreachable ();
12660 return 1; /* DONE */
12664 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12665 works for floating pointer parameters and nonoffsetable memories.
12666 For pushes, it returns just stack offsets; the values will be saved
12667 in the right order. Maximally three parts are generated. */
12670 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12675 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12677 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12679 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12680 gcc_assert (size
>= 2 && size
<= 3);
12682 /* Optimize constant pool reference to immediates. This is used by fp
12683 moves, that force all constants to memory to allow combining. */
12684 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12686 rtx tmp
= maybe_get_pool_constant (operand
);
12691 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12693 /* The only non-offsetable memories we handle are pushes. */
12694 int ok
= push_operand (operand
, VOIDmode
);
12698 operand
= copy_rtx (operand
);
12699 PUT_MODE (operand
, Pmode
);
12700 parts
[0] = parts
[1] = parts
[2] = operand
;
12704 if (GET_CODE (operand
) == CONST_VECTOR
)
12706 enum machine_mode imode
= int_mode_for_mode (mode
);
12707 /* Caution: if we looked through a constant pool memory above,
12708 the operand may actually have a different mode now. That's
12709 ok, since we want to pun this all the way back to an integer. */
12710 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12711 gcc_assert (operand
!= NULL
);
12717 if (mode
== DImode
)
12718 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12721 if (REG_P (operand
))
12723 gcc_assert (reload_completed
);
12724 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12725 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12727 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12729 else if (offsettable_memref_p (operand
))
12731 operand
= adjust_address (operand
, SImode
, 0);
12732 parts
[0] = operand
;
12733 parts
[1] = adjust_address (operand
, SImode
, 4);
12735 parts
[2] = adjust_address (operand
, SImode
, 8);
12737 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12742 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12746 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12747 parts
[2] = gen_int_mode (l
[2], SImode
);
12750 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12753 gcc_unreachable ();
12755 parts
[1] = gen_int_mode (l
[1], SImode
);
12756 parts
[0] = gen_int_mode (l
[0], SImode
);
12759 gcc_unreachable ();
12764 if (mode
== TImode
)
12765 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12766 if (mode
== XFmode
|| mode
== TFmode
)
12768 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12769 if (REG_P (operand
))
12771 gcc_assert (reload_completed
);
12772 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12773 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12775 else if (offsettable_memref_p (operand
))
12777 operand
= adjust_address (operand
, DImode
, 0);
12778 parts
[0] = operand
;
12779 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12781 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12786 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12787 real_to_target (l
, &r
, mode
);
12789 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12790 if (HOST_BITS_PER_WIDE_INT
>= 64)
12793 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12794 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12797 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12799 if (upper_mode
== SImode
)
12800 parts
[1] = gen_int_mode (l
[2], SImode
);
12801 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12804 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12805 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12808 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12811 gcc_unreachable ();
12818 /* Emit insns to perform a move or push of DI, DF, and XF values.
12819 Return false when normal moves are needed; true when all required
12820 insns have been emitted. Operands 2-4 contain the input values
12821 int the correct order; operands 5-7 contain the output values. */
12824 ix86_split_long_move (rtx operands
[])
12829 int collisions
= 0;
12830 enum machine_mode mode
= GET_MODE (operands
[0]);
12832 /* The DFmode expanders may ask us to move double.
12833 For 64bit target this is single move. By hiding the fact
12834 here we simplify i386.md splitters. */
12835 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12837 /* Optimize constant pool reference to immediates. This is used by
12838 fp moves, that force all constants to memory to allow combining. */
12840 if (MEM_P (operands
[1])
12841 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12842 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12843 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12844 if (push_operand (operands
[0], VOIDmode
))
12846 operands
[0] = copy_rtx (operands
[0]);
12847 PUT_MODE (operands
[0], Pmode
);
12850 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12851 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12852 emit_move_insn (operands
[0], operands
[1]);
12856 /* The only non-offsettable memory we handle is push. */
12857 if (push_operand (operands
[0], VOIDmode
))
12860 gcc_assert (!MEM_P (operands
[0])
12861 || offsettable_memref_p (operands
[0]));
12863 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12864 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12866 /* When emitting push, take care for source operands on the stack. */
12867 if (push
&& MEM_P (operands
[1])
12868 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12871 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12872 XEXP (part
[1][2], 0));
12873 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12874 XEXP (part
[1][1], 0));
12877 /* We need to do copy in the right order in case an address register
12878 of the source overlaps the destination. */
12879 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12881 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12883 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12886 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12889 /* Collision in the middle part can be handled by reordering. */
12890 if (collisions
== 1 && nparts
== 3
12891 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12894 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12895 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12898 /* If there are more collisions, we can't handle it by reordering.
12899 Do an lea to the last part and use only one colliding move. */
12900 else if (collisions
> 1)
12906 base
= part
[0][nparts
- 1];
12908 /* Handle the case when the last part isn't valid for lea.
12909 Happens in 64-bit mode storing the 12-byte XFmode. */
12910 if (GET_MODE (base
) != Pmode
)
12911 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12913 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12914 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12915 part
[1][1] = replace_equiv_address (part
[1][1],
12916 plus_constant (base
, UNITS_PER_WORD
));
12918 part
[1][2] = replace_equiv_address (part
[1][2],
12919 plus_constant (base
, 8));
12929 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12930 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12931 emit_move_insn (part
[0][2], part
[1][2]);
12936 /* In 64bit mode we don't have 32bit push available. In case this is
12937 register, it is OK - we will just use larger counterpart. We also
12938 retype memory - these comes from attempt to avoid REX prefix on
12939 moving of second half of TFmode value. */
12940 if (GET_MODE (part
[1][1]) == SImode
)
12942 switch (GET_CODE (part
[1][1]))
12945 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12949 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12953 gcc_unreachable ();
12956 if (GET_MODE (part
[1][0]) == SImode
)
12957 part
[1][0] = part
[1][1];
12960 emit_move_insn (part
[0][1], part
[1][1]);
12961 emit_move_insn (part
[0][0], part
[1][0]);
12965 /* Choose correct order to not overwrite the source before it is copied. */
12966 if ((REG_P (part
[0][0])
12967 && REG_P (part
[1][1])
12968 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12970 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12972 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12976 operands
[2] = part
[0][2];
12977 operands
[3] = part
[0][1];
12978 operands
[4] = part
[0][0];
12979 operands
[5] = part
[1][2];
12980 operands
[6] = part
[1][1];
12981 operands
[7] = part
[1][0];
12985 operands
[2] = part
[0][1];
12986 operands
[3] = part
[0][0];
12987 operands
[5] = part
[1][1];
12988 operands
[6] = part
[1][0];
12995 operands
[2] = part
[0][0];
12996 operands
[3] = part
[0][1];
12997 operands
[4] = part
[0][2];
12998 operands
[5] = part
[1][0];
12999 operands
[6] = part
[1][1];
13000 operands
[7] = part
[1][2];
13004 operands
[2] = part
[0][0];
13005 operands
[3] = part
[0][1];
13006 operands
[5] = part
[1][0];
13007 operands
[6] = part
[1][1];
13011 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13014 if (CONST_INT_P (operands
[5])
13015 && operands
[5] != const0_rtx
13016 && REG_P (operands
[2]))
13018 if (CONST_INT_P (operands
[6])
13019 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13020 operands
[6] = operands
[2];
13023 && CONST_INT_P (operands
[7])
13024 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13025 operands
[7] = operands
[2];
13029 && CONST_INT_P (operands
[6])
13030 && operands
[6] != const0_rtx
13031 && REG_P (operands
[3])
13032 && CONST_INT_P (operands
[7])
13033 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13034 operands
[7] = operands
[3];
13037 emit_move_insn (operands
[2], operands
[5]);
13038 emit_move_insn (operands
[3], operands
[6]);
13040 emit_move_insn (operands
[4], operands
[7]);
13045 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13046 left shift by a constant, either using a single shift or
13047 a sequence of add instructions. */
13050 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13054 emit_insn ((mode
== DImode
13056 : gen_adddi3
) (operand
, operand
, operand
));
13058 else if (!optimize_size
13059 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13062 for (i
=0; i
<count
; i
++)
13064 emit_insn ((mode
== DImode
13066 : gen_adddi3
) (operand
, operand
, operand
));
13070 emit_insn ((mode
== DImode
13072 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13076 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13078 rtx low
[2], high
[2];
13080 const int single_width
= mode
== DImode
? 32 : 64;
13082 if (CONST_INT_P (operands
[2]))
13084 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13085 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13087 if (count
>= single_width
)
13089 emit_move_insn (high
[0], low
[1]);
13090 emit_move_insn (low
[0], const0_rtx
);
13092 if (count
> single_width
)
13093 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13097 if (!rtx_equal_p (operands
[0], operands
[1]))
13098 emit_move_insn (operands
[0], operands
[1]);
13099 emit_insn ((mode
== DImode
13101 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13102 ix86_expand_ashl_const (low
[0], count
, mode
);
13107 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13109 if (operands
[1] == const1_rtx
)
13111 /* Assuming we've chosen a QImode capable registers, then 1 << N
13112 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13113 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13115 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13117 ix86_expand_clear (low
[0]);
13118 ix86_expand_clear (high
[0]);
13119 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13121 d
= gen_lowpart (QImode
, low
[0]);
13122 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13123 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13124 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13126 d
= gen_lowpart (QImode
, high
[0]);
13127 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13128 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13129 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13132 /* Otherwise, we can get the same results by manually performing
13133 a bit extract operation on bit 5/6, and then performing the two
13134 shifts. The two methods of getting 0/1 into low/high are exactly
13135 the same size. Avoiding the shift in the bit extract case helps
13136 pentium4 a bit; no one else seems to care much either way. */
13141 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13142 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13144 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13145 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13147 emit_insn ((mode
== DImode
13149 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13150 emit_insn ((mode
== DImode
13152 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13153 emit_move_insn (low
[0], high
[0]);
13154 emit_insn ((mode
== DImode
13156 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13159 emit_insn ((mode
== DImode
13161 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13162 emit_insn ((mode
== DImode
13164 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13168 if (operands
[1] == constm1_rtx
)
13170 /* For -1 << N, we can avoid the shld instruction, because we
13171 know that we're shifting 0...31/63 ones into a -1. */
13172 emit_move_insn (low
[0], constm1_rtx
);
13174 emit_move_insn (high
[0], low
[0]);
13176 emit_move_insn (high
[0], constm1_rtx
);
13180 if (!rtx_equal_p (operands
[0], operands
[1]))
13181 emit_move_insn (operands
[0], operands
[1]);
13183 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13184 emit_insn ((mode
== DImode
13186 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13189 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13191 if (TARGET_CMOVE
&& scratch
)
13193 ix86_expand_clear (scratch
);
13194 emit_insn ((mode
== DImode
13195 ? gen_x86_shift_adj_1
13196 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13199 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13203 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13205 rtx low
[2], high
[2];
13207 const int single_width
= mode
== DImode
? 32 : 64;
13209 if (CONST_INT_P (operands
[2]))
13211 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13212 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13214 if (count
== single_width
* 2 - 1)
13216 emit_move_insn (high
[0], high
[1]);
13217 emit_insn ((mode
== DImode
13219 : gen_ashrdi3
) (high
[0], high
[0],
13220 GEN_INT (single_width
- 1)));
13221 emit_move_insn (low
[0], high
[0]);
13224 else if (count
>= single_width
)
13226 emit_move_insn (low
[0], high
[1]);
13227 emit_move_insn (high
[0], low
[0]);
13228 emit_insn ((mode
== DImode
13230 : gen_ashrdi3
) (high
[0], high
[0],
13231 GEN_INT (single_width
- 1)));
13232 if (count
> single_width
)
13233 emit_insn ((mode
== DImode
13235 : gen_ashrdi3
) (low
[0], low
[0],
13236 GEN_INT (count
- single_width
)));
13240 if (!rtx_equal_p (operands
[0], operands
[1]))
13241 emit_move_insn (operands
[0], operands
[1]);
13242 emit_insn ((mode
== DImode
13244 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13245 emit_insn ((mode
== DImode
13247 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13252 if (!rtx_equal_p (operands
[0], operands
[1]))
13253 emit_move_insn (operands
[0], operands
[1]);
13255 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13257 emit_insn ((mode
== DImode
13259 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13260 emit_insn ((mode
== DImode
13262 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13264 if (TARGET_CMOVE
&& scratch
)
13266 emit_move_insn (scratch
, high
[0]);
13267 emit_insn ((mode
== DImode
13269 : gen_ashrdi3
) (scratch
, scratch
,
13270 GEN_INT (single_width
- 1)));
13271 emit_insn ((mode
== DImode
13272 ? gen_x86_shift_adj_1
13273 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13277 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13282 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13284 rtx low
[2], high
[2];
13286 const int single_width
= mode
== DImode
? 32 : 64;
13288 if (CONST_INT_P (operands
[2]))
13290 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13291 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13293 if (count
>= single_width
)
13295 emit_move_insn (low
[0], high
[1]);
13296 ix86_expand_clear (high
[0]);
13298 if (count
> single_width
)
13299 emit_insn ((mode
== DImode
13301 : gen_lshrdi3
) (low
[0], low
[0],
13302 GEN_INT (count
- single_width
)));
13306 if (!rtx_equal_p (operands
[0], operands
[1]))
13307 emit_move_insn (operands
[0], operands
[1]);
13308 emit_insn ((mode
== DImode
13310 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13311 emit_insn ((mode
== DImode
13313 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13318 if (!rtx_equal_p (operands
[0], operands
[1]))
13319 emit_move_insn (operands
[0], operands
[1]);
13321 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13323 emit_insn ((mode
== DImode
13325 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13326 emit_insn ((mode
== DImode
13328 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13330 /* Heh. By reversing the arguments, we can reuse this pattern. */
13331 if (TARGET_CMOVE
&& scratch
)
13333 ix86_expand_clear (scratch
);
13334 emit_insn ((mode
== DImode
13335 ? gen_x86_shift_adj_1
13336 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13340 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13344 /* Predict just emitted jump instruction to be taken with probability PROB. */
13346 predict_jump (int prob
)
13348 rtx insn
= get_last_insn ();
13349 gcc_assert (JUMP_P (insn
));
13351 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13356 /* Helper function for the string operations below. Dest VARIABLE whether
13357 it is aligned to VALUE bytes. If true, jump to the label. */
13359 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13361 rtx label
= gen_label_rtx ();
13362 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13363 if (GET_MODE (variable
) == DImode
)
13364 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13366 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13367 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13370 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13372 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13376 /* Adjust COUNTER by the VALUE. */
13378 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13380 if (GET_MODE (countreg
) == DImode
)
13381 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13383 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13386 /* Zero extend possibly SImode EXP to Pmode register. */
13388 ix86_zero_extend_to_Pmode (rtx exp
)
13391 if (GET_MODE (exp
) == VOIDmode
)
13392 return force_reg (Pmode
, exp
);
13393 if (GET_MODE (exp
) == Pmode
)
13394 return copy_to_mode_reg (Pmode
, exp
);
13395 r
= gen_reg_rtx (Pmode
);
13396 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13400 /* Divide COUNTREG by SCALE. */
13402 scale_counter (rtx countreg
, int scale
)
13405 rtx piece_size_mask
;
13409 if (CONST_INT_P (countreg
))
13410 return GEN_INT (INTVAL (countreg
) / scale
);
13411 gcc_assert (REG_P (countreg
));
13413 piece_size_mask
= GEN_INT (scale
- 1);
13414 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13415 GEN_INT (exact_log2 (scale
)),
13416 NULL
, 1, OPTAB_DIRECT
);
13420 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13421 for constant loop counts. */
13423 static enum machine_mode
13424 counter_mode (rtx count_exp
)
13426 if (GET_MODE (count_exp
) != VOIDmode
)
13427 return GET_MODE (count_exp
);
13428 if (GET_CODE (count_exp
) != CONST_INT
)
13430 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13435 /* When SRCPTR is non-NULL, output simple loop to move memory
13436 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13437 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13438 equivalent loop to set memory by VALUE (supposed to be in MODE).
13440 The size is rounded down to whole number of chunk size moved at once.
13441 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13445 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13446 rtx destptr
, rtx srcptr
, rtx value
,
13447 rtx count
, enum machine_mode mode
, int unroll
,
13450 rtx out_label
, top_label
, iter
, tmp
;
13451 enum machine_mode iter_mode
= counter_mode (count
);
13452 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13453 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13459 top_label
= gen_label_rtx ();
13460 out_label
= gen_label_rtx ();
13461 iter
= gen_reg_rtx (iter_mode
);
13463 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13464 NULL
, 1, OPTAB_DIRECT
);
13465 /* Those two should combine. */
13466 if (piece_size
== const1_rtx
)
13468 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13470 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13472 emit_move_insn (iter
, const0_rtx
);
13474 emit_label (top_label
);
13476 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13477 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13478 destmem
= change_address (destmem
, mode
, x_addr
);
13482 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13483 srcmem
= change_address (srcmem
, mode
, y_addr
);
13485 /* When unrolling for chips that reorder memory reads and writes,
13486 we can save registers by using single temporary.
13487 Also using 4 temporaries is overkill in 32bit mode. */
13488 if (!TARGET_64BIT
&& 0)
13490 for (i
= 0; i
< unroll
; i
++)
13495 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13497 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13499 emit_move_insn (destmem
, srcmem
);
13505 gcc_assert (unroll
<= 4);
13506 for (i
= 0; i
< unroll
; i
++)
13508 tmpreg
[i
] = gen_reg_rtx (mode
);
13512 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13514 emit_move_insn (tmpreg
[i
], srcmem
);
13516 for (i
= 0; i
< unroll
; i
++)
13521 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13523 emit_move_insn (destmem
, tmpreg
[i
]);
13528 for (i
= 0; i
< unroll
; i
++)
13532 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13533 emit_move_insn (destmem
, value
);
13536 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13537 true, OPTAB_LIB_WIDEN
);
13539 emit_move_insn (iter
, tmp
);
13541 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13543 if (expected_size
!= -1)
13545 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13546 if (expected_size
== 0)
13548 else if (expected_size
> REG_BR_PROB_BASE
)
13549 predict_jump (REG_BR_PROB_BASE
- 1);
13551 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13554 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13555 iter
= ix86_zero_extend_to_Pmode (iter
);
13556 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13557 true, OPTAB_LIB_WIDEN
);
13558 if (tmp
!= destptr
)
13559 emit_move_insn (destptr
, tmp
);
13562 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13563 true, OPTAB_LIB_WIDEN
);
13565 emit_move_insn (srcptr
, tmp
);
13567 emit_label (out_label
);
13570 /* Output "rep; mov" instruction.
13571 Arguments have same meaning as for previous function */
13573 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13574 rtx destptr
, rtx srcptr
,
13576 enum machine_mode mode
)
13582 /* If the size is known, it is shorter to use rep movs. */
13583 if (mode
== QImode
&& CONST_INT_P (count
)
13584 && !(INTVAL (count
) & 3))
13587 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13588 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13589 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13590 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13591 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13592 if (mode
!= QImode
)
13594 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13595 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13596 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13597 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13598 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13599 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13603 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13604 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13606 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13610 /* Output "rep; stos" instruction.
13611 Arguments have same meaning as for previous function */
13613 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13615 enum machine_mode mode
)
13620 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13621 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13622 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13623 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13624 if (mode
!= QImode
)
13626 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13627 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13628 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13631 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13632 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13636 emit_strmov (rtx destmem
, rtx srcmem
,
13637 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13639 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13640 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13641 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13644 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13646 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13647 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13650 if (CONST_INT_P (count
))
13652 HOST_WIDE_INT countval
= INTVAL (count
);
13655 if ((countval
& 0x10) && max_size
> 16)
13659 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13660 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13663 gcc_unreachable ();
13666 if ((countval
& 0x08) && max_size
> 8)
13669 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13672 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13673 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13677 if ((countval
& 0x04) && max_size
> 4)
13679 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13682 if ((countval
& 0x02) && max_size
> 2)
13684 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13687 if ((countval
& 0x01) && max_size
> 1)
13689 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13696 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13697 count
, 1, OPTAB_DIRECT
);
13698 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13699 count
, QImode
, 1, 4);
13703 /* When there are stringops, we can cheaply increase dest and src pointers.
13704 Otherwise we save code size by maintaining offset (zero is readily
13705 available from preceding rep operation) and using x86 addressing modes.
13707 if (TARGET_SINGLE_STRINGOP
)
13711 rtx label
= ix86_expand_aligntest (count
, 4, true);
13712 src
= change_address (srcmem
, SImode
, srcptr
);
13713 dest
= change_address (destmem
, SImode
, destptr
);
13714 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13715 emit_label (label
);
13716 LABEL_NUSES (label
) = 1;
13720 rtx label
= ix86_expand_aligntest (count
, 2, true);
13721 src
= change_address (srcmem
, HImode
, srcptr
);
13722 dest
= change_address (destmem
, HImode
, destptr
);
13723 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13724 emit_label (label
);
13725 LABEL_NUSES (label
) = 1;
13729 rtx label
= ix86_expand_aligntest (count
, 1, true);
13730 src
= change_address (srcmem
, QImode
, srcptr
);
13731 dest
= change_address (destmem
, QImode
, destptr
);
13732 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13733 emit_label (label
);
13734 LABEL_NUSES (label
) = 1;
13739 rtx offset
= force_reg (Pmode
, const0_rtx
);
13744 rtx label
= ix86_expand_aligntest (count
, 4, true);
13745 src
= change_address (srcmem
, SImode
, srcptr
);
13746 dest
= change_address (destmem
, SImode
, destptr
);
13747 emit_move_insn (dest
, src
);
13748 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13749 true, OPTAB_LIB_WIDEN
);
13751 emit_move_insn (offset
, tmp
);
13752 emit_label (label
);
13753 LABEL_NUSES (label
) = 1;
13757 rtx label
= ix86_expand_aligntest (count
, 2, true);
13758 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13759 src
= change_address (srcmem
, HImode
, tmp
);
13760 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13761 dest
= change_address (destmem
, HImode
, tmp
);
13762 emit_move_insn (dest
, src
);
13763 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13764 true, OPTAB_LIB_WIDEN
);
13766 emit_move_insn (offset
, tmp
);
13767 emit_label (label
);
13768 LABEL_NUSES (label
) = 1;
13772 rtx label
= ix86_expand_aligntest (count
, 1, true);
13773 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13774 src
= change_address (srcmem
, QImode
, tmp
);
13775 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13776 dest
= change_address (destmem
, QImode
, tmp
);
13777 emit_move_insn (dest
, src
);
13778 emit_label (label
);
13779 LABEL_NUSES (label
) = 1;
13784 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13786 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13787 rtx count
, int max_size
)
13790 expand_simple_binop (counter_mode (count
), AND
, count
,
13791 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13792 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13793 gen_lowpart (QImode
, value
), count
, QImode
,
13797 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13799 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13803 if (CONST_INT_P (count
))
13805 HOST_WIDE_INT countval
= INTVAL (count
);
13808 if ((countval
& 0x10) && max_size
> 16)
13812 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13813 emit_insn (gen_strset (destptr
, dest
, value
));
13814 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13815 emit_insn (gen_strset (destptr
, dest
, value
));
13818 gcc_unreachable ();
13821 if ((countval
& 0x08) && max_size
> 8)
13825 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13826 emit_insn (gen_strset (destptr
, dest
, value
));
13830 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13831 emit_insn (gen_strset (destptr
, dest
, value
));
13832 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13833 emit_insn (gen_strset (destptr
, dest
, value
));
13837 if ((countval
& 0x04) && max_size
> 4)
13839 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13840 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13843 if ((countval
& 0x02) && max_size
> 2)
13845 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13846 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13849 if ((countval
& 0x01) && max_size
> 1)
13851 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13852 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13859 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13864 rtx label
= ix86_expand_aligntest (count
, 16, true);
13867 dest
= change_address (destmem
, DImode
, destptr
);
13868 emit_insn (gen_strset (destptr
, dest
, value
));
13869 emit_insn (gen_strset (destptr
, dest
, value
));
13873 dest
= change_address (destmem
, SImode
, destptr
);
13874 emit_insn (gen_strset (destptr
, dest
, value
));
13875 emit_insn (gen_strset (destptr
, dest
, value
));
13876 emit_insn (gen_strset (destptr
, dest
, value
));
13877 emit_insn (gen_strset (destptr
, dest
, value
));
13879 emit_label (label
);
13880 LABEL_NUSES (label
) = 1;
13884 rtx label
= ix86_expand_aligntest (count
, 8, true);
13887 dest
= change_address (destmem
, DImode
, destptr
);
13888 emit_insn (gen_strset (destptr
, dest
, value
));
13892 dest
= change_address (destmem
, SImode
, destptr
);
13893 emit_insn (gen_strset (destptr
, dest
, value
));
13894 emit_insn (gen_strset (destptr
, dest
, value
));
13896 emit_label (label
);
13897 LABEL_NUSES (label
) = 1;
13901 rtx label
= ix86_expand_aligntest (count
, 4, true);
13902 dest
= change_address (destmem
, SImode
, destptr
);
13903 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13904 emit_label (label
);
13905 LABEL_NUSES (label
) = 1;
13909 rtx label
= ix86_expand_aligntest (count
, 2, true);
13910 dest
= change_address (destmem
, HImode
, destptr
);
13911 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13912 emit_label (label
);
13913 LABEL_NUSES (label
) = 1;
13917 rtx label
= ix86_expand_aligntest (count
, 1, true);
13918 dest
= change_address (destmem
, QImode
, destptr
);
13919 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13920 emit_label (label
);
13921 LABEL_NUSES (label
) = 1;
13925 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13926 DESIRED_ALIGNMENT. */
13928 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13929 rtx destptr
, rtx srcptr
, rtx count
,
13930 int align
, int desired_alignment
)
13932 if (align
<= 1 && desired_alignment
> 1)
13934 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13935 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13936 destmem
= change_address (destmem
, QImode
, destptr
);
13937 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13938 ix86_adjust_counter (count
, 1);
13939 emit_label (label
);
13940 LABEL_NUSES (label
) = 1;
13942 if (align
<= 2 && desired_alignment
> 2)
13944 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13945 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13946 destmem
= change_address (destmem
, HImode
, destptr
);
13947 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13948 ix86_adjust_counter (count
, 2);
13949 emit_label (label
);
13950 LABEL_NUSES (label
) = 1;
13952 if (align
<= 4 && desired_alignment
> 4)
13954 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13955 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13956 destmem
= change_address (destmem
, SImode
, destptr
);
13957 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13958 ix86_adjust_counter (count
, 4);
13959 emit_label (label
);
13960 LABEL_NUSES (label
) = 1;
13962 gcc_assert (desired_alignment
<= 8);
13965 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13966 DESIRED_ALIGNMENT. */
13968 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13969 int align
, int desired_alignment
)
13971 if (align
<= 1 && desired_alignment
> 1)
13973 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13974 destmem
= change_address (destmem
, QImode
, destptr
);
13975 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13976 ix86_adjust_counter (count
, 1);
13977 emit_label (label
);
13978 LABEL_NUSES (label
) = 1;
13980 if (align
<= 2 && desired_alignment
> 2)
13982 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13983 destmem
= change_address (destmem
, HImode
, destptr
);
13984 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13985 ix86_adjust_counter (count
, 2);
13986 emit_label (label
);
13987 LABEL_NUSES (label
) = 1;
13989 if (align
<= 4 && desired_alignment
> 4)
13991 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13992 destmem
= change_address (destmem
, SImode
, destptr
);
13993 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13994 ix86_adjust_counter (count
, 4);
13995 emit_label (label
);
13996 LABEL_NUSES (label
) = 1;
13998 gcc_assert (desired_alignment
<= 8);
14001 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14002 static enum stringop_alg
14003 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14004 int *dynamic_check
)
14006 const struct stringop_algs
* algs
;
14008 *dynamic_check
= -1;
14010 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14012 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14013 if (stringop_alg
!= no_stringop
)
14014 return stringop_alg
;
14015 /* rep; movq or rep; movl is the smallest variant. */
14016 else if (optimize_size
)
14018 if (!count
|| (count
& 3))
14019 return rep_prefix_1_byte
;
14021 return rep_prefix_4_byte
;
14023 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14025 else if (expected_size
!= -1 && expected_size
< 4)
14026 return loop_1_byte
;
14027 else if (expected_size
!= -1)
14030 enum stringop_alg alg
= libcall
;
14031 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14033 gcc_assert (algs
->size
[i
].max
);
14034 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14036 if (algs
->size
[i
].alg
!= libcall
)
14037 alg
= algs
->size
[i
].alg
;
14038 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14039 last non-libcall inline algorithm. */
14040 if (TARGET_INLINE_ALL_STRINGOPS
)
14042 /* When the current size is best to be copied by a libcall,
14043 but we are still forced to inline, run the heuristic bellow
14044 that will pick code for medium sized blocks. */
14045 if (alg
!= libcall
)
14050 return algs
->size
[i
].alg
;
14053 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14055 /* When asked to inline the call anyway, try to pick meaningful choice.
14056 We look for maximal size of block that is faster to copy by hand and
14057 take blocks of at most of that size guessing that average size will
14058 be roughly half of the block.
14060 If this turns out to be bad, we might simply specify the preferred
14061 choice in ix86_costs. */
14062 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14063 && algs
->unknown_size
== libcall
)
14066 enum stringop_alg alg
;
14069 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14070 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14071 max
= algs
->size
[i
].max
;
14074 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14075 gcc_assert (*dynamic_check
== -1);
14076 gcc_assert (alg
!= libcall
);
14077 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14078 *dynamic_check
= max
;
14081 return algs
->unknown_size
;
14084 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14085 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14087 decide_alignment (int align
,
14088 enum stringop_alg alg
,
14091 int desired_align
= 0;
14095 gcc_unreachable ();
14097 case unrolled_loop
:
14098 desired_align
= GET_MODE_SIZE (Pmode
);
14100 case rep_prefix_8_byte
:
14103 case rep_prefix_4_byte
:
14104 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14105 copying whole cacheline at once. */
14106 if (TARGET_PENTIUMPRO
)
14111 case rep_prefix_1_byte
:
14112 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14113 copying whole cacheline at once. */
14114 if (TARGET_PENTIUMPRO
)
14128 if (desired_align
< align
)
14129 desired_align
= align
;
14130 if (expected_size
!= -1 && expected_size
< 4)
14131 desired_align
= align
;
14132 return desired_align
;
14135 /* Return the smallest power of 2 greater than VAL. */
14137 smallest_pow2_greater_than (int val
)
14145 /* Expand string move (memcpy) operation. Use i386 string operations when
14146 profitable. expand_clrmem contains similar code. The code depends upon
14147 architecture, block size and alignment, but always has the same
14150 1) Prologue guard: Conditional that jumps up to epilogues for small
14151 blocks that can be handled by epilogue alone. This is faster but
14152 also needed for correctness, since prologue assume the block is larger
14153 than the desired alignment.
14155 Optional dynamic check for size and libcall for large
14156 blocks is emitted here too, with -minline-stringops-dynamically.
14158 2) Prologue: copy first few bytes in order to get destination aligned
14159 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14160 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14161 We emit either a jump tree on power of two sized blocks, or a byte loop.
14163 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14164 with specified algorithm.
14166 4) Epilogue: code copying tail of the block that is too small to be
14167 handled by main body (or up to size guarded by prologue guard). */
14170 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14171 rtx expected_align_exp
, rtx expected_size_exp
)
14177 rtx jump_around_label
= NULL
;
14178 HOST_WIDE_INT align
= 1;
14179 unsigned HOST_WIDE_INT count
= 0;
14180 HOST_WIDE_INT expected_size
= -1;
14181 int size_needed
= 0, epilogue_size_needed
;
14182 int desired_align
= 0;
14183 enum stringop_alg alg
;
14186 if (CONST_INT_P (align_exp
))
14187 align
= INTVAL (align_exp
);
14188 /* i386 can do misaligned access on reasonably increased cost. */
14189 if (CONST_INT_P (expected_align_exp
)
14190 && INTVAL (expected_align_exp
) > align
)
14191 align
= INTVAL (expected_align_exp
);
14192 if (CONST_INT_P (count_exp
))
14193 count
= expected_size
= INTVAL (count_exp
);
14194 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14195 expected_size
= INTVAL (expected_size_exp
);
14197 /* Step 0: Decide on preferred algorithm, desired alignment and
14198 size of chunks to be copied by main loop. */
14200 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14201 desired_align
= decide_alignment (align
, alg
, expected_size
);
14203 if (!TARGET_ALIGN_STRINGOPS
)
14204 align
= desired_align
;
14206 if (alg
== libcall
)
14208 gcc_assert (alg
!= no_stringop
);
14210 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14211 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14212 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14217 gcc_unreachable ();
14219 size_needed
= GET_MODE_SIZE (Pmode
);
14221 case unrolled_loop
:
14222 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14224 case rep_prefix_8_byte
:
14227 case rep_prefix_4_byte
:
14230 case rep_prefix_1_byte
:
14236 epilogue_size_needed
= size_needed
;
14238 /* Step 1: Prologue guard. */
14240 /* Alignment code needs count to be in register. */
14241 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14243 enum machine_mode mode
= SImode
;
14244 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14246 count_exp
= force_reg (mode
, count_exp
);
14248 gcc_assert (desired_align
>= 1 && align
>= 1);
14250 /* Ensure that alignment prologue won't copy past end of block. */
14251 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14253 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14254 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14255 Make sure it is power of 2. */
14256 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14258 label
= gen_label_rtx ();
14259 emit_cmp_and_jump_insns (count_exp
,
14260 GEN_INT (epilogue_size_needed
),
14261 LTU
, 0, counter_mode (count_exp
), 1, label
);
14262 if (GET_CODE (count_exp
) == CONST_INT
)
14264 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14265 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14267 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14269 /* Emit code to decide on runtime whether library call or inline should be
14271 if (dynamic_check
!= -1)
14273 rtx hot_label
= gen_label_rtx ();
14274 jump_around_label
= gen_label_rtx ();
14275 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14276 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14277 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14278 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14279 emit_jump (jump_around_label
);
14280 emit_label (hot_label
);
14283 /* Step 2: Alignment prologue. */
14285 if (desired_align
> align
)
14287 /* Except for the first move in epilogue, we no longer know
14288 constant offset in aliasing info. It don't seems to worth
14289 the pain to maintain it for the first move, so throw away
14291 src
= change_address (src
, BLKmode
, srcreg
);
14292 dst
= change_address (dst
, BLKmode
, destreg
);
14293 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14296 if (label
&& size_needed
== 1)
14298 emit_label (label
);
14299 LABEL_NUSES (label
) = 1;
14303 /* Step 3: Main loop. */
14309 gcc_unreachable ();
14311 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14312 count_exp
, QImode
, 1, expected_size
);
14315 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14316 count_exp
, Pmode
, 1, expected_size
);
14318 case unrolled_loop
:
14319 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14320 registers for 4 temporaries anyway. */
14321 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14322 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14325 case rep_prefix_8_byte
:
14326 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14329 case rep_prefix_4_byte
:
14330 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14333 case rep_prefix_1_byte
:
14334 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14338 /* Adjust properly the offset of src and dest memory for aliasing. */
14339 if (CONST_INT_P (count_exp
))
14341 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14342 (count
/ size_needed
) * size_needed
);
14343 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14344 (count
/ size_needed
) * size_needed
);
14348 src
= change_address (src
, BLKmode
, srcreg
);
14349 dst
= change_address (dst
, BLKmode
, destreg
);
14352 /* Step 4: Epilogue to copy the remaining bytes. */
14356 /* When the main loop is done, COUNT_EXP might hold original count,
14357 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14358 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14359 bytes. Compensate if needed. */
14361 if (size_needed
< epilogue_size_needed
)
14364 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14365 GEN_INT (size_needed
- 1), count_exp
, 1,
14367 if (tmp
!= count_exp
)
14368 emit_move_insn (count_exp
, tmp
);
14370 emit_label (label
);
14371 LABEL_NUSES (label
) = 1;
14374 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14375 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14376 epilogue_size_needed
);
14377 if (jump_around_label
)
14378 emit_label (jump_around_label
);
14382 /* Helper function for memcpy. For QImode value 0xXY produce
14383 0xXYXYXYXY of wide specified by MODE. This is essentially
14384 a * 0x10101010, but we can do slightly better than
14385 synth_mult by unwinding the sequence by hand on CPUs with
14388 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14390 enum machine_mode valmode
= GET_MODE (val
);
14392 int nops
= mode
== DImode
? 3 : 2;
14394 gcc_assert (mode
== SImode
|| mode
== DImode
);
14395 if (val
== const0_rtx
)
14396 return copy_to_mode_reg (mode
, const0_rtx
);
14397 if (CONST_INT_P (val
))
14399 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14403 if (mode
== DImode
)
14404 v
|= (v
<< 16) << 16;
14405 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14408 if (valmode
== VOIDmode
)
14410 if (valmode
!= QImode
)
14411 val
= gen_lowpart (QImode
, val
);
14412 if (mode
== QImode
)
14414 if (!TARGET_PARTIAL_REG_STALL
)
14416 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14417 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14418 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14419 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14421 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14422 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14423 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14428 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14430 if (!TARGET_PARTIAL_REG_STALL
)
14431 if (mode
== SImode
)
14432 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14434 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14437 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14438 NULL
, 1, OPTAB_DIRECT
);
14440 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14442 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14443 NULL
, 1, OPTAB_DIRECT
);
14444 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14445 if (mode
== SImode
)
14447 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14448 NULL
, 1, OPTAB_DIRECT
);
14449 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14454 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14455 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14456 alignment from ALIGN to DESIRED_ALIGN. */
14458 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14463 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14464 promoted_val
= promote_duplicated_reg (DImode
, val
);
14465 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14466 promoted_val
= promote_duplicated_reg (SImode
, val
);
14467 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14468 promoted_val
= promote_duplicated_reg (HImode
, val
);
14470 promoted_val
= val
;
14472 return promoted_val
;
14475 /* Expand string clear operation (bzero). Use i386 string operations when
14476 profitable. See expand_movmem comment for explanation of individual
14477 steps performed. */
14479 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14480 rtx expected_align_exp
, rtx expected_size_exp
)
14485 rtx jump_around_label
= NULL
;
14486 HOST_WIDE_INT align
= 1;
14487 unsigned HOST_WIDE_INT count
= 0;
14488 HOST_WIDE_INT expected_size
= -1;
14489 int size_needed
= 0, epilogue_size_needed
;
14490 int desired_align
= 0;
14491 enum stringop_alg alg
;
14492 rtx promoted_val
= NULL
;
14493 bool force_loopy_epilogue
= false;
14496 if (CONST_INT_P (align_exp
))
14497 align
= INTVAL (align_exp
);
14498 /* i386 can do misaligned access on reasonably increased cost. */
14499 if (CONST_INT_P (expected_align_exp
)
14500 && INTVAL (expected_align_exp
) > align
)
14501 align
= INTVAL (expected_align_exp
);
14502 if (CONST_INT_P (count_exp
))
14503 count
= expected_size
= INTVAL (count_exp
);
14504 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14505 expected_size
= INTVAL (expected_size_exp
);
14507 /* Step 0: Decide on preferred algorithm, desired alignment and
14508 size of chunks to be copied by main loop. */
14510 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14511 desired_align
= decide_alignment (align
, alg
, expected_size
);
14513 if (!TARGET_ALIGN_STRINGOPS
)
14514 align
= desired_align
;
14516 if (alg
== libcall
)
14518 gcc_assert (alg
!= no_stringop
);
14520 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14521 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14526 gcc_unreachable ();
14528 size_needed
= GET_MODE_SIZE (Pmode
);
14530 case unrolled_loop
:
14531 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14533 case rep_prefix_8_byte
:
14536 case rep_prefix_4_byte
:
14539 case rep_prefix_1_byte
:
14544 epilogue_size_needed
= size_needed
;
14546 /* Step 1: Prologue guard. */
14548 /* Alignment code needs count to be in register. */
14549 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14551 enum machine_mode mode
= SImode
;
14552 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14554 count_exp
= force_reg (mode
, count_exp
);
14556 /* Do the cheap promotion to allow better CSE across the
14557 main loop and epilogue (ie one load of the big constant in the
14558 front of all code. */
14559 if (CONST_INT_P (val_exp
))
14560 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14561 desired_align
, align
);
14562 /* Ensure that alignment prologue won't copy past end of block. */
14563 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14565 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14566 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14567 Make sure it is power of 2. */
14568 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14570 /* To improve performance of small blocks, we jump around the VAL
14571 promoting mode. This mean that if the promoted VAL is not constant,
14572 we might not use it in the epilogue and have to use byte
14574 if (epilogue_size_needed
> 2 && !promoted_val
)
14575 force_loopy_epilogue
= true;
14576 label
= gen_label_rtx ();
14577 emit_cmp_and_jump_insns (count_exp
,
14578 GEN_INT (epilogue_size_needed
),
14579 LTU
, 0, counter_mode (count_exp
), 1, label
);
14580 if (GET_CODE (count_exp
) == CONST_INT
)
14582 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14583 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14585 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14587 if (dynamic_check
!= -1)
14589 rtx hot_label
= gen_label_rtx ();
14590 jump_around_label
= gen_label_rtx ();
14591 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14592 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14593 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14594 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14595 emit_jump (jump_around_label
);
14596 emit_label (hot_label
);
14599 /* Step 2: Alignment prologue. */
14601 /* Do the expensive promotion once we branched off the small blocks. */
14603 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14604 desired_align
, align
);
14605 gcc_assert (desired_align
>= 1 && align
>= 1);
14607 if (desired_align
> align
)
14609 /* Except for the first move in epilogue, we no longer know
14610 constant offset in aliasing info. It don't seems to worth
14611 the pain to maintain it for the first move, so throw away
14613 dst
= change_address (dst
, BLKmode
, destreg
);
14614 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14617 if (label
&& size_needed
== 1)
14619 emit_label (label
);
14620 LABEL_NUSES (label
) = 1;
14624 /* Step 3: Main loop. */
14630 gcc_unreachable ();
14632 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14633 count_exp
, QImode
, 1, expected_size
);
14636 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14637 count_exp
, Pmode
, 1, expected_size
);
14639 case unrolled_loop
:
14640 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14641 count_exp
, Pmode
, 4, expected_size
);
14643 case rep_prefix_8_byte
:
14644 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14647 case rep_prefix_4_byte
:
14648 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14651 case rep_prefix_1_byte
:
14652 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14656 /* Adjust properly the offset of src and dest memory for aliasing. */
14657 if (CONST_INT_P (count_exp
))
14658 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14659 (count
/ size_needed
) * size_needed
);
14661 dst
= change_address (dst
, BLKmode
, destreg
);
14663 /* Step 4: Epilogue to copy the remaining bytes. */
14667 /* When the main loop is done, COUNT_EXP might hold original count,
14668 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14669 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14670 bytes. Compensate if needed. */
14672 if (size_needed
< desired_align
- align
)
14675 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14676 GEN_INT (size_needed
- 1), count_exp
, 1,
14678 size_needed
= desired_align
- align
+ 1;
14679 if (tmp
!= count_exp
)
14680 emit_move_insn (count_exp
, tmp
);
14682 emit_label (label
);
14683 LABEL_NUSES (label
) = 1;
14685 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14687 if (force_loopy_epilogue
)
14688 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14691 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14694 if (jump_around_label
)
14695 emit_label (jump_around_label
);
14699 /* Expand strlen. */
14701 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14703 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14705 /* The generic case of strlen expander is long. Avoid it's
14706 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14708 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14709 && !TARGET_INLINE_ALL_STRINGOPS
14711 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14714 addr
= force_reg (Pmode
, XEXP (src
, 0));
14715 scratch1
= gen_reg_rtx (Pmode
);
14717 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14720 /* Well it seems that some optimizer does not combine a call like
14721 foo(strlen(bar), strlen(bar));
14722 when the move and the subtraction is done here. It does calculate
14723 the length just once when these instructions are done inside of
14724 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14725 often used and I use one fewer register for the lifetime of
14726 output_strlen_unroll() this is better. */
14728 emit_move_insn (out
, addr
);
14730 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14732 /* strlensi_unroll_1 returns the address of the zero at the end of
14733 the string, like memchr(), so compute the length by subtracting
14734 the start address. */
14736 emit_insn (gen_subdi3 (out
, out
, addr
));
14738 emit_insn (gen_subsi3 (out
, out
, addr
));
14743 scratch2
= gen_reg_rtx (Pmode
);
14744 scratch3
= gen_reg_rtx (Pmode
);
14745 scratch4
= force_reg (Pmode
, constm1_rtx
);
14747 emit_move_insn (scratch3
, addr
);
14748 eoschar
= force_reg (QImode
, eoschar
);
14750 src
= replace_equiv_address_nv (src
, scratch3
);
14752 /* If .md starts supporting :P, this can be done in .md. */
14753 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14754 scratch4
), UNSPEC_SCAS
);
14755 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14758 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14759 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14763 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14764 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14770 /* Expand the appropriate insns for doing strlen if not just doing
14773 out = result, initialized with the start address
14774 align_rtx = alignment of the address.
14775 scratch = scratch register, initialized with the startaddress when
14776 not aligned, otherwise undefined
14778 This is just the body. It needs the initializations mentioned above and
14779 some address computing at the end. These things are done in i386.md. */
14782 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14786 rtx align_2_label
= NULL_RTX
;
14787 rtx align_3_label
= NULL_RTX
;
14788 rtx align_4_label
= gen_label_rtx ();
14789 rtx end_0_label
= gen_label_rtx ();
14791 rtx tmpreg
= gen_reg_rtx (SImode
);
14792 rtx scratch
= gen_reg_rtx (SImode
);
14796 if (CONST_INT_P (align_rtx
))
14797 align
= INTVAL (align_rtx
);
14799 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14801 /* Is there a known alignment and is it less than 4? */
14804 rtx scratch1
= gen_reg_rtx (Pmode
);
14805 emit_move_insn (scratch1
, out
);
14806 /* Is there a known alignment and is it not 2? */
14809 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14810 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14812 /* Leave just the 3 lower bits. */
14813 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14814 NULL_RTX
, 0, OPTAB_WIDEN
);
14816 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14817 Pmode
, 1, align_4_label
);
14818 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14819 Pmode
, 1, align_2_label
);
14820 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14821 Pmode
, 1, align_3_label
);
14825 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14826 check if is aligned to 4 - byte. */
14828 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14829 NULL_RTX
, 0, OPTAB_WIDEN
);
14831 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14832 Pmode
, 1, align_4_label
);
14835 mem
= change_address (src
, QImode
, out
);
14837 /* Now compare the bytes. */
14839 /* Compare the first n unaligned byte on a byte per byte basis. */
14840 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14841 QImode
, 1, end_0_label
);
14843 /* Increment the address. */
14845 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14847 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14849 /* Not needed with an alignment of 2 */
14852 emit_label (align_2_label
);
14854 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14858 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14860 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14862 emit_label (align_3_label
);
14865 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14869 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14871 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14874 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14875 align this loop. It gives only huge programs, but does not help to
14877 emit_label (align_4_label
);
14879 mem
= change_address (src
, SImode
, out
);
14880 emit_move_insn (scratch
, mem
);
14882 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14884 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14886 /* This formula yields a nonzero result iff one of the bytes is zero.
14887 This saves three branches inside loop and many cycles. */
14889 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14890 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14891 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14892 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14893 gen_int_mode (0x80808080, SImode
)));
14894 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14899 rtx reg
= gen_reg_rtx (SImode
);
14900 rtx reg2
= gen_reg_rtx (Pmode
);
14901 emit_move_insn (reg
, tmpreg
);
14902 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14904 /* If zero is not in the first two bytes, move two bytes forward. */
14905 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14906 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14907 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14908 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14909 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14912 /* Emit lea manually to avoid clobbering of flags. */
14913 emit_insn (gen_rtx_SET (SImode
, reg2
,
14914 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14916 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14917 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14918 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14919 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14926 rtx end_2_label
= gen_label_rtx ();
14927 /* Is zero in the first two bytes? */
14929 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14930 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14931 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14932 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14933 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14935 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14936 JUMP_LABEL (tmp
) = end_2_label
;
14938 /* Not in the first two. Move two bytes forward. */
14939 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14941 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14943 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14945 emit_label (end_2_label
);
14949 /* Avoid branch in fixing the byte. */
14950 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14951 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14952 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14954 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14956 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14958 emit_label (end_0_label
);
14962 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14963 rtx callarg2 ATTRIBUTE_UNUSED
,
14964 rtx pop
, int sibcall
)
14966 rtx use
= NULL
, call
;
14968 if (pop
== const0_rtx
)
14970 gcc_assert (!TARGET_64BIT
|| !pop
);
14972 if (TARGET_MACHO
&& !TARGET_64BIT
)
14975 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14976 fnaddr
= machopic_indirect_call_target (fnaddr
);
14981 /* Static functions and indirect calls don't need the pic register. */
14982 if (! TARGET_64BIT
&& flag_pic
14983 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14984 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14985 use_reg (&use
, pic_offset_table_rtx
);
14988 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14990 rtx al
= gen_rtx_REG (QImode
, 0);
14991 emit_move_insn (al
, callarg2
);
14992 use_reg (&use
, al
);
14995 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14997 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14998 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15000 if (sibcall
&& TARGET_64BIT
15001 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15004 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15005 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15006 emit_move_insn (fnaddr
, addr
);
15007 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15010 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15012 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15015 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15016 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15017 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15020 call
= emit_call_insn (call
);
15022 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15026 /* Clear stack slot assignments remembered from previous functions.
15027 This is called from INIT_EXPANDERS once before RTL is emitted for each
15030 static struct machine_function
*
15031 ix86_init_machine_status (void)
15033 struct machine_function
*f
;
15035 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15036 f
->use_fast_prologue_epilogue_nregs
= -1;
15037 f
->tls_descriptor_call_expanded_p
= 0;
15042 /* Return a MEM corresponding to a stack slot with mode MODE.
15043 Allocate a new slot if necessary.
15045 The RTL for a function can have several slots available: N is
15046 which slot to use. */
15049 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15051 struct stack_local_entry
*s
;
15053 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15055 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15056 if (s
->mode
== mode
&& s
->n
== n
)
15057 return copy_rtx (s
->rtl
);
15059 s
= (struct stack_local_entry
*)
15060 ggc_alloc (sizeof (struct stack_local_entry
));
15063 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15065 s
->next
= ix86_stack_locals
;
15066 ix86_stack_locals
= s
;
15070 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15072 static GTY(()) rtx ix86_tls_symbol
;
15074 ix86_tls_get_addr (void)
15077 if (!ix86_tls_symbol
)
15079 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15080 (TARGET_ANY_GNU_TLS
15082 ? "___tls_get_addr"
15083 : "__tls_get_addr");
15086 return ix86_tls_symbol
;
15089 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15091 static GTY(()) rtx ix86_tls_module_base_symbol
;
15093 ix86_tls_module_base (void)
15096 if (!ix86_tls_module_base_symbol
)
15098 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15099 "_TLS_MODULE_BASE_");
15100 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15101 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15104 return ix86_tls_module_base_symbol
;
15107 /* Calculate the length of the memory address in the instruction
15108 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15111 memory_address_length (rtx addr
)
15113 struct ix86_address parts
;
15114 rtx base
, index
, disp
;
15118 if (GET_CODE (addr
) == PRE_DEC
15119 || GET_CODE (addr
) == POST_INC
15120 || GET_CODE (addr
) == PRE_MODIFY
15121 || GET_CODE (addr
) == POST_MODIFY
)
15124 ok
= ix86_decompose_address (addr
, &parts
);
15127 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15128 parts
.base
= SUBREG_REG (parts
.base
);
15129 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15130 parts
.index
= SUBREG_REG (parts
.index
);
15133 index
= parts
.index
;
15138 - esp as the base always wants an index,
15139 - ebp as the base always wants a displacement. */
15141 /* Register Indirect. */
15142 if (base
&& !index
&& !disp
)
15144 /* esp (for its index) and ebp (for its displacement) need
15145 the two-byte modrm form. */
15146 if (addr
== stack_pointer_rtx
15147 || addr
== arg_pointer_rtx
15148 || addr
== frame_pointer_rtx
15149 || addr
== hard_frame_pointer_rtx
)
15153 /* Direct Addressing. */
15154 else if (disp
&& !base
&& !index
)
15159 /* Find the length of the displacement constant. */
15162 if (base
&& satisfies_constraint_K (disp
))
15167 /* ebp always wants a displacement. */
15168 else if (base
== hard_frame_pointer_rtx
)
15171 /* An index requires the two-byte modrm form.... */
15173 /* ...like esp, which always wants an index. */
15174 || base
== stack_pointer_rtx
15175 || base
== arg_pointer_rtx
15176 || base
== frame_pointer_rtx
)
15183 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15184 is set, expect that insn have 8bit immediate alternative. */
15186 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15190 extract_insn_cached (insn
);
15191 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15192 if (CONSTANT_P (recog_data
.operand
[i
]))
15195 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15199 switch (get_attr_mode (insn
))
15210 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15215 fatal_insn ("unknown insn mode", insn
);
15221 /* Compute default value for "length_address" attribute. */
15223 ix86_attr_length_address_default (rtx insn
)
15227 if (get_attr_type (insn
) == TYPE_LEA
)
15229 rtx set
= PATTERN (insn
);
15231 if (GET_CODE (set
) == PARALLEL
)
15232 set
= XVECEXP (set
, 0, 0);
15234 gcc_assert (GET_CODE (set
) == SET
);
15236 return memory_address_length (SET_SRC (set
));
15239 extract_insn_cached (insn
);
15240 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15241 if (MEM_P (recog_data
.operand
[i
]))
15243 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15249 /* Return the maximum number of instructions a cpu can issue. */
15252 ix86_issue_rate (void)
15256 case PROCESSOR_PENTIUM
:
15260 case PROCESSOR_PENTIUMPRO
:
15261 case PROCESSOR_PENTIUM4
:
15262 case PROCESSOR_ATHLON
:
15264 case PROCESSOR_AMDFAM10
:
15265 case PROCESSOR_NOCONA
:
15266 case PROCESSOR_GENERIC32
:
15267 case PROCESSOR_GENERIC64
:
15270 case PROCESSOR_CORE2
:
15278 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15279 by DEP_INSN and nothing set by DEP_INSN. */
15282 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15286 /* Simplify the test for uninteresting insns. */
15287 if (insn_type
!= TYPE_SETCC
15288 && insn_type
!= TYPE_ICMOV
15289 && insn_type
!= TYPE_FCMOV
15290 && insn_type
!= TYPE_IBR
)
15293 if ((set
= single_set (dep_insn
)) != 0)
15295 set
= SET_DEST (set
);
15298 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15299 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15300 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15301 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15303 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15304 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15309 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15312 /* This test is true if the dependent insn reads the flags but
15313 not any other potentially set register. */
15314 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15317 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15323 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15324 address with operands set by DEP_INSN. */
15327 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15331 if (insn_type
== TYPE_LEA
15334 addr
= PATTERN (insn
);
15336 if (GET_CODE (addr
) == PARALLEL
)
15337 addr
= XVECEXP (addr
, 0, 0);
15339 gcc_assert (GET_CODE (addr
) == SET
);
15341 addr
= SET_SRC (addr
);
15346 extract_insn_cached (insn
);
15347 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15348 if (MEM_P (recog_data
.operand
[i
]))
15350 addr
= XEXP (recog_data
.operand
[i
], 0);
15357 return modified_in_p (addr
, dep_insn
);
15361 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15363 enum attr_type insn_type
, dep_insn_type
;
15364 enum attr_memory memory
;
15366 int dep_insn_code_number
;
15368 /* Anti and output dependencies have zero cost on all CPUs. */
15369 if (REG_NOTE_KIND (link
) != 0)
15372 dep_insn_code_number
= recog_memoized (dep_insn
);
15374 /* If we can't recognize the insns, we can't really do anything. */
15375 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15378 insn_type
= get_attr_type (insn
);
15379 dep_insn_type
= get_attr_type (dep_insn
);
15383 case PROCESSOR_PENTIUM
:
15384 /* Address Generation Interlock adds a cycle of latency. */
15385 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15388 /* ??? Compares pair with jump/setcc. */
15389 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15392 /* Floating point stores require value to be ready one cycle earlier. */
15393 if (insn_type
== TYPE_FMOV
15394 && get_attr_memory (insn
) == MEMORY_STORE
15395 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15399 case PROCESSOR_PENTIUMPRO
:
15400 memory
= get_attr_memory (insn
);
15402 /* INT->FP conversion is expensive. */
15403 if (get_attr_fp_int_src (dep_insn
))
15406 /* There is one cycle extra latency between an FP op and a store. */
15407 if (insn_type
== TYPE_FMOV
15408 && (set
= single_set (dep_insn
)) != NULL_RTX
15409 && (set2
= single_set (insn
)) != NULL_RTX
15410 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15411 && MEM_P (SET_DEST (set2
)))
15414 /* Show ability of reorder buffer to hide latency of load by executing
15415 in parallel with previous instruction in case
15416 previous instruction is not needed to compute the address. */
15417 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15418 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15420 /* Claim moves to take one cycle, as core can issue one load
15421 at time and the next load can start cycle later. */
15422 if (dep_insn_type
== TYPE_IMOV
15423 || dep_insn_type
== TYPE_FMOV
)
15431 memory
= get_attr_memory (insn
);
15433 /* The esp dependency is resolved before the instruction is really
15435 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15436 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15439 /* INT->FP conversion is expensive. */
15440 if (get_attr_fp_int_src (dep_insn
))
15443 /* Show ability of reorder buffer to hide latency of load by executing
15444 in parallel with previous instruction in case
15445 previous instruction is not needed to compute the address. */
15446 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15447 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15449 /* Claim moves to take one cycle, as core can issue one load
15450 at time and the next load can start cycle later. */
15451 if (dep_insn_type
== TYPE_IMOV
15452 || dep_insn_type
== TYPE_FMOV
)
15461 case PROCESSOR_ATHLON
:
15463 case PROCESSOR_AMDFAM10
:
15464 case PROCESSOR_GENERIC32
:
15465 case PROCESSOR_GENERIC64
:
15466 memory
= get_attr_memory (insn
);
15468 /* Show ability of reorder buffer to hide latency of load by executing
15469 in parallel with previous instruction in case
15470 previous instruction is not needed to compute the address. */
15471 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15472 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15474 enum attr_unit unit
= get_attr_unit (insn
);
15477 /* Because of the difference between the length of integer and
15478 floating unit pipeline preparation stages, the memory operands
15479 for floating point are cheaper.
15481 ??? For Athlon it the difference is most probably 2. */
15482 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15485 loadcost
= TARGET_ATHLON
? 2 : 0;
15487 if (cost
>= loadcost
)
15500 /* How many alternative schedules to try. This should be as wide as the
15501 scheduling freedom in the DFA, but no wider. Making this value too
15502 large results extra work for the scheduler. */
15505 ia32_multipass_dfa_lookahead (void)
15507 if (ix86_tune
== PROCESSOR_PENTIUM
)
15510 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15511 || ix86_tune
== PROCESSOR_K6
)
15519 /* Compute the alignment given to a constant that is being placed in memory.
15520 EXP is the constant and ALIGN is the alignment that the object would
15522 The value of this function is used instead of that alignment to align
15526 ix86_constant_alignment (tree exp
, int align
)
15528 if (TREE_CODE (exp
) == REAL_CST
)
15530 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15532 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15535 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15536 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15537 return BITS_PER_WORD
;
15542 /* Compute the alignment for a static variable.
15543 TYPE is the data type, and ALIGN is the alignment that
15544 the object would ordinarily have. The value of this function is used
15545 instead of that alignment to align the object. */
15548 ix86_data_alignment (tree type
, int align
)
15550 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15552 if (AGGREGATE_TYPE_P (type
)
15553 && TYPE_SIZE (type
)
15554 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15555 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15556 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15557 && align
< max_align
)
15560 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15561 to 16byte boundary. */
15564 if (AGGREGATE_TYPE_P (type
)
15565 && TYPE_SIZE (type
)
15566 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15567 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15568 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15572 if (TREE_CODE (type
) == ARRAY_TYPE
)
15574 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15576 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15579 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15582 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15584 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15587 else if ((TREE_CODE (type
) == RECORD_TYPE
15588 || TREE_CODE (type
) == UNION_TYPE
15589 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15590 && TYPE_FIELDS (type
))
15592 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15594 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15597 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15598 || TREE_CODE (type
) == INTEGER_TYPE
)
15600 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15602 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15609 /* Compute the alignment for a local variable.
15610 TYPE is the data type, and ALIGN is the alignment that
15611 the object would ordinarily have. The value of this macro is used
15612 instead of that alignment to align the object. */
15615 ix86_local_alignment (tree type
, int align
)
15617 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15618 to 16byte boundary. */
15621 if (AGGREGATE_TYPE_P (type
)
15622 && TYPE_SIZE (type
)
15623 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15624 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15625 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15628 if (TREE_CODE (type
) == ARRAY_TYPE
)
15630 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15632 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15635 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15637 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15639 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15642 else if ((TREE_CODE (type
) == RECORD_TYPE
15643 || TREE_CODE (type
) == UNION_TYPE
15644 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15645 && TYPE_FIELDS (type
))
15647 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15649 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15652 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15653 || TREE_CODE (type
) == INTEGER_TYPE
)
15656 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15658 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15664 /* Emit RTL insns to initialize the variable parts of a trampoline.
15665 FNADDR is an RTX for the address of the function's pure code.
15666 CXT is an RTX for the static chain value for the function. */
15668 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15672 /* Compute offset from the end of the jmp to the target function. */
15673 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15674 plus_constant (tramp
, 10),
15675 NULL_RTX
, 1, OPTAB_DIRECT
);
15676 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15677 gen_int_mode (0xb9, QImode
));
15678 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15679 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15680 gen_int_mode (0xe9, QImode
));
15681 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15686 /* Try to load address using shorter movl instead of movabs.
15687 We may want to support movq for kernel mode, but kernel does not use
15688 trampolines at the moment. */
15689 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15691 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15692 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15693 gen_int_mode (0xbb41, HImode
));
15694 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15695 gen_lowpart (SImode
, fnaddr
));
15700 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15701 gen_int_mode (0xbb49, HImode
));
15702 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15706 /* Load static chain using movabs to r10. */
15707 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15708 gen_int_mode (0xba49, HImode
));
15709 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15712 /* Jump to the r11 */
15713 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15714 gen_int_mode (0xff49, HImode
));
15715 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15716 gen_int_mode (0xe3, QImode
));
15718 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15721 #ifdef ENABLE_EXECUTE_STACK
15722 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15723 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15727 /* Codes for all the SSE/MMX builtins. */
15730 IX86_BUILTIN_ADDPS
,
15731 IX86_BUILTIN_ADDSS
,
15732 IX86_BUILTIN_DIVPS
,
15733 IX86_BUILTIN_DIVSS
,
15734 IX86_BUILTIN_MULPS
,
15735 IX86_BUILTIN_MULSS
,
15736 IX86_BUILTIN_SUBPS
,
15737 IX86_BUILTIN_SUBSS
,
15739 IX86_BUILTIN_CMPEQPS
,
15740 IX86_BUILTIN_CMPLTPS
,
15741 IX86_BUILTIN_CMPLEPS
,
15742 IX86_BUILTIN_CMPGTPS
,
15743 IX86_BUILTIN_CMPGEPS
,
15744 IX86_BUILTIN_CMPNEQPS
,
15745 IX86_BUILTIN_CMPNLTPS
,
15746 IX86_BUILTIN_CMPNLEPS
,
15747 IX86_BUILTIN_CMPNGTPS
,
15748 IX86_BUILTIN_CMPNGEPS
,
15749 IX86_BUILTIN_CMPORDPS
,
15750 IX86_BUILTIN_CMPUNORDPS
,
15751 IX86_BUILTIN_CMPEQSS
,
15752 IX86_BUILTIN_CMPLTSS
,
15753 IX86_BUILTIN_CMPLESS
,
15754 IX86_BUILTIN_CMPNEQSS
,
15755 IX86_BUILTIN_CMPNLTSS
,
15756 IX86_BUILTIN_CMPNLESS
,
15757 IX86_BUILTIN_CMPNGTSS
,
15758 IX86_BUILTIN_CMPNGESS
,
15759 IX86_BUILTIN_CMPORDSS
,
15760 IX86_BUILTIN_CMPUNORDSS
,
15762 IX86_BUILTIN_COMIEQSS
,
15763 IX86_BUILTIN_COMILTSS
,
15764 IX86_BUILTIN_COMILESS
,
15765 IX86_BUILTIN_COMIGTSS
,
15766 IX86_BUILTIN_COMIGESS
,
15767 IX86_BUILTIN_COMINEQSS
,
15768 IX86_BUILTIN_UCOMIEQSS
,
15769 IX86_BUILTIN_UCOMILTSS
,
15770 IX86_BUILTIN_UCOMILESS
,
15771 IX86_BUILTIN_UCOMIGTSS
,
15772 IX86_BUILTIN_UCOMIGESS
,
15773 IX86_BUILTIN_UCOMINEQSS
,
15775 IX86_BUILTIN_CVTPI2PS
,
15776 IX86_BUILTIN_CVTPS2PI
,
15777 IX86_BUILTIN_CVTSI2SS
,
15778 IX86_BUILTIN_CVTSI642SS
,
15779 IX86_BUILTIN_CVTSS2SI
,
15780 IX86_BUILTIN_CVTSS2SI64
,
15781 IX86_BUILTIN_CVTTPS2PI
,
15782 IX86_BUILTIN_CVTTSS2SI
,
15783 IX86_BUILTIN_CVTTSS2SI64
,
15785 IX86_BUILTIN_MAXPS
,
15786 IX86_BUILTIN_MAXSS
,
15787 IX86_BUILTIN_MINPS
,
15788 IX86_BUILTIN_MINSS
,
15790 IX86_BUILTIN_LOADUPS
,
15791 IX86_BUILTIN_STOREUPS
,
15792 IX86_BUILTIN_MOVSS
,
15794 IX86_BUILTIN_MOVHLPS
,
15795 IX86_BUILTIN_MOVLHPS
,
15796 IX86_BUILTIN_LOADHPS
,
15797 IX86_BUILTIN_LOADLPS
,
15798 IX86_BUILTIN_STOREHPS
,
15799 IX86_BUILTIN_STORELPS
,
15801 IX86_BUILTIN_MASKMOVQ
,
15802 IX86_BUILTIN_MOVMSKPS
,
15803 IX86_BUILTIN_PMOVMSKB
,
15805 IX86_BUILTIN_MOVNTPS
,
15806 IX86_BUILTIN_MOVNTQ
,
15808 IX86_BUILTIN_LOADDQU
,
15809 IX86_BUILTIN_STOREDQU
,
15811 IX86_BUILTIN_PACKSSWB
,
15812 IX86_BUILTIN_PACKSSDW
,
15813 IX86_BUILTIN_PACKUSWB
,
15815 IX86_BUILTIN_PADDB
,
15816 IX86_BUILTIN_PADDW
,
15817 IX86_BUILTIN_PADDD
,
15818 IX86_BUILTIN_PADDQ
,
15819 IX86_BUILTIN_PADDSB
,
15820 IX86_BUILTIN_PADDSW
,
15821 IX86_BUILTIN_PADDUSB
,
15822 IX86_BUILTIN_PADDUSW
,
15823 IX86_BUILTIN_PSUBB
,
15824 IX86_BUILTIN_PSUBW
,
15825 IX86_BUILTIN_PSUBD
,
15826 IX86_BUILTIN_PSUBQ
,
15827 IX86_BUILTIN_PSUBSB
,
15828 IX86_BUILTIN_PSUBSW
,
15829 IX86_BUILTIN_PSUBUSB
,
15830 IX86_BUILTIN_PSUBUSW
,
15833 IX86_BUILTIN_PANDN
,
15837 IX86_BUILTIN_PAVGB
,
15838 IX86_BUILTIN_PAVGW
,
15840 IX86_BUILTIN_PCMPEQB
,
15841 IX86_BUILTIN_PCMPEQW
,
15842 IX86_BUILTIN_PCMPEQD
,
15843 IX86_BUILTIN_PCMPGTB
,
15844 IX86_BUILTIN_PCMPGTW
,
15845 IX86_BUILTIN_PCMPGTD
,
15847 IX86_BUILTIN_PMADDWD
,
15849 IX86_BUILTIN_PMAXSW
,
15850 IX86_BUILTIN_PMAXUB
,
15851 IX86_BUILTIN_PMINSW
,
15852 IX86_BUILTIN_PMINUB
,
15854 IX86_BUILTIN_PMULHUW
,
15855 IX86_BUILTIN_PMULHW
,
15856 IX86_BUILTIN_PMULLW
,
15858 IX86_BUILTIN_PSADBW
,
15859 IX86_BUILTIN_PSHUFW
,
15861 IX86_BUILTIN_PSLLW
,
15862 IX86_BUILTIN_PSLLD
,
15863 IX86_BUILTIN_PSLLQ
,
15864 IX86_BUILTIN_PSRAW
,
15865 IX86_BUILTIN_PSRAD
,
15866 IX86_BUILTIN_PSRLW
,
15867 IX86_BUILTIN_PSRLD
,
15868 IX86_BUILTIN_PSRLQ
,
15869 IX86_BUILTIN_PSLLWI
,
15870 IX86_BUILTIN_PSLLDI
,
15871 IX86_BUILTIN_PSLLQI
,
15872 IX86_BUILTIN_PSRAWI
,
15873 IX86_BUILTIN_PSRADI
,
15874 IX86_BUILTIN_PSRLWI
,
15875 IX86_BUILTIN_PSRLDI
,
15876 IX86_BUILTIN_PSRLQI
,
15878 IX86_BUILTIN_PUNPCKHBW
,
15879 IX86_BUILTIN_PUNPCKHWD
,
15880 IX86_BUILTIN_PUNPCKHDQ
,
15881 IX86_BUILTIN_PUNPCKLBW
,
15882 IX86_BUILTIN_PUNPCKLWD
,
15883 IX86_BUILTIN_PUNPCKLDQ
,
15885 IX86_BUILTIN_SHUFPS
,
15887 IX86_BUILTIN_RCPPS
,
15888 IX86_BUILTIN_RCPSS
,
15889 IX86_BUILTIN_RSQRTPS
,
15890 IX86_BUILTIN_RSQRTSS
,
15891 IX86_BUILTIN_SQRTPS
,
15892 IX86_BUILTIN_SQRTSS
,
15894 IX86_BUILTIN_UNPCKHPS
,
15895 IX86_BUILTIN_UNPCKLPS
,
15897 IX86_BUILTIN_ANDPS
,
15898 IX86_BUILTIN_ANDNPS
,
15900 IX86_BUILTIN_XORPS
,
15903 IX86_BUILTIN_LDMXCSR
,
15904 IX86_BUILTIN_STMXCSR
,
15905 IX86_BUILTIN_SFENCE
,
15907 /* 3DNow! Original */
15908 IX86_BUILTIN_FEMMS
,
15909 IX86_BUILTIN_PAVGUSB
,
15910 IX86_BUILTIN_PF2ID
,
15911 IX86_BUILTIN_PFACC
,
15912 IX86_BUILTIN_PFADD
,
15913 IX86_BUILTIN_PFCMPEQ
,
15914 IX86_BUILTIN_PFCMPGE
,
15915 IX86_BUILTIN_PFCMPGT
,
15916 IX86_BUILTIN_PFMAX
,
15917 IX86_BUILTIN_PFMIN
,
15918 IX86_BUILTIN_PFMUL
,
15919 IX86_BUILTIN_PFRCP
,
15920 IX86_BUILTIN_PFRCPIT1
,
15921 IX86_BUILTIN_PFRCPIT2
,
15922 IX86_BUILTIN_PFRSQIT1
,
15923 IX86_BUILTIN_PFRSQRT
,
15924 IX86_BUILTIN_PFSUB
,
15925 IX86_BUILTIN_PFSUBR
,
15926 IX86_BUILTIN_PI2FD
,
15927 IX86_BUILTIN_PMULHRW
,
15929 /* 3DNow! Athlon Extensions */
15930 IX86_BUILTIN_PF2IW
,
15931 IX86_BUILTIN_PFNACC
,
15932 IX86_BUILTIN_PFPNACC
,
15933 IX86_BUILTIN_PI2FW
,
15934 IX86_BUILTIN_PSWAPDSI
,
15935 IX86_BUILTIN_PSWAPDSF
,
15938 IX86_BUILTIN_ADDPD
,
15939 IX86_BUILTIN_ADDSD
,
15940 IX86_BUILTIN_DIVPD
,
15941 IX86_BUILTIN_DIVSD
,
15942 IX86_BUILTIN_MULPD
,
15943 IX86_BUILTIN_MULSD
,
15944 IX86_BUILTIN_SUBPD
,
15945 IX86_BUILTIN_SUBSD
,
15947 IX86_BUILTIN_CMPEQPD
,
15948 IX86_BUILTIN_CMPLTPD
,
15949 IX86_BUILTIN_CMPLEPD
,
15950 IX86_BUILTIN_CMPGTPD
,
15951 IX86_BUILTIN_CMPGEPD
,
15952 IX86_BUILTIN_CMPNEQPD
,
15953 IX86_BUILTIN_CMPNLTPD
,
15954 IX86_BUILTIN_CMPNLEPD
,
15955 IX86_BUILTIN_CMPNGTPD
,
15956 IX86_BUILTIN_CMPNGEPD
,
15957 IX86_BUILTIN_CMPORDPD
,
15958 IX86_BUILTIN_CMPUNORDPD
,
15959 IX86_BUILTIN_CMPNEPD
,
15960 IX86_BUILTIN_CMPEQSD
,
15961 IX86_BUILTIN_CMPLTSD
,
15962 IX86_BUILTIN_CMPLESD
,
15963 IX86_BUILTIN_CMPNEQSD
,
15964 IX86_BUILTIN_CMPNLTSD
,
15965 IX86_BUILTIN_CMPNLESD
,
15966 IX86_BUILTIN_CMPORDSD
,
15967 IX86_BUILTIN_CMPUNORDSD
,
15968 IX86_BUILTIN_CMPNESD
,
15970 IX86_BUILTIN_COMIEQSD
,
15971 IX86_BUILTIN_COMILTSD
,
15972 IX86_BUILTIN_COMILESD
,
15973 IX86_BUILTIN_COMIGTSD
,
15974 IX86_BUILTIN_COMIGESD
,
15975 IX86_BUILTIN_COMINEQSD
,
15976 IX86_BUILTIN_UCOMIEQSD
,
15977 IX86_BUILTIN_UCOMILTSD
,
15978 IX86_BUILTIN_UCOMILESD
,
15979 IX86_BUILTIN_UCOMIGTSD
,
15980 IX86_BUILTIN_UCOMIGESD
,
15981 IX86_BUILTIN_UCOMINEQSD
,
15983 IX86_BUILTIN_MAXPD
,
15984 IX86_BUILTIN_MAXSD
,
15985 IX86_BUILTIN_MINPD
,
15986 IX86_BUILTIN_MINSD
,
15988 IX86_BUILTIN_ANDPD
,
15989 IX86_BUILTIN_ANDNPD
,
15991 IX86_BUILTIN_XORPD
,
15993 IX86_BUILTIN_SQRTPD
,
15994 IX86_BUILTIN_SQRTSD
,
15996 IX86_BUILTIN_UNPCKHPD
,
15997 IX86_BUILTIN_UNPCKLPD
,
15999 IX86_BUILTIN_SHUFPD
,
16001 IX86_BUILTIN_LOADUPD
,
16002 IX86_BUILTIN_STOREUPD
,
16003 IX86_BUILTIN_MOVSD
,
16005 IX86_BUILTIN_LOADHPD
,
16006 IX86_BUILTIN_LOADLPD
,
16008 IX86_BUILTIN_CVTDQ2PD
,
16009 IX86_BUILTIN_CVTDQ2PS
,
16011 IX86_BUILTIN_CVTPD2DQ
,
16012 IX86_BUILTIN_CVTPD2PI
,
16013 IX86_BUILTIN_CVTPD2PS
,
16014 IX86_BUILTIN_CVTTPD2DQ
,
16015 IX86_BUILTIN_CVTTPD2PI
,
16017 IX86_BUILTIN_CVTPI2PD
,
16018 IX86_BUILTIN_CVTSI2SD
,
16019 IX86_BUILTIN_CVTSI642SD
,
16021 IX86_BUILTIN_CVTSD2SI
,
16022 IX86_BUILTIN_CVTSD2SI64
,
16023 IX86_BUILTIN_CVTSD2SS
,
16024 IX86_BUILTIN_CVTSS2SD
,
16025 IX86_BUILTIN_CVTTSD2SI
,
16026 IX86_BUILTIN_CVTTSD2SI64
,
16028 IX86_BUILTIN_CVTPS2DQ
,
16029 IX86_BUILTIN_CVTPS2PD
,
16030 IX86_BUILTIN_CVTTPS2DQ
,
16032 IX86_BUILTIN_MOVNTI
,
16033 IX86_BUILTIN_MOVNTPD
,
16034 IX86_BUILTIN_MOVNTDQ
,
16037 IX86_BUILTIN_MASKMOVDQU
,
16038 IX86_BUILTIN_MOVMSKPD
,
16039 IX86_BUILTIN_PMOVMSKB128
,
16041 IX86_BUILTIN_PACKSSWB128
,
16042 IX86_BUILTIN_PACKSSDW128
,
16043 IX86_BUILTIN_PACKUSWB128
,
16045 IX86_BUILTIN_PADDB128
,
16046 IX86_BUILTIN_PADDW128
,
16047 IX86_BUILTIN_PADDD128
,
16048 IX86_BUILTIN_PADDQ128
,
16049 IX86_BUILTIN_PADDSB128
,
16050 IX86_BUILTIN_PADDSW128
,
16051 IX86_BUILTIN_PADDUSB128
,
16052 IX86_BUILTIN_PADDUSW128
,
16053 IX86_BUILTIN_PSUBB128
,
16054 IX86_BUILTIN_PSUBW128
,
16055 IX86_BUILTIN_PSUBD128
,
16056 IX86_BUILTIN_PSUBQ128
,
16057 IX86_BUILTIN_PSUBSB128
,
16058 IX86_BUILTIN_PSUBSW128
,
16059 IX86_BUILTIN_PSUBUSB128
,
16060 IX86_BUILTIN_PSUBUSW128
,
16062 IX86_BUILTIN_PAND128
,
16063 IX86_BUILTIN_PANDN128
,
16064 IX86_BUILTIN_POR128
,
16065 IX86_BUILTIN_PXOR128
,
16067 IX86_BUILTIN_PAVGB128
,
16068 IX86_BUILTIN_PAVGW128
,
16070 IX86_BUILTIN_PCMPEQB128
,
16071 IX86_BUILTIN_PCMPEQW128
,
16072 IX86_BUILTIN_PCMPEQD128
,
16073 IX86_BUILTIN_PCMPGTB128
,
16074 IX86_BUILTIN_PCMPGTW128
,
16075 IX86_BUILTIN_PCMPGTD128
,
16077 IX86_BUILTIN_PMADDWD128
,
16079 IX86_BUILTIN_PMAXSW128
,
16080 IX86_BUILTIN_PMAXUB128
,
16081 IX86_BUILTIN_PMINSW128
,
16082 IX86_BUILTIN_PMINUB128
,
16084 IX86_BUILTIN_PMULUDQ
,
16085 IX86_BUILTIN_PMULUDQ128
,
16086 IX86_BUILTIN_PMULHUW128
,
16087 IX86_BUILTIN_PMULHW128
,
16088 IX86_BUILTIN_PMULLW128
,
16090 IX86_BUILTIN_PSADBW128
,
16091 IX86_BUILTIN_PSHUFHW
,
16092 IX86_BUILTIN_PSHUFLW
,
16093 IX86_BUILTIN_PSHUFD
,
16095 IX86_BUILTIN_PSLLW128
,
16096 IX86_BUILTIN_PSLLD128
,
16097 IX86_BUILTIN_PSLLQ128
,
16098 IX86_BUILTIN_PSRAW128
,
16099 IX86_BUILTIN_PSRAD128
,
16100 IX86_BUILTIN_PSRLW128
,
16101 IX86_BUILTIN_PSRLD128
,
16102 IX86_BUILTIN_PSRLQ128
,
16103 IX86_BUILTIN_PSLLDQI128
,
16104 IX86_BUILTIN_PSLLWI128
,
16105 IX86_BUILTIN_PSLLDI128
,
16106 IX86_BUILTIN_PSLLQI128
,
16107 IX86_BUILTIN_PSRAWI128
,
16108 IX86_BUILTIN_PSRADI128
,
16109 IX86_BUILTIN_PSRLDQI128
,
16110 IX86_BUILTIN_PSRLWI128
,
16111 IX86_BUILTIN_PSRLDI128
,
16112 IX86_BUILTIN_PSRLQI128
,
16114 IX86_BUILTIN_PUNPCKHBW128
,
16115 IX86_BUILTIN_PUNPCKHWD128
,
16116 IX86_BUILTIN_PUNPCKHDQ128
,
16117 IX86_BUILTIN_PUNPCKHQDQ128
,
16118 IX86_BUILTIN_PUNPCKLBW128
,
16119 IX86_BUILTIN_PUNPCKLWD128
,
16120 IX86_BUILTIN_PUNPCKLDQ128
,
16121 IX86_BUILTIN_PUNPCKLQDQ128
,
16123 IX86_BUILTIN_CLFLUSH
,
16124 IX86_BUILTIN_MFENCE
,
16125 IX86_BUILTIN_LFENCE
,
16127 /* Prescott New Instructions. */
16128 IX86_BUILTIN_ADDSUBPS
,
16129 IX86_BUILTIN_HADDPS
,
16130 IX86_BUILTIN_HSUBPS
,
16131 IX86_BUILTIN_MOVSHDUP
,
16132 IX86_BUILTIN_MOVSLDUP
,
16133 IX86_BUILTIN_ADDSUBPD
,
16134 IX86_BUILTIN_HADDPD
,
16135 IX86_BUILTIN_HSUBPD
,
16136 IX86_BUILTIN_LDDQU
,
16138 IX86_BUILTIN_MONITOR
,
16139 IX86_BUILTIN_MWAIT
,
16142 IX86_BUILTIN_PHADDW
,
16143 IX86_BUILTIN_PHADDD
,
16144 IX86_BUILTIN_PHADDSW
,
16145 IX86_BUILTIN_PHSUBW
,
16146 IX86_BUILTIN_PHSUBD
,
16147 IX86_BUILTIN_PHSUBSW
,
16148 IX86_BUILTIN_PMADDUBSW
,
16149 IX86_BUILTIN_PMULHRSW
,
16150 IX86_BUILTIN_PSHUFB
,
16151 IX86_BUILTIN_PSIGNB
,
16152 IX86_BUILTIN_PSIGNW
,
16153 IX86_BUILTIN_PSIGND
,
16154 IX86_BUILTIN_PALIGNR
,
16155 IX86_BUILTIN_PABSB
,
16156 IX86_BUILTIN_PABSW
,
16157 IX86_BUILTIN_PABSD
,
16159 IX86_BUILTIN_PHADDW128
,
16160 IX86_BUILTIN_PHADDD128
,
16161 IX86_BUILTIN_PHADDSW128
,
16162 IX86_BUILTIN_PHSUBW128
,
16163 IX86_BUILTIN_PHSUBD128
,
16164 IX86_BUILTIN_PHSUBSW128
,
16165 IX86_BUILTIN_PMADDUBSW128
,
16166 IX86_BUILTIN_PMULHRSW128
,
16167 IX86_BUILTIN_PSHUFB128
,
16168 IX86_BUILTIN_PSIGNB128
,
16169 IX86_BUILTIN_PSIGNW128
,
16170 IX86_BUILTIN_PSIGND128
,
16171 IX86_BUILTIN_PALIGNR128
,
16172 IX86_BUILTIN_PABSB128
,
16173 IX86_BUILTIN_PABSW128
,
16174 IX86_BUILTIN_PABSD128
,
16176 /* AMDFAM10 - SSE4A New Instructions. */
16177 IX86_BUILTIN_MOVNTSD
,
16178 IX86_BUILTIN_MOVNTSS
,
16179 IX86_BUILTIN_EXTRQI
,
16180 IX86_BUILTIN_EXTRQ
,
16181 IX86_BUILTIN_INSERTQI
,
16182 IX86_BUILTIN_INSERTQ
,
16184 IX86_BUILTIN_VEC_INIT_V2SI
,
16185 IX86_BUILTIN_VEC_INIT_V4HI
,
16186 IX86_BUILTIN_VEC_INIT_V8QI
,
16187 IX86_BUILTIN_VEC_EXT_V2DF
,
16188 IX86_BUILTIN_VEC_EXT_V2DI
,
16189 IX86_BUILTIN_VEC_EXT_V4SF
,
16190 IX86_BUILTIN_VEC_EXT_V4SI
,
16191 IX86_BUILTIN_VEC_EXT_V8HI
,
16192 IX86_BUILTIN_VEC_EXT_V2SI
,
16193 IX86_BUILTIN_VEC_EXT_V4HI
,
16194 IX86_BUILTIN_VEC_SET_V8HI
,
16195 IX86_BUILTIN_VEC_SET_V4HI
,
16200 /* Table for the ix86 builtin decls. */
16201 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16203 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16204 * if the target_flags include one of MASK. Stores the function decl
16205 * in the ix86_builtins array.
16206 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16209 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16211 tree decl
= NULL_TREE
;
16213 if (mask
& target_flags
16214 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16216 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16218 ix86_builtins
[(int) code
] = decl
;
16224 /* Like def_builtin, but also marks the function decl "const". */
16227 def_builtin_const (int mask
, const char *name
, tree type
,
16228 enum ix86_builtins code
)
16230 tree decl
= def_builtin (mask
, name
, type
, code
);
16232 TREE_READONLY (decl
) = 1;
16236 /* Bits for builtin_description.flag. */
16238 /* Set when we don't support the comparison natively, and should
16239 swap_comparison in order to support it. */
16240 #define BUILTIN_DESC_SWAP_OPERANDS 1
16242 struct builtin_description
16244 const unsigned int mask
;
16245 const enum insn_code icode
;
16246 const char *const name
;
16247 const enum ix86_builtins code
;
16248 const enum rtx_code comparison
;
16249 const unsigned int flag
;
16252 static const struct builtin_description bdesc_comi
[] =
16254 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16255 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16256 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16257 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16258 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16259 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16260 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16261 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16262 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16263 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16264 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16265 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16266 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16267 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16268 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16269 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16270 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16271 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16272 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16273 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16274 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16275 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16276 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16277 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16280 static const struct builtin_description bdesc_2arg
[] =
16283 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16284 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16285 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16286 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16287 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16288 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16289 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16290 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16292 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16293 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16294 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16295 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16296 BUILTIN_DESC_SWAP_OPERANDS
},
16297 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16298 BUILTIN_DESC_SWAP_OPERANDS
},
16299 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16300 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16301 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16302 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16303 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16304 BUILTIN_DESC_SWAP_OPERANDS
},
16305 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16306 BUILTIN_DESC_SWAP_OPERANDS
},
16307 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16308 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16309 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16310 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16311 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16312 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16313 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16314 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16315 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16316 BUILTIN_DESC_SWAP_OPERANDS
},
16317 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16318 BUILTIN_DESC_SWAP_OPERANDS
},
16319 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16321 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16322 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16323 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16324 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16326 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16327 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16328 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16329 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16331 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16332 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16333 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16334 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16335 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16338 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16339 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16340 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16341 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16342 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16343 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16344 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16345 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16347 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16348 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16349 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16350 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16351 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16352 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16353 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16354 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16356 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16357 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16358 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16360 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16361 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16362 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16363 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16365 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16366 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16368 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16369 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16370 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16371 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16372 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16373 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16375 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16376 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16377 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16378 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16380 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16381 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16382 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16383 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16384 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16385 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16388 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16389 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16390 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16392 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16393 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16394 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16396 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16397 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16398 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16399 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16400 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16401 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16403 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16404 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16405 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16406 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16407 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16408 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16410 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16411 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16412 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16413 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16415 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16416 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16419 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16420 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16421 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16422 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16423 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16424 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16425 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16426 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16428 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16429 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16430 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16431 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16432 BUILTIN_DESC_SWAP_OPERANDS
},
16433 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16434 BUILTIN_DESC_SWAP_OPERANDS
},
16435 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16436 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16437 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16438 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16439 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16440 BUILTIN_DESC_SWAP_OPERANDS
},
16441 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16442 BUILTIN_DESC_SWAP_OPERANDS
},
16443 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16444 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16445 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16446 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16447 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16448 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16449 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16450 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16451 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16453 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16454 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16455 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16456 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16458 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16459 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16460 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16461 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16463 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16464 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16465 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16468 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16469 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16470 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16471 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16472 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16473 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16474 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16475 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16477 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16478 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16479 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16480 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16481 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16482 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16483 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16484 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16486 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16487 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16489 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16490 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16491 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16492 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16494 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16495 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16497 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16498 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16499 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16500 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16501 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16502 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16504 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16505 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16506 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16507 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16509 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16510 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16511 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16512 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16513 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16514 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16515 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16516 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16518 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16519 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16520 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16522 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16523 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16525 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16526 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16528 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16529 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16530 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16532 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16533 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16534 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16536 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16537 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16539 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16541 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16542 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16543 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16544 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16547 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16548 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16549 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16550 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16551 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16552 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16555 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16556 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16557 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16558 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16559 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16560 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16561 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16562 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16563 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16564 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16565 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16566 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16567 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16568 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16569 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16570 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16571 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16572 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16573 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16574 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16575 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16576 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16577 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16578 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16581 static const struct builtin_description bdesc_1arg
[] =
16583 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16584 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16586 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16587 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16588 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16590 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16591 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16592 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16593 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16594 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16595 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16597 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16598 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16600 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16602 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16603 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16605 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16606 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16607 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16608 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16609 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16611 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16613 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16614 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16615 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16616 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16618 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16619 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16620 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16623 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16624 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16627 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16628 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16629 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16630 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16631 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16632 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16636 ix86_init_builtins (void)
16639 ix86_init_mmx_sse_builtins ();
16642 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16643 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16646 ix86_init_mmx_sse_builtins (void)
16648 const struct builtin_description
* d
;
16651 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16652 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16653 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16654 tree V2DI_type_node
16655 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16656 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16657 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16658 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16659 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16660 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16661 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16663 tree pchar_type_node
= build_pointer_type (char_type_node
);
16664 tree pcchar_type_node
= build_pointer_type (
16665 build_type_variant (char_type_node
, 1, 0));
16666 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16667 tree pcfloat_type_node
= build_pointer_type (
16668 build_type_variant (float_type_node
, 1, 0));
16669 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16670 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16671 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16674 tree int_ftype_v4sf_v4sf
16675 = build_function_type_list (integer_type_node
,
16676 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16677 tree v4si_ftype_v4sf_v4sf
16678 = build_function_type_list (V4SI_type_node
,
16679 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16680 /* MMX/SSE/integer conversions. */
16681 tree int_ftype_v4sf
16682 = build_function_type_list (integer_type_node
,
16683 V4SF_type_node
, NULL_TREE
);
16684 tree int64_ftype_v4sf
16685 = build_function_type_list (long_long_integer_type_node
,
16686 V4SF_type_node
, NULL_TREE
);
16687 tree int_ftype_v8qi
16688 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16689 tree v4sf_ftype_v4sf_int
16690 = build_function_type_list (V4SF_type_node
,
16691 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16692 tree v4sf_ftype_v4sf_int64
16693 = build_function_type_list (V4SF_type_node
,
16694 V4SF_type_node
, long_long_integer_type_node
,
16696 tree v4sf_ftype_v4sf_v2si
16697 = build_function_type_list (V4SF_type_node
,
16698 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16700 /* Miscellaneous. */
16701 tree v8qi_ftype_v4hi_v4hi
16702 = build_function_type_list (V8QI_type_node
,
16703 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16704 tree v4hi_ftype_v2si_v2si
16705 = build_function_type_list (V4HI_type_node
,
16706 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16707 tree v4sf_ftype_v4sf_v4sf_int
16708 = build_function_type_list (V4SF_type_node
,
16709 V4SF_type_node
, V4SF_type_node
,
16710 integer_type_node
, NULL_TREE
);
16711 tree v2si_ftype_v4hi_v4hi
16712 = build_function_type_list (V2SI_type_node
,
16713 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16714 tree v4hi_ftype_v4hi_int
16715 = build_function_type_list (V4HI_type_node
,
16716 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16717 tree v4hi_ftype_v4hi_di
16718 = build_function_type_list (V4HI_type_node
,
16719 V4HI_type_node
, long_long_unsigned_type_node
,
16721 tree v2si_ftype_v2si_di
16722 = build_function_type_list (V2SI_type_node
,
16723 V2SI_type_node
, long_long_unsigned_type_node
,
16725 tree void_ftype_void
16726 = build_function_type (void_type_node
, void_list_node
);
16727 tree void_ftype_unsigned
16728 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16729 tree void_ftype_unsigned_unsigned
16730 = build_function_type_list (void_type_node
, unsigned_type_node
,
16731 unsigned_type_node
, NULL_TREE
);
16732 tree void_ftype_pcvoid_unsigned_unsigned
16733 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16734 unsigned_type_node
, unsigned_type_node
,
16736 tree unsigned_ftype_void
16737 = build_function_type (unsigned_type_node
, void_list_node
);
16738 tree v2si_ftype_v4sf
16739 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16740 /* Loads/stores. */
16741 tree void_ftype_v8qi_v8qi_pchar
16742 = build_function_type_list (void_type_node
,
16743 V8QI_type_node
, V8QI_type_node
,
16744 pchar_type_node
, NULL_TREE
);
16745 tree v4sf_ftype_pcfloat
16746 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16747 /* @@@ the type is bogus */
16748 tree v4sf_ftype_v4sf_pv2si
16749 = build_function_type_list (V4SF_type_node
,
16750 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16751 tree void_ftype_pv2si_v4sf
16752 = build_function_type_list (void_type_node
,
16753 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16754 tree void_ftype_pfloat_v4sf
16755 = build_function_type_list (void_type_node
,
16756 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16757 tree void_ftype_pdi_di
16758 = build_function_type_list (void_type_node
,
16759 pdi_type_node
, long_long_unsigned_type_node
,
16761 tree void_ftype_pv2di_v2di
16762 = build_function_type_list (void_type_node
,
16763 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16764 /* Normal vector unops. */
16765 tree v4sf_ftype_v4sf
16766 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16767 tree v16qi_ftype_v16qi
16768 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16769 tree v8hi_ftype_v8hi
16770 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16771 tree v4si_ftype_v4si
16772 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16773 tree v8qi_ftype_v8qi
16774 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16775 tree v4hi_ftype_v4hi
16776 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16778 /* Normal vector binops. */
16779 tree v4sf_ftype_v4sf_v4sf
16780 = build_function_type_list (V4SF_type_node
,
16781 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16782 tree v8qi_ftype_v8qi_v8qi
16783 = build_function_type_list (V8QI_type_node
,
16784 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16785 tree v4hi_ftype_v4hi_v4hi
16786 = build_function_type_list (V4HI_type_node
,
16787 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16788 tree v2si_ftype_v2si_v2si
16789 = build_function_type_list (V2SI_type_node
,
16790 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16791 tree di_ftype_di_di
16792 = build_function_type_list (long_long_unsigned_type_node
,
16793 long_long_unsigned_type_node
,
16794 long_long_unsigned_type_node
, NULL_TREE
);
16796 tree di_ftype_di_di_int
16797 = build_function_type_list (long_long_unsigned_type_node
,
16798 long_long_unsigned_type_node
,
16799 long_long_unsigned_type_node
,
16800 integer_type_node
, NULL_TREE
);
16802 tree v2si_ftype_v2sf
16803 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16804 tree v2sf_ftype_v2si
16805 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16806 tree v2si_ftype_v2si
16807 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16808 tree v2sf_ftype_v2sf
16809 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16810 tree v2sf_ftype_v2sf_v2sf
16811 = build_function_type_list (V2SF_type_node
,
16812 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16813 tree v2si_ftype_v2sf_v2sf
16814 = build_function_type_list (V2SI_type_node
,
16815 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16816 tree pint_type_node
= build_pointer_type (integer_type_node
);
16817 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16818 tree pcdouble_type_node
= build_pointer_type (
16819 build_type_variant (double_type_node
, 1, 0));
16820 tree int_ftype_v2df_v2df
16821 = build_function_type_list (integer_type_node
,
16822 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16824 tree void_ftype_pcvoid
16825 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16826 tree v4sf_ftype_v4si
16827 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16828 tree v4si_ftype_v4sf
16829 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16830 tree v2df_ftype_v4si
16831 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16832 tree v4si_ftype_v2df
16833 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16834 tree v2si_ftype_v2df
16835 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16836 tree v4sf_ftype_v2df
16837 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16838 tree v2df_ftype_v2si
16839 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16840 tree v2df_ftype_v4sf
16841 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16842 tree int_ftype_v2df
16843 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16844 tree int64_ftype_v2df
16845 = build_function_type_list (long_long_integer_type_node
,
16846 V2DF_type_node
, NULL_TREE
);
16847 tree v2df_ftype_v2df_int
16848 = build_function_type_list (V2DF_type_node
,
16849 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16850 tree v2df_ftype_v2df_int64
16851 = build_function_type_list (V2DF_type_node
,
16852 V2DF_type_node
, long_long_integer_type_node
,
16854 tree v4sf_ftype_v4sf_v2df
16855 = build_function_type_list (V4SF_type_node
,
16856 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16857 tree v2df_ftype_v2df_v4sf
16858 = build_function_type_list (V2DF_type_node
,
16859 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16860 tree v2df_ftype_v2df_v2df_int
16861 = build_function_type_list (V2DF_type_node
,
16862 V2DF_type_node
, V2DF_type_node
,
16865 tree v2df_ftype_v2df_pcdouble
16866 = build_function_type_list (V2DF_type_node
,
16867 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16868 tree void_ftype_pdouble_v2df
16869 = build_function_type_list (void_type_node
,
16870 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16871 tree void_ftype_pint_int
16872 = build_function_type_list (void_type_node
,
16873 pint_type_node
, integer_type_node
, NULL_TREE
);
16874 tree void_ftype_v16qi_v16qi_pchar
16875 = build_function_type_list (void_type_node
,
16876 V16QI_type_node
, V16QI_type_node
,
16877 pchar_type_node
, NULL_TREE
);
16878 tree v2df_ftype_pcdouble
16879 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16880 tree v2df_ftype_v2df_v2df
16881 = build_function_type_list (V2DF_type_node
,
16882 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16883 tree v16qi_ftype_v16qi_v16qi
16884 = build_function_type_list (V16QI_type_node
,
16885 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16886 tree v8hi_ftype_v8hi_v8hi
16887 = build_function_type_list (V8HI_type_node
,
16888 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16889 tree v4si_ftype_v4si_v4si
16890 = build_function_type_list (V4SI_type_node
,
16891 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16892 tree v2di_ftype_v2di_v2di
16893 = build_function_type_list (V2DI_type_node
,
16894 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16895 tree v2di_ftype_v2df_v2df
16896 = build_function_type_list (V2DI_type_node
,
16897 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16898 tree v2df_ftype_v2df
16899 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16900 tree v2di_ftype_v2di_int
16901 = build_function_type_list (V2DI_type_node
,
16902 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16903 tree v2di_ftype_v2di_v2di_int
16904 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16905 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16906 tree v4si_ftype_v4si_int
16907 = build_function_type_list (V4SI_type_node
,
16908 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16909 tree v8hi_ftype_v8hi_int
16910 = build_function_type_list (V8HI_type_node
,
16911 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16912 tree v8hi_ftype_v8hi_v2di
16913 = build_function_type_list (V8HI_type_node
,
16914 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16915 tree v4si_ftype_v4si_v2di
16916 = build_function_type_list (V4SI_type_node
,
16917 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16918 tree v4si_ftype_v8hi_v8hi
16919 = build_function_type_list (V4SI_type_node
,
16920 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16921 tree di_ftype_v8qi_v8qi
16922 = build_function_type_list (long_long_unsigned_type_node
,
16923 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16924 tree di_ftype_v2si_v2si
16925 = build_function_type_list (long_long_unsigned_type_node
,
16926 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16927 tree v2di_ftype_v16qi_v16qi
16928 = build_function_type_list (V2DI_type_node
,
16929 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16930 tree v2di_ftype_v4si_v4si
16931 = build_function_type_list (V2DI_type_node
,
16932 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16933 tree int_ftype_v16qi
16934 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16935 tree v16qi_ftype_pcchar
16936 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16937 tree void_ftype_pchar_v16qi
16938 = build_function_type_list (void_type_node
,
16939 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16941 tree v2di_ftype_v2di_unsigned_unsigned
16942 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16943 unsigned_type_node
, unsigned_type_node
,
16945 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16946 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16947 unsigned_type_node
, unsigned_type_node
,
16949 tree v2di_ftype_v2di_v16qi
16950 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16954 tree float128_type
;
16957 /* The __float80 type. */
16958 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16959 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16963 /* The __float80 type. */
16964 float80_type
= make_node (REAL_TYPE
);
16965 TYPE_PRECISION (float80_type
) = 80;
16966 layout_type (float80_type
);
16967 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16972 float128_type
= make_node (REAL_TYPE
);
16973 TYPE_PRECISION (float128_type
) = 128;
16974 layout_type (float128_type
);
16975 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16978 /* Add all builtins that are more or less simple operations on two
16980 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16982 /* Use one of the operands; the target can have a different mode for
16983 mask-generating compares. */
16984 enum machine_mode mode
;
16989 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16994 type
= v16qi_ftype_v16qi_v16qi
;
16997 type
= v8hi_ftype_v8hi_v8hi
;
17000 type
= v4si_ftype_v4si_v4si
;
17003 type
= v2di_ftype_v2di_v2di
;
17006 type
= v2df_ftype_v2df_v2df
;
17009 type
= v4sf_ftype_v4sf_v4sf
;
17012 type
= v8qi_ftype_v8qi_v8qi
;
17015 type
= v4hi_ftype_v4hi_v4hi
;
17018 type
= v2si_ftype_v2si_v2si
;
17021 type
= di_ftype_di_di
;
17025 gcc_unreachable ();
17028 /* Override for comparisons. */
17029 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17030 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17031 type
= v4si_ftype_v4sf_v4sf
;
17033 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17034 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17035 type
= v2di_ftype_v2df_v2df
;
17037 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17040 /* Add all builtins that are more or less simple operations on 1 operand. */
17041 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17043 enum machine_mode mode
;
17048 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17053 type
= v16qi_ftype_v16qi
;
17056 type
= v8hi_ftype_v8hi
;
17059 type
= v4si_ftype_v4si
;
17062 type
= v2df_ftype_v2df
;
17065 type
= v4sf_ftype_v4sf
;
17068 type
= v8qi_ftype_v8qi
;
17071 type
= v4hi_ftype_v4hi
;
17074 type
= v2si_ftype_v2si
;
17081 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17084 /* Add the remaining MMX insns with somewhat more complicated types. */
17085 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17086 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17087 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17088 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17090 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17091 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17092 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17094 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17095 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17097 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17098 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17100 /* comi/ucomi insns. */
17101 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17102 if (d
->mask
== MASK_SSE2
)
17103 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17105 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17107 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17108 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17109 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17111 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17112 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17113 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17114 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17115 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17116 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17117 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17118 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17119 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17120 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17121 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17123 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17125 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17126 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17128 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17129 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17130 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17131 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17133 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17134 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17135 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17136 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17138 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17140 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17142 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17143 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17144 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17145 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17146 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17147 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17149 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17151 /* Original 3DNow! */
17152 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17153 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17154 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17155 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17156 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17157 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17158 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17159 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17160 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17161 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17162 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17163 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17164 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17165 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17166 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17167 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17168 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17169 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17170 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17171 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17173 /* 3DNow! extension as used in the Athlon CPU. */
17174 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17175 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17176 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17177 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17178 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17179 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17182 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17184 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17185 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17187 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17188 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17190 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17191 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17192 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17193 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17194 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17196 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17197 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17198 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17199 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17201 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17202 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17204 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17206 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17207 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17209 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17210 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17211 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17212 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17213 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17215 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17217 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17218 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17219 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17220 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17222 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17223 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17224 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17226 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17227 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17228 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17229 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17231 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17232 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17233 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17235 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17236 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17238 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17239 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17241 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17242 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17243 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17245 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17246 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17247 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17249 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17250 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17252 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17253 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17254 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17255 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17257 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17258 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17259 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17260 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17262 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17263 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17265 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17267 /* Prescott New Instructions. */
17268 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17269 void_ftype_pcvoid_unsigned_unsigned
,
17270 IX86_BUILTIN_MONITOR
);
17271 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17272 void_ftype_unsigned_unsigned
,
17273 IX86_BUILTIN_MWAIT
);
17274 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17275 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17278 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17279 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17280 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17281 IX86_BUILTIN_PALIGNR
);
17283 /* AMDFAM10 SSE4A New built-ins */
17284 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17285 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17286 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17287 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17288 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17289 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17290 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17291 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17292 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17293 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17294 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17295 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17297 /* Access to the vec_init patterns. */
17298 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17299 integer_type_node
, NULL_TREE
);
17300 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17301 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17303 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17304 short_integer_type_node
,
17305 short_integer_type_node
,
17306 short_integer_type_node
, NULL_TREE
);
17307 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17308 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17310 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17311 char_type_node
, char_type_node
,
17312 char_type_node
, char_type_node
,
17313 char_type_node
, char_type_node
,
17314 char_type_node
, NULL_TREE
);
17315 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17316 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17318 /* Access to the vec_extract patterns. */
17319 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17320 integer_type_node
, NULL_TREE
);
17321 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17322 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17324 ftype
= build_function_type_list (long_long_integer_type_node
,
17325 V2DI_type_node
, integer_type_node
,
17327 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17328 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17330 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17331 integer_type_node
, NULL_TREE
);
17332 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17333 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17335 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17336 integer_type_node
, NULL_TREE
);
17337 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17338 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17340 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17341 integer_type_node
, NULL_TREE
);
17342 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17343 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17345 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17346 integer_type_node
, NULL_TREE
);
17347 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17348 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17350 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17351 integer_type_node
, NULL_TREE
);
17352 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17353 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17355 /* Access to the vec_set patterns. */
17356 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17358 integer_type_node
, NULL_TREE
);
17359 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17360 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17362 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17364 integer_type_node
, NULL_TREE
);
17365 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17366 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17369 /* Errors in the source file can cause expand_expr to return const0_rtx
17370 where we expect a vector. To avoid crashing, use one of the vector
17371 clear instructions. */
17373 safe_vector_operand (rtx x
, enum machine_mode mode
)
17375 if (x
== const0_rtx
)
17376 x
= CONST0_RTX (mode
);
17380 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17383 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17386 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17387 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17388 rtx op0
= expand_normal (arg0
);
17389 rtx op1
= expand_normal (arg1
);
17390 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17391 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17392 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17394 if (VECTOR_MODE_P (mode0
))
17395 op0
= safe_vector_operand (op0
, mode0
);
17396 if (VECTOR_MODE_P (mode1
))
17397 op1
= safe_vector_operand (op1
, mode1
);
17399 if (optimize
|| !target
17400 || GET_MODE (target
) != tmode
17401 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17402 target
= gen_reg_rtx (tmode
);
17404 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17406 rtx x
= gen_reg_rtx (V4SImode
);
17407 emit_insn (gen_sse2_loadd (x
, op1
));
17408 op1
= gen_lowpart (TImode
, x
);
17411 /* The insn must want input operands in the same modes as the
17413 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17414 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17416 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17417 op0
= copy_to_mode_reg (mode0
, op0
);
17418 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17419 op1
= copy_to_mode_reg (mode1
, op1
);
17421 /* ??? Using ix86_fixup_binary_operands is problematic when
17422 we've got mismatched modes. Fake it. */
17428 if (tmode
== mode0
&& tmode
== mode1
)
17430 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17434 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17436 op0
= force_reg (mode0
, op0
);
17437 op1
= force_reg (mode1
, op1
);
17438 target
= gen_reg_rtx (tmode
);
17441 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17448 /* Subroutine of ix86_expand_builtin to take care of stores. */
17451 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17454 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17455 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17456 rtx op0
= expand_normal (arg0
);
17457 rtx op1
= expand_normal (arg1
);
17458 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17459 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17461 if (VECTOR_MODE_P (mode1
))
17462 op1
= safe_vector_operand (op1
, mode1
);
17464 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17465 op1
= copy_to_mode_reg (mode1
, op1
);
17467 pat
= GEN_FCN (icode
) (op0
, op1
);
17473 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17476 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17477 rtx target
, int do_load
)
17480 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17481 rtx op0
= expand_normal (arg0
);
17482 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17483 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17485 if (optimize
|| !target
17486 || GET_MODE (target
) != tmode
17487 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17488 target
= gen_reg_rtx (tmode
);
17490 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17493 if (VECTOR_MODE_P (mode0
))
17494 op0
= safe_vector_operand (op0
, mode0
);
17496 if ((optimize
&& !register_operand (op0
, mode0
))
17497 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17498 op0
= copy_to_mode_reg (mode0
, op0
);
17501 pat
= GEN_FCN (icode
) (target
, op0
);
17508 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17509 sqrtss, rsqrtss, rcpss. */
17512 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17515 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17516 rtx op1
, op0
= expand_normal (arg0
);
17517 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17518 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17520 if (optimize
|| !target
17521 || GET_MODE (target
) != tmode
17522 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17523 target
= gen_reg_rtx (tmode
);
17525 if (VECTOR_MODE_P (mode0
))
17526 op0
= safe_vector_operand (op0
, mode0
);
17528 if ((optimize
&& !register_operand (op0
, mode0
))
17529 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17530 op0
= copy_to_mode_reg (mode0
, op0
);
17533 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17534 op1
= copy_to_mode_reg (mode0
, op1
);
17536 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17543 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17546 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17550 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17551 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17552 rtx op0
= expand_normal (arg0
);
17553 rtx op1
= expand_normal (arg1
);
17555 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17556 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17557 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17558 enum rtx_code comparison
= d
->comparison
;
17560 if (VECTOR_MODE_P (mode0
))
17561 op0
= safe_vector_operand (op0
, mode0
);
17562 if (VECTOR_MODE_P (mode1
))
17563 op1
= safe_vector_operand (op1
, mode1
);
17565 /* Swap operands if we have a comparison that isn't available in
17567 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17569 rtx tmp
= gen_reg_rtx (mode1
);
17570 emit_move_insn (tmp
, op1
);
17575 if (optimize
|| !target
17576 || GET_MODE (target
) != tmode
17577 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17578 target
= gen_reg_rtx (tmode
);
17580 if ((optimize
&& !register_operand (op0
, mode0
))
17581 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17582 op0
= copy_to_mode_reg (mode0
, op0
);
17583 if ((optimize
&& !register_operand (op1
, mode1
))
17584 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17585 op1
= copy_to_mode_reg (mode1
, op1
);
17587 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17588 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17595 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17598 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17602 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17603 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17604 rtx op0
= expand_normal (arg0
);
17605 rtx op1
= expand_normal (arg1
);
17607 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17608 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17609 enum rtx_code comparison
= d
->comparison
;
17611 if (VECTOR_MODE_P (mode0
))
17612 op0
= safe_vector_operand (op0
, mode0
);
17613 if (VECTOR_MODE_P (mode1
))
17614 op1
= safe_vector_operand (op1
, mode1
);
17616 /* Swap operands if we have a comparison that isn't available in
17618 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17625 target
= gen_reg_rtx (SImode
);
17626 emit_move_insn (target
, const0_rtx
);
17627 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17629 if ((optimize
&& !register_operand (op0
, mode0
))
17630 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17631 op0
= copy_to_mode_reg (mode0
, op0
);
17632 if ((optimize
&& !register_operand (op1
, mode1
))
17633 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17634 op1
= copy_to_mode_reg (mode1
, op1
);
17636 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17637 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17641 emit_insn (gen_rtx_SET (VOIDmode
,
17642 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17643 gen_rtx_fmt_ee (comparison
, QImode
,
17647 return SUBREG_REG (target
);
17650 /* Return the integer constant in ARG. Constrain it to be in the range
17651 of the subparts of VEC_TYPE; issue an error if not. */
17654 get_element_number (tree vec_type
, tree arg
)
17656 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17658 if (!host_integerp (arg
, 1)
17659 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17661 error ("selector must be an integer constant in the range 0..%wi", max
);
17668 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17669 ix86_expand_vector_init. We DO have language-level syntax for this, in
17670 the form of (type){ init-list }. Except that since we can't place emms
17671 instructions from inside the compiler, we can't allow the use of MMX
17672 registers unless the user explicitly asks for it. So we do *not* define
17673 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17674 we have builtins invoked by mmintrin.h that gives us license to emit
17675 these sorts of instructions. */
17678 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17680 enum machine_mode tmode
= TYPE_MODE (type
);
17681 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17682 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17683 rtvec v
= rtvec_alloc (n_elt
);
17685 gcc_assert (VECTOR_MODE_P (tmode
));
17686 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17688 for (i
= 0; i
< n_elt
; ++i
)
17690 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17691 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17694 if (!target
|| !register_operand (target
, tmode
))
17695 target
= gen_reg_rtx (tmode
);
17697 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17701 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17702 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17703 had a language-level syntax for referencing vector elements. */
17706 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17708 enum machine_mode tmode
, mode0
;
17713 arg0
= CALL_EXPR_ARG (exp
, 0);
17714 arg1
= CALL_EXPR_ARG (exp
, 1);
17716 op0
= expand_normal (arg0
);
17717 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17719 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17720 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17721 gcc_assert (VECTOR_MODE_P (mode0
));
17723 op0
= force_reg (mode0
, op0
);
17725 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17726 target
= gen_reg_rtx (tmode
);
17728 ix86_expand_vector_extract (true, target
, op0
, elt
);
17733 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17734 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17735 a language-level syntax for referencing vector elements. */
17738 ix86_expand_vec_set_builtin (tree exp
)
17740 enum machine_mode tmode
, mode1
;
17741 tree arg0
, arg1
, arg2
;
17745 arg0
= CALL_EXPR_ARG (exp
, 0);
17746 arg1
= CALL_EXPR_ARG (exp
, 1);
17747 arg2
= CALL_EXPR_ARG (exp
, 2);
17749 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17750 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17751 gcc_assert (VECTOR_MODE_P (tmode
));
17753 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17754 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17755 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17757 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17758 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17760 op0
= force_reg (tmode
, op0
);
17761 op1
= force_reg (mode1
, op1
);
17763 ix86_expand_vector_set (true, op0
, op1
, elt
);
17768 /* Expand an expression EXP that calls a built-in function,
17769 with result going to TARGET if that's convenient
17770 (and in mode MODE if that's convenient).
17771 SUBTARGET may be used as the target for computing one of EXP's operands.
17772 IGNORE is nonzero if the value is to be ignored. */
17775 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17776 enum machine_mode mode ATTRIBUTE_UNUSED
,
17777 int ignore ATTRIBUTE_UNUSED
)
17779 const struct builtin_description
*d
;
17781 enum insn_code icode
;
17782 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17783 tree arg0
, arg1
, arg2
, arg3
;
17784 rtx op0
, op1
, op2
, op3
, pat
;
17785 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17786 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17790 case IX86_BUILTIN_EMMS
:
17791 emit_insn (gen_mmx_emms ());
17794 case IX86_BUILTIN_SFENCE
:
17795 emit_insn (gen_sse_sfence ());
17798 case IX86_BUILTIN_MASKMOVQ
:
17799 case IX86_BUILTIN_MASKMOVDQU
:
17800 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17801 ? CODE_FOR_mmx_maskmovq
17802 : CODE_FOR_sse2_maskmovdqu
);
17803 /* Note the arg order is different from the operand order. */
17804 arg1
= CALL_EXPR_ARG (exp
, 0);
17805 arg2
= CALL_EXPR_ARG (exp
, 1);
17806 arg0
= CALL_EXPR_ARG (exp
, 2);
17807 op0
= expand_normal (arg0
);
17808 op1
= expand_normal (arg1
);
17809 op2
= expand_normal (arg2
);
17810 mode0
= insn_data
[icode
].operand
[0].mode
;
17811 mode1
= insn_data
[icode
].operand
[1].mode
;
17812 mode2
= insn_data
[icode
].operand
[2].mode
;
17814 op0
= force_reg (Pmode
, op0
);
17815 op0
= gen_rtx_MEM (mode1
, op0
);
17817 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17818 op0
= copy_to_mode_reg (mode0
, op0
);
17819 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17820 op1
= copy_to_mode_reg (mode1
, op1
);
17821 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17822 op2
= copy_to_mode_reg (mode2
, op2
);
17823 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17829 case IX86_BUILTIN_SQRTSS
:
17830 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17831 case IX86_BUILTIN_RSQRTSS
:
17832 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17833 case IX86_BUILTIN_RCPSS
:
17834 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17836 case IX86_BUILTIN_LOADUPS
:
17837 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17839 case IX86_BUILTIN_STOREUPS
:
17840 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17842 case IX86_BUILTIN_LOADHPS
:
17843 case IX86_BUILTIN_LOADLPS
:
17844 case IX86_BUILTIN_LOADHPD
:
17845 case IX86_BUILTIN_LOADLPD
:
17846 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17847 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17848 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17849 : CODE_FOR_sse2_loadlpd
);
17850 arg0
= CALL_EXPR_ARG (exp
, 0);
17851 arg1
= CALL_EXPR_ARG (exp
, 1);
17852 op0
= expand_normal (arg0
);
17853 op1
= expand_normal (arg1
);
17854 tmode
= insn_data
[icode
].operand
[0].mode
;
17855 mode0
= insn_data
[icode
].operand
[1].mode
;
17856 mode1
= insn_data
[icode
].operand
[2].mode
;
17858 op0
= force_reg (mode0
, op0
);
17859 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17860 if (optimize
|| target
== 0
17861 || GET_MODE (target
) != tmode
17862 || !register_operand (target
, tmode
))
17863 target
= gen_reg_rtx (tmode
);
17864 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17870 case IX86_BUILTIN_STOREHPS
:
17871 case IX86_BUILTIN_STORELPS
:
17872 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17873 : CODE_FOR_sse_storelps
);
17874 arg0
= CALL_EXPR_ARG (exp
, 0);
17875 arg1
= CALL_EXPR_ARG (exp
, 1);
17876 op0
= expand_normal (arg0
);
17877 op1
= expand_normal (arg1
);
17878 mode0
= insn_data
[icode
].operand
[0].mode
;
17879 mode1
= insn_data
[icode
].operand
[1].mode
;
17881 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17882 op1
= force_reg (mode1
, op1
);
17884 pat
= GEN_FCN (icode
) (op0
, op1
);
17890 case IX86_BUILTIN_MOVNTPS
:
17891 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17892 case IX86_BUILTIN_MOVNTQ
:
17893 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17895 case IX86_BUILTIN_LDMXCSR
:
17896 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17897 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17898 emit_move_insn (target
, op0
);
17899 emit_insn (gen_sse_ldmxcsr (target
));
17902 case IX86_BUILTIN_STMXCSR
:
17903 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17904 emit_insn (gen_sse_stmxcsr (target
));
17905 return copy_to_mode_reg (SImode
, target
);
17907 case IX86_BUILTIN_SHUFPS
:
17908 case IX86_BUILTIN_SHUFPD
:
17909 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17910 ? CODE_FOR_sse_shufps
17911 : CODE_FOR_sse2_shufpd
);
17912 arg0
= CALL_EXPR_ARG (exp
, 0);
17913 arg1
= CALL_EXPR_ARG (exp
, 1);
17914 arg2
= CALL_EXPR_ARG (exp
, 2);
17915 op0
= expand_normal (arg0
);
17916 op1
= expand_normal (arg1
);
17917 op2
= expand_normal (arg2
);
17918 tmode
= insn_data
[icode
].operand
[0].mode
;
17919 mode0
= insn_data
[icode
].operand
[1].mode
;
17920 mode1
= insn_data
[icode
].operand
[2].mode
;
17921 mode2
= insn_data
[icode
].operand
[3].mode
;
17923 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17924 op0
= copy_to_mode_reg (mode0
, op0
);
17925 if ((optimize
&& !register_operand (op1
, mode1
))
17926 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17927 op1
= copy_to_mode_reg (mode1
, op1
);
17928 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17930 /* @@@ better error message */
17931 error ("mask must be an immediate");
17932 return gen_reg_rtx (tmode
);
17934 if (optimize
|| target
== 0
17935 || GET_MODE (target
) != tmode
17936 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17937 target
= gen_reg_rtx (tmode
);
17938 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17944 case IX86_BUILTIN_PSHUFW
:
17945 case IX86_BUILTIN_PSHUFD
:
17946 case IX86_BUILTIN_PSHUFHW
:
17947 case IX86_BUILTIN_PSHUFLW
:
17948 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17949 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17950 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17951 : CODE_FOR_mmx_pshufw
);
17952 arg0
= CALL_EXPR_ARG (exp
, 0);
17953 arg1
= CALL_EXPR_ARG (exp
, 1);
17954 op0
= expand_normal (arg0
);
17955 op1
= expand_normal (arg1
);
17956 tmode
= insn_data
[icode
].operand
[0].mode
;
17957 mode1
= insn_data
[icode
].operand
[1].mode
;
17958 mode2
= insn_data
[icode
].operand
[2].mode
;
17960 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17961 op0
= copy_to_mode_reg (mode1
, op0
);
17962 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17964 /* @@@ better error message */
17965 error ("mask must be an immediate");
17969 || GET_MODE (target
) != tmode
17970 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17971 target
= gen_reg_rtx (tmode
);
17972 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17978 case IX86_BUILTIN_PSLLDQI128
:
17979 case IX86_BUILTIN_PSRLDQI128
:
17980 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17981 : CODE_FOR_sse2_lshrti3
);
17982 arg0
= CALL_EXPR_ARG (exp
, 0);
17983 arg1
= CALL_EXPR_ARG (exp
, 1);
17984 op0
= expand_normal (arg0
);
17985 op1
= expand_normal (arg1
);
17986 tmode
= insn_data
[icode
].operand
[0].mode
;
17987 mode1
= insn_data
[icode
].operand
[1].mode
;
17988 mode2
= insn_data
[icode
].operand
[2].mode
;
17990 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17992 op0
= copy_to_reg (op0
);
17993 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17995 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17997 error ("shift must be an immediate");
18000 target
= gen_reg_rtx (V2DImode
);
18001 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
18007 case IX86_BUILTIN_FEMMS
:
18008 emit_insn (gen_mmx_femms ());
18011 case IX86_BUILTIN_PAVGUSB
:
18012 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18014 case IX86_BUILTIN_PF2ID
:
18015 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18017 case IX86_BUILTIN_PFACC
:
18018 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18020 case IX86_BUILTIN_PFADD
:
18021 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18023 case IX86_BUILTIN_PFCMPEQ
:
18024 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18026 case IX86_BUILTIN_PFCMPGE
:
18027 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18029 case IX86_BUILTIN_PFCMPGT
:
18030 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18032 case IX86_BUILTIN_PFMAX
:
18033 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18035 case IX86_BUILTIN_PFMIN
:
18036 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18038 case IX86_BUILTIN_PFMUL
:
18039 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18041 case IX86_BUILTIN_PFRCP
:
18042 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18044 case IX86_BUILTIN_PFRCPIT1
:
18045 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18047 case IX86_BUILTIN_PFRCPIT2
:
18048 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18050 case IX86_BUILTIN_PFRSQIT1
:
18051 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18053 case IX86_BUILTIN_PFRSQRT
:
18054 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18056 case IX86_BUILTIN_PFSUB
:
18057 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18059 case IX86_BUILTIN_PFSUBR
:
18060 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18062 case IX86_BUILTIN_PI2FD
:
18063 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18065 case IX86_BUILTIN_PMULHRW
:
18066 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18068 case IX86_BUILTIN_PF2IW
:
18069 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18071 case IX86_BUILTIN_PFNACC
:
18072 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18074 case IX86_BUILTIN_PFPNACC
:
18075 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18077 case IX86_BUILTIN_PI2FW
:
18078 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18080 case IX86_BUILTIN_PSWAPDSI
:
18081 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18083 case IX86_BUILTIN_PSWAPDSF
:
18084 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18086 case IX86_BUILTIN_SQRTSD
:
18087 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18088 case IX86_BUILTIN_LOADUPD
:
18089 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18090 case IX86_BUILTIN_STOREUPD
:
18091 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18093 case IX86_BUILTIN_MFENCE
:
18094 emit_insn (gen_sse2_mfence ());
18096 case IX86_BUILTIN_LFENCE
:
18097 emit_insn (gen_sse2_lfence ());
18100 case IX86_BUILTIN_CLFLUSH
:
18101 arg0
= CALL_EXPR_ARG (exp
, 0);
18102 op0
= expand_normal (arg0
);
18103 icode
= CODE_FOR_sse2_clflush
;
18104 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18105 op0
= copy_to_mode_reg (Pmode
, op0
);
18107 emit_insn (gen_sse2_clflush (op0
));
18110 case IX86_BUILTIN_MOVNTPD
:
18111 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18112 case IX86_BUILTIN_MOVNTDQ
:
18113 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18114 case IX86_BUILTIN_MOVNTI
:
18115 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18117 case IX86_BUILTIN_LOADDQU
:
18118 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18119 case IX86_BUILTIN_STOREDQU
:
18120 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18122 case IX86_BUILTIN_MONITOR
:
18123 arg0
= CALL_EXPR_ARG (exp
, 0);
18124 arg1
= CALL_EXPR_ARG (exp
, 1);
18125 arg2
= CALL_EXPR_ARG (exp
, 2);
18126 op0
= expand_normal (arg0
);
18127 op1
= expand_normal (arg1
);
18128 op2
= expand_normal (arg2
);
18130 op0
= copy_to_mode_reg (Pmode
, op0
);
18132 op1
= copy_to_mode_reg (SImode
, op1
);
18134 op2
= copy_to_mode_reg (SImode
, op2
);
18136 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18138 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18141 case IX86_BUILTIN_MWAIT
:
18142 arg0
= CALL_EXPR_ARG (exp
, 0);
18143 arg1
= CALL_EXPR_ARG (exp
, 1);
18144 op0
= expand_normal (arg0
);
18145 op1
= expand_normal (arg1
);
18147 op0
= copy_to_mode_reg (SImode
, op0
);
18149 op1
= copy_to_mode_reg (SImode
, op1
);
18150 emit_insn (gen_sse3_mwait (op0
, op1
));
18153 case IX86_BUILTIN_LDDQU
:
18154 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18157 case IX86_BUILTIN_PALIGNR
:
18158 case IX86_BUILTIN_PALIGNR128
:
18159 if (fcode
== IX86_BUILTIN_PALIGNR
)
18161 icode
= CODE_FOR_ssse3_palignrdi
;
18166 icode
= CODE_FOR_ssse3_palignrti
;
18169 arg0
= CALL_EXPR_ARG (exp
, 0);
18170 arg1
= CALL_EXPR_ARG (exp
, 1);
18171 arg2
= CALL_EXPR_ARG (exp
, 2);
18172 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18173 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18174 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18175 tmode
= insn_data
[icode
].operand
[0].mode
;
18176 mode1
= insn_data
[icode
].operand
[1].mode
;
18177 mode2
= insn_data
[icode
].operand
[2].mode
;
18178 mode3
= insn_data
[icode
].operand
[3].mode
;
18180 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18182 op0
= copy_to_reg (op0
);
18183 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18185 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18187 op1
= copy_to_reg (op1
);
18188 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18190 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18192 error ("shift must be an immediate");
18195 target
= gen_reg_rtx (mode
);
18196 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18203 case IX86_BUILTIN_MOVNTSD
:
18204 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18206 case IX86_BUILTIN_MOVNTSS
:
18207 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18209 case IX86_BUILTIN_INSERTQ
:
18210 case IX86_BUILTIN_EXTRQ
:
18211 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18212 ? CODE_FOR_sse4a_extrq
18213 : CODE_FOR_sse4a_insertq
);
18214 arg0
= CALL_EXPR_ARG (exp
, 0);
18215 arg1
= CALL_EXPR_ARG (exp
, 1);
18216 op0
= expand_normal (arg0
);
18217 op1
= expand_normal (arg1
);
18218 tmode
= insn_data
[icode
].operand
[0].mode
;
18219 mode1
= insn_data
[icode
].operand
[1].mode
;
18220 mode2
= insn_data
[icode
].operand
[2].mode
;
18221 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18222 op0
= copy_to_mode_reg (mode1
, op0
);
18223 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18224 op1
= copy_to_mode_reg (mode2
, op1
);
18225 if (optimize
|| target
== 0
18226 || GET_MODE (target
) != tmode
18227 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18228 target
= gen_reg_rtx (tmode
);
18229 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18235 case IX86_BUILTIN_EXTRQI
:
18236 icode
= CODE_FOR_sse4a_extrqi
;
18237 arg0
= CALL_EXPR_ARG (exp
, 0);
18238 arg1
= CALL_EXPR_ARG (exp
, 1);
18239 arg2
= CALL_EXPR_ARG (exp
, 2);
18240 op0
= expand_normal (arg0
);
18241 op1
= expand_normal (arg1
);
18242 op2
= expand_normal (arg2
);
18243 tmode
= insn_data
[icode
].operand
[0].mode
;
18244 mode1
= insn_data
[icode
].operand
[1].mode
;
18245 mode2
= insn_data
[icode
].operand
[2].mode
;
18246 mode3
= insn_data
[icode
].operand
[3].mode
;
18247 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18248 op0
= copy_to_mode_reg (mode1
, op0
);
18249 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18251 error ("index mask must be an immediate");
18252 return gen_reg_rtx (tmode
);
18254 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18256 error ("length mask must be an immediate");
18257 return gen_reg_rtx (tmode
);
18259 if (optimize
|| target
== 0
18260 || GET_MODE (target
) != tmode
18261 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18262 target
= gen_reg_rtx (tmode
);
18263 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18269 case IX86_BUILTIN_INSERTQI
:
18270 icode
= CODE_FOR_sse4a_insertqi
;
18271 arg0
= CALL_EXPR_ARG (exp
, 0);
18272 arg1
= CALL_EXPR_ARG (exp
, 1);
18273 arg2
= CALL_EXPR_ARG (exp
, 2);
18274 arg3
= CALL_EXPR_ARG (exp
, 3);
18275 op0
= expand_normal (arg0
);
18276 op1
= expand_normal (arg1
);
18277 op2
= expand_normal (arg2
);
18278 op3
= expand_normal (arg3
);
18279 tmode
= insn_data
[icode
].operand
[0].mode
;
18280 mode1
= insn_data
[icode
].operand
[1].mode
;
18281 mode2
= insn_data
[icode
].operand
[2].mode
;
18282 mode3
= insn_data
[icode
].operand
[3].mode
;
18283 mode4
= insn_data
[icode
].operand
[4].mode
;
18285 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18286 op0
= copy_to_mode_reg (mode1
, op0
);
18288 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18289 op1
= copy_to_mode_reg (mode2
, op1
);
18291 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18293 error ("index mask must be an immediate");
18294 return gen_reg_rtx (tmode
);
18296 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18298 error ("length mask must be an immediate");
18299 return gen_reg_rtx (tmode
);
18301 if (optimize
|| target
== 0
18302 || GET_MODE (target
) != tmode
18303 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18304 target
= gen_reg_rtx (tmode
);
18305 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18311 case IX86_BUILTIN_VEC_INIT_V2SI
:
18312 case IX86_BUILTIN_VEC_INIT_V4HI
:
18313 case IX86_BUILTIN_VEC_INIT_V8QI
:
18314 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18316 case IX86_BUILTIN_VEC_EXT_V2DF
:
18317 case IX86_BUILTIN_VEC_EXT_V2DI
:
18318 case IX86_BUILTIN_VEC_EXT_V4SF
:
18319 case IX86_BUILTIN_VEC_EXT_V4SI
:
18320 case IX86_BUILTIN_VEC_EXT_V8HI
:
18321 case IX86_BUILTIN_VEC_EXT_V2SI
:
18322 case IX86_BUILTIN_VEC_EXT_V4HI
:
18323 return ix86_expand_vec_ext_builtin (exp
, target
);
18325 case IX86_BUILTIN_VEC_SET_V8HI
:
18326 case IX86_BUILTIN_VEC_SET_V4HI
:
18327 return ix86_expand_vec_set_builtin (exp
);
18333 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18334 if (d
->code
== fcode
)
18336 /* Compares are treated specially. */
18337 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18338 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18339 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18340 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18341 return ix86_expand_sse_compare (d
, exp
, target
);
18343 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18346 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18347 if (d
->code
== fcode
)
18348 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18350 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18351 if (d
->code
== fcode
)
18352 return ix86_expand_sse_comi (d
, exp
, target
);
18354 gcc_unreachable ();
18357 /* Returns a function decl for a vectorized version of the builtin function
18358 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18359 if it is not available. */
18362 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18365 enum machine_mode in_mode
, out_mode
;
18368 if (TREE_CODE (type_out
) != VECTOR_TYPE
18369 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18372 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18373 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18374 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18375 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18379 case BUILT_IN_SQRT
:
18380 if (out_mode
== DFmode
&& out_n
== 2
18381 && in_mode
== DFmode
&& in_n
== 2)
18382 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18385 case BUILT_IN_SQRTF
:
18386 if (out_mode
== SFmode
&& out_n
== 4
18387 && in_mode
== SFmode
&& in_n
== 4)
18388 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18391 case BUILT_IN_LRINTF
:
18392 if (out_mode
== SImode
&& out_n
== 4
18393 && in_mode
== SFmode
&& in_n
== 4)
18394 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18404 /* Returns a decl of a function that implements conversion of the
18405 input vector of type TYPE, or NULL_TREE if it is not available. */
18408 ix86_builtin_conversion (enum tree_code code
, tree type
)
18410 if (TREE_CODE (type
) != VECTOR_TYPE
)
18416 switch (TYPE_MODE (type
))
18419 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18424 case FIX_TRUNC_EXPR
:
18425 switch (TYPE_MODE (type
))
18428 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18438 /* Store OPERAND to the memory after reload is completed. This means
18439 that we can't easily use assign_stack_local. */
18441 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18445 gcc_assert (reload_completed
);
18446 if (TARGET_RED_ZONE
)
18448 result
= gen_rtx_MEM (mode
,
18449 gen_rtx_PLUS (Pmode
,
18451 GEN_INT (-RED_ZONE_SIZE
)));
18452 emit_move_insn (result
, operand
);
18454 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18460 operand
= gen_lowpart (DImode
, operand
);
18464 gen_rtx_SET (VOIDmode
,
18465 gen_rtx_MEM (DImode
,
18466 gen_rtx_PRE_DEC (DImode
,
18467 stack_pointer_rtx
)),
18471 gcc_unreachable ();
18473 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18482 split_di (&operand
, 1, operands
, operands
+ 1);
18484 gen_rtx_SET (VOIDmode
,
18485 gen_rtx_MEM (SImode
,
18486 gen_rtx_PRE_DEC (Pmode
,
18487 stack_pointer_rtx
)),
18490 gen_rtx_SET (VOIDmode
,
18491 gen_rtx_MEM (SImode
,
18492 gen_rtx_PRE_DEC (Pmode
,
18493 stack_pointer_rtx
)),
18498 /* Store HImodes as SImodes. */
18499 operand
= gen_lowpart (SImode
, operand
);
18503 gen_rtx_SET (VOIDmode
,
18504 gen_rtx_MEM (GET_MODE (operand
),
18505 gen_rtx_PRE_DEC (SImode
,
18506 stack_pointer_rtx
)),
18510 gcc_unreachable ();
18512 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18517 /* Free operand from the memory. */
18519 ix86_free_from_memory (enum machine_mode mode
)
18521 if (!TARGET_RED_ZONE
)
18525 if (mode
== DImode
|| TARGET_64BIT
)
18529 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18530 to pop or add instruction if registers are available. */
18531 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18532 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18537 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18538 QImode must go into class Q_REGS.
18539 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18540 movdf to do mem-to-mem moves through integer regs. */
18542 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18544 enum machine_mode mode
= GET_MODE (x
);
18546 /* We're only allowed to return a subclass of CLASS. Many of the
18547 following checks fail for NO_REGS, so eliminate that early. */
18548 if (class == NO_REGS
)
18551 /* All classes can load zeros. */
18552 if (x
== CONST0_RTX (mode
))
18555 /* Force constants into memory if we are loading a (nonzero) constant into
18556 an MMX or SSE register. This is because there are no MMX/SSE instructions
18557 to load from a constant. */
18559 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18562 /* Prefer SSE regs only, if we can use them for math. */
18563 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18564 return SSE_CLASS_P (class) ? class : NO_REGS
;
18566 /* Floating-point constants need more complex checks. */
18567 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18569 /* General regs can load everything. */
18570 if (reg_class_subset_p (class, GENERAL_REGS
))
18573 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18574 zero above. We only want to wind up preferring 80387 registers if
18575 we plan on doing computation with them. */
18577 && standard_80387_constant_p (x
))
18579 /* Limit class to non-sse. */
18580 if (class == FLOAT_SSE_REGS
)
18582 if (class == FP_TOP_SSE_REGS
)
18584 if (class == FP_SECOND_SSE_REGS
)
18585 return FP_SECOND_REG
;
18586 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18593 /* Generally when we see PLUS here, it's the function invariant
18594 (plus soft-fp const_int). Which can only be computed into general
18596 if (GET_CODE (x
) == PLUS
)
18597 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18599 /* QImode constants are easy to load, but non-constant QImode data
18600 must go into Q_REGS. */
18601 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18603 if (reg_class_subset_p (class, Q_REGS
))
18605 if (reg_class_subset_p (Q_REGS
, class))
18613 /* Discourage putting floating-point values in SSE registers unless
18614 SSE math is being used, and likewise for the 387 registers. */
18616 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18618 enum machine_mode mode
= GET_MODE (x
);
18620 /* Restrict the output reload class to the register bank that we are doing
18621 math on. If we would like not to return a subset of CLASS, reject this
18622 alternative: if reload cannot do this, it will still use its choice. */
18623 mode
= GET_MODE (x
);
18624 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18625 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18627 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18629 if (class == FP_TOP_SSE_REGS
)
18631 else if (class == FP_SECOND_SSE_REGS
)
18632 return FP_SECOND_REG
;
18634 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18640 /* If we are copying between general and FP registers, we need a memory
18641 location. The same is true for SSE and MMX registers.
18643 The macro can't work reliably when one of the CLASSES is class containing
18644 registers from multiple units (SSE, MMX, integer). We avoid this by never
18645 combining those units in single alternative in the machine description.
18646 Ensure that this constraint holds to avoid unexpected surprises.
18648 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18649 enforce these sanity checks. */
18652 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18653 enum machine_mode mode
, int strict
)
18655 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18656 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18657 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18658 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18659 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18660 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18662 gcc_assert (!strict
);
18666 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18669 /* ??? This is a lie. We do have moves between mmx/general, and for
18670 mmx/sse2. But by saying we need secondary memory we discourage the
18671 register allocator from using the mmx registers unless needed. */
18672 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18675 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18677 /* SSE1 doesn't have any direct moves from other classes. */
18681 /* If the target says that inter-unit moves are more expensive
18682 than moving through memory, then don't generate them. */
18683 if (!TARGET_INTER_UNIT_MOVES
)
18686 /* Between SSE and general, we have moves no larger than word size. */
18687 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18694 /* Return true if the registers in CLASS cannot represent the change from
18695 modes FROM to TO. */
18698 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18699 enum reg_class
class)
18704 /* x87 registers can't do subreg at all, as all values are reformatted
18705 to extended precision. */
18706 if (MAYBE_FLOAT_CLASS_P (class))
18709 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18711 /* Vector registers do not support QI or HImode loads. If we don't
18712 disallow a change to these modes, reload will assume it's ok to
18713 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18714 the vec_dupv4hi pattern. */
18715 if (GET_MODE_SIZE (from
) < 4)
18718 /* Vector registers do not support subreg with nonzero offsets, which
18719 are otherwise valid for integer registers. Since we can't see
18720 whether we have a nonzero offset from here, prohibit all
18721 nonparadoxical subregs changing size. */
18722 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18729 /* Return the cost of moving data from a register in class CLASS1 to
18730 one in class CLASS2.
18732 It is not required that the cost always equal 2 when FROM is the same as TO;
18733 on some machines it is expensive to move between registers if they are not
18734 general registers. */
18737 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18738 enum reg_class class2
)
18740 /* In case we require secondary memory, compute cost of the store followed
18741 by load. In order to avoid bad register allocation choices, we need
18742 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18744 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18748 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18749 MEMORY_MOVE_COST (mode
, class1
, 1));
18750 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18751 MEMORY_MOVE_COST (mode
, class2
, 1));
18753 /* In case of copying from general_purpose_register we may emit multiple
18754 stores followed by single load causing memory size mismatch stall.
18755 Count this as arbitrarily high cost of 20. */
18756 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18759 /* In the case of FP/MMX moves, the registers actually overlap, and we
18760 have to switch modes in order to treat them differently. */
18761 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18762 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18768 /* Moves between SSE/MMX and integer unit are expensive. */
18769 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18770 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18771 return ix86_cost
->mmxsse_to_integer
;
18772 if (MAYBE_FLOAT_CLASS_P (class1
))
18773 return ix86_cost
->fp_move
;
18774 if (MAYBE_SSE_CLASS_P (class1
))
18775 return ix86_cost
->sse_move
;
18776 if (MAYBE_MMX_CLASS_P (class1
))
18777 return ix86_cost
->mmx_move
;
18781 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18784 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18786 /* Flags and only flags can only hold CCmode values. */
18787 if (CC_REGNO_P (regno
))
18788 return GET_MODE_CLASS (mode
) == MODE_CC
;
18789 if (GET_MODE_CLASS (mode
) == MODE_CC
18790 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18791 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18793 if (FP_REGNO_P (regno
))
18794 return VALID_FP_MODE_P (mode
);
18795 if (SSE_REGNO_P (regno
))
18797 /* We implement the move patterns for all vector modes into and
18798 out of SSE registers, even when no operation instructions
18800 return (VALID_SSE_REG_MODE (mode
)
18801 || VALID_SSE2_REG_MODE (mode
)
18802 || VALID_MMX_REG_MODE (mode
)
18803 || VALID_MMX_REG_MODE_3DNOW (mode
));
18805 if (MMX_REGNO_P (regno
))
18807 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18808 so if the register is available at all, then we can move data of
18809 the given mode into or out of it. */
18810 return (VALID_MMX_REG_MODE (mode
)
18811 || VALID_MMX_REG_MODE_3DNOW (mode
));
18814 if (mode
== QImode
)
18816 /* Take care for QImode values - they can be in non-QI regs,
18817 but then they do cause partial register stalls. */
18818 if (regno
< 4 || TARGET_64BIT
)
18820 if (!TARGET_PARTIAL_REG_STALL
)
18822 return reload_in_progress
|| reload_completed
;
18824 /* We handle both integer and floats in the general purpose registers. */
18825 else if (VALID_INT_MODE_P (mode
))
18827 else if (VALID_FP_MODE_P (mode
))
18829 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18830 on to use that value in smaller contexts, this can easily force a
18831 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18832 supporting DImode, allow it. */
18833 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18839 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18840 tieable integer mode. */
18843 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18852 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18855 return TARGET_64BIT
;
18862 /* Return true if MODE1 is accessible in a register that can hold MODE2
18863 without copying. That is, all register classes that can hold MODE2
18864 can also hold MODE1. */
18867 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18869 if (mode1
== mode2
)
18872 if (ix86_tieable_integer_mode_p (mode1
)
18873 && ix86_tieable_integer_mode_p (mode2
))
18876 /* MODE2 being XFmode implies fp stack or general regs, which means we
18877 can tie any smaller floating point modes to it. Note that we do not
18878 tie this with TFmode. */
18879 if (mode2
== XFmode
)
18880 return mode1
== SFmode
|| mode1
== DFmode
;
18882 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18883 that we can tie it with SFmode. */
18884 if (mode2
== DFmode
)
18885 return mode1
== SFmode
;
18887 /* If MODE2 is only appropriate for an SSE register, then tie with
18888 any other mode acceptable to SSE registers. */
18889 if (GET_MODE_SIZE (mode2
) == 16
18890 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18891 return (GET_MODE_SIZE (mode1
) == 16
18892 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18894 /* If MODE2 is appropriate for an MMX register, then tie
18895 with any other mode acceptable to MMX registers. */
18896 if (GET_MODE_SIZE (mode2
) == 8
18897 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18898 return (GET_MODE_SIZE (mode1
) == 8
18899 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18904 /* Return the cost of moving data of mode M between a
18905 register and memory. A value of 2 is the default; this cost is
18906 relative to those in `REGISTER_MOVE_COST'.
18908 If moving between registers and memory is more expensive than
18909 between two registers, you should define this macro to express the
18912 Model also increased moving costs of QImode registers in non
18916 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18918 if (FLOAT_CLASS_P (class))
18935 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18937 if (SSE_CLASS_P (class))
18940 switch (GET_MODE_SIZE (mode
))
18954 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18956 if (MMX_CLASS_P (class))
18959 switch (GET_MODE_SIZE (mode
))
18970 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18972 switch (GET_MODE_SIZE (mode
))
18976 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18977 : ix86_cost
->movzbl_load
);
18979 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18980 : ix86_cost
->int_store
[0] + 4);
18983 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18985 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18986 if (mode
== TFmode
)
18988 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18989 * (((int) GET_MODE_SIZE (mode
)
18990 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18994 /* Compute a (partial) cost for rtx X. Return true if the complete
18995 cost has been computed, and false if subexpressions should be
18996 scanned. In either case, *TOTAL contains the cost result. */
18999 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19001 enum machine_mode mode
= GET_MODE (x
);
19009 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19011 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19013 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19015 || (!GET_CODE (x
) != LABEL_REF
19016 && (GET_CODE (x
) != SYMBOL_REF
19017 || !SYMBOL_REF_LOCAL_P (x
)))))
19024 if (mode
== VOIDmode
)
19027 switch (standard_80387_constant_p (x
))
19032 default: /* Other constants */
19037 /* Start with (MEM (SYMBOL_REF)), since that's where
19038 it'll probably end up. Add a penalty for size. */
19039 *total
= (COSTS_N_INSNS (1)
19040 + (flag_pic
!= 0 && !TARGET_64BIT
)
19041 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19047 /* The zero extensions is often completely free on x86_64, so make
19048 it as cheap as possible. */
19049 if (TARGET_64BIT
&& mode
== DImode
19050 && GET_MODE (XEXP (x
, 0)) == SImode
)
19052 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19053 *total
= ix86_cost
->add
;
19055 *total
= ix86_cost
->movzx
;
19059 *total
= ix86_cost
->movsx
;
19063 if (CONST_INT_P (XEXP (x
, 1))
19064 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19066 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19069 *total
= ix86_cost
->add
;
19072 if ((value
== 2 || value
== 3)
19073 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19075 *total
= ix86_cost
->lea
;
19085 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19087 if (CONST_INT_P (XEXP (x
, 1)))
19089 if (INTVAL (XEXP (x
, 1)) > 32)
19090 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19092 *total
= ix86_cost
->shift_const
* 2;
19096 if (GET_CODE (XEXP (x
, 1)) == AND
)
19097 *total
= ix86_cost
->shift_var
* 2;
19099 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19104 if (CONST_INT_P (XEXP (x
, 1)))
19105 *total
= ix86_cost
->shift_const
;
19107 *total
= ix86_cost
->shift_var
;
19112 if (FLOAT_MODE_P (mode
))
19114 *total
= ix86_cost
->fmul
;
19119 rtx op0
= XEXP (x
, 0);
19120 rtx op1
= XEXP (x
, 1);
19122 if (CONST_INT_P (XEXP (x
, 1)))
19124 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19125 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19129 /* This is arbitrary. */
19132 /* Compute costs correctly for widening multiplication. */
19133 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19134 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19135 == GET_MODE_SIZE (mode
))
19137 int is_mulwiden
= 0;
19138 enum machine_mode inner_mode
= GET_MODE (op0
);
19140 if (GET_CODE (op0
) == GET_CODE (op1
))
19141 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19142 else if (CONST_INT_P (op1
))
19144 if (GET_CODE (op0
) == SIGN_EXTEND
)
19145 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19148 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19152 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19155 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19156 + nbits
* ix86_cost
->mult_bit
19157 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19166 if (FLOAT_MODE_P (mode
))
19167 *total
= ix86_cost
->fdiv
;
19169 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19173 if (FLOAT_MODE_P (mode
))
19174 *total
= ix86_cost
->fadd
;
19175 else if (GET_MODE_CLASS (mode
) == MODE_INT
19176 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19178 if (GET_CODE (XEXP (x
, 0)) == PLUS
19179 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19180 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19181 && CONSTANT_P (XEXP (x
, 1)))
19183 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19184 if (val
== 2 || val
== 4 || val
== 8)
19186 *total
= ix86_cost
->lea
;
19187 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19188 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19190 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19194 else if (GET_CODE (XEXP (x
, 0)) == MULT
19195 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19197 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19198 if (val
== 2 || val
== 4 || val
== 8)
19200 *total
= ix86_cost
->lea
;
19201 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19202 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19206 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19208 *total
= ix86_cost
->lea
;
19209 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19210 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19211 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19218 if (FLOAT_MODE_P (mode
))
19220 *total
= ix86_cost
->fadd
;
19228 if (!TARGET_64BIT
&& mode
== DImode
)
19230 *total
= (ix86_cost
->add
* 2
19231 + (rtx_cost (XEXP (x
, 0), outer_code
)
19232 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19233 + (rtx_cost (XEXP (x
, 1), outer_code
)
19234 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19240 if (FLOAT_MODE_P (mode
))
19242 *total
= ix86_cost
->fchs
;
19248 if (!TARGET_64BIT
&& mode
== DImode
)
19249 *total
= ix86_cost
->add
* 2;
19251 *total
= ix86_cost
->add
;
19255 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19256 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19257 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19258 && XEXP (x
, 1) == const0_rtx
)
19260 /* This kind of construct is implemented using test[bwl].
19261 Treat it as if we had an AND. */
19262 *total
= (ix86_cost
->add
19263 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19264 + rtx_cost (const1_rtx
, outer_code
));
19270 if (!TARGET_SSE_MATH
19272 || (mode
== DFmode
&& !TARGET_SSE2
))
19277 if (FLOAT_MODE_P (mode
))
19278 *total
= ix86_cost
->fabs
;
19282 if (FLOAT_MODE_P (mode
))
19283 *total
= ix86_cost
->fsqrt
;
19287 if (XINT (x
, 1) == UNSPEC_TP
)
19298 static int current_machopic_label_num
;
19300 /* Given a symbol name and its associated stub, write out the
19301 definition of the stub. */
19304 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19306 unsigned int length
;
19307 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19308 int label
= ++current_machopic_label_num
;
19310 /* For 64-bit we shouldn't get here. */
19311 gcc_assert (!TARGET_64BIT
);
19313 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19314 symb
= (*targetm
.strip_name_encoding
) (symb
);
19316 length
= strlen (stub
);
19317 binder_name
= alloca (length
+ 32);
19318 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19320 length
= strlen (symb
);
19321 symbol_name
= alloca (length
+ 32);
19322 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19324 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19327 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19329 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19331 fprintf (file
, "%s:\n", stub
);
19332 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19336 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19337 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19338 fprintf (file
, "\tjmp\t*%%edx\n");
19341 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19343 fprintf (file
, "%s:\n", binder_name
);
19347 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19348 fprintf (file
, "\tpushl\t%%eax\n");
19351 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19353 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19355 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19356 fprintf (file
, "%s:\n", lazy_ptr_name
);
19357 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19358 fprintf (file
, "\t.long %s\n", binder_name
);
19362 darwin_x86_file_end (void)
19364 darwin_file_end ();
19367 #endif /* TARGET_MACHO */
19369 /* Order the registers for register allocator. */
19372 x86_order_regs_for_local_alloc (void)
19377 /* First allocate the local general purpose registers. */
19378 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19379 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19380 reg_alloc_order
[pos
++] = i
;
19382 /* Global general purpose registers. */
19383 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19384 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19385 reg_alloc_order
[pos
++] = i
;
19387 /* x87 registers come first in case we are doing FP math
19389 if (!TARGET_SSE_MATH
)
19390 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19391 reg_alloc_order
[pos
++] = i
;
19393 /* SSE registers. */
19394 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19395 reg_alloc_order
[pos
++] = i
;
19396 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19397 reg_alloc_order
[pos
++] = i
;
19399 /* x87 registers. */
19400 if (TARGET_SSE_MATH
)
19401 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19402 reg_alloc_order
[pos
++] = i
;
19404 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19405 reg_alloc_order
[pos
++] = i
;
19407 /* Initialize the rest of array as we do not allocate some registers
19409 while (pos
< FIRST_PSEUDO_REGISTER
)
19410 reg_alloc_order
[pos
++] = 0;
19413 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19414 struct attribute_spec.handler. */
19416 ix86_handle_struct_attribute (tree
*node
, tree name
,
19417 tree args ATTRIBUTE_UNUSED
,
19418 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19421 if (DECL_P (*node
))
19423 if (TREE_CODE (*node
) == TYPE_DECL
)
19424 type
= &TREE_TYPE (*node
);
19429 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19430 || TREE_CODE (*type
) == UNION_TYPE
)))
19432 warning (OPT_Wattributes
, "%qs attribute ignored",
19433 IDENTIFIER_POINTER (name
));
19434 *no_add_attrs
= true;
19437 else if ((is_attribute_p ("ms_struct", name
)
19438 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19439 || ((is_attribute_p ("gcc_struct", name
)
19440 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19442 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19443 IDENTIFIER_POINTER (name
));
19444 *no_add_attrs
= true;
19451 ix86_ms_bitfield_layout_p (tree record_type
)
19453 return (TARGET_MS_BITFIELD_LAYOUT
&&
19454 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19455 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19458 /* Returns an expression indicating where the this parameter is
19459 located on entry to the FUNCTION. */
19462 x86_this_parameter (tree function
)
19464 tree type
= TREE_TYPE (function
);
19468 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19469 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19472 if (ix86_function_regparm (type
, function
) > 0)
19476 parm
= TYPE_ARG_TYPES (type
);
19477 /* Figure out whether or not the function has a variable number of
19479 for (; parm
; parm
= TREE_CHAIN (parm
))
19480 if (TREE_VALUE (parm
) == void_type_node
)
19482 /* If not, the this parameter is in the first argument. */
19486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19488 return gen_rtx_REG (SImode
, regno
);
19492 if (aggregate_value_p (TREE_TYPE (type
), type
))
19493 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19495 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19498 /* Determine whether x86_output_mi_thunk can succeed. */
19501 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19502 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19503 HOST_WIDE_INT vcall_offset
, tree function
)
19505 /* 64-bit can handle anything. */
19509 /* For 32-bit, everything's fine if we have one free register. */
19510 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19513 /* Need a free register for vcall_offset. */
19517 /* Need a free register for GOT references. */
19518 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19521 /* Otherwise ok. */
19525 /* Output the assembler code for a thunk function. THUNK_DECL is the
19526 declaration for the thunk function itself, FUNCTION is the decl for
19527 the target function. DELTA is an immediate constant offset to be
19528 added to THIS. If VCALL_OFFSET is nonzero, the word at
19529 *(*this + vcall_offset) should be added to THIS. */
19532 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19533 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19534 HOST_WIDE_INT vcall_offset
, tree function
)
19537 rtx
this = x86_this_parameter (function
);
19540 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19541 pull it in now and let DELTA benefit. */
19544 else if (vcall_offset
)
19546 /* Put the this parameter into %eax. */
19548 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19549 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19552 this_reg
= NULL_RTX
;
19554 /* Adjust the this parameter by a fixed constant. */
19557 xops
[0] = GEN_INT (delta
);
19558 xops
[1] = this_reg
? this_reg
: this;
19561 if (!x86_64_general_operand (xops
[0], DImode
))
19563 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19565 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19569 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19572 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19575 /* Adjust the this parameter by a value stored in the vtable. */
19579 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19582 int tmp_regno
= 2 /* ECX */;
19583 if (lookup_attribute ("fastcall",
19584 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19585 tmp_regno
= 0 /* EAX */;
19586 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19589 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19592 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19594 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19596 /* Adjust the this parameter. */
19597 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19598 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19600 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19601 xops
[0] = GEN_INT (vcall_offset
);
19603 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19604 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19606 xops
[1] = this_reg
;
19608 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19610 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19613 /* If necessary, drop THIS back to its stack slot. */
19614 if (this_reg
&& this_reg
!= this)
19616 xops
[0] = this_reg
;
19618 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19621 xops
[0] = XEXP (DECL_RTL (function
), 0);
19624 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19625 output_asm_insn ("jmp\t%P0", xops
);
19628 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19629 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19630 tmp
= gen_rtx_MEM (QImode
, tmp
);
19632 output_asm_insn ("jmp\t%A0", xops
);
19637 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19638 output_asm_insn ("jmp\t%P0", xops
);
19643 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19644 tmp
= (gen_rtx_SYMBOL_REF
19646 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19647 tmp
= gen_rtx_MEM (QImode
, tmp
);
19649 output_asm_insn ("jmp\t%0", xops
);
19652 #endif /* TARGET_MACHO */
19654 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19655 output_set_got (tmp
, NULL_RTX
);
19658 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19659 output_asm_insn ("jmp\t{*}%1", xops
);
19665 x86_file_start (void)
19667 default_file_start ();
19669 darwin_file_start ();
19671 if (X86_FILE_START_VERSION_DIRECTIVE
)
19672 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19673 if (X86_FILE_START_FLTUSED
)
19674 fputs ("\t.global\t__fltused\n", asm_out_file
);
19675 if (ix86_asm_dialect
== ASM_INTEL
)
19676 fputs ("\t.intel_syntax\n", asm_out_file
);
19680 x86_field_alignment (tree field
, int computed
)
19682 enum machine_mode mode
;
19683 tree type
= TREE_TYPE (field
);
19685 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19687 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19688 ? get_inner_array_type (type
) : type
);
19689 if (mode
== DFmode
|| mode
== DCmode
19690 || GET_MODE_CLASS (mode
) == MODE_INT
19691 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19692 return MIN (32, computed
);
19696 /* Output assembler code to FILE to increment profiler label # LABELNO
19697 for profiling a function entry. */
19699 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19704 #ifndef NO_PROFILE_COUNTERS
19705 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19707 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19711 #ifndef NO_PROFILE_COUNTERS
19712 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19714 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19718 #ifndef NO_PROFILE_COUNTERS
19719 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19720 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19722 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19726 #ifndef NO_PROFILE_COUNTERS
19727 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19728 PROFILE_COUNT_REGISTER
);
19730 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19734 /* We don't have exact information about the insn sizes, but we may assume
19735 quite safely that we are informed about all 1 byte insns and memory
19736 address sizes. This is enough to eliminate unnecessary padding in
19740 min_insn_size (rtx insn
)
19744 if (!INSN_P (insn
) || !active_insn_p (insn
))
19747 /* Discard alignments we've emit and jump instructions. */
19748 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19749 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19752 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19753 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19756 /* Important case - calls are always 5 bytes.
19757 It is common to have many calls in the row. */
19759 && symbolic_reference_mentioned_p (PATTERN (insn
))
19760 && !SIBLING_CALL_P (insn
))
19762 if (get_attr_length (insn
) <= 1)
19765 /* For normal instructions we may rely on the sizes of addresses
19766 and the presence of symbol to require 4 bytes of encoding.
19767 This is not the case for jumps where references are PC relative. */
19768 if (!JUMP_P (insn
))
19770 l
= get_attr_length_address (insn
);
19771 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19780 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19784 ix86_avoid_jump_misspredicts (void)
19786 rtx insn
, start
= get_insns ();
19787 int nbytes
= 0, njumps
= 0;
19790 /* Look for all minimal intervals of instructions containing 4 jumps.
19791 The intervals are bounded by START and INSN. NBYTES is the total
19792 size of instructions in the interval including INSN and not including
19793 START. When the NBYTES is smaller than 16 bytes, it is possible
19794 that the end of START and INSN ends up in the same 16byte page.
19796 The smallest offset in the page INSN can start is the case where START
19797 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19798 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19800 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19803 nbytes
+= min_insn_size (insn
);
19805 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19806 INSN_UID (insn
), min_insn_size (insn
));
19808 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19809 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19817 start
= NEXT_INSN (start
);
19818 if ((JUMP_P (start
)
19819 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19820 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19822 njumps
--, isjump
= 1;
19825 nbytes
-= min_insn_size (start
);
19827 gcc_assert (njumps
>= 0);
19829 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19830 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19832 if (njumps
== 3 && isjump
&& nbytes
< 16)
19834 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19837 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19838 INSN_UID (insn
), padsize
);
19839 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19844 /* AMD Athlon works faster
19845 when RET is not destination of conditional jump or directly preceded
19846 by other jump instruction. We avoid the penalty by inserting NOP just
19847 before the RET instructions in such cases. */
19849 ix86_pad_returns (void)
19854 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19856 basic_block bb
= e
->src
;
19857 rtx ret
= BB_END (bb
);
19859 bool replace
= false;
19861 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19862 || !maybe_hot_bb_p (bb
))
19864 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19865 if (active_insn_p (prev
) || LABEL_P (prev
))
19867 if (prev
&& LABEL_P (prev
))
19872 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19873 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19874 && !(e
->flags
& EDGE_FALLTHRU
))
19879 prev
= prev_active_insn (ret
);
19881 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19884 /* Empty functions get branch mispredict even when the jump destination
19885 is not visible to us. */
19886 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19891 emit_insn_before (gen_return_internal_long (), ret
);
19897 /* Implement machine specific optimizations. We implement padding of returns
19898 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19902 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19903 ix86_pad_returns ();
19904 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19905 ix86_avoid_jump_misspredicts ();
19908 /* Return nonzero when QImode register that must be represented via REX prefix
19911 x86_extended_QIreg_mentioned_p (rtx insn
)
19914 extract_insn_cached (insn
);
19915 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19916 if (REG_P (recog_data
.operand
[i
])
19917 && REGNO (recog_data
.operand
[i
]) >= 4)
19922 /* Return nonzero when P points to register encoded via REX prefix.
19923 Called via for_each_rtx. */
19925 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19927 unsigned int regno
;
19930 regno
= REGNO (*p
);
19931 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19934 /* Return true when INSN mentions register that must be encoded using REX
19937 x86_extended_reg_mentioned_p (rtx insn
)
19939 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19942 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19943 optabs would emit if we didn't have TFmode patterns. */
19946 x86_emit_floatuns (rtx operands
[2])
19948 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19949 enum machine_mode mode
, inmode
;
19951 inmode
= GET_MODE (operands
[1]);
19952 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19955 in
= force_reg (inmode
, operands
[1]);
19956 mode
= GET_MODE (out
);
19957 neglab
= gen_label_rtx ();
19958 donelab
= gen_label_rtx ();
19959 f0
= gen_reg_rtx (mode
);
19961 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
19963 expand_float (out
, in
, 0);
19965 emit_jump_insn (gen_jump (donelab
));
19968 emit_label (neglab
);
19970 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
19972 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
19974 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19976 expand_float (f0
, i0
, 0);
19978 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19980 emit_label (donelab
);
19983 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19984 with all elements equal to VAR. Return true if successful. */
19987 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19988 rtx target
, rtx val
)
19990 enum machine_mode smode
, wsmode
, wvmode
;
20005 val
= force_reg (GET_MODE_INNER (mode
), val
);
20006 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20007 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20013 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20015 val
= gen_lowpart (SImode
, val
);
20016 x
= gen_rtx_TRUNCATE (HImode
, val
);
20017 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20018 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20040 /* Extend HImode to SImode using a paradoxical SUBREG. */
20041 tmp1
= gen_reg_rtx (SImode
);
20042 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20043 /* Insert the SImode value as low element of V4SImode vector. */
20044 tmp2
= gen_reg_rtx (V4SImode
);
20045 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20046 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20047 CONST0_RTX (V4SImode
),
20049 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20050 /* Cast the V4SImode vector back to a V8HImode vector. */
20051 tmp1
= gen_reg_rtx (V8HImode
);
20052 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20053 /* Duplicate the low short through the whole low SImode word. */
20054 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20055 /* Cast the V8HImode vector back to a V4SImode vector. */
20056 tmp2
= gen_reg_rtx (V4SImode
);
20057 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20058 /* Replicate the low element of the V4SImode vector. */
20059 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20060 /* Cast the V2SImode back to V8HImode, and store in target. */
20061 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20072 /* Extend QImode to SImode using a paradoxical SUBREG. */
20073 tmp1
= gen_reg_rtx (SImode
);
20074 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20075 /* Insert the SImode value as low element of V4SImode vector. */
20076 tmp2
= gen_reg_rtx (V4SImode
);
20077 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20078 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20079 CONST0_RTX (V4SImode
),
20081 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20082 /* Cast the V4SImode vector back to a V16QImode vector. */
20083 tmp1
= gen_reg_rtx (V16QImode
);
20084 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20085 /* Duplicate the low byte through the whole low SImode word. */
20086 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20087 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20088 /* Cast the V16QImode vector back to a V4SImode vector. */
20089 tmp2
= gen_reg_rtx (V4SImode
);
20090 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20091 /* Replicate the low element of the V4SImode vector. */
20092 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20093 /* Cast the V2SImode back to V16QImode, and store in target. */
20094 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20102 /* Replicate the value once into the next wider mode and recurse. */
20103 val
= convert_modes (wsmode
, smode
, val
, true);
20104 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20105 GEN_INT (GET_MODE_BITSIZE (smode
)),
20106 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20107 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20109 x
= gen_reg_rtx (wvmode
);
20110 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20111 gcc_unreachable ();
20112 emit_move_insn (target
, gen_lowpart (mode
, x
));
20120 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20121 whose ONE_VAR element is VAR, and other elements are zero. Return true
20125 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20126 rtx target
, rtx var
, int one_var
)
20128 enum machine_mode vsimode
;
20144 var
= force_reg (GET_MODE_INNER (mode
), var
);
20145 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20146 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20151 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20152 new_target
= gen_reg_rtx (mode
);
20154 new_target
= target
;
20155 var
= force_reg (GET_MODE_INNER (mode
), var
);
20156 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20157 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20158 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20161 /* We need to shuffle the value to the correct position, so
20162 create a new pseudo to store the intermediate result. */
20164 /* With SSE2, we can use the integer shuffle insns. */
20165 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20167 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20169 GEN_INT (one_var
== 1 ? 0 : 1),
20170 GEN_INT (one_var
== 2 ? 0 : 1),
20171 GEN_INT (one_var
== 3 ? 0 : 1)));
20172 if (target
!= new_target
)
20173 emit_move_insn (target
, new_target
);
20177 /* Otherwise convert the intermediate result to V4SFmode and
20178 use the SSE1 shuffle instructions. */
20179 if (mode
!= V4SFmode
)
20181 tmp
= gen_reg_rtx (V4SFmode
);
20182 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20187 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20189 GEN_INT (one_var
== 1 ? 0 : 1),
20190 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20191 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20193 if (mode
!= V4SFmode
)
20194 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20195 else if (tmp
!= target
)
20196 emit_move_insn (target
, tmp
);
20198 else if (target
!= new_target
)
20199 emit_move_insn (target
, new_target
);
20204 vsimode
= V4SImode
;
20210 vsimode
= V2SImode
;
20216 /* Zero extend the variable element to SImode and recurse. */
20217 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20219 x
= gen_reg_rtx (vsimode
);
20220 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20222 gcc_unreachable ();
20224 emit_move_insn (target
, gen_lowpart (mode
, x
));
20232 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20233 consisting of the values in VALS. It is known that all elements
20234 except ONE_VAR are constants. Return true if successful. */
20237 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20238 rtx target
, rtx vals
, int one_var
)
20240 rtx var
= XVECEXP (vals
, 0, one_var
);
20241 enum machine_mode wmode
;
20244 const_vec
= copy_rtx (vals
);
20245 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20246 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20254 /* For the two element vectors, it's just as easy to use
20255 the general case. */
20271 /* There's no way to set one QImode entry easily. Combine
20272 the variable value with its adjacent constant value, and
20273 promote to an HImode set. */
20274 x
= XVECEXP (vals
, 0, one_var
^ 1);
20277 var
= convert_modes (HImode
, QImode
, var
, true);
20278 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20279 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20280 x
= GEN_INT (INTVAL (x
) & 0xff);
20284 var
= convert_modes (HImode
, QImode
, var
, true);
20285 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20287 if (x
!= const0_rtx
)
20288 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20289 1, OPTAB_LIB_WIDEN
);
20291 x
= gen_reg_rtx (wmode
);
20292 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20293 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20295 emit_move_insn (target
, gen_lowpart (mode
, x
));
20302 emit_move_insn (target
, const_vec
);
20303 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20307 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20308 all values variable, and none identical. */
20311 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20312 rtx target
, rtx vals
)
20314 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20315 rtx op0
= NULL
, op1
= NULL
;
20316 bool use_vec_concat
= false;
20322 if (!mmx_ok
&& !TARGET_SSE
)
20328 /* For the two element vectors, we always implement VEC_CONCAT. */
20329 op0
= XVECEXP (vals
, 0, 0);
20330 op1
= XVECEXP (vals
, 0, 1);
20331 use_vec_concat
= true;
20335 half_mode
= V2SFmode
;
20338 half_mode
= V2SImode
;
20344 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20345 Recurse to load the two halves. */
20347 op0
= gen_reg_rtx (half_mode
);
20348 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20349 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20351 op1
= gen_reg_rtx (half_mode
);
20352 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20353 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20355 use_vec_concat
= true;
20366 gcc_unreachable ();
20369 if (use_vec_concat
)
20371 if (!register_operand (op0
, half_mode
))
20372 op0
= force_reg (half_mode
, op0
);
20373 if (!register_operand (op1
, half_mode
))
20374 op1
= force_reg (half_mode
, op1
);
20376 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20377 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20381 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20382 enum machine_mode inner_mode
;
20383 rtx words
[4], shift
;
20385 inner_mode
= GET_MODE_INNER (mode
);
20386 n_elts
= GET_MODE_NUNITS (mode
);
20387 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20388 n_elt_per_word
= n_elts
/ n_words
;
20389 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20391 for (i
= 0; i
< n_words
; ++i
)
20393 rtx word
= NULL_RTX
;
20395 for (j
= 0; j
< n_elt_per_word
; ++j
)
20397 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20398 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20404 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20405 word
, 1, OPTAB_LIB_WIDEN
);
20406 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20407 word
, 1, OPTAB_LIB_WIDEN
);
20415 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20416 else if (n_words
== 2)
20418 rtx tmp
= gen_reg_rtx (mode
);
20419 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20420 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20421 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20422 emit_move_insn (target
, tmp
);
20424 else if (n_words
== 4)
20426 rtx tmp
= gen_reg_rtx (V4SImode
);
20427 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20428 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20429 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20432 gcc_unreachable ();
20436 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20437 instructions unless MMX_OK is true. */
20440 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20442 enum machine_mode mode
= GET_MODE (target
);
20443 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20444 int n_elts
= GET_MODE_NUNITS (mode
);
20445 int n_var
= 0, one_var
= -1;
20446 bool all_same
= true, all_const_zero
= true;
20450 for (i
= 0; i
< n_elts
; ++i
)
20452 x
= XVECEXP (vals
, 0, i
);
20453 if (!CONSTANT_P (x
))
20454 n_var
++, one_var
= i
;
20455 else if (x
!= CONST0_RTX (inner_mode
))
20456 all_const_zero
= false;
20457 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20461 /* Constants are best loaded from the constant pool. */
20464 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20468 /* If all values are identical, broadcast the value. */
20470 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20471 XVECEXP (vals
, 0, 0)))
20474 /* Values where only one field is non-constant are best loaded from
20475 the pool and overwritten via move later. */
20479 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20480 XVECEXP (vals
, 0, one_var
),
20484 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20488 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20492 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20494 enum machine_mode mode
= GET_MODE (target
);
20495 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20496 bool use_vec_merge
= false;
20505 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20506 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20508 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20510 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20511 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20521 /* For the two element vectors, we implement a VEC_CONCAT with
20522 the extraction of the other element. */
20524 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20525 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20528 op0
= val
, op1
= tmp
;
20530 op0
= tmp
, op1
= val
;
20532 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20533 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20541 use_vec_merge
= true;
20545 /* tmp = target = A B C D */
20546 tmp
= copy_to_reg (target
);
20547 /* target = A A B B */
20548 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20549 /* target = X A B B */
20550 ix86_expand_vector_set (false, target
, val
, 0);
20551 /* target = A X C D */
20552 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20553 GEN_INT (1), GEN_INT (0),
20554 GEN_INT (2+4), GEN_INT (3+4)));
20558 /* tmp = target = A B C D */
20559 tmp
= copy_to_reg (target
);
20560 /* tmp = X B C D */
20561 ix86_expand_vector_set (false, tmp
, val
, 0);
20562 /* target = A B X D */
20563 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20564 GEN_INT (0), GEN_INT (1),
20565 GEN_INT (0+4), GEN_INT (3+4)));
20569 /* tmp = target = A B C D */
20570 tmp
= copy_to_reg (target
);
20571 /* tmp = X B C D */
20572 ix86_expand_vector_set (false, tmp
, val
, 0);
20573 /* target = A B X D */
20574 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20575 GEN_INT (0), GEN_INT (1),
20576 GEN_INT (2+4), GEN_INT (0+4)));
20580 gcc_unreachable ();
20585 /* Element 0 handled by vec_merge below. */
20588 use_vec_merge
= true;
20594 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20595 store into element 0, then shuffle them back. */
20599 order
[0] = GEN_INT (elt
);
20600 order
[1] = const1_rtx
;
20601 order
[2] = const2_rtx
;
20602 order
[3] = GEN_INT (3);
20603 order
[elt
] = const0_rtx
;
20605 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20606 order
[1], order
[2], order
[3]));
20608 ix86_expand_vector_set (false, target
, val
, 0);
20610 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20611 order
[1], order
[2], order
[3]));
20615 /* For SSE1, we have to reuse the V4SF code. */
20616 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20617 gen_lowpart (SFmode
, val
), elt
);
20622 use_vec_merge
= TARGET_SSE2
;
20625 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20636 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20637 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20638 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20642 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20644 emit_move_insn (mem
, target
);
20646 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20647 emit_move_insn (tmp
, val
);
20649 emit_move_insn (target
, mem
);
20654 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20656 enum machine_mode mode
= GET_MODE (vec
);
20657 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20658 bool use_vec_extr
= false;
20671 use_vec_extr
= true;
20683 tmp
= gen_reg_rtx (mode
);
20684 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20685 GEN_INT (elt
), GEN_INT (elt
),
20686 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20690 tmp
= gen_reg_rtx (mode
);
20691 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20695 gcc_unreachable ();
20698 use_vec_extr
= true;
20713 tmp
= gen_reg_rtx (mode
);
20714 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20715 GEN_INT (elt
), GEN_INT (elt
),
20716 GEN_INT (elt
), GEN_INT (elt
)));
20720 tmp
= gen_reg_rtx (mode
);
20721 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20725 gcc_unreachable ();
20728 use_vec_extr
= true;
20733 /* For SSE1, we have to reuse the V4SF code. */
20734 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20735 gen_lowpart (V4SFmode
, vec
), elt
);
20741 use_vec_extr
= TARGET_SSE2
;
20744 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20749 /* ??? Could extract the appropriate HImode element and shift. */
20756 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20757 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20759 /* Let the rtl optimizers know about the zero extension performed. */
20760 if (inner_mode
== HImode
)
20762 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20763 target
= gen_lowpart (SImode
, target
);
20766 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20770 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20772 emit_move_insn (mem
, vec
);
20774 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20775 emit_move_insn (target
, tmp
);
20779 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20780 pattern to reduce; DEST is the destination; IN is the input vector. */
20783 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20785 rtx tmp1
, tmp2
, tmp3
;
20787 tmp1
= gen_reg_rtx (V4SFmode
);
20788 tmp2
= gen_reg_rtx (V4SFmode
);
20789 tmp3
= gen_reg_rtx (V4SFmode
);
20791 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20792 emit_insn (fn (tmp2
, tmp1
, in
));
20794 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20795 GEN_INT (1), GEN_INT (1),
20796 GEN_INT (1+4), GEN_INT (1+4)));
20797 emit_insn (fn (dest
, tmp2
, tmp3
));
20800 /* Target hook for scalar_mode_supported_p. */
20802 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20804 if (DECIMAL_FLOAT_MODE_P (mode
))
20807 return default_scalar_mode_supported_p (mode
);
20810 /* Implements target hook vector_mode_supported_p. */
20812 ix86_vector_mode_supported_p (enum machine_mode mode
)
20814 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20816 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20818 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20820 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20825 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20827 We do this in the new i386 backend to maintain source compatibility
20828 with the old cc0-based compiler. */
20831 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20832 tree inputs ATTRIBUTE_UNUSED
,
20835 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20837 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20842 /* Return true if this goes in small data/bss. */
20845 ix86_in_large_data_p (tree exp
)
20847 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20850 /* Functions are never large data. */
20851 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20854 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20856 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20857 if (strcmp (section
, ".ldata") == 0
20858 || strcmp (section
, ".lbss") == 0)
20864 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20866 /* If this is an incomplete type with size 0, then we can't put it
20867 in data because it might be too big when completed. */
20868 if (!size
|| size
> ix86_section_threshold
)
20875 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20877 default_encode_section_info (decl
, rtl
, first
);
20879 if (TREE_CODE (decl
) == VAR_DECL
20880 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20881 && ix86_in_large_data_p (decl
))
20882 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20885 /* Worker function for REVERSE_CONDITION. */
20888 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20890 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20891 ? reverse_condition (code
)
20892 : reverse_condition_maybe_unordered (code
));
20895 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20899 output_387_reg_move (rtx insn
, rtx
*operands
)
20901 if (REG_P (operands
[1])
20902 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20904 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20905 return output_387_ffreep (operands
, 0);
20906 return "fstp\t%y0";
20908 if (STACK_TOP_P (operands
[0]))
20909 return "fld%z1\t%y1";
20913 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20914 FP status register is set. */
20917 ix86_emit_fp_unordered_jump (rtx label
)
20919 rtx reg
= gen_reg_rtx (HImode
);
20922 emit_insn (gen_x86_fnstsw_1 (reg
));
20924 if (TARGET_USE_SAHF
)
20926 emit_insn (gen_x86_sahf_1 (reg
));
20928 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20929 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20933 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20935 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20936 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20939 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20940 gen_rtx_LABEL_REF (VOIDmode
, label
),
20942 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20943 emit_jump_insn (temp
);
20946 /* Output code to perform a log1p XFmode calculation. */
20948 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20950 rtx label1
= gen_label_rtx ();
20951 rtx label2
= gen_label_rtx ();
20953 rtx tmp
= gen_reg_rtx (XFmode
);
20954 rtx tmp2
= gen_reg_rtx (XFmode
);
20956 emit_insn (gen_absxf2 (tmp
, op1
));
20957 emit_insn (gen_cmpxf (tmp
,
20958 CONST_DOUBLE_FROM_REAL_VALUE (
20959 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20961 emit_jump_insn (gen_bge (label1
));
20963 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20964 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20965 emit_jump (label2
);
20967 emit_label (label1
);
20968 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20969 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20970 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20971 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20973 emit_label (label2
);
20976 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20979 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20982 /* With Binutils 2.15, the "@unwind" marker must be specified on
20983 every occurrence of the ".eh_frame" section, not just the first
20986 && strcmp (name
, ".eh_frame") == 0)
20988 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20989 flags
& SECTION_WRITE
? "aw" : "a");
20992 default_elf_asm_named_section (name
, flags
, decl
);
20995 /* Return the mangling of TYPE if it is an extended fundamental type. */
20997 static const char *
20998 ix86_mangle_fundamental_type (tree type
)
21000 switch (TYPE_MODE (type
))
21003 /* __float128 is "g". */
21006 /* "long double" or __float80 is "e". */
21013 /* For 32-bit code we can save PIC register setup by using
21014 __stack_chk_fail_local hidden function instead of calling
21015 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21016 register, so it is better to call __stack_chk_fail directly. */
21019 ix86_stack_protect_fail (void)
21021 return TARGET_64BIT
21022 ? default_external_stack_protect_fail ()
21023 : default_hidden_stack_protect_fail ();
21026 /* Select a format to encode pointers in exception handling data. CODE
21027 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21028 true if the symbol may be affected by dynamic relocations.
21030 ??? All x86 object file formats are capable of representing this.
21031 After all, the relocation needed is the same as for the call insn.
21032 Whether or not a particular assembler allows us to enter such, I
21033 guess we'll have to see. */
21035 asm_preferred_eh_data_format (int code
, int global
)
21039 int type
= DW_EH_PE_sdata8
;
21041 || ix86_cmodel
== CM_SMALL_PIC
21042 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21043 type
= DW_EH_PE_sdata4
;
21044 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21046 if (ix86_cmodel
== CM_SMALL
21047 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21048 return DW_EH_PE_udata4
;
21049 return DW_EH_PE_absptr
;
21052 /* Expand copysign from SIGN to the positive value ABS_VALUE
21053 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21056 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21058 enum machine_mode mode
= GET_MODE (sign
);
21059 rtx sgn
= gen_reg_rtx (mode
);
21060 if (mask
== NULL_RTX
)
21062 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21063 if (!VECTOR_MODE_P (mode
))
21065 /* We need to generate a scalar mode mask in this case. */
21066 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21067 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21068 mask
= gen_reg_rtx (mode
);
21069 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21073 mask
= gen_rtx_NOT (mode
, mask
);
21074 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21075 gen_rtx_AND (mode
, mask
, sign
)));
21076 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21077 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21080 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21081 mask for masking out the sign-bit is stored in *SMASK, if that is
21084 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21086 enum machine_mode mode
= GET_MODE (op0
);
21089 xa
= gen_reg_rtx (mode
);
21090 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21091 if (!VECTOR_MODE_P (mode
))
21093 /* We need to generate a scalar mode mask in this case. */
21094 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21095 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21096 mask
= gen_reg_rtx (mode
);
21097 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21099 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21100 gen_rtx_AND (mode
, op0
, mask
)));
21108 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21109 swapping the operands if SWAP_OPERANDS is true. The expanded
21110 code is a forward jump to a newly created label in case the
21111 comparison is true. The generated label rtx is returned. */
21113 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21114 bool swap_operands
)
21125 label
= gen_label_rtx ();
21126 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21127 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21128 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21129 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21130 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21131 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21132 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21133 JUMP_LABEL (tmp
) = label
;
21138 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21139 using comparison code CODE. Operands are swapped for the comparison if
21140 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21142 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21143 bool swap_operands
)
21145 enum machine_mode mode
= GET_MODE (op0
);
21146 rtx mask
= gen_reg_rtx (mode
);
21155 if (mode
== DFmode
)
21156 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21157 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21159 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21160 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21165 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21166 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21168 ix86_gen_TWO52 (enum machine_mode mode
)
21170 REAL_VALUE_TYPE TWO52r
;
21173 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21174 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21175 TWO52
= force_reg (mode
, TWO52
);
21180 /* Expand SSE sequence for computing lround from OP1 storing
21183 ix86_expand_lround (rtx op0
, rtx op1
)
21185 /* C code for the stuff we're doing below:
21186 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21189 enum machine_mode mode
= GET_MODE (op1
);
21190 const struct real_format
*fmt
;
21191 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21194 /* load nextafter (0.5, 0.0) */
21195 fmt
= REAL_MODE_FORMAT (mode
);
21196 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21197 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21199 /* adj = copysign (0.5, op1) */
21200 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21201 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21203 /* adj = op1 + adj */
21204 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21206 /* op0 = (imode)adj */
21207 expand_fix (op0
, adj
, 0);
21210 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21213 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21215 /* C code for the stuff we're doing below (for do_floor):
21217 xi -= (double)xi > op1 ? 1 : 0;
21220 enum machine_mode fmode
= GET_MODE (op1
);
21221 enum machine_mode imode
= GET_MODE (op0
);
21222 rtx ireg
, freg
, label
, tmp
;
21224 /* reg = (long)op1 */
21225 ireg
= gen_reg_rtx (imode
);
21226 expand_fix (ireg
, op1
, 0);
21228 /* freg = (double)reg */
21229 freg
= gen_reg_rtx (fmode
);
21230 expand_float (freg
, ireg
, 0);
21232 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21233 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21234 freg
, op1
, !do_floor
);
21235 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21236 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21237 emit_move_insn (ireg
, tmp
);
21239 emit_label (label
);
21240 LABEL_NUSES (label
) = 1;
21242 emit_move_insn (op0
, ireg
);
21245 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21246 result in OPERAND0. */
21248 ix86_expand_rint (rtx operand0
, rtx operand1
)
21250 /* C code for the stuff we're doing below:
21251 xa = fabs (operand1);
21252 if (!isless (xa, 2**52))
21254 xa = xa + 2**52 - 2**52;
21255 return copysign (xa, operand1);
21257 enum machine_mode mode
= GET_MODE (operand0
);
21258 rtx res
, xa
, label
, TWO52
, mask
;
21260 res
= gen_reg_rtx (mode
);
21261 emit_move_insn (res
, operand1
);
21263 /* xa = abs (operand1) */
21264 xa
= ix86_expand_sse_fabs (res
, &mask
);
21266 /* if (!isless (xa, TWO52)) goto label; */
21267 TWO52
= ix86_gen_TWO52 (mode
);
21268 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21270 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21271 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21273 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21275 emit_label (label
);
21276 LABEL_NUSES (label
) = 1;
21278 emit_move_insn (operand0
, res
);
21281 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21284 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21286 /* C code for the stuff we expand below.
21287 double xa = fabs (x), x2;
21288 if (!isless (xa, TWO52))
21290 xa = xa + TWO52 - TWO52;
21291 x2 = copysign (xa, x);
21300 enum machine_mode mode
= GET_MODE (operand0
);
21301 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21303 TWO52
= ix86_gen_TWO52 (mode
);
21305 /* Temporary for holding the result, initialized to the input
21306 operand to ease control flow. */
21307 res
= gen_reg_rtx (mode
);
21308 emit_move_insn (res
, operand1
);
21310 /* xa = abs (operand1) */
21311 xa
= ix86_expand_sse_fabs (res
, &mask
);
21313 /* if (!isless (xa, TWO52)) goto label; */
21314 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21316 /* xa = xa + TWO52 - TWO52; */
21317 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21318 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21320 /* xa = copysign (xa, operand1) */
21321 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21323 /* generate 1.0 or -1.0 */
21324 one
= force_reg (mode
,
21325 const_double_from_real_value (do_floor
21326 ? dconst1
: dconstm1
, mode
));
21328 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21329 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21330 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21331 gen_rtx_AND (mode
, one
, tmp
)));
21332 /* We always need to subtract here to preserve signed zero. */
21333 tmp
= expand_simple_binop (mode
, MINUS
,
21334 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21335 emit_move_insn (res
, tmp
);
21337 emit_label (label
);
21338 LABEL_NUSES (label
) = 1;
21340 emit_move_insn (operand0
, res
);
21343 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21346 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21348 /* C code for the stuff we expand below.
21349 double xa = fabs (x), x2;
21350 if (!isless (xa, TWO52))
21352 x2 = (double)(long)x;
21359 if (HONOR_SIGNED_ZEROS (mode))
21360 return copysign (x2, x);
21363 enum machine_mode mode
= GET_MODE (operand0
);
21364 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21366 TWO52
= ix86_gen_TWO52 (mode
);
21368 /* Temporary for holding the result, initialized to the input
21369 operand to ease control flow. */
21370 res
= gen_reg_rtx (mode
);
21371 emit_move_insn (res
, operand1
);
21373 /* xa = abs (operand1) */
21374 xa
= ix86_expand_sse_fabs (res
, &mask
);
21376 /* if (!isless (xa, TWO52)) goto label; */
21377 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21379 /* xa = (double)(long)x */
21380 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21381 expand_fix (xi
, res
, 0);
21382 expand_float (xa
, xi
, 0);
21385 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21387 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21388 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21389 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21390 gen_rtx_AND (mode
, one
, tmp
)));
21391 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21392 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21393 emit_move_insn (res
, tmp
);
21395 if (HONOR_SIGNED_ZEROS (mode
))
21396 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21398 emit_label (label
);
21399 LABEL_NUSES (label
) = 1;
21401 emit_move_insn (operand0
, res
);
21404 /* Expand SSE sequence for computing round from OPERAND1 storing
21405 into OPERAND0. Sequence that works without relying on DImode truncation
21406 via cvttsd2siq that is only available on 64bit targets. */
21408 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21410 /* C code for the stuff we expand below.
21411 double xa = fabs (x), xa2, x2;
21412 if (!isless (xa, TWO52))
21414 Using the absolute value and copying back sign makes
21415 -0.0 -> -0.0 correct.
21416 xa2 = xa + TWO52 - TWO52;
21421 else if (dxa > 0.5)
21423 x2 = copysign (xa2, x);
21426 enum machine_mode mode
= GET_MODE (operand0
);
21427 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21429 TWO52
= ix86_gen_TWO52 (mode
);
21431 /* Temporary for holding the result, initialized to the input
21432 operand to ease control flow. */
21433 res
= gen_reg_rtx (mode
);
21434 emit_move_insn (res
, operand1
);
21436 /* xa = abs (operand1) */
21437 xa
= ix86_expand_sse_fabs (res
, &mask
);
21439 /* if (!isless (xa, TWO52)) goto label; */
21440 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21442 /* xa2 = xa + TWO52 - TWO52; */
21443 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21444 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21446 /* dxa = xa2 - xa; */
21447 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21449 /* generate 0.5, 1.0 and -0.5 */
21450 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21451 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21452 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21456 tmp
= gen_reg_rtx (mode
);
21457 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21458 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21459 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21460 gen_rtx_AND (mode
, one
, tmp
)));
21461 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21462 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21463 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21464 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21465 gen_rtx_AND (mode
, one
, tmp
)));
21466 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21468 /* res = copysign (xa2, operand1) */
21469 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21471 emit_label (label
);
21472 LABEL_NUSES (label
) = 1;
21474 emit_move_insn (operand0
, res
);
21477 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21480 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21482 /* C code for SSE variant we expand below.
21483 double xa = fabs (x), x2;
21484 if (!isless (xa, TWO52))
21486 x2 = (double)(long)x;
21487 if (HONOR_SIGNED_ZEROS (mode))
21488 return copysign (x2, x);
21491 enum machine_mode mode
= GET_MODE (operand0
);
21492 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21494 TWO52
= ix86_gen_TWO52 (mode
);
21496 /* Temporary for holding the result, initialized to the input
21497 operand to ease control flow. */
21498 res
= gen_reg_rtx (mode
);
21499 emit_move_insn (res
, operand1
);
21501 /* xa = abs (operand1) */
21502 xa
= ix86_expand_sse_fabs (res
, &mask
);
21504 /* if (!isless (xa, TWO52)) goto label; */
21505 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21507 /* x = (double)(long)x */
21508 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21509 expand_fix (xi
, res
, 0);
21510 expand_float (res
, xi
, 0);
21512 if (HONOR_SIGNED_ZEROS (mode
))
21513 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21515 emit_label (label
);
21516 LABEL_NUSES (label
) = 1;
21518 emit_move_insn (operand0
, res
);
21521 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21524 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21526 enum machine_mode mode
= GET_MODE (operand0
);
21527 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21529 /* C code for SSE variant we expand below.
21530 double xa = fabs (x), x2;
21531 if (!isless (xa, TWO52))
21533 xa2 = xa + TWO52 - TWO52;
21537 x2 = copysign (xa2, x);
21541 TWO52
= ix86_gen_TWO52 (mode
);
21543 /* Temporary for holding the result, initialized to the input
21544 operand to ease control flow. */
21545 res
= gen_reg_rtx (mode
);
21546 emit_move_insn (res
, operand1
);
21548 /* xa = abs (operand1) */
21549 xa
= ix86_expand_sse_fabs (res
, &smask
);
21551 /* if (!isless (xa, TWO52)) goto label; */
21552 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21554 /* res = xa + TWO52 - TWO52; */
21555 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21556 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21557 emit_move_insn (res
, tmp
);
21560 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21562 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21563 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21564 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21565 gen_rtx_AND (mode
, mask
, one
)));
21566 tmp
= expand_simple_binop (mode
, MINUS
,
21567 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21568 emit_move_insn (res
, tmp
);
21570 /* res = copysign (res, operand1) */
21571 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21573 emit_label (label
);
21574 LABEL_NUSES (label
) = 1;
21576 emit_move_insn (operand0
, res
);
21579 /* Expand SSE sequence for computing round from OPERAND1 storing
21582 ix86_expand_round (rtx operand0
, rtx operand1
)
21584 /* C code for the stuff we're doing below:
21585 double xa = fabs (x);
21586 if (!isless (xa, TWO52))
21588 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21589 return copysign (xa, x);
21591 enum machine_mode mode
= GET_MODE (operand0
);
21592 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21593 const struct real_format
*fmt
;
21594 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21596 /* Temporary for holding the result, initialized to the input
21597 operand to ease control flow. */
21598 res
= gen_reg_rtx (mode
);
21599 emit_move_insn (res
, operand1
);
21601 TWO52
= ix86_gen_TWO52 (mode
);
21602 xa
= ix86_expand_sse_fabs (res
, &mask
);
21603 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21605 /* load nextafter (0.5, 0.0) */
21606 fmt
= REAL_MODE_FORMAT (mode
);
21607 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21608 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21610 /* xa = xa + 0.5 */
21611 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21612 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21614 /* xa = (double)(int64_t)xa */
21615 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21616 expand_fix (xi
, xa
, 0);
21617 expand_float (xa
, xi
, 0);
21619 /* res = copysign (xa, operand1) */
21620 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21622 emit_label (label
);
21623 LABEL_NUSES (label
) = 1;
21625 emit_move_insn (operand0
, res
);
21628 #include "gt-i386.h"