1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1010 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1011 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1012 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1015 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
1016 const int x86_double_with_add
= ~m_386
;
1017 const int x86_use_bit_test
= m_386
;
1018 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
1019 | m_K6
| m_CORE2
| m_GENERIC
;
1020 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1022 const int x86_3dnow_a
= m_ATHLON_K8_AMDFAM10
;
1023 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
1024 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints
= 0;
1030 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall
= m_PPRO
;
1041 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
1042 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
1043 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
1044 | m_CORE2
| m_GENERIC
);
1045 const int x86_use_mov0
= m_K6
;
1046 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
1047 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1048 const int x86_use_xchgb
= m_PENT4
;
1049 const int x86_read_modify_write
= ~m_PENT
;
1050 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
1051 const int x86_split_long_moves
= m_PPRO
;
1052 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
1053 | m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1055 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
1056 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
1057 const int x86_qimode_math
= ~(0);
1058 const int x86_promote_qi_regs
= 0;
1059 /* On PPro this flag is meant to avoid partial register stalls. Just like
1060 the x86_partial_reg_stall this option might be considered for Generic32
1061 if our scheme for avoiding partial stalls was more effective. */
1062 const int x86_himode_math
= ~(m_PPRO
);
1063 const int x86_promote_hi_regs
= m_PPRO
;
1064 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1065 const int x86_sub_esp_4
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1066 | m_CORE2
| m_GENERIC
;
1067 const int x86_sub_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1068 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1069 const int x86_add_esp_4
= m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
1070 | m_CORE2
| m_GENERIC
;
1071 const int x86_add_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1072 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1073 /* Enable if integer moves are preferred for DFmode copies */
1074 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1075 | m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
1076 const int x86_partial_reg_dependency
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1077 | m_CORE2
| m_GENERIC
;
1078 const int x86_memory_mismatch_stall
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1079 | m_CORE2
| m_GENERIC
;
1080 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1081 for outgoing arguments will be computed and placed into the variable
1082 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1083 for each call; instead, the function prologue should increase the stack frame
1084 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1086 const int x86_accumulate_outgoing_args
= m_ATHLON_K8_AMDFAM10
| m_PENT4
1087 | m_NOCONA
| m_PPRO
| m_CORE2
1089 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1090 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1091 const int x86_shift1
= ~m_486
;
1092 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
1093 | m_ATHLON_K8_AMDFAM10
| m_PENT4
1094 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1095 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1096 that thread 128bit SSE registers as single units versus K8 based chips that
1097 divide SSE registers to two 64bit halves.
1098 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1099 to allow register renaming on 128bit SSE units, but usually results in one
1100 extra microop on 64bit SSE units. Experimental results shows that disabling
1101 this option on P4 brings over 20% SPECfp regression, while enabling it on
1102 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1104 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1105 | m_GENERIC
| m_AMDFAM10
;
1106 /* Set for machines where the type and dependencies are resolved on SSE
1107 register parts instead of whole registers, so we may maintain just
1108 lower part of scalar values in proper format leaving the upper part
1110 const int x86_sse_split_regs
= m_ATHLON_K8
;
1111 /* Code generation for scalar reg-reg moves of single and double precision data:
1112 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1116 if (x86_sse_partial_reg_dependency == true)
1121 Code generation for scalar loads of double precision data:
1122 if (x86_sse_split_regs == true)
1123 movlpd mem, reg (gas syntax)
1127 Code generation for unaligned packed loads of single precision data
1128 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1129 if (x86_sse_unaligned_move_optimal)
1132 if (x86_sse_partial_reg_dependency == true)
1144 Code generation for unaligned packed loads of double precision data
1145 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1146 if (x86_sse_unaligned_move_optimal)
1149 if (x86_sse_split_regs == true)
1160 const int x86_sse_unaligned_move_optimal
= m_AMDFAM10
;
1161 const int x86_sse_typeless_stores
= m_ATHLON_K8_AMDFAM10
;
1162 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
1163 const int x86_use_ffreep
= m_ATHLON_K8_AMDFAM10
;
1164 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
1166 const int x86_inter_unit_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
);
1168 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
1169 | m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1170 /* Some CPU cores are not able to predict more than 4 branch instructions in
1171 the 16 byte window. */
1172 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1173 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1174 const int x86_schedule
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
1175 | m_CORE2
| m_GENERIC
;
1176 const int x86_use_bt
= m_ATHLON_K8_AMDFAM10
;
1177 /* Compare and exchange was added for 80486. */
1178 const int x86_cmpxchg
= ~m_386
;
1179 /* Compare and exchange 8 bytes was added for pentium. */
1180 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
1181 /* Exchange and add was added for 80486. */
1182 const int x86_xadd
= ~m_386
;
1183 /* Byteswap was added for 80486. */
1184 const int x86_bswap
= ~m_386
;
1185 const int x86_pad_returns
= m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1187 static enum stringop_alg stringop_alg
= no_stringop
;
1189 /* In case the average insn count for single function invocation is
1190 lower than this constant, emit fast (but longer) prologue and
1192 #define FAST_PROLOGUE_INSN_COUNT 20
1194 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1195 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1196 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1197 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1199 /* Array of the smallest class containing reg number REGNO, indexed by
1200 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1202 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1204 /* ax, dx, cx, bx */
1205 AREG
, DREG
, CREG
, BREG
,
1206 /* si, di, bp, sp */
1207 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1209 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1210 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1213 /* flags, fpsr, fpcr, frame */
1214 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1215 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1217 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1219 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1220 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1221 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1225 /* The "default" register map used in 32bit mode. */
1227 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1229 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1230 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1231 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1232 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1233 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1234 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1235 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1238 static int const x86_64_int_parameter_registers
[6] =
1240 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1241 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1244 static int const x86_64_int_return_registers
[4] =
1246 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1249 /* The "default" register map used in 64bit mode. */
1250 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1252 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1253 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1255 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1256 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1257 8,9,10,11,12,13,14,15, /* extended integer registers */
1258 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1261 /* Define the register numbers to be used in Dwarf debugging information.
1262 The SVR4 reference port C compiler uses the following register numbers
1263 in its Dwarf output code:
1264 0 for %eax (gcc regno = 0)
1265 1 for %ecx (gcc regno = 2)
1266 2 for %edx (gcc regno = 1)
1267 3 for %ebx (gcc regno = 3)
1268 4 for %esp (gcc regno = 7)
1269 5 for %ebp (gcc regno = 6)
1270 6 for %esi (gcc regno = 4)
1271 7 for %edi (gcc regno = 5)
1272 The following three DWARF register numbers are never generated by
1273 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1274 believes these numbers have these meanings.
1275 8 for %eip (no gcc equivalent)
1276 9 for %eflags (gcc regno = 17)
1277 10 for %trapno (no gcc equivalent)
1278 It is not at all clear how we should number the FP stack registers
1279 for the x86 architecture. If the version of SDB on x86/svr4 were
1280 a bit less brain dead with respect to floating-point then we would
1281 have a precedent to follow with respect to DWARF register numbers
1282 for x86 FP registers, but the SDB on x86/svr4 is so completely
1283 broken with respect to FP registers that it is hardly worth thinking
1284 of it as something to strive for compatibility with.
1285 The version of x86/svr4 SDB I have at the moment does (partially)
1286 seem to believe that DWARF register number 11 is associated with
1287 the x86 register %st(0), but that's about all. Higher DWARF
1288 register numbers don't seem to be associated with anything in
1289 particular, and even for DWARF regno 11, SDB only seems to under-
1290 stand that it should say that a variable lives in %st(0) (when
1291 asked via an `=' command) if we said it was in DWARF regno 11,
1292 but SDB still prints garbage when asked for the value of the
1293 variable in question (via a `/' command).
1294 (Also note that the labels SDB prints for various FP stack regs
1295 when doing an `x' command are all wrong.)
1296 Note that these problems generally don't affect the native SVR4
1297 C compiler because it doesn't allow the use of -O with -g and
1298 because when it is *not* optimizing, it allocates a memory
1299 location for each floating-point variable, and the memory
1300 location is what gets described in the DWARF AT_location
1301 attribute for the variable in question.
1302 Regardless of the severe mental illness of the x86/svr4 SDB, we
1303 do something sensible here and we use the following DWARF
1304 register numbers. Note that these are all stack-top-relative
1306 11 for %st(0) (gcc regno = 8)
1307 12 for %st(1) (gcc regno = 9)
1308 13 for %st(2) (gcc regno = 10)
1309 14 for %st(3) (gcc regno = 11)
1310 15 for %st(4) (gcc regno = 12)
1311 16 for %st(5) (gcc regno = 13)
1312 17 for %st(6) (gcc regno = 14)
1313 18 for %st(7) (gcc regno = 15)
1315 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1317 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1318 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1319 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1320 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1321 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1322 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1323 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1326 /* Test and compare insns in i386.md store the information needed to
1327 generate branch and scc insns here. */
1329 rtx ix86_compare_op0
= NULL_RTX
;
1330 rtx ix86_compare_op1
= NULL_RTX
;
1331 rtx ix86_compare_emitted
= NULL_RTX
;
1333 /* Size of the register save area. */
1334 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1336 /* Define the structure for the machine field in struct function. */
1338 struct stack_local_entry
GTY(())
1340 unsigned short mode
;
1343 struct stack_local_entry
*next
;
1346 /* Structure describing stack frame layout.
1347 Stack grows downward:
1353 saved frame pointer if frame_pointer_needed
1354 <- HARD_FRAME_POINTER
1359 [va_arg registers] (
1360 > to_allocate <- FRAME_POINTER
1370 HOST_WIDE_INT frame
;
1372 int outgoing_arguments_size
;
1375 HOST_WIDE_INT to_allocate
;
1376 /* The offsets relative to ARG_POINTER. */
1377 HOST_WIDE_INT frame_pointer_offset
;
1378 HOST_WIDE_INT hard_frame_pointer_offset
;
1379 HOST_WIDE_INT stack_pointer_offset
;
1381 /* When save_regs_using_mov is set, emit prologue using
1382 move instead of push instructions. */
1383 bool save_regs_using_mov
;
1386 /* Code model option. */
1387 enum cmodel ix86_cmodel
;
1389 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1391 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1393 /* Which unit we are generating floating point math for. */
1394 enum fpmath_unit ix86_fpmath
;
1396 /* Which cpu are we scheduling for. */
1397 enum processor_type ix86_tune
;
1398 /* Which instruction set architecture to use. */
1399 enum processor_type ix86_arch
;
1401 /* true if sse prefetch instruction is not NOOP. */
1402 int x86_prefetch_sse
;
1404 /* true if cmpxchg16b is supported. */
1407 /* ix86_regparm_string as a number */
1408 static int ix86_regparm
;
1410 /* -mstackrealign option */
1411 extern int ix86_force_align_arg_pointer
;
1412 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1414 /* Preferred alignment for stack boundary in bits. */
1415 unsigned int ix86_preferred_stack_boundary
;
1417 /* Values 1-5: see jump.c */
1418 int ix86_branch_cost
;
1420 /* Variables which are this size or smaller are put in the data/bss
1421 or ldata/lbss sections. */
1423 int ix86_section_threshold
= 65536;
1425 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1426 char internal_label_prefix
[16];
1427 int internal_label_prefix_len
;
1429 static bool ix86_handle_option (size_t, const char *, int);
1430 static void output_pic_addr_const (FILE *, rtx
, int);
1431 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1433 static const char *get_some_local_dynamic_name (void);
1434 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1435 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1436 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1438 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1439 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1441 static rtx
get_thread_pointer (int);
1442 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1443 static void get_pc_thunk_name (char [32], unsigned int);
1444 static rtx
gen_push (rtx
);
1445 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1446 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1447 static struct machine_function
* ix86_init_machine_status (void);
1448 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1449 static int ix86_nsaved_regs (void);
1450 static void ix86_emit_save_regs (void);
1451 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1452 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1453 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1454 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1455 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1456 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1457 static int ix86_issue_rate (void);
1458 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1459 static int ia32_multipass_dfa_lookahead (void);
1460 static void ix86_init_mmx_sse_builtins (void);
1461 static rtx
x86_this_parameter (tree
);
1462 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1463 HOST_WIDE_INT
, tree
);
1464 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1465 static void x86_file_start (void);
1466 static void ix86_reorg (void);
1467 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1468 static tree
ix86_build_builtin_va_list (void);
1469 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1471 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1472 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1473 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1475 static int ix86_address_cost (rtx
);
1476 static bool ix86_cannot_force_const_mem (rtx
);
1477 static rtx
ix86_delegitimize_address (rtx
);
1479 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1481 struct builtin_description
;
1482 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1484 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1486 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1487 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1488 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1489 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1490 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1491 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1492 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1493 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1494 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1495 static int ix86_fp_comparison_cost (enum rtx_code code
);
1496 static unsigned int ix86_select_alt_pic_regnum (void);
1497 static int ix86_save_reg (unsigned int, int);
1498 static void ix86_compute_frame_layout (struct ix86_frame
*);
1499 static int ix86_comp_type_attributes (tree
, tree
);
1500 static int ix86_function_regparm (tree
, tree
);
1501 const struct attribute_spec ix86_attribute_table
[];
1502 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1503 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1504 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1505 static bool contains_128bit_aligned_vector_p (tree
);
1506 static rtx
ix86_struct_value_rtx (tree
, int);
1507 static bool ix86_ms_bitfield_layout_p (tree
);
1508 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1509 static int extended_reg_mentioned_1 (rtx
*, void *);
1510 static bool ix86_rtx_costs (rtx
, int, int, int *);
1511 static int min_insn_size (rtx
);
1512 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1513 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1514 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1516 static void ix86_init_builtins (void);
1517 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1518 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1519 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1520 static const char *ix86_mangle_fundamental_type (tree
);
1521 static tree
ix86_stack_protect_fail (void);
1522 static rtx
ix86_internal_arg_pointer (void);
1523 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1524 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1527 /* This function is only used on Solaris. */
1528 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1531 /* Register class used for passing given 64bit part of the argument.
1532 These represent classes as documented by the PS ABI, with the exception
1533 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1534 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1536 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1537 whenever possible (upper half does contain padding).
1539 enum x86_64_reg_class
1542 X86_64_INTEGER_CLASS
,
1543 X86_64_INTEGERSI_CLASS
,
1550 X86_64_COMPLEX_X87_CLASS
,
1553 static const char * const x86_64_reg_class_name
[] = {
1554 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1555 "sseup", "x87", "x87up", "cplx87", "no"
1558 #define MAX_CLASSES 4
1560 /* Table of constants used by fldpi, fldln2, etc.... */
1561 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1562 static bool ext_80387_constants_init
= 0;
1563 static void init_ext_80387_constants (void);
1564 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1565 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1566 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1567 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1568 unsigned HOST_WIDE_INT align
)
1571 /* Initialize the GCC target structure. */
1572 #undef TARGET_ATTRIBUTE_TABLE
1573 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1574 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1575 # undef TARGET_MERGE_DECL_ATTRIBUTES
1576 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1579 #undef TARGET_COMP_TYPE_ATTRIBUTES
1580 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1582 #undef TARGET_INIT_BUILTINS
1583 #define TARGET_INIT_BUILTINS ix86_init_builtins
1584 #undef TARGET_EXPAND_BUILTIN
1585 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1587 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1588 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1589 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1590 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1592 #undef TARGET_ASM_FUNCTION_EPILOGUE
1593 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1595 #undef TARGET_ENCODE_SECTION_INFO
1596 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1597 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1599 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1602 #undef TARGET_ASM_OPEN_PAREN
1603 #define TARGET_ASM_OPEN_PAREN ""
1604 #undef TARGET_ASM_CLOSE_PAREN
1605 #define TARGET_ASM_CLOSE_PAREN ""
1607 #undef TARGET_ASM_ALIGNED_HI_OP
1608 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1609 #undef TARGET_ASM_ALIGNED_SI_OP
1610 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1612 #undef TARGET_ASM_ALIGNED_DI_OP
1613 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1616 #undef TARGET_ASM_UNALIGNED_HI_OP
1617 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1618 #undef TARGET_ASM_UNALIGNED_SI_OP
1619 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1620 #undef TARGET_ASM_UNALIGNED_DI_OP
1621 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1623 #undef TARGET_SCHED_ADJUST_COST
1624 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1625 #undef TARGET_SCHED_ISSUE_RATE
1626 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1627 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1628 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1629 ia32_multipass_dfa_lookahead
1631 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1632 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1635 #undef TARGET_HAVE_TLS
1636 #define TARGET_HAVE_TLS true
1638 #undef TARGET_CANNOT_FORCE_CONST_MEM
1639 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1640 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1641 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1643 #undef TARGET_DELEGITIMIZE_ADDRESS
1644 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1646 #undef TARGET_MS_BITFIELD_LAYOUT_P
1647 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1650 #undef TARGET_BINDS_LOCAL_P
1651 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1654 #undef TARGET_ASM_OUTPUT_MI_THUNK
1655 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1659 #undef TARGET_ASM_FILE_START
1660 #define TARGET_ASM_FILE_START x86_file_start
1662 #undef TARGET_DEFAULT_TARGET_FLAGS
1663 #define TARGET_DEFAULT_TARGET_FLAGS \
1665 | TARGET_64BIT_DEFAULT \
1666 | TARGET_SUBTARGET_DEFAULT \
1667 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1669 #undef TARGET_HANDLE_OPTION
1670 #define TARGET_HANDLE_OPTION ix86_handle_option
1672 #undef TARGET_RTX_COSTS
1673 #define TARGET_RTX_COSTS ix86_rtx_costs
1674 #undef TARGET_ADDRESS_COST
1675 #define TARGET_ADDRESS_COST ix86_address_cost
1677 #undef TARGET_FIXED_CONDITION_CODE_REGS
1678 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1679 #undef TARGET_CC_MODES_COMPATIBLE
1680 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1682 #undef TARGET_MACHINE_DEPENDENT_REORG
1683 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1685 #undef TARGET_BUILD_BUILTIN_VA_LIST
1686 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1688 #undef TARGET_MD_ASM_CLOBBERS
1689 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1691 #undef TARGET_PROMOTE_PROTOTYPES
1692 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1693 #undef TARGET_STRUCT_VALUE_RTX
1694 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1695 #undef TARGET_SETUP_INCOMING_VARARGS
1696 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1697 #undef TARGET_MUST_PASS_IN_STACK
1698 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1699 #undef TARGET_PASS_BY_REFERENCE
1700 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1701 #undef TARGET_INTERNAL_ARG_POINTER
1702 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1703 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1704 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1707 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1709 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1710 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1712 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1713 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1716 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1717 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1720 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1721 #undef TARGET_INSERT_ATTRIBUTES
1722 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1725 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1726 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1728 #undef TARGET_STACK_PROTECT_FAIL
1729 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1731 #undef TARGET_FUNCTION_VALUE
1732 #define TARGET_FUNCTION_VALUE ix86_function_value
1734 struct gcc_target targetm
= TARGET_INITIALIZER
;
1737 /* The svr4 ABI for the i386 says that records and unions are returned
1739 #ifndef DEFAULT_PCC_STRUCT_RETURN
1740 #define DEFAULT_PCC_STRUCT_RETURN 1
1743 /* Implement TARGET_HANDLE_OPTION. */
1746 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1753 target_flags
&= ~MASK_3DNOW_A
;
1754 target_flags_explicit
|= MASK_3DNOW_A
;
1761 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1762 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1769 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1770 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1777 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1778 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1785 target_flags
&= ~MASK_SSE4A
;
1786 target_flags_explicit
|= MASK_SSE4A
;
1795 /* Sometimes certain combinations of command options do not make
1796 sense on a particular target machine. You can define a macro
1797 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1798 defined, is executed once just after all the command options have
1801 Don't use this macro to turn on various extra optimizations for
1802 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1805 override_options (void)
1808 int ix86_tune_defaulted
= 0;
1810 /* Comes from final.c -- no real reason to change it. */
1811 #define MAX_CODE_ALIGN 16
1815 const struct processor_costs
*cost
; /* Processor costs */
1816 const int target_enable
; /* Target flags to enable. */
1817 const int target_disable
; /* Target flags to disable. */
1818 const int align_loop
; /* Default alignments. */
1819 const int align_loop_max_skip
;
1820 const int align_jump
;
1821 const int align_jump_max_skip
;
1822 const int align_func
;
1824 const processor_target_table
[PROCESSOR_max
] =
1826 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1827 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1828 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1829 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1830 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1831 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1832 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1833 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1834 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1835 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1836 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1837 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1838 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1839 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1842 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1845 const char *const name
; /* processor name or nickname. */
1846 const enum processor_type processor
;
1847 const enum pta_flags
1853 PTA_PREFETCH_SSE
= 16,
1864 const processor_alias_table
[] =
1866 {"i386", PROCESSOR_I386
, 0},
1867 {"i486", PROCESSOR_I486
, 0},
1868 {"i586", PROCESSOR_PENTIUM
, 0},
1869 {"pentium", PROCESSOR_PENTIUM
, 0},
1870 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1871 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1872 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1873 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1874 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1875 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1876 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1877 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1878 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1879 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1880 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1881 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1882 | PTA_MMX
| PTA_PREFETCH_SSE
},
1883 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1884 | PTA_MMX
| PTA_PREFETCH_SSE
},
1885 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1886 | PTA_MMX
| PTA_PREFETCH_SSE
},
1887 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1888 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1889 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1890 | PTA_64BIT
| PTA_MMX
1891 | PTA_PREFETCH_SSE
| PTA_CX16
},
1892 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1894 {"k6", PROCESSOR_K6
, PTA_MMX
},
1895 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1896 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1897 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1899 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1900 | PTA_3DNOW
| PTA_3DNOW_A
},
1901 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1902 | PTA_3DNOW_A
| PTA_SSE
},
1903 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1904 | PTA_3DNOW_A
| PTA_SSE
},
1905 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1906 | PTA_3DNOW_A
| PTA_SSE
},
1907 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1908 | PTA_SSE
| PTA_SSE2
},
1909 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1910 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1911 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1912 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1913 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1914 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1915 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1916 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1917 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1918 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1919 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1920 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1921 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1922 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1925 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1927 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1928 SUBTARGET_OVERRIDE_OPTIONS
;
1931 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1932 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1935 /* -fPIC is the default for x86_64. */
1936 if (TARGET_MACHO
&& TARGET_64BIT
)
1939 /* Set the default values for switches whose default depends on TARGET_64BIT
1940 in case they weren't overwritten by command line options. */
1943 /* Mach-O doesn't support omitting the frame pointer for now. */
1944 if (flag_omit_frame_pointer
== 2)
1945 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1946 if (flag_asynchronous_unwind_tables
== 2)
1947 flag_asynchronous_unwind_tables
= 1;
1948 if (flag_pcc_struct_return
== 2)
1949 flag_pcc_struct_return
= 0;
1953 if (flag_omit_frame_pointer
== 2)
1954 flag_omit_frame_pointer
= 0;
1955 if (flag_asynchronous_unwind_tables
== 2)
1956 flag_asynchronous_unwind_tables
= 0;
1957 if (flag_pcc_struct_return
== 2)
1958 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1961 /* Need to check -mtune=generic first. */
1962 if (ix86_tune_string
)
1964 if (!strcmp (ix86_tune_string
, "generic")
1965 || !strcmp (ix86_tune_string
, "i686")
1966 /* As special support for cross compilers we read -mtune=native
1967 as -mtune=generic. With native compilers we won't see the
1968 -mtune=native, as it was changed by the driver. */
1969 || !strcmp (ix86_tune_string
, "native"))
1972 ix86_tune_string
= "generic64";
1974 ix86_tune_string
= "generic32";
1976 else if (!strncmp (ix86_tune_string
, "generic", 7))
1977 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1981 if (ix86_arch_string
)
1982 ix86_tune_string
= ix86_arch_string
;
1983 if (!ix86_tune_string
)
1985 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1986 ix86_tune_defaulted
= 1;
1989 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1990 need to use a sensible tune option. */
1991 if (!strcmp (ix86_tune_string
, "generic")
1992 || !strcmp (ix86_tune_string
, "x86-64")
1993 || !strcmp (ix86_tune_string
, "i686"))
1996 ix86_tune_string
= "generic64";
1998 ix86_tune_string
= "generic32";
2001 if (ix86_stringop_string
)
2003 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2004 stringop_alg
= rep_prefix_1_byte
;
2005 else if (!strcmp (ix86_stringop_string
, "libcall"))
2006 stringop_alg
= libcall
;
2007 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2008 stringop_alg
= rep_prefix_4_byte
;
2009 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2010 stringop_alg
= rep_prefix_8_byte
;
2011 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2012 stringop_alg
= loop_1_byte
;
2013 else if (!strcmp (ix86_stringop_string
, "loop"))
2014 stringop_alg
= loop
;
2015 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2016 stringop_alg
= unrolled_loop
;
2018 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2020 if (!strcmp (ix86_tune_string
, "x86-64"))
2021 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2022 "-mtune=generic instead as appropriate.");
2024 if (!ix86_arch_string
)
2025 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2026 if (!strcmp (ix86_arch_string
, "generic"))
2027 error ("generic CPU can be used only for -mtune= switch");
2028 if (!strncmp (ix86_arch_string
, "generic", 7))
2029 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2031 if (ix86_cmodel_string
!= 0)
2033 if (!strcmp (ix86_cmodel_string
, "small"))
2034 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2035 else if (!strcmp (ix86_cmodel_string
, "medium"))
2036 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2038 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
2039 else if (!strcmp (ix86_cmodel_string
, "32"))
2040 ix86_cmodel
= CM_32
;
2041 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2042 ix86_cmodel
= CM_KERNEL
;
2043 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
2044 ix86_cmodel
= CM_LARGE
;
2046 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2050 ix86_cmodel
= CM_32
;
2052 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2054 if (ix86_asm_string
!= 0)
2057 && !strcmp (ix86_asm_string
, "intel"))
2058 ix86_asm_dialect
= ASM_INTEL
;
2059 else if (!strcmp (ix86_asm_string
, "att"))
2060 ix86_asm_dialect
= ASM_ATT
;
2062 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2064 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2065 error ("code model %qs not supported in the %s bit mode",
2066 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2067 if (ix86_cmodel
== CM_LARGE
)
2068 sorry ("code model %<large%> not supported yet");
2069 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2070 sorry ("%i-bit mode not compiled in",
2071 (target_flags
& MASK_64BIT
) ? 64 : 32);
2073 for (i
= 0; i
< pta_size
; i
++)
2074 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2076 ix86_arch
= processor_alias_table
[i
].processor
;
2077 /* Default cpu tuning to the architecture. */
2078 ix86_tune
= ix86_arch
;
2079 if (processor_alias_table
[i
].flags
& PTA_MMX
2080 && !(target_flags_explicit
& MASK_MMX
))
2081 target_flags
|= MASK_MMX
;
2082 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2083 && !(target_flags_explicit
& MASK_3DNOW
))
2084 target_flags
|= MASK_3DNOW
;
2085 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2086 && !(target_flags_explicit
& MASK_3DNOW_A
))
2087 target_flags
|= MASK_3DNOW_A
;
2088 if (processor_alias_table
[i
].flags
& PTA_SSE
2089 && !(target_flags_explicit
& MASK_SSE
))
2090 target_flags
|= MASK_SSE
;
2091 if (processor_alias_table
[i
].flags
& PTA_SSE2
2092 && !(target_flags_explicit
& MASK_SSE2
))
2093 target_flags
|= MASK_SSE2
;
2094 if (processor_alias_table
[i
].flags
& PTA_SSE3
2095 && !(target_flags_explicit
& MASK_SSE3
))
2096 target_flags
|= MASK_SSE3
;
2097 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2098 && !(target_flags_explicit
& MASK_SSSE3
))
2099 target_flags
|= MASK_SSSE3
;
2100 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2101 x86_prefetch_sse
= true;
2102 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2103 x86_cmpxchg16b
= true;
2104 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2105 && !(target_flags_explicit
& MASK_POPCNT
))
2106 target_flags
|= MASK_POPCNT
;
2107 if (processor_alias_table
[i
].flags
& PTA_ABM
2108 && !(target_flags_explicit
& MASK_ABM
))
2109 target_flags
|= MASK_ABM
;
2110 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2111 && !(target_flags_explicit
& MASK_SSE4A
))
2112 target_flags
|= MASK_SSE4A
;
2113 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2114 error ("CPU you selected does not support x86-64 "
2120 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2122 for (i
= 0; i
< pta_size
; i
++)
2123 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2125 ix86_tune
= processor_alias_table
[i
].processor
;
2126 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2128 if (ix86_tune_defaulted
)
2130 ix86_tune_string
= "x86-64";
2131 for (i
= 0; i
< pta_size
; i
++)
2132 if (! strcmp (ix86_tune_string
,
2133 processor_alias_table
[i
].name
))
2135 ix86_tune
= processor_alias_table
[i
].processor
;
2138 error ("CPU you selected does not support x86-64 "
2141 /* Intel CPUs have always interpreted SSE prefetch instructions as
2142 NOPs; so, we can enable SSE prefetch instructions even when
2143 -mtune (rather than -march) points us to a processor that has them.
2144 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2145 higher processors. */
2146 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2147 x86_prefetch_sse
= true;
2151 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2154 ix86_cost
= &size_cost
;
2156 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2157 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2158 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2160 /* Arrange to set up i386_stack_locals for all functions. */
2161 init_machine_status
= ix86_init_machine_status
;
2163 /* Validate -mregparm= value. */
2164 if (ix86_regparm_string
)
2166 i
= atoi (ix86_regparm_string
);
2167 if (i
< 0 || i
> REGPARM_MAX
)
2168 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2174 ix86_regparm
= REGPARM_MAX
;
2176 /* If the user has provided any of the -malign-* options,
2177 warn and use that value only if -falign-* is not set.
2178 Remove this code in GCC 3.2 or later. */
2179 if (ix86_align_loops_string
)
2181 warning (0, "-malign-loops is obsolete, use -falign-loops");
2182 if (align_loops
== 0)
2184 i
= atoi (ix86_align_loops_string
);
2185 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2186 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2188 align_loops
= 1 << i
;
2192 if (ix86_align_jumps_string
)
2194 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2195 if (align_jumps
== 0)
2197 i
= atoi (ix86_align_jumps_string
);
2198 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2199 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2201 align_jumps
= 1 << i
;
2205 if (ix86_align_funcs_string
)
2207 warning (0, "-malign-functions is obsolete, use -falign-functions");
2208 if (align_functions
== 0)
2210 i
= atoi (ix86_align_funcs_string
);
2211 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2212 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2214 align_functions
= 1 << i
;
2218 /* Default align_* from the processor table. */
2219 if (align_loops
== 0)
2221 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2222 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2224 if (align_jumps
== 0)
2226 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2227 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2229 if (align_functions
== 0)
2231 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2234 /* Validate -mbranch-cost= value, or provide default. */
2235 ix86_branch_cost
= ix86_cost
->branch_cost
;
2236 if (ix86_branch_cost_string
)
2238 i
= atoi (ix86_branch_cost_string
);
2240 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2242 ix86_branch_cost
= i
;
2244 if (ix86_section_threshold_string
)
2246 i
= atoi (ix86_section_threshold_string
);
2248 error ("-mlarge-data-threshold=%d is negative", i
);
2250 ix86_section_threshold
= i
;
2253 if (ix86_tls_dialect_string
)
2255 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2256 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2257 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2258 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2259 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2260 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2262 error ("bad value (%s) for -mtls-dialect= switch",
2263 ix86_tls_dialect_string
);
2266 /* Keep nonleaf frame pointers. */
2267 if (flag_omit_frame_pointer
)
2268 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2269 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2270 flag_omit_frame_pointer
= 1;
2272 /* If we're doing fast math, we don't care about comparison order
2273 wrt NaNs. This lets us use a shorter comparison sequence. */
2274 if (flag_finite_math_only
)
2275 target_flags
&= ~MASK_IEEE_FP
;
2277 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2278 since the insns won't need emulation. */
2279 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
2280 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2282 /* Likewise, if the target doesn't have a 387, or we've specified
2283 software floating point, don't use 387 inline intrinsics. */
2285 target_flags
|= MASK_NO_FANCY_MATH_387
;
2287 /* Turn on SSE3 builtins for -mssse3. */
2289 target_flags
|= MASK_SSE3
;
2291 /* Turn on SSE3 builtins for -msse4a. */
2293 target_flags
|= MASK_SSE3
;
2295 /* Turn on SSE2 builtins for -msse3. */
2297 target_flags
|= MASK_SSE2
;
2299 /* Turn on SSE builtins for -msse2. */
2301 target_flags
|= MASK_SSE
;
2303 /* Turn on MMX builtins for -msse. */
2306 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2307 x86_prefetch_sse
= true;
2310 /* Turn on MMX builtins for 3Dnow. */
2312 target_flags
|= MASK_MMX
;
2314 /* Turn on POPCNT builtins for -mabm. */
2316 target_flags
|= MASK_POPCNT
;
2320 if (TARGET_ALIGN_DOUBLE
)
2321 error ("-malign-double makes no sense in the 64bit mode");
2323 error ("-mrtd calling convention not supported in the 64bit mode");
2325 /* Enable by default the SSE and MMX builtins. Do allow the user to
2326 explicitly disable any of these. In particular, disabling SSE and
2327 MMX for kernel code is extremely useful. */
2329 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2330 & ~target_flags_explicit
);
2334 /* i386 ABI does not specify red zone. It still makes sense to use it
2335 when programmer takes care to stack from being destroyed. */
2336 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2337 target_flags
|= MASK_NO_RED_ZONE
;
2340 /* Validate -mpreferred-stack-boundary= value, or provide default.
2341 The default of 128 bits is for Pentium III's SSE __m128. We can't
2342 change it because of optimize_size. Otherwise, we can't mix object
2343 files compiled with -Os and -On. */
2344 ix86_preferred_stack_boundary
= 128;
2345 if (ix86_preferred_stack_boundary_string
)
2347 i
= atoi (ix86_preferred_stack_boundary_string
);
2348 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2349 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2350 TARGET_64BIT
? 4 : 2);
2352 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2355 /* Accept -msseregparm only if at least SSE support is enabled. */
2356 if (TARGET_SSEREGPARM
2358 error ("-msseregparm used without SSE enabled");
2360 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2362 if (ix86_fpmath_string
!= 0)
2364 if (! strcmp (ix86_fpmath_string
, "387"))
2365 ix86_fpmath
= FPMATH_387
;
2366 else if (! strcmp (ix86_fpmath_string
, "sse"))
2370 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2371 ix86_fpmath
= FPMATH_387
;
2374 ix86_fpmath
= FPMATH_SSE
;
2376 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2377 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2381 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2382 ix86_fpmath
= FPMATH_387
;
2384 else if (!TARGET_80387
)
2386 warning (0, "387 instruction set disabled, using SSE arithmetics");
2387 ix86_fpmath
= FPMATH_SSE
;
2390 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2393 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2396 /* If the i387 is disabled, then do not return values in it. */
2398 target_flags
&= ~MASK_FLOAT_RETURNS
;
2400 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2401 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2403 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2405 /* ??? Unwind info is not correct around the CFG unless either a frame
2406 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2407 unwind info generation to be aware of the CFG and propagating states
2409 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2410 || flag_exceptions
|| flag_non_call_exceptions
)
2411 && flag_omit_frame_pointer
2412 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2414 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2415 warning (0, "unwind tables currently require either a frame pointer "
2416 "or -maccumulate-outgoing-args for correctness");
2417 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2424 p
= strchr (internal_label_prefix
, 'X');
2425 internal_label_prefix_len
= p
- internal_label_prefix
;
2429 /* When scheduling description is not available, disable scheduler pass
2430 so it won't slow down the compilation and make x87 code slower. */
2431 if (!TARGET_SCHEDULE
)
2432 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2434 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2435 set_param_value ("simultaneous-prefetches",
2436 ix86_cost
->simultaneous_prefetches
);
2437 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2438 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2441 /* switch to the appropriate section for output of DECL.
2442 DECL is either a `VAR_DECL' node or a constant of some sort.
2443 RELOC indicates whether forming the initial value of DECL requires
2444 link-time relocations. */
2447 x86_64_elf_select_section (tree decl
, int reloc
,
2448 unsigned HOST_WIDE_INT align
)
2450 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2451 && ix86_in_large_data_p (decl
))
2453 const char *sname
= NULL
;
2454 unsigned int flags
= SECTION_WRITE
;
2455 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2460 case SECCAT_DATA_REL
:
2461 sname
= ".ldata.rel";
2463 case SECCAT_DATA_REL_LOCAL
:
2464 sname
= ".ldata.rel.local";
2466 case SECCAT_DATA_REL_RO
:
2467 sname
= ".ldata.rel.ro";
2469 case SECCAT_DATA_REL_RO_LOCAL
:
2470 sname
= ".ldata.rel.ro.local";
2474 flags
|= SECTION_BSS
;
2477 case SECCAT_RODATA_MERGE_STR
:
2478 case SECCAT_RODATA_MERGE_STR_INIT
:
2479 case SECCAT_RODATA_MERGE_CONST
:
2483 case SECCAT_SRODATA
:
2490 /* We don't split these for medium model. Place them into
2491 default sections and hope for best. */
2496 /* We might get called with string constants, but get_named_section
2497 doesn't like them as they are not DECLs. Also, we need to set
2498 flags in that case. */
2500 return get_section (sname
, flags
, NULL
);
2501 return get_named_section (decl
, sname
, reloc
);
2504 return default_elf_select_section (decl
, reloc
, align
);
2507 /* Build up a unique section name, expressed as a
2508 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2509 RELOC indicates whether the initial value of EXP requires
2510 link-time relocations. */
2513 x86_64_elf_unique_section (tree decl
, int reloc
)
2515 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2516 && ix86_in_large_data_p (decl
))
2518 const char *prefix
= NULL
;
2519 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2520 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2522 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2525 case SECCAT_DATA_REL
:
2526 case SECCAT_DATA_REL_LOCAL
:
2527 case SECCAT_DATA_REL_RO
:
2528 case SECCAT_DATA_REL_RO_LOCAL
:
2529 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2532 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2535 case SECCAT_RODATA_MERGE_STR
:
2536 case SECCAT_RODATA_MERGE_STR_INIT
:
2537 case SECCAT_RODATA_MERGE_CONST
:
2538 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2540 case SECCAT_SRODATA
:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2556 plen
= strlen (prefix
);
2558 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2559 name
= targetm
.strip_name_encoding (name
);
2560 nlen
= strlen (name
);
2562 string
= alloca (nlen
+ plen
+ 1);
2563 memcpy (string
, prefix
, plen
);
2564 memcpy (string
+ plen
, name
, nlen
+ 1);
2566 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2570 default_unique_section (decl
, reloc
);
2573 #ifdef COMMON_ASM_OP
2574 /* This says how to output assembler code to declare an
2575 uninitialized external linkage data object.
2577 For medium model x86-64 we need to use .largecomm opcode for
2580 x86_elf_aligned_common (FILE *file
,
2581 const char *name
, unsigned HOST_WIDE_INT size
,
2584 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2585 && size
> (unsigned int)ix86_section_threshold
)
2586 fprintf (file
, ".largecomm\t");
2588 fprintf (file
, "%s", COMMON_ASM_OP
);
2589 assemble_name (file
, name
);
2590 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2591 size
, align
/ BITS_PER_UNIT
);
2594 /* Utility function for targets to use in implementing
2595 ASM_OUTPUT_ALIGNED_BSS. */
2598 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2599 const char *name
, unsigned HOST_WIDE_INT size
,
2602 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2603 && size
> (unsigned int)ix86_section_threshold
)
2604 switch_to_section (get_named_section (decl
, ".lbss", 0));
2606 switch_to_section (bss_section
);
2607 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2608 #ifdef ASM_DECLARE_OBJECT_NAME
2609 last_assemble_variable_decl
= decl
;
2610 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2612 /* Standard thing is just output label for the object. */
2613 ASM_OUTPUT_LABEL (file
, name
);
2614 #endif /* ASM_DECLARE_OBJECT_NAME */
2615 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2619 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2621 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2622 make the problem with not enough registers even worse. */
2623 #ifdef INSN_SCHEDULING
2625 flag_schedule_insns
= 0;
2629 /* The Darwin libraries never set errno, so we might as well
2630 avoid calling them when that's the only reason we would. */
2631 flag_errno_math
= 0;
2633 /* The default values of these switches depend on the TARGET_64BIT
2634 that is not known at this moment. Mark these values with 2 and
2635 let user the to override these. In case there is no command line option
2636 specifying them, we will set the defaults in override_options. */
2638 flag_omit_frame_pointer
= 2;
2639 flag_pcc_struct_return
= 2;
2640 flag_asynchronous_unwind_tables
= 2;
2641 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2642 SUBTARGET_OPTIMIZATION_OPTIONS
;
2646 /* Table of valid machine attributes. */
2647 const struct attribute_spec ix86_attribute_table
[] =
2649 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2650 /* Stdcall attribute says callee is responsible for popping arguments
2651 if they are not variable. */
2652 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2653 /* Fastcall attribute says callee is responsible for popping arguments
2654 if they are not variable. */
2655 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2656 /* Cdecl attribute says the callee is a normal C declaration */
2657 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2658 /* Regparm attribute specifies how many integer arguments are to be
2659 passed in registers. */
2660 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2661 /* Sseregparm attribute says we are using x86_64 calling conventions
2662 for FP arguments. */
2663 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2664 /* force_align_arg_pointer says this function realigns the stack at entry. */
2665 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2666 false, true, true, ix86_handle_cconv_attribute
},
2667 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2668 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2669 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2670 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2672 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2673 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2674 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2675 SUBTARGET_ATTRIBUTE_TABLE
,
2677 { NULL
, 0, 0, false, false, false, NULL
}
2680 /* Decide whether we can make a sibling call to a function. DECL is the
2681 declaration of the function being targeted by the call and EXP is the
2682 CALL_EXPR representing the call. */
2685 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2690 /* If we are generating position-independent code, we cannot sibcall
2691 optimize any indirect call, or a direct call to a global function,
2692 as the PLT requires %ebx be live. */
2693 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2700 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2701 if (POINTER_TYPE_P (func
))
2702 func
= TREE_TYPE (func
);
2705 /* Check that the return value locations are the same. Like
2706 if we are returning floats on the 80387 register stack, we cannot
2707 make a sibcall from a function that doesn't return a float to a
2708 function that does or, conversely, from a function that does return
2709 a float to a function that doesn't; the necessary stack adjustment
2710 would not be executed. This is also the place we notice
2711 differences in the return value ABI. Note that it is ok for one
2712 of the functions to have void return type as long as the return
2713 value of the other is passed in a register. */
2714 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2715 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2717 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2719 if (!rtx_equal_p (a
, b
))
2722 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2724 else if (!rtx_equal_p (a
, b
))
2727 /* If this call is indirect, we'll need to be able to use a call-clobbered
2728 register for the address of the target function. Make sure that all
2729 such registers are not used for passing parameters. */
2730 if (!decl
&& !TARGET_64BIT
)
2734 /* We're looking at the CALL_EXPR, we need the type of the function. */
2735 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2736 type
= TREE_TYPE (type
); /* pointer type */
2737 type
= TREE_TYPE (type
); /* function type */
2739 if (ix86_function_regparm (type
, NULL
) >= 3)
2741 /* ??? Need to count the actual number of registers to be used,
2742 not the possible number of registers. Fix later. */
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 /* Dllimport'd functions are also called indirectly. */
2749 if (decl
&& DECL_DLLIMPORT_P (decl
)
2750 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2754 /* If we forced aligned the stack, then sibcalling would unalign the
2755 stack, which may break the called function. */
2756 if (cfun
->machine
->force_align_arg_pointer
)
2759 /* Otherwise okay. That also includes certain types of indirect calls. */
2763 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2764 calling convention attributes;
2765 arguments as in struct attribute_spec.handler. */
2768 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2770 int flags ATTRIBUTE_UNUSED
,
2773 if (TREE_CODE (*node
) != FUNCTION_TYPE
2774 && TREE_CODE (*node
) != METHOD_TYPE
2775 && TREE_CODE (*node
) != FIELD_DECL
2776 && TREE_CODE (*node
) != TYPE_DECL
)
2778 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2779 IDENTIFIER_POINTER (name
));
2780 *no_add_attrs
= true;
2784 /* Can combine regparm with all attributes but fastcall. */
2785 if (is_attribute_p ("regparm", name
))
2789 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2791 error ("fastcall and regparm attributes are not compatible");
2794 cst
= TREE_VALUE (args
);
2795 if (TREE_CODE (cst
) != INTEGER_CST
)
2797 warning (OPT_Wattributes
,
2798 "%qs attribute requires an integer constant argument",
2799 IDENTIFIER_POINTER (name
));
2800 *no_add_attrs
= true;
2802 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2804 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2805 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2806 *no_add_attrs
= true;
2810 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2811 TYPE_ATTRIBUTES (*node
))
2812 && compare_tree_int (cst
, REGPARM_MAX
-1))
2814 error ("%s functions limited to %d register parameters",
2815 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2823 warning (OPT_Wattributes
, "%qs attribute ignored",
2824 IDENTIFIER_POINTER (name
));
2825 *no_add_attrs
= true;
2829 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2830 if (is_attribute_p ("fastcall", name
))
2832 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2834 error ("fastcall and cdecl attributes are not compatible");
2836 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2838 error ("fastcall and stdcall attributes are not compatible");
2840 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2842 error ("fastcall and regparm attributes are not compatible");
2846 /* Can combine stdcall with fastcall (redundant), regparm and
2848 else if (is_attribute_p ("stdcall", name
))
2850 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2852 error ("stdcall and cdecl attributes are not compatible");
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2856 error ("stdcall and fastcall attributes are not compatible");
2860 /* Can combine cdecl with regparm and sseregparm. */
2861 else if (is_attribute_p ("cdecl", name
))
2863 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2865 error ("stdcall and cdecl attributes are not compatible");
2867 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2869 error ("fastcall and cdecl attributes are not compatible");
2873 /* Can combine sseregparm with all attributes. */
2878 /* Return 0 if the attributes for two types are incompatible, 1 if they
2879 are compatible, and 2 if they are nearly compatible (which causes a
2880 warning to be generated). */
2883 ix86_comp_type_attributes (tree type1
, tree type2
)
2885 /* Check for mismatch of non-default calling convention. */
2886 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2888 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2891 /* Check for mismatched fastcall/regparm types. */
2892 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2893 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2894 || (ix86_function_regparm (type1
, NULL
)
2895 != ix86_function_regparm (type2
, NULL
)))
2898 /* Check for mismatched sseregparm types. */
2899 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2900 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2903 /* Check for mismatched return types (cdecl vs stdcall). */
2904 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2905 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2911 /* Return the regparm value for a function with the indicated TYPE and DECL.
2912 DECL may be NULL when calling function indirectly
2913 or considering a libcall. */
2916 ix86_function_regparm (tree type
, tree decl
)
2919 int regparm
= ix86_regparm
;
2920 bool user_convention
= false;
2924 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2927 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2928 user_convention
= true;
2931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2934 user_convention
= true;
2937 /* Use register calling convention for local functions when possible. */
2938 if (!TARGET_64BIT
&& !user_convention
&& decl
2939 && flag_unit_at_a_time
&& !profile_flag
)
2941 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2944 int local_regparm
, globals
= 0, regno
;
2946 /* Make sure no regparm register is taken by a global register
2948 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2949 if (global_regs
[local_regparm
])
2951 /* We can't use regparm(3) for nested functions as these use
2952 static chain pointer in third argument. */
2953 if (local_regparm
== 3
2954 && decl_function_context (decl
)
2955 && !DECL_NO_STATIC_CHAIN (decl
))
2957 /* If the function realigns its stackpointer, the
2958 prologue will clobber %ecx. If we've already
2959 generated code for the callee, the callee
2960 DECL_STRUCT_FUNCTION is gone, so we fall back to
2961 scanning the attributes for the self-realigning
2963 if ((DECL_STRUCT_FUNCTION (decl
)
2964 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2965 || (!DECL_STRUCT_FUNCTION (decl
)
2966 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2967 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2969 /* Each global register variable increases register preassure,
2970 so the more global reg vars there are, the smaller regparm
2971 optimization use, unless requested by the user explicitly. */
2972 for (regno
= 0; regno
< 6; regno
++)
2973 if (global_regs
[regno
])
2976 = globals
< local_regparm
? local_regparm
- globals
: 0;
2978 if (local_regparm
> regparm
)
2979 regparm
= local_regparm
;
2986 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2987 DFmode (2) arguments in SSE registers for a function with the
2988 indicated TYPE and DECL. DECL may be NULL when calling function
2989 indirectly or considering a libcall. Otherwise return 0. */
2992 ix86_function_sseregparm (tree type
, tree decl
)
2994 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2995 by the sseregparm attribute. */
2996 if (TARGET_SSEREGPARM
2998 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3003 error ("Calling %qD with attribute sseregparm without "
3004 "SSE/SSE2 enabled", decl
);
3006 error ("Calling %qT with attribute sseregparm without "
3007 "SSE/SSE2 enabled", type
);
3014 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3015 (and DFmode for SSE2) arguments in SSE registers,
3016 even for 32-bit targets. */
3017 if (!TARGET_64BIT
&& decl
3018 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3020 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3022 return TARGET_SSE2
? 2 : 1;
3028 /* Return true if EAX is live at the start of the function. Used by
3029 ix86_expand_prologue to determine if we need special help before
3030 calling allocate_stack_worker. */
3033 ix86_eax_live_at_start_p (void)
3035 /* Cheat. Don't bother working forward from ix86_function_regparm
3036 to the function type to whether an actual argument is located in
3037 eax. Instead just look at cfg info, which is still close enough
3038 to correct at this point. This gives false positives for broken
3039 functions that might use uninitialized data that happens to be
3040 allocated in eax, but who cares? */
3041 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3044 /* Value is the number of bytes of arguments automatically
3045 popped when returning from a subroutine call.
3046 FUNDECL is the declaration node of the function (as a tree),
3047 FUNTYPE is the data type of the function (as a tree),
3048 or for a library call it is an identifier node for the subroutine name.
3049 SIZE is the number of bytes of arguments passed on the stack.
3051 On the 80386, the RTD insn may be used to pop them if the number
3052 of args is fixed, but if the number is variable then the caller
3053 must pop them all. RTD can't be used for library calls now
3054 because the library is compiled with the Unix compiler.
3055 Use of RTD is a selectable option, since it is incompatible with
3056 standard Unix calling sequences. If the option is not selected,
3057 the caller must always pop the args.
3059 The attribute stdcall is equivalent to RTD on a per module basis. */
3062 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3064 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3066 /* Cdecl functions override -mrtd, and never pop the stack. */
3067 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3069 /* Stdcall and fastcall functions will pop the stack if not
3071 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3072 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3076 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3077 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3078 == void_type_node
)))
3082 /* Lose any fake structure return argument if it is passed on the stack. */
3083 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3085 && !KEEP_AGGREGATE_RETURN_POINTER
)
3087 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3090 return GET_MODE_SIZE (Pmode
);
3096 /* Argument support functions. */
3098 /* Return true when register may be used to pass function parameters. */
3100 ix86_function_arg_regno_p (int regno
)
3106 return (regno
< REGPARM_MAX
3107 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3109 return (regno
< REGPARM_MAX
3110 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3111 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3112 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3113 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3118 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3123 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3124 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3127 /* RAX is used as hidden argument to va_arg functions. */
3130 for (i
= 0; i
< REGPARM_MAX
; i
++)
3131 if (regno
== x86_64_int_parameter_registers
[i
])
3136 /* Return if we do not know how to pass TYPE solely in registers. */
3139 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3141 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3144 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3145 The layout_type routine is crafty and tries to trick us into passing
3146 currently unsupported vector types on the stack by using TImode. */
3147 return (!TARGET_64BIT
&& mode
== TImode
3148 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3151 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3152 for a call to a function whose data type is FNTYPE.
3153 For a library call, FNTYPE is 0. */
3156 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3157 tree fntype
, /* tree ptr for function decl */
3158 rtx libname
, /* SYMBOL_REF of library name or 0 */
3161 static CUMULATIVE_ARGS zero_cum
;
3162 tree param
, next_param
;
3164 if (TARGET_DEBUG_ARG
)
3166 fprintf (stderr
, "\ninit_cumulative_args (");
3168 fprintf (stderr
, "fntype code = %s, ret code = %s",
3169 tree_code_name
[(int) TREE_CODE (fntype
)],
3170 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3172 fprintf (stderr
, "no fntype");
3175 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3180 /* Set up the number of registers to use for passing arguments. */
3181 cum
->nregs
= ix86_regparm
;
3183 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3185 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3186 cum
->warn_sse
= true;
3187 cum
->warn_mmx
= true;
3188 cum
->maybe_vaarg
= false;
3190 /* Use ecx and edx registers if function has fastcall attribute,
3191 else look for regparm information. */
3192 if (fntype
&& !TARGET_64BIT
)
3194 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3200 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3203 /* Set up the number of SSE registers used for passing SFmode
3204 and DFmode arguments. Warn for mismatching ABI. */
3205 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3207 /* Determine if this function has variable arguments. This is
3208 indicated by the last argument being 'void_type_mode' if there
3209 are no variable arguments. If there are variable arguments, then
3210 we won't pass anything in registers in 32-bit mode. */
3212 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3214 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3215 param
!= 0; param
= next_param
)
3217 next_param
= TREE_CHAIN (param
);
3218 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3228 cum
->float_in_sse
= 0;
3230 cum
->maybe_vaarg
= true;
3234 if ((!fntype
&& !libname
)
3235 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3236 cum
->maybe_vaarg
= true;
3238 if (TARGET_DEBUG_ARG
)
3239 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3244 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3245 But in the case of vector types, it is some vector mode.
3247 When we have only some of our vector isa extensions enabled, then there
3248 are some modes for which vector_mode_supported_p is false. For these
3249 modes, the generic vector support in gcc will choose some non-vector mode
3250 in order to implement the type. By computing the natural mode, we'll
3251 select the proper ABI location for the operand and not depend on whatever
3252 the middle-end decides to do with these vector types. */
3254 static enum machine_mode
3255 type_natural_mode (tree type
)
3257 enum machine_mode mode
= TYPE_MODE (type
);
3259 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3261 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3262 if ((size
== 8 || size
== 16)
3263 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3264 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3266 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3268 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3269 mode
= MIN_MODE_VECTOR_FLOAT
;
3271 mode
= MIN_MODE_VECTOR_INT
;
3273 /* Get the mode which has this inner mode and number of units. */
3274 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3275 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3276 && GET_MODE_INNER (mode
) == innermode
)
3286 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3287 this may not agree with the mode that the type system has chosen for the
3288 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3289 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3292 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3297 if (orig_mode
!= BLKmode
)
3298 tmp
= gen_rtx_REG (orig_mode
, regno
);
3301 tmp
= gen_rtx_REG (mode
, regno
);
3302 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3303 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3309 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3310 of this code is to classify each 8bytes of incoming argument by the register
3311 class and assign registers accordingly. */
3313 /* Return the union class of CLASS1 and CLASS2.
3314 See the x86-64 PS ABI for details. */
3316 static enum x86_64_reg_class
3317 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3319 /* Rule #1: If both classes are equal, this is the resulting class. */
3320 if (class1
== class2
)
3323 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3325 if (class1
== X86_64_NO_CLASS
)
3327 if (class2
== X86_64_NO_CLASS
)
3330 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3331 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3332 return X86_64_MEMORY_CLASS
;
3334 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3335 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3336 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3337 return X86_64_INTEGERSI_CLASS
;
3338 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3339 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3340 return X86_64_INTEGER_CLASS
;
3342 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3344 if (class1
== X86_64_X87_CLASS
3345 || class1
== X86_64_X87UP_CLASS
3346 || class1
== X86_64_COMPLEX_X87_CLASS
3347 || class2
== X86_64_X87_CLASS
3348 || class2
== X86_64_X87UP_CLASS
3349 || class2
== X86_64_COMPLEX_X87_CLASS
)
3350 return X86_64_MEMORY_CLASS
;
3352 /* Rule #6: Otherwise class SSE is used. */
3353 return X86_64_SSE_CLASS
;
3356 /* Classify the argument of type TYPE and mode MODE.
3357 CLASSES will be filled by the register class used to pass each word
3358 of the operand. The number of words is returned. In case the parameter
3359 should be passed in memory, 0 is returned. As a special case for zero
3360 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3362 BIT_OFFSET is used internally for handling records and specifies offset
3363 of the offset in bits modulo 256 to avoid overflow cases.
3365 See the x86-64 PS ABI for details.
3369 classify_argument (enum machine_mode mode
, tree type
,
3370 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3372 HOST_WIDE_INT bytes
=
3373 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3374 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3376 /* Variable sized entities are always passed/returned in memory. */
3380 if (mode
!= VOIDmode
3381 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3384 if (type
&& AGGREGATE_TYPE_P (type
))
3388 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3390 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3394 for (i
= 0; i
< words
; i
++)
3395 classes
[i
] = X86_64_NO_CLASS
;
3397 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3398 signalize memory class, so handle it as special case. */
3401 classes
[0] = X86_64_NO_CLASS
;
3405 /* Classify each field of record and merge classes. */
3406 switch (TREE_CODE (type
))
3409 /* And now merge the fields of structure. */
3410 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3412 if (TREE_CODE (field
) == FIELD_DECL
)
3416 if (TREE_TYPE (field
) == error_mark_node
)
3419 /* Bitfields are always classified as integer. Handle them
3420 early, since later code would consider them to be
3421 misaligned integers. */
3422 if (DECL_BIT_FIELD (field
))
3424 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3425 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3426 + tree_low_cst (DECL_SIZE (field
), 0)
3429 merge_classes (X86_64_INTEGER_CLASS
,
3434 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3435 TREE_TYPE (field
), subclasses
,
3436 (int_bit_position (field
)
3437 + bit_offset
) % 256);
3440 for (i
= 0; i
< num
; i
++)
3443 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3445 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3453 /* Arrays are handled as small records. */
3456 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3457 TREE_TYPE (type
), subclasses
, bit_offset
);
3461 /* The partial classes are now full classes. */
3462 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3463 subclasses
[0] = X86_64_SSE_CLASS
;
3464 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3465 subclasses
[0] = X86_64_INTEGER_CLASS
;
3467 for (i
= 0; i
< words
; i
++)
3468 classes
[i
] = subclasses
[i
% num
];
3473 case QUAL_UNION_TYPE
:
3474 /* Unions are similar to RECORD_TYPE but offset is always 0.
3476 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3478 if (TREE_CODE (field
) == FIELD_DECL
)
3482 if (TREE_TYPE (field
) == error_mark_node
)
3485 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3486 TREE_TYPE (field
), subclasses
,
3490 for (i
= 0; i
< num
; i
++)
3491 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3500 /* Final merger cleanup. */
3501 for (i
= 0; i
< words
; i
++)
3503 /* If one class is MEMORY, everything should be passed in
3505 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3508 /* The X86_64_SSEUP_CLASS should be always preceded by
3509 X86_64_SSE_CLASS. */
3510 if (classes
[i
] == X86_64_SSEUP_CLASS
3511 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3512 classes
[i
] = X86_64_SSE_CLASS
;
3514 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3515 if (classes
[i
] == X86_64_X87UP_CLASS
3516 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3517 classes
[i
] = X86_64_SSE_CLASS
;
3522 /* Compute alignment needed. We align all types to natural boundaries with
3523 exception of XFmode that is aligned to 64bits. */
3524 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3526 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3529 mode_alignment
= 128;
3530 else if (mode
== XCmode
)
3531 mode_alignment
= 256;
3532 if (COMPLEX_MODE_P (mode
))
3533 mode_alignment
/= 2;
3534 /* Misaligned fields are always returned in memory. */
3535 if (bit_offset
% mode_alignment
)
3539 /* for V1xx modes, just use the base mode */
3540 if (VECTOR_MODE_P (mode
)
3541 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3542 mode
= GET_MODE_INNER (mode
);
3544 /* Classification of atomic types. */
3549 classes
[0] = X86_64_SSE_CLASS
;
3552 classes
[0] = X86_64_SSE_CLASS
;
3553 classes
[1] = X86_64_SSEUP_CLASS
;
3562 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3563 classes
[0] = X86_64_INTEGERSI_CLASS
;
3565 classes
[0] = X86_64_INTEGER_CLASS
;
3569 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3574 if (!(bit_offset
% 64))
3575 classes
[0] = X86_64_SSESF_CLASS
;
3577 classes
[0] = X86_64_SSE_CLASS
;
3580 classes
[0] = X86_64_SSEDF_CLASS
;
3583 classes
[0] = X86_64_X87_CLASS
;
3584 classes
[1] = X86_64_X87UP_CLASS
;
3587 classes
[0] = X86_64_SSE_CLASS
;
3588 classes
[1] = X86_64_SSEUP_CLASS
;
3591 classes
[0] = X86_64_SSE_CLASS
;
3594 classes
[0] = X86_64_SSEDF_CLASS
;
3595 classes
[1] = X86_64_SSEDF_CLASS
;
3598 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3601 /* This modes is larger than 16 bytes. */
3609 classes
[0] = X86_64_SSE_CLASS
;
3610 classes
[1] = X86_64_SSEUP_CLASS
;
3616 classes
[0] = X86_64_SSE_CLASS
;
3622 gcc_assert (VECTOR_MODE_P (mode
));
3627 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3629 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3630 classes
[0] = X86_64_INTEGERSI_CLASS
;
3632 classes
[0] = X86_64_INTEGER_CLASS
;
3633 classes
[1] = X86_64_INTEGER_CLASS
;
3634 return 1 + (bytes
> 8);
3638 /* Examine the argument and return set number of register required in each
3639 class. Return 0 iff parameter should be passed in memory. */
3641 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3642 int *int_nregs
, int *sse_nregs
)
3644 enum x86_64_reg_class
class[MAX_CLASSES
];
3645 int n
= classify_argument (mode
, type
, class, 0);
3651 for (n
--; n
>= 0; n
--)
3654 case X86_64_INTEGER_CLASS
:
3655 case X86_64_INTEGERSI_CLASS
:
3658 case X86_64_SSE_CLASS
:
3659 case X86_64_SSESF_CLASS
:
3660 case X86_64_SSEDF_CLASS
:
3663 case X86_64_NO_CLASS
:
3664 case X86_64_SSEUP_CLASS
:
3666 case X86_64_X87_CLASS
:
3667 case X86_64_X87UP_CLASS
:
3671 case X86_64_COMPLEX_X87_CLASS
:
3672 return in_return
? 2 : 0;
3673 case X86_64_MEMORY_CLASS
:
3679 /* Construct container for the argument used by GCC interface. See
3680 FUNCTION_ARG for the detailed description. */
3683 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3684 tree type
, int in_return
, int nintregs
, int nsseregs
,
3685 const int *intreg
, int sse_regno
)
3687 /* The following variables hold the static issued_error state. */
3688 static bool issued_sse_arg_error
;
3689 static bool issued_sse_ret_error
;
3690 static bool issued_x87_ret_error
;
3692 enum machine_mode tmpmode
;
3694 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3695 enum x86_64_reg_class
class[MAX_CLASSES
];
3699 int needed_sseregs
, needed_intregs
;
3700 rtx exp
[MAX_CLASSES
];
3703 n
= classify_argument (mode
, type
, class, 0);
3704 if (TARGET_DEBUG_ARG
)
3707 fprintf (stderr
, "Memory class\n");
3710 fprintf (stderr
, "Classes:");
3711 for (i
= 0; i
< n
; i
++)
3713 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3715 fprintf (stderr
, "\n");
3720 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3723 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3726 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3727 some less clueful developer tries to use floating-point anyway. */
3728 if (needed_sseregs
&& !TARGET_SSE
)
3732 if (!issued_sse_ret_error
)
3734 error ("SSE register return with SSE disabled");
3735 issued_sse_ret_error
= true;
3738 else if (!issued_sse_arg_error
)
3740 error ("SSE register argument with SSE disabled");
3741 issued_sse_arg_error
= true;
3746 /* Likewise, error if the ABI requires us to return values in the
3747 x87 registers and the user specified -mno-80387. */
3748 if (!TARGET_80387
&& in_return
)
3749 for (i
= 0; i
< n
; i
++)
3750 if (class[i
] == X86_64_X87_CLASS
3751 || class[i
] == X86_64_X87UP_CLASS
3752 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3754 if (!issued_x87_ret_error
)
3756 error ("x87 register return with x87 disabled");
3757 issued_x87_ret_error
= true;
3762 /* First construct simple cases. Avoid SCmode, since we want to use
3763 single register to pass this type. */
3764 if (n
== 1 && mode
!= SCmode
)
3767 case X86_64_INTEGER_CLASS
:
3768 case X86_64_INTEGERSI_CLASS
:
3769 return gen_rtx_REG (mode
, intreg
[0]);
3770 case X86_64_SSE_CLASS
:
3771 case X86_64_SSESF_CLASS
:
3772 case X86_64_SSEDF_CLASS
:
3773 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3774 case X86_64_X87_CLASS
:
3775 case X86_64_COMPLEX_X87_CLASS
:
3776 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3777 case X86_64_NO_CLASS
:
3778 /* Zero sized array, struct or class. */
3783 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3785 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3787 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3788 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3789 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3790 && class[1] == X86_64_INTEGER_CLASS
3791 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3792 && intreg
[0] + 1 == intreg
[1])
3793 return gen_rtx_REG (mode
, intreg
[0]);
3795 /* Otherwise figure out the entries of the PARALLEL. */
3796 for (i
= 0; i
< n
; i
++)
3800 case X86_64_NO_CLASS
:
3802 case X86_64_INTEGER_CLASS
:
3803 case X86_64_INTEGERSI_CLASS
:
3804 /* Merge TImodes on aligned occasions here too. */
3805 if (i
* 8 + 8 > bytes
)
3806 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3807 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3811 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3812 if (tmpmode
== BLKmode
)
3814 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3815 gen_rtx_REG (tmpmode
, *intreg
),
3819 case X86_64_SSESF_CLASS
:
3820 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3821 gen_rtx_REG (SFmode
,
3822 SSE_REGNO (sse_regno
)),
3826 case X86_64_SSEDF_CLASS
:
3827 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3828 gen_rtx_REG (DFmode
,
3829 SSE_REGNO (sse_regno
)),
3833 case X86_64_SSE_CLASS
:
3834 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3838 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3839 gen_rtx_REG (tmpmode
,
3840 SSE_REGNO (sse_regno
)),
3842 if (tmpmode
== TImode
)
3851 /* Empty aligned struct, union or class. */
3855 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3856 for (i
= 0; i
< nexps
; i
++)
3857 XVECEXP (ret
, 0, i
) = exp
[i
];
3861 /* Update the data in CUM to advance over an argument
3862 of mode MODE and data type TYPE.
3863 (TYPE is null for libcalls where that information may not be available.) */
3866 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3867 tree type
, int named
)
3870 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3871 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3874 mode
= type_natural_mode (type
);
3876 if (TARGET_DEBUG_ARG
)
3877 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3878 "mode=%s, named=%d)\n\n",
3879 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3880 GET_MODE_NAME (mode
), named
);
3884 int int_nregs
, sse_nregs
;
3885 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3886 cum
->words
+= words
;
3887 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3889 cum
->nregs
-= int_nregs
;
3890 cum
->sse_nregs
-= sse_nregs
;
3891 cum
->regno
+= int_nregs
;
3892 cum
->sse_regno
+= sse_nregs
;
3895 cum
->words
+= words
;
3913 cum
->words
+= words
;
3914 cum
->nregs
-= words
;
3915 cum
->regno
+= words
;
3917 if (cum
->nregs
<= 0)
3925 if (cum
->float_in_sse
< 2)
3928 if (cum
->float_in_sse
< 1)
3939 if (!type
|| !AGGREGATE_TYPE_P (type
))
3941 cum
->sse_words
+= words
;
3942 cum
->sse_nregs
-= 1;
3943 cum
->sse_regno
+= 1;
3944 if (cum
->sse_nregs
<= 0)
3956 if (!type
|| !AGGREGATE_TYPE_P (type
))
3958 cum
->mmx_words
+= words
;
3959 cum
->mmx_nregs
-= 1;
3960 cum
->mmx_regno
+= 1;
3961 if (cum
->mmx_nregs
<= 0)
3972 /* Define where to put the arguments to a function.
3973 Value is zero to push the argument on the stack,
3974 or a hard register in which to store the argument.
3976 MODE is the argument's machine mode.
3977 TYPE is the data type of the argument (as a tree).
3978 This is null for libcalls where that information may
3980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3981 the preceding args and about the function being called.
3982 NAMED is nonzero if this argument is a named parameter
3983 (otherwise it is an extra parameter matching an ellipsis). */
3986 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3987 tree type
, int named
)
3989 enum machine_mode mode
= orig_mode
;
3992 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3993 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3994 static bool warnedsse
, warnedmmx
;
3996 /* To simplify the code below, represent vector types with a vector mode
3997 even if MMX/SSE are not active. */
3998 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3999 mode
= type_natural_mode (type
);
4001 /* Handle a hidden AL argument containing number of registers for varargs
4002 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4004 if (mode
== VOIDmode
)
4007 return GEN_INT (cum
->maybe_vaarg
4008 ? (cum
->sse_nregs
< 0
4016 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4018 &x86_64_int_parameter_registers
[cum
->regno
],
4023 /* For now, pass fp/complex values on the stack. */
4035 if (words
<= cum
->nregs
)
4037 int regno
= cum
->regno
;
4039 /* Fastcall allocates the first two DWORD (SImode) or
4040 smaller arguments to ECX and EDX. */
4043 if (mode
== BLKmode
|| mode
== DImode
)
4046 /* ECX not EAX is the first allocated register. */
4050 ret
= gen_rtx_REG (mode
, regno
);
4054 if (cum
->float_in_sse
< 2)
4057 if (cum
->float_in_sse
< 1)
4067 if (!type
|| !AGGREGATE_TYPE_P (type
))
4069 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4072 warning (0, "SSE vector argument without SSE enabled "
4076 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4077 cum
->sse_regno
+ FIRST_SSE_REG
);
4084 if (!type
|| !AGGREGATE_TYPE_P (type
))
4086 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4089 warning (0, "MMX vector argument without MMX enabled "
4093 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4094 cum
->mmx_regno
+ FIRST_MMX_REG
);
4099 if (TARGET_DEBUG_ARG
)
4102 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4103 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4106 print_simple_rtl (stderr
, ret
);
4108 fprintf (stderr
, ", stack");
4110 fprintf (stderr
, " )\n");
4116 /* A C expression that indicates when an argument must be passed by
4117 reference. If nonzero for an argument, a copy of that argument is
4118 made in memory and a pointer to the argument is passed instead of
4119 the argument itself. The pointer is passed in whatever way is
4120 appropriate for passing a pointer to that type. */
4123 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4124 enum machine_mode mode ATTRIBUTE_UNUSED
,
4125 tree type
, bool named ATTRIBUTE_UNUSED
)
4130 if (type
&& int_size_in_bytes (type
) == -1)
4132 if (TARGET_DEBUG_ARG
)
4133 fprintf (stderr
, "function_arg_pass_by_reference\n");
4140 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4141 ABI. Only called if TARGET_SSE. */
4143 contains_128bit_aligned_vector_p (tree type
)
4145 enum machine_mode mode
= TYPE_MODE (type
);
4146 if (SSE_REG_MODE_P (mode
)
4147 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4149 if (TYPE_ALIGN (type
) < 128)
4152 if (AGGREGATE_TYPE_P (type
))
4154 /* Walk the aggregates recursively. */
4155 switch (TREE_CODE (type
))
4159 case QUAL_UNION_TYPE
:
4163 /* Walk all the structure fields. */
4164 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4166 if (TREE_CODE (field
) == FIELD_DECL
4167 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4174 /* Just for use if some languages passes arrays by value. */
4175 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4186 /* Gives the alignment boundary, in bits, of an argument with the
4187 specified mode and type. */
4190 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4194 align
= TYPE_ALIGN (type
);
4196 align
= GET_MODE_ALIGNMENT (mode
);
4197 if (align
< PARM_BOUNDARY
)
4198 align
= PARM_BOUNDARY
;
4201 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4202 make an exception for SSE modes since these require 128bit
4205 The handling here differs from field_alignment. ICC aligns MMX
4206 arguments to 4 byte boundaries, while structure fields are aligned
4207 to 8 byte boundaries. */
4209 align
= PARM_BOUNDARY
;
4212 if (!SSE_REG_MODE_P (mode
))
4213 align
= PARM_BOUNDARY
;
4217 if (!contains_128bit_aligned_vector_p (type
))
4218 align
= PARM_BOUNDARY
;
4226 /* Return true if N is a possible register number of function value. */
4228 ix86_function_value_regno_p (int regno
)
4234 return ((regno
) == 0
4235 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4236 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4238 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4239 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4240 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4245 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4246 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4250 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4257 /* Define how to find the value returned by a function.
4258 VALTYPE is the data type of the value (as a tree).
4259 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4260 otherwise, FUNC is 0. */
4262 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4263 bool outgoing ATTRIBUTE_UNUSED
)
4265 enum machine_mode natmode
= type_natural_mode (valtype
);
4269 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4270 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4271 x86_64_int_return_registers
, 0);
4272 /* For zero sized structures, construct_container return NULL, but we
4273 need to keep rest of compiler happy by returning meaningful value. */
4275 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4280 tree fn
= NULL_TREE
, fntype
;
4282 && DECL_P (fntype_or_decl
))
4283 fn
= fntype_or_decl
;
4284 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4285 return gen_rtx_REG (TYPE_MODE (valtype
),
4286 ix86_value_regno (natmode
, fn
, fntype
));
4290 /* Return true iff type is returned in memory. */
4292 ix86_return_in_memory (tree type
)
4294 int needed_intregs
, needed_sseregs
, size
;
4295 enum machine_mode mode
= type_natural_mode (type
);
4298 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4300 if (mode
== BLKmode
)
4303 size
= int_size_in_bytes (type
);
4305 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4308 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4310 /* User-created vectors small enough to fit in EAX. */
4314 /* MMX/3dNow values are returned in MM0,
4315 except when it doesn't exits. */
4317 return (TARGET_MMX
? 0 : 1);
4319 /* SSE values are returned in XMM0, except when it doesn't exist. */
4321 return (TARGET_SSE
? 0 : 1);
4335 /* When returning SSE vector types, we have a choice of either
4336 (1) being abi incompatible with a -march switch, or
4337 (2) generating an error.
4338 Given no good solution, I think the safest thing is one warning.
4339 The user won't be able to use -Werror, but....
4341 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4342 called in response to actually generating a caller or callee that
4343 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4344 via aggregate_value_p for general type probing from tree-ssa. */
4347 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4349 static bool warnedsse
, warnedmmx
;
4353 /* Look at the return type of the function, not the function type. */
4354 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4356 if (!TARGET_SSE
&& !warnedsse
)
4359 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4362 warning (0, "SSE vector return without SSE enabled "
4367 if (!TARGET_MMX
&& !warnedmmx
)
4369 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4372 warning (0, "MMX vector return without MMX enabled "
4381 /* Define how to find the value returned by a library function
4382 assuming the value has mode MODE. */
4384 ix86_libcall_value (enum machine_mode mode
)
4398 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4401 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4405 return gen_rtx_REG (mode
, 0);
4409 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4412 /* Given a mode, return the register to use for a return value. */
4415 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4417 gcc_assert (!TARGET_64BIT
);
4419 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4420 we normally prevent this case when mmx is not available. However
4421 some ABIs may require the result to be returned like DImode. */
4422 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4423 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4425 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4426 we prevent this case when sse is not available. However some ABIs
4427 may require the result to be returned like integer TImode. */
4428 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4429 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4431 /* Decimal floating point values can go in %eax, unlike other float modes. */
4432 if (DECIMAL_FLOAT_MODE_P (mode
))
4435 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4436 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4439 /* Floating point return values in %st(0), except for local functions when
4440 SSE math is enabled or for functions with sseregparm attribute. */
4441 if ((func
|| fntype
)
4442 && (mode
== SFmode
|| mode
== DFmode
))
4444 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4445 if ((sse_level
>= 1 && mode
== SFmode
)
4446 || (sse_level
== 2 && mode
== DFmode
))
4447 return FIRST_SSE_REG
;
4450 return FIRST_FLOAT_REG
;
4453 /* Create the va_list data type. */
4456 ix86_build_builtin_va_list (void)
4458 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4460 /* For i386 we use plain pointer to argument area. */
4462 return build_pointer_type (char_type_node
);
4464 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4465 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4467 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4468 unsigned_type_node
);
4469 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4470 unsigned_type_node
);
4471 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4473 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4476 va_list_gpr_counter_field
= f_gpr
;
4477 va_list_fpr_counter_field
= f_fpr
;
4479 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4480 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4481 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4482 DECL_FIELD_CONTEXT (f_sav
) = record
;
4484 TREE_CHAIN (record
) = type_decl
;
4485 TYPE_NAME (record
) = type_decl
;
4486 TYPE_FIELDS (record
) = f_gpr
;
4487 TREE_CHAIN (f_gpr
) = f_fpr
;
4488 TREE_CHAIN (f_fpr
) = f_ovf
;
4489 TREE_CHAIN (f_ovf
) = f_sav
;
4491 layout_type (record
);
4493 /* The correct type is an array type of one element. */
4494 return build_array_type (record
, build_index_type (size_zero_node
));
4497 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4500 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4501 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4504 CUMULATIVE_ARGS next_cum
;
4505 rtx save_area
= NULL_RTX
, mem
;
4518 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4521 /* Indicate to allocate space on the stack for varargs save area. */
4522 ix86_save_varrargs_registers
= 1;
4524 cfun
->stack_alignment_needed
= 128;
4526 fntype
= TREE_TYPE (current_function_decl
);
4527 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4528 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4529 != void_type_node
));
4531 /* For varargs, we do not want to skip the dummy va_dcl argument.
4532 For stdargs, we do want to skip the last named argument. */
4535 function_arg_advance (&next_cum
, mode
, type
, 1);
4538 save_area
= frame_pointer_rtx
;
4540 set
= get_varargs_alias_set ();
4542 for (i
= next_cum
.regno
;
4544 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4547 mem
= gen_rtx_MEM (Pmode
,
4548 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4549 MEM_NOTRAP_P (mem
) = 1;
4550 set_mem_alias_set (mem
, set
);
4551 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4552 x86_64_int_parameter_registers
[i
]));
4555 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4557 /* Now emit code to save SSE registers. The AX parameter contains number
4558 of SSE parameter registers used to call this function. We use
4559 sse_prologue_save insn template that produces computed jump across
4560 SSE saves. We need some preparation work to get this working. */
4562 label
= gen_label_rtx ();
4563 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4565 /* Compute address to jump to :
4566 label - 5*eax + nnamed_sse_arguments*5 */
4567 tmp_reg
= gen_reg_rtx (Pmode
);
4568 nsse_reg
= gen_reg_rtx (Pmode
);
4569 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4570 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4571 gen_rtx_MULT (Pmode
, nsse_reg
,
4573 if (next_cum
.sse_regno
)
4576 gen_rtx_CONST (DImode
,
4577 gen_rtx_PLUS (DImode
,
4579 GEN_INT (next_cum
.sse_regno
* 4))));
4581 emit_move_insn (nsse_reg
, label_ref
);
4582 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4584 /* Compute address of memory block we save into. We always use pointer
4585 pointing 127 bytes after first byte to store - this is needed to keep
4586 instruction size limited by 4 bytes. */
4587 tmp_reg
= gen_reg_rtx (Pmode
);
4588 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4589 plus_constant (save_area
,
4590 8 * REGPARM_MAX
+ 127)));
4591 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4592 MEM_NOTRAP_P (mem
) = 1;
4593 set_mem_alias_set (mem
, set
);
4594 set_mem_align (mem
, BITS_PER_WORD
);
4596 /* And finally do the dirty job! */
4597 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4598 GEN_INT (next_cum
.sse_regno
), label
));
4603 /* Implement va_start. */
4606 ix86_va_start (tree valist
, rtx nextarg
)
4608 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4609 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4610 tree gpr
, fpr
, ovf
, sav
, t
;
4613 /* Only 64bit target needs something special. */
4616 std_expand_builtin_va_start (valist
, nextarg
);
4620 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4621 f_fpr
= TREE_CHAIN (f_gpr
);
4622 f_ovf
= TREE_CHAIN (f_fpr
);
4623 f_sav
= TREE_CHAIN (f_ovf
);
4625 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4626 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4627 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4628 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4629 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4631 /* Count number of gp and fp argument registers used. */
4632 words
= current_function_args_info
.words
;
4633 n_gpr
= current_function_args_info
.regno
;
4634 n_fpr
= current_function_args_info
.sse_regno
;
4636 if (TARGET_DEBUG_ARG
)
4637 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4638 (int) words
, (int) n_gpr
, (int) n_fpr
);
4640 if (cfun
->va_list_gpr_size
)
4642 type
= TREE_TYPE (gpr
);
4643 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4644 build_int_cst (type
, n_gpr
* 8));
4645 TREE_SIDE_EFFECTS (t
) = 1;
4646 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4649 if (cfun
->va_list_fpr_size
)
4651 type
= TREE_TYPE (fpr
);
4652 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4653 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4654 TREE_SIDE_EFFECTS (t
) = 1;
4655 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4658 /* Find the overflow area. */
4659 type
= TREE_TYPE (ovf
);
4660 t
= make_tree (type
, virtual_incoming_args_rtx
);
4662 t
= build2 (PLUS_EXPR
, type
, t
,
4663 build_int_cst (type
, words
* UNITS_PER_WORD
));
4664 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4665 TREE_SIDE_EFFECTS (t
) = 1;
4666 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4668 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4670 /* Find the register save area.
4671 Prologue of the function save it right above stack frame. */
4672 type
= TREE_TYPE (sav
);
4673 t
= make_tree (type
, frame_pointer_rtx
);
4674 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4675 TREE_SIDE_EFFECTS (t
) = 1;
4676 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4680 /* Implement va_arg. */
4683 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4685 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4686 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4687 tree gpr
, fpr
, ovf
, sav
, t
;
4689 tree lab_false
, lab_over
= NULL_TREE
;
4694 enum machine_mode nat_mode
;
4696 /* Only 64bit target needs something special. */
4698 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4700 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4701 f_fpr
= TREE_CHAIN (f_gpr
);
4702 f_ovf
= TREE_CHAIN (f_fpr
);
4703 f_sav
= TREE_CHAIN (f_ovf
);
4705 valist
= build_va_arg_indirect_ref (valist
);
4706 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4707 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4708 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4709 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4711 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4713 type
= build_pointer_type (type
);
4714 size
= int_size_in_bytes (type
);
4715 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4717 nat_mode
= type_natural_mode (type
);
4718 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4719 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4721 /* Pull the value out of the saved registers. */
4723 addr
= create_tmp_var (ptr_type_node
, "addr");
4724 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4728 int needed_intregs
, needed_sseregs
;
4730 tree int_addr
, sse_addr
;
4732 lab_false
= create_artificial_label ();
4733 lab_over
= create_artificial_label ();
4735 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4737 need_temp
= (!REG_P (container
)
4738 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4739 || TYPE_ALIGN (type
) > 128));
4741 /* In case we are passing structure, verify that it is consecutive block
4742 on the register save area. If not we need to do moves. */
4743 if (!need_temp
&& !REG_P (container
))
4745 /* Verify that all registers are strictly consecutive */
4746 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4750 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4752 rtx slot
= XVECEXP (container
, 0, i
);
4753 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4754 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4762 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4764 rtx slot
= XVECEXP (container
, 0, i
);
4765 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4766 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4778 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4779 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4780 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4781 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4784 /* First ensure that we fit completely in registers. */
4787 t
= build_int_cst (TREE_TYPE (gpr
),
4788 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4789 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4790 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4791 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4792 gimplify_and_add (t
, pre_p
);
4796 t
= build_int_cst (TREE_TYPE (fpr
),
4797 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4799 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4800 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4801 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4802 gimplify_and_add (t
, pre_p
);
4805 /* Compute index to start of area used for integer regs. */
4808 /* int_addr = gpr + sav; */
4809 t
= fold_convert (ptr_type_node
, gpr
);
4810 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4811 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4812 gimplify_and_add (t
, pre_p
);
4816 /* sse_addr = fpr + sav; */
4817 t
= fold_convert (ptr_type_node
, fpr
);
4818 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4819 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4820 gimplify_and_add (t
, pre_p
);
4825 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4828 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4829 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4830 gimplify_and_add (t
, pre_p
);
4832 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4834 rtx slot
= XVECEXP (container
, 0, i
);
4835 rtx reg
= XEXP (slot
, 0);
4836 enum machine_mode mode
= GET_MODE (reg
);
4837 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4838 tree addr_type
= build_pointer_type (piece_type
);
4841 tree dest_addr
, dest
;
4843 if (SSE_REGNO_P (REGNO (reg
)))
4845 src_addr
= sse_addr
;
4846 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4850 src_addr
= int_addr
;
4851 src_offset
= REGNO (reg
) * 8;
4853 src_addr
= fold_convert (addr_type
, src_addr
);
4854 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4855 size_int (src_offset
)));
4856 src
= build_va_arg_indirect_ref (src_addr
);
4858 dest_addr
= fold_convert (addr_type
, addr
);
4859 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4860 size_int (INTVAL (XEXP (slot
, 1)))));
4861 dest
= build_va_arg_indirect_ref (dest_addr
);
4863 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4864 gimplify_and_add (t
, pre_p
);
4870 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4871 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4872 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4873 gimplify_and_add (t
, pre_p
);
4877 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4878 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4879 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4880 gimplify_and_add (t
, pre_p
);
4883 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4884 gimplify_and_add (t
, pre_p
);
4886 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4887 append_to_statement_list (t
, pre_p
);
4890 /* ... otherwise out of the overflow area. */
4892 /* Care for on-stack alignment if needed. */
4893 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4894 || integer_zerop (TYPE_SIZE (type
)))
4898 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4899 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4900 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4901 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4902 build_int_cst (TREE_TYPE (t
), -align
));
4904 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4906 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4907 gimplify_and_add (t2
, pre_p
);
4909 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4910 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4911 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4912 gimplify_and_add (t
, pre_p
);
4916 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4917 append_to_statement_list (t
, pre_p
);
4920 ptrtype
= build_pointer_type (type
);
4921 addr
= fold_convert (ptrtype
, addr
);
4924 addr
= build_va_arg_indirect_ref (addr
);
4925 return build_va_arg_indirect_ref (addr
);
4928 /* Return nonzero if OPNUM's MEM should be matched
4929 in movabs* patterns. */
4932 ix86_check_movabs (rtx insn
, int opnum
)
4936 set
= PATTERN (insn
);
4937 if (GET_CODE (set
) == PARALLEL
)
4938 set
= XVECEXP (set
, 0, 0);
4939 gcc_assert (GET_CODE (set
) == SET
);
4940 mem
= XEXP (set
, opnum
);
4941 while (GET_CODE (mem
) == SUBREG
)
4942 mem
= SUBREG_REG (mem
);
4943 gcc_assert (MEM_P (mem
));
4944 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4947 /* Initialize the table of extra 80387 mathematical constants. */
4950 init_ext_80387_constants (void)
4952 static const char * cst
[5] =
4954 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4955 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4956 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4957 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4958 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4962 for (i
= 0; i
< 5; i
++)
4964 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4965 /* Ensure each constant is rounded to XFmode precision. */
4966 real_convert (&ext_80387_constants_table
[i
],
4967 XFmode
, &ext_80387_constants_table
[i
]);
4970 ext_80387_constants_init
= 1;
4973 /* Return true if the constant is something that can be loaded with
4974 a special instruction. */
4977 standard_80387_constant_p (rtx x
)
4981 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4984 if (x
== CONST0_RTX (GET_MODE (x
)))
4986 if (x
== CONST1_RTX (GET_MODE (x
)))
4989 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4991 /* For XFmode constants, try to find a special 80387 instruction when
4992 optimizing for size or on those CPUs that benefit from them. */
4993 if (GET_MODE (x
) == XFmode
4994 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4998 if (! ext_80387_constants_init
)
4999 init_ext_80387_constants ();
5001 for (i
= 0; i
< 5; i
++)
5002 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5006 /* Load of the constant -0.0 or -1.0 will be split as
5007 fldz;fchs or fld1;fchs sequence. */
5008 if (real_isnegzero (&r
))
5010 if (real_identical (&r
, &dconstm1
))
5016 /* Return the opcode of the special instruction to be used to load
5020 standard_80387_constant_opcode (rtx x
)
5022 switch (standard_80387_constant_p (x
))
5046 /* Return the CONST_DOUBLE representing the 80387 constant that is
5047 loaded by the specified special instruction. The argument IDX
5048 matches the return value from standard_80387_constant_p. */
5051 standard_80387_constant_rtx (int idx
)
5055 if (! ext_80387_constants_init
)
5056 init_ext_80387_constants ();
5072 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5076 /* Return 1 if mode is a valid mode for sse. */
5078 standard_sse_mode_p (enum machine_mode mode
)
5095 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5098 standard_sse_constant_p (rtx x
)
5100 enum machine_mode mode
= GET_MODE (x
);
5102 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5104 if (vector_all_ones_operand (x
, mode
)
5105 && standard_sse_mode_p (mode
))
5106 return TARGET_SSE2
? 2 : -1;
5111 /* Return the opcode of the special instruction to be used to load
5115 standard_sse_constant_opcode (rtx insn
, rtx x
)
5117 switch (standard_sse_constant_p (x
))
5120 if (get_attr_mode (insn
) == MODE_V4SF
)
5121 return "xorps\t%0, %0";
5122 else if (get_attr_mode (insn
) == MODE_V2DF
)
5123 return "xorpd\t%0, %0";
5125 return "pxor\t%0, %0";
5127 return "pcmpeqd\t%0, %0";
5132 /* Returns 1 if OP contains a symbol reference */
5135 symbolic_reference_mentioned_p (rtx op
)
5140 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5143 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5144 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5150 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5151 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5155 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5162 /* Return 1 if it is appropriate to emit `ret' instructions in the
5163 body of a function. Do this only if the epilogue is simple, needing a
5164 couple of insns. Prior to reloading, we can't tell how many registers
5165 must be saved, so return 0 then. Return 0 if there is no frame
5166 marker to de-allocate. */
5169 ix86_can_use_return_insn_p (void)
5171 struct ix86_frame frame
;
5173 if (! reload_completed
|| frame_pointer_needed
)
5176 /* Don't allow more than 32 pop, since that's all we can do
5177 with one instruction. */
5178 if (current_function_pops_args
5179 && current_function_args_size
>= 32768)
5182 ix86_compute_frame_layout (&frame
);
5183 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5186 /* Value should be nonzero if functions must have frame pointers.
5187 Zero means the frame pointer need not be set up (and parms may
5188 be accessed via the stack pointer) in functions that seem suitable. */
5191 ix86_frame_pointer_required (void)
5193 /* If we accessed previous frames, then the generated code expects
5194 to be able to access the saved ebp value in our frame. */
5195 if (cfun
->machine
->accesses_prev_frame
)
5198 /* Several x86 os'es need a frame pointer for other reasons,
5199 usually pertaining to setjmp. */
5200 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5203 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5204 the frame pointer by default. Turn it back on now if we've not
5205 got a leaf function. */
5206 if (TARGET_OMIT_LEAF_FRAME_POINTER
5207 && (!current_function_is_leaf
5208 || ix86_current_function_calls_tls_descriptor
))
5211 if (current_function_profile
)
5217 /* Record that the current function accesses previous call frames. */
5220 ix86_setup_frame_addresses (void)
5222 cfun
->machine
->accesses_prev_frame
= 1;
5225 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5226 # define USE_HIDDEN_LINKONCE 1
5228 # define USE_HIDDEN_LINKONCE 0
5231 static int pic_labels_used
;
5233 /* Fills in the label name that should be used for a pc thunk for
5234 the given register. */
5237 get_pc_thunk_name (char name
[32], unsigned int regno
)
5239 gcc_assert (!TARGET_64BIT
);
5241 if (USE_HIDDEN_LINKONCE
)
5242 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5244 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5248 /* This function generates code for -fpic that loads %ebx with
5249 the return address of the caller and then returns. */
5252 ix86_file_end (void)
5257 for (regno
= 0; regno
< 8; ++regno
)
5261 if (! ((pic_labels_used
>> regno
) & 1))
5264 get_pc_thunk_name (name
, regno
);
5269 switch_to_section (darwin_sections
[text_coal_section
]);
5270 fputs ("\t.weak_definition\t", asm_out_file
);
5271 assemble_name (asm_out_file
, name
);
5272 fputs ("\n\t.private_extern\t", asm_out_file
);
5273 assemble_name (asm_out_file
, name
);
5274 fputs ("\n", asm_out_file
);
5275 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5279 if (USE_HIDDEN_LINKONCE
)
5283 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5285 TREE_PUBLIC (decl
) = 1;
5286 TREE_STATIC (decl
) = 1;
5287 DECL_ONE_ONLY (decl
) = 1;
5289 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5290 switch_to_section (get_named_section (decl
, NULL
, 0));
5292 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5293 fputs ("\t.hidden\t", asm_out_file
);
5294 assemble_name (asm_out_file
, name
);
5295 fputc ('\n', asm_out_file
);
5296 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5300 switch_to_section (text_section
);
5301 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5304 xops
[0] = gen_rtx_REG (SImode
, regno
);
5305 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5306 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5307 output_asm_insn ("ret", xops
);
5310 if (NEED_INDICATE_EXEC_STACK
)
5311 file_end_indicate_exec_stack ();
5314 /* Emit code for the SET_GOT patterns. */
5317 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5322 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5324 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5326 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5329 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5331 output_asm_insn ("call\t%a2", xops
);
5334 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5335 is what will be referenced by the Mach-O PIC subsystem. */
5337 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5340 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5341 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5344 output_asm_insn ("pop{l}\t%0", xops
);
5349 get_pc_thunk_name (name
, REGNO (dest
));
5350 pic_labels_used
|= 1 << REGNO (dest
);
5352 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5353 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5354 output_asm_insn ("call\t%X2", xops
);
5355 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5356 is what will be referenced by the Mach-O PIC subsystem. */
5359 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5361 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5362 CODE_LABEL_NUMBER (label
));
5369 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5370 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5372 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5377 /* Generate an "push" pattern for input ARG. */
5382 return gen_rtx_SET (VOIDmode
,
5384 gen_rtx_PRE_DEC (Pmode
,
5385 stack_pointer_rtx
)),
5389 /* Return >= 0 if there is an unused call-clobbered register available
5390 for the entire function. */
5393 ix86_select_alt_pic_regnum (void)
5395 if (current_function_is_leaf
&& !current_function_profile
5396 && !ix86_current_function_calls_tls_descriptor
)
5399 for (i
= 2; i
>= 0; --i
)
5400 if (!regs_ever_live
[i
])
5404 return INVALID_REGNUM
;
5407 /* Return 1 if we need to save REGNO. */
5409 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5411 if (pic_offset_table_rtx
5412 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5413 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5414 || current_function_profile
5415 || current_function_calls_eh_return
5416 || current_function_uses_const_pool
))
5418 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5423 if (current_function_calls_eh_return
&& maybe_eh_return
)
5428 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5429 if (test
== INVALID_REGNUM
)
5436 if (cfun
->machine
->force_align_arg_pointer
5437 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5440 return (regs_ever_live
[regno
]
5441 && !call_used_regs
[regno
]
5442 && !fixed_regs
[regno
]
5443 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5446 /* Return number of registers to be saved on the stack. */
5449 ix86_nsaved_regs (void)
5454 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5455 if (ix86_save_reg (regno
, true))
5460 /* Return the offset between two registers, one to be eliminated, and the other
5461 its replacement, at the start of a routine. */
5464 ix86_initial_elimination_offset (int from
, int to
)
5466 struct ix86_frame frame
;
5467 ix86_compute_frame_layout (&frame
);
5469 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5470 return frame
.hard_frame_pointer_offset
;
5471 else if (from
== FRAME_POINTER_REGNUM
5472 && to
== HARD_FRAME_POINTER_REGNUM
)
5473 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5476 gcc_assert (to
== STACK_POINTER_REGNUM
);
5478 if (from
== ARG_POINTER_REGNUM
)
5479 return frame
.stack_pointer_offset
;
5481 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5482 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5486 /* Fill structure ix86_frame about frame of currently computed function. */
5489 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5491 HOST_WIDE_INT total_size
;
5492 unsigned int stack_alignment_needed
;
5493 HOST_WIDE_INT offset
;
5494 unsigned int preferred_alignment
;
5495 HOST_WIDE_INT size
= get_frame_size ();
5497 frame
->nregs
= ix86_nsaved_regs ();
5500 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5501 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5503 /* During reload iteration the amount of registers saved can change.
5504 Recompute the value as needed. Do not recompute when amount of registers
5505 didn't change as reload does multiple calls to the function and does not
5506 expect the decision to change within single iteration. */
5508 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5510 int count
= frame
->nregs
;
5512 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5513 /* The fast prologue uses move instead of push to save registers. This
5514 is significantly longer, but also executes faster as modern hardware
5515 can execute the moves in parallel, but can't do that for push/pop.
5517 Be careful about choosing what prologue to emit: When function takes
5518 many instructions to execute we may use slow version as well as in
5519 case function is known to be outside hot spot (this is known with
5520 feedback only). Weight the size of function by number of registers
5521 to save as it is cheap to use one or two push instructions but very
5522 slow to use many of them. */
5524 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5525 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5526 || (flag_branch_probabilities
5527 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5528 cfun
->machine
->use_fast_prologue_epilogue
= false;
5530 cfun
->machine
->use_fast_prologue_epilogue
5531 = !expensive_function_p (count
);
5533 if (TARGET_PROLOGUE_USING_MOVE
5534 && cfun
->machine
->use_fast_prologue_epilogue
)
5535 frame
->save_regs_using_mov
= true;
5537 frame
->save_regs_using_mov
= false;
5540 /* Skip return address and saved base pointer. */
5541 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5543 frame
->hard_frame_pointer_offset
= offset
;
5545 /* Do some sanity checking of stack_alignment_needed and
5546 preferred_alignment, since i386 port is the only using those features
5547 that may break easily. */
5549 gcc_assert (!size
|| stack_alignment_needed
);
5550 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5551 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5552 gcc_assert (stack_alignment_needed
5553 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5555 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5556 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5558 /* Register save area */
5559 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5562 if (ix86_save_varrargs_registers
)
5564 offset
+= X86_64_VARARGS_SIZE
;
5565 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5568 frame
->va_arg_size
= 0;
5570 /* Align start of frame for local function. */
5571 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5572 & -stack_alignment_needed
) - offset
;
5574 offset
+= frame
->padding1
;
5576 /* Frame pointer points here. */
5577 frame
->frame_pointer_offset
= offset
;
5581 /* Add outgoing arguments area. Can be skipped if we eliminated
5582 all the function calls as dead code.
5583 Skipping is however impossible when function calls alloca. Alloca
5584 expander assumes that last current_function_outgoing_args_size
5585 of stack frame are unused. */
5586 if (ACCUMULATE_OUTGOING_ARGS
5587 && (!current_function_is_leaf
|| current_function_calls_alloca
5588 || ix86_current_function_calls_tls_descriptor
))
5590 offset
+= current_function_outgoing_args_size
;
5591 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5594 frame
->outgoing_arguments_size
= 0;
5596 /* Align stack boundary. Only needed if we're calling another function
5598 if (!current_function_is_leaf
|| current_function_calls_alloca
5599 || ix86_current_function_calls_tls_descriptor
)
5600 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5601 & -preferred_alignment
) - offset
;
5603 frame
->padding2
= 0;
5605 offset
+= frame
->padding2
;
5607 /* We've reached end of stack frame. */
5608 frame
->stack_pointer_offset
= offset
;
5610 /* Size prologue needs to allocate. */
5611 frame
->to_allocate
=
5612 (size
+ frame
->padding1
+ frame
->padding2
5613 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5615 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5616 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5617 frame
->save_regs_using_mov
= false;
5619 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5620 && current_function_is_leaf
5621 && !ix86_current_function_calls_tls_descriptor
)
5623 frame
->red_zone_size
= frame
->to_allocate
;
5624 if (frame
->save_regs_using_mov
)
5625 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5626 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5627 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5630 frame
->red_zone_size
= 0;
5631 frame
->to_allocate
-= frame
->red_zone_size
;
5632 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5634 fprintf (stderr
, "\n");
5635 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5636 fprintf (stderr
, "size: %ld\n", (long)size
);
5637 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5638 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5639 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5640 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5641 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5642 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5643 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5644 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5645 (long)frame
->hard_frame_pointer_offset
);
5646 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5647 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5648 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5649 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5653 /* Emit code to save registers in the prologue. */
5656 ix86_emit_save_regs (void)
5661 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5662 if (ix86_save_reg (regno
, true))
5664 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5665 RTX_FRAME_RELATED_P (insn
) = 1;
5669 /* Emit code to save registers using MOV insns. First register
5670 is restored from POINTER + OFFSET. */
5672 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5677 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5678 if (ix86_save_reg (regno
, true))
5680 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5682 gen_rtx_REG (Pmode
, regno
));
5683 RTX_FRAME_RELATED_P (insn
) = 1;
5684 offset
+= UNITS_PER_WORD
;
5688 /* Expand prologue or epilogue stack adjustment.
5689 The pattern exist to put a dependency on all ebp-based memory accesses.
5690 STYLE should be negative if instructions should be marked as frame related,
5691 zero if %r11 register is live and cannot be freely used and positive
5695 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5700 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5701 else if (x86_64_immediate_operand (offset
, DImode
))
5702 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5706 /* r11 is used by indirect sibcall return as well, set before the
5707 epilogue and used after the epilogue. ATM indirect sibcall
5708 shouldn't be used together with huge frame sizes in one
5709 function because of the frame_size check in sibcall.c. */
5711 r11
= gen_rtx_REG (DImode
, R11_REG
);
5712 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5714 RTX_FRAME_RELATED_P (insn
) = 1;
5715 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5719 RTX_FRAME_RELATED_P (insn
) = 1;
5722 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5725 ix86_internal_arg_pointer (void)
5727 bool has_force_align_arg_pointer
=
5728 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5729 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5730 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5731 && DECL_NAME (current_function_decl
)
5732 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5733 && DECL_FILE_SCOPE_P (current_function_decl
))
5734 || ix86_force_align_arg_pointer
5735 || has_force_align_arg_pointer
)
5737 /* Nested functions can't realign the stack due to a register
5739 if (DECL_CONTEXT (current_function_decl
)
5740 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5742 if (ix86_force_align_arg_pointer
)
5743 warning (0, "-mstackrealign ignored for nested functions");
5744 if (has_force_align_arg_pointer
)
5745 error ("%s not supported for nested functions",
5746 ix86_force_align_arg_pointer_string
);
5747 return virtual_incoming_args_rtx
;
5749 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5750 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5753 return virtual_incoming_args_rtx
;
5756 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5757 This is called from dwarf2out.c to emit call frame instructions
5758 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5760 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5762 rtx unspec
= SET_SRC (pattern
);
5763 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5767 case UNSPEC_REG_SAVE
:
5768 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5769 SET_DEST (pattern
));
5771 case UNSPEC_DEF_CFA
:
5772 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5773 INTVAL (XVECEXP (unspec
, 0, 0)));
5780 /* Expand the prologue into a bunch of separate insns. */
5783 ix86_expand_prologue (void)
5787 struct ix86_frame frame
;
5788 HOST_WIDE_INT allocate
;
5790 ix86_compute_frame_layout (&frame
);
5792 if (cfun
->machine
->force_align_arg_pointer
)
5796 /* Grab the argument pointer. */
5797 x
= plus_constant (stack_pointer_rtx
, 4);
5798 y
= cfun
->machine
->force_align_arg_pointer
;
5799 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5800 RTX_FRAME_RELATED_P (insn
) = 1;
5802 /* The unwind info consists of two parts: install the fafp as the cfa,
5803 and record the fafp as the "save register" of the stack pointer.
5804 The later is there in order that the unwinder can see where it
5805 should restore the stack pointer across the and insn. */
5806 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5807 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5808 RTX_FRAME_RELATED_P (x
) = 1;
5809 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5811 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5812 RTX_FRAME_RELATED_P (y
) = 1;
5813 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5814 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5815 REG_NOTES (insn
) = x
;
5817 /* Align the stack. */
5818 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5821 /* And here we cheat like madmen with the unwind info. We force the
5822 cfa register back to sp+4, which is exactly what it was at the
5823 start of the function. Re-pushing the return address results in
5824 the return at the same spot relative to the cfa, and thus is
5825 correct wrt the unwind info. */
5826 x
= cfun
->machine
->force_align_arg_pointer
;
5827 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5828 insn
= emit_insn (gen_push (x
));
5829 RTX_FRAME_RELATED_P (insn
) = 1;
5832 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5833 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5834 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5835 REG_NOTES (insn
) = x
;
5838 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5839 slower on all targets. Also sdb doesn't like it. */
5841 if (frame_pointer_needed
)
5843 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5844 RTX_FRAME_RELATED_P (insn
) = 1;
5846 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5847 RTX_FRAME_RELATED_P (insn
) = 1;
5850 allocate
= frame
.to_allocate
;
5852 if (!frame
.save_regs_using_mov
)
5853 ix86_emit_save_regs ();
5855 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5857 /* When using red zone we may start register saving before allocating
5858 the stack frame saving one cycle of the prologue. */
5859 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5860 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5861 : stack_pointer_rtx
,
5862 -frame
.nregs
* UNITS_PER_WORD
);
5866 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5867 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5868 GEN_INT (-allocate
), -1);
5871 /* Only valid for Win32. */
5872 rtx eax
= gen_rtx_REG (SImode
, 0);
5873 bool eax_live
= ix86_eax_live_at_start_p ();
5876 gcc_assert (!TARGET_64BIT
);
5880 emit_insn (gen_push (eax
));
5884 emit_move_insn (eax
, GEN_INT (allocate
));
5886 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5887 RTX_FRAME_RELATED_P (insn
) = 1;
5888 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5889 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5890 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5891 t
, REG_NOTES (insn
));
5895 if (frame_pointer_needed
)
5896 t
= plus_constant (hard_frame_pointer_rtx
,
5899 - frame
.nregs
* UNITS_PER_WORD
);
5901 t
= plus_constant (stack_pointer_rtx
, allocate
);
5902 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5906 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5908 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5909 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5911 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5912 -frame
.nregs
* UNITS_PER_WORD
);
5915 pic_reg_used
= false;
5916 if (pic_offset_table_rtx
5917 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5918 || current_function_profile
))
5920 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5922 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5923 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5925 pic_reg_used
= true;
5931 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5933 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5935 /* Even with accurate pre-reload life analysis, we can wind up
5936 deleting all references to the pic register after reload.
5937 Consider if cross-jumping unifies two sides of a branch
5938 controlled by a comparison vs the only read from a global.
5939 In which case, allow the set_got to be deleted, though we're
5940 too late to do anything about the ebx save in the prologue. */
5941 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5944 /* Prevent function calls from be scheduled before the call to mcount.
5945 In the pic_reg_used case, make sure that the got load isn't deleted. */
5946 if (current_function_profile
)
5947 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5950 /* Emit code to restore saved registers using MOV insns. First register
5951 is restored from POINTER + OFFSET. */
5953 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5954 int maybe_eh_return
)
5957 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5959 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5960 if (ix86_save_reg (regno
, maybe_eh_return
))
5962 /* Ensure that adjust_address won't be forced to produce pointer
5963 out of range allowed by x86-64 instruction set. */
5964 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5968 r11
= gen_rtx_REG (DImode
, R11_REG
);
5969 emit_move_insn (r11
, GEN_INT (offset
));
5970 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5971 base_address
= gen_rtx_MEM (Pmode
, r11
);
5974 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5975 adjust_address (base_address
, Pmode
, offset
));
5976 offset
+= UNITS_PER_WORD
;
5980 /* Restore function stack, frame, and registers. */
5983 ix86_expand_epilogue (int style
)
5986 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5987 struct ix86_frame frame
;
5988 HOST_WIDE_INT offset
;
5990 ix86_compute_frame_layout (&frame
);
5992 /* Calculate start of saved registers relative to ebp. Special care
5993 must be taken for the normal return case of a function using
5994 eh_return: the eax and edx registers are marked as saved, but not
5995 restored along this path. */
5996 offset
= frame
.nregs
;
5997 if (current_function_calls_eh_return
&& style
!= 2)
5999 offset
*= -UNITS_PER_WORD
;
6001 /* If we're only restoring one register and sp is not valid then
6002 using a move instruction to restore the register since it's
6003 less work than reloading sp and popping the register.
6005 The default code result in stack adjustment using add/lea instruction,
6006 while this code results in LEAVE instruction (or discrete equivalent),
6007 so it is profitable in some other cases as well. Especially when there
6008 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6009 and there is exactly one register to pop. This heuristic may need some
6010 tuning in future. */
6011 if ((!sp_valid
&& frame
.nregs
<= 1)
6012 || (TARGET_EPILOGUE_USING_MOVE
6013 && cfun
->machine
->use_fast_prologue_epilogue
6014 && (frame
.nregs
> 1 || frame
.to_allocate
))
6015 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6016 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6017 && cfun
->machine
->use_fast_prologue_epilogue
6018 && frame
.nregs
== 1)
6019 || current_function_calls_eh_return
)
6021 /* Restore registers. We can use ebp or esp to address the memory
6022 locations. If both are available, default to ebp, since offsets
6023 are known to be small. Only exception is esp pointing directly to the
6024 end of block of saved registers, where we may simplify addressing
6027 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6028 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6029 frame
.to_allocate
, style
== 2);
6031 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6032 offset
, style
== 2);
6034 /* eh_return epilogues need %ecx added to the stack pointer. */
6037 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6039 if (frame_pointer_needed
)
6041 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6042 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6043 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6045 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6046 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6048 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6053 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6054 tmp
= plus_constant (tmp
, (frame
.to_allocate
6055 + frame
.nregs
* UNITS_PER_WORD
));
6056 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6059 else if (!frame_pointer_needed
)
6060 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6061 GEN_INT (frame
.to_allocate
6062 + frame
.nregs
* UNITS_PER_WORD
),
6064 /* If not an i386, mov & pop is faster than "leave". */
6065 else if (TARGET_USE_LEAVE
|| optimize_size
6066 || !cfun
->machine
->use_fast_prologue_epilogue
)
6067 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6070 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6071 hard_frame_pointer_rtx
,
6074 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6076 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6081 /* First step is to deallocate the stack frame so that we can
6082 pop the registers. */
6085 gcc_assert (frame_pointer_needed
);
6086 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6087 hard_frame_pointer_rtx
,
6088 GEN_INT (offset
), style
);
6090 else if (frame
.to_allocate
)
6091 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6092 GEN_INT (frame
.to_allocate
), style
);
6094 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6095 if (ix86_save_reg (regno
, false))
6098 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6100 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6102 if (frame_pointer_needed
)
6104 /* Leave results in shorter dependency chains on CPUs that are
6105 able to grok it fast. */
6106 if (TARGET_USE_LEAVE
)
6107 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6108 else if (TARGET_64BIT
)
6109 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6111 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6115 if (cfun
->machine
->force_align_arg_pointer
)
6117 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6118 cfun
->machine
->force_align_arg_pointer
,
6122 /* Sibcall epilogues don't want a return instruction. */
6126 if (current_function_pops_args
&& current_function_args_size
)
6128 rtx popc
= GEN_INT (current_function_pops_args
);
6130 /* i386 can only pop 64K bytes. If asked to pop more, pop
6131 return address, do explicit add, and jump indirectly to the
6134 if (current_function_pops_args
>= 65536)
6136 rtx ecx
= gen_rtx_REG (SImode
, 2);
6138 /* There is no "pascal" calling convention in 64bit ABI. */
6139 gcc_assert (!TARGET_64BIT
);
6141 emit_insn (gen_popsi1 (ecx
));
6142 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6143 emit_jump_insn (gen_return_indirect_internal (ecx
));
6146 emit_jump_insn (gen_return_pop_internal (popc
));
6149 emit_jump_insn (gen_return_internal ());
6152 /* Reset from the function's potential modifications. */
6155 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6156 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6158 if (pic_offset_table_rtx
)
6159 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6161 /* Mach-O doesn't support labels at the end of objects, so if
6162 it looks like we might want one, insert a NOP. */
6164 rtx insn
= get_last_insn ();
6167 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6168 insn
= PREV_INSN (insn
);
6172 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6173 fputs ("\tnop\n", file
);
6179 /* Extract the parts of an RTL expression that is a valid memory address
6180 for an instruction. Return 0 if the structure of the address is
6181 grossly off. Return -1 if the address contains ASHIFT, so it is not
6182 strictly valid, but still used for computing length of lea instruction. */
6185 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6187 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6188 rtx base_reg
, index_reg
;
6189 HOST_WIDE_INT scale
= 1;
6190 rtx scale_rtx
= NULL_RTX
;
6192 enum ix86_address_seg seg
= SEG_DEFAULT
;
6194 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6196 else if (GET_CODE (addr
) == PLUS
)
6206 addends
[n
++] = XEXP (op
, 1);
6209 while (GET_CODE (op
) == PLUS
);
6214 for (i
= n
; i
>= 0; --i
)
6217 switch (GET_CODE (op
))
6222 index
= XEXP (op
, 0);
6223 scale_rtx
= XEXP (op
, 1);
6227 if (XINT (op
, 1) == UNSPEC_TP
6228 && TARGET_TLS_DIRECT_SEG_REFS
6229 && seg
== SEG_DEFAULT
)
6230 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6259 else if (GET_CODE (addr
) == MULT
)
6261 index
= XEXP (addr
, 0); /* index*scale */
6262 scale_rtx
= XEXP (addr
, 1);
6264 else if (GET_CODE (addr
) == ASHIFT
)
6268 /* We're called for lea too, which implements ashift on occasion. */
6269 index
= XEXP (addr
, 0);
6270 tmp
= XEXP (addr
, 1);
6271 if (!CONST_INT_P (tmp
))
6273 scale
= INTVAL (tmp
);
6274 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6280 disp
= addr
; /* displacement */
6282 /* Extract the integral value of scale. */
6285 if (!CONST_INT_P (scale_rtx
))
6287 scale
= INTVAL (scale_rtx
);
6290 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6291 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6293 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6294 if (base_reg
&& index_reg
&& scale
== 1
6295 && (index_reg
== arg_pointer_rtx
6296 || index_reg
== frame_pointer_rtx
6297 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6300 tmp
= base
, base
= index
, index
= tmp
;
6301 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6304 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6305 if ((base_reg
== hard_frame_pointer_rtx
6306 || base_reg
== frame_pointer_rtx
6307 || base_reg
== arg_pointer_rtx
) && !disp
)
6310 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6311 Avoid this by transforming to [%esi+0]. */
6312 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6313 && base_reg
&& !index_reg
&& !disp
6315 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6318 /* Special case: encode reg+reg instead of reg*2. */
6319 if (!base
&& index
&& scale
&& scale
== 2)
6320 base
= index
, base_reg
= index_reg
, scale
= 1;
6322 /* Special case: scaling cannot be encoded without base or displacement. */
6323 if (!base
&& !disp
&& index
&& scale
!= 1)
6335 /* Return cost of the memory address x.
6336 For i386, it is better to use a complex address than let gcc copy
6337 the address into a reg and make a new pseudo. But not if the address
6338 requires to two regs - that would mean more pseudos with longer
6341 ix86_address_cost (rtx x
)
6343 struct ix86_address parts
;
6345 int ok
= ix86_decompose_address (x
, &parts
);
6349 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6350 parts
.base
= SUBREG_REG (parts
.base
);
6351 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6352 parts
.index
= SUBREG_REG (parts
.index
);
6354 /* More complex memory references are better. */
6355 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6357 if (parts
.seg
!= SEG_DEFAULT
)
6360 /* Attempt to minimize number of registers in the address. */
6362 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6364 && (!REG_P (parts
.index
)
6365 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6369 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6371 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6372 && parts
.base
!= parts
.index
)
6375 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6376 since it's predecode logic can't detect the length of instructions
6377 and it degenerates to vector decoded. Increase cost of such
6378 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6379 to split such addresses or even refuse such addresses at all.
6381 Following addressing modes are affected:
6386 The first and last case may be avoidable by explicitly coding the zero in
6387 memory address, but I don't have AMD-K6 machine handy to check this
6391 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6392 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6393 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6399 /* If X is a machine specific address (i.e. a symbol or label being
6400 referenced as a displacement from the GOT implemented using an
6401 UNSPEC), then return the base term. Otherwise return X. */
6404 ix86_find_base_term (rtx x
)
6410 if (GET_CODE (x
) != CONST
)
6413 if (GET_CODE (term
) == PLUS
6414 && (CONST_INT_P (XEXP (term
, 1))
6415 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6416 term
= XEXP (term
, 0);
6417 if (GET_CODE (term
) != UNSPEC
6418 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6421 term
= XVECEXP (term
, 0, 0);
6423 if (GET_CODE (term
) != SYMBOL_REF
6424 && GET_CODE (term
) != LABEL_REF
)
6430 term
= ix86_delegitimize_address (x
);
6432 if (GET_CODE (term
) != SYMBOL_REF
6433 && GET_CODE (term
) != LABEL_REF
)
6439 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6440 this is used for to form addresses to local data when -fPIC is in
6444 darwin_local_data_pic (rtx disp
)
6446 if (GET_CODE (disp
) == MINUS
)
6448 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6449 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6450 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6452 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6453 if (! strcmp (sym_name
, "<pic base>"))
6461 /* Determine if a given RTX is a valid constant. We already know this
6462 satisfies CONSTANT_P. */
6465 legitimate_constant_p (rtx x
)
6467 switch (GET_CODE (x
))
6472 if (GET_CODE (x
) == PLUS
)
6474 if (!CONST_INT_P (XEXP (x
, 1)))
6479 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6482 /* Only some unspecs are valid as "constants". */
6483 if (GET_CODE (x
) == UNSPEC
)
6484 switch (XINT (x
, 1))
6487 return TARGET_64BIT
;
6490 x
= XVECEXP (x
, 0, 0);
6491 return (GET_CODE (x
) == SYMBOL_REF
6492 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6494 x
= XVECEXP (x
, 0, 0);
6495 return (GET_CODE (x
) == SYMBOL_REF
6496 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6501 /* We must have drilled down to a symbol. */
6502 if (GET_CODE (x
) == LABEL_REF
)
6504 if (GET_CODE (x
) != SYMBOL_REF
)
6509 /* TLS symbols are never valid. */
6510 if (SYMBOL_REF_TLS_MODEL (x
))
6515 if (GET_MODE (x
) == TImode
6516 && x
!= CONST0_RTX (TImode
)
6522 if (x
== CONST0_RTX (GET_MODE (x
)))
6530 /* Otherwise we handle everything else in the move patterns. */
6534 /* Determine if it's legal to put X into the constant pool. This
6535 is not possible for the address of thread-local symbols, which
6536 is checked above. */
6539 ix86_cannot_force_const_mem (rtx x
)
6541 /* We can always put integral constants and vectors in memory. */
6542 switch (GET_CODE (x
))
6552 return !legitimate_constant_p (x
);
6555 /* Determine if a given RTX is a valid constant address. */
6558 constant_address_p (rtx x
)
6560 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6563 /* Nonzero if the constant value X is a legitimate general operand
6564 when generating PIC code. It is given that flag_pic is on and
6565 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6568 legitimate_pic_operand_p (rtx x
)
6572 switch (GET_CODE (x
))
6575 inner
= XEXP (x
, 0);
6576 if (GET_CODE (inner
) == PLUS
6577 && CONST_INT_P (XEXP (inner
, 1)))
6578 inner
= XEXP (inner
, 0);
6580 /* Only some unspecs are valid as "constants". */
6581 if (GET_CODE (inner
) == UNSPEC
)
6582 switch (XINT (inner
, 1))
6585 return TARGET_64BIT
;
6587 x
= XVECEXP (inner
, 0, 0);
6588 return (GET_CODE (x
) == SYMBOL_REF
6589 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6597 return legitimate_pic_address_disp_p (x
);
6604 /* Determine if a given CONST RTX is a valid memory displacement
6608 legitimate_pic_address_disp_p (rtx disp
)
6612 /* In 64bit mode we can allow direct addresses of symbols and labels
6613 when they are not dynamic symbols. */
6616 rtx op0
= disp
, op1
;
6618 switch (GET_CODE (disp
))
6624 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6626 op0
= XEXP (XEXP (disp
, 0), 0);
6627 op1
= XEXP (XEXP (disp
, 0), 1);
6628 if (!CONST_INT_P (op1
)
6629 || INTVAL (op1
) >= 16*1024*1024
6630 || INTVAL (op1
) < -16*1024*1024)
6632 if (GET_CODE (op0
) == LABEL_REF
)
6634 if (GET_CODE (op0
) != SYMBOL_REF
)
6639 /* TLS references should always be enclosed in UNSPEC. */
6640 if (SYMBOL_REF_TLS_MODEL (op0
))
6642 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6650 if (GET_CODE (disp
) != CONST
)
6652 disp
= XEXP (disp
, 0);
6656 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6657 of GOT tables. We should not need these anyway. */
6658 if (GET_CODE (disp
) != UNSPEC
6659 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6660 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6663 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6664 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6670 if (GET_CODE (disp
) == PLUS
)
6672 if (!CONST_INT_P (XEXP (disp
, 1)))
6674 disp
= XEXP (disp
, 0);
6678 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6681 if (GET_CODE (disp
) != UNSPEC
)
6684 switch (XINT (disp
, 1))
6689 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6691 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6692 While ABI specify also 32bit relocation but we don't produce it in
6693 small PIC model at all. */
6694 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6695 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6697 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6699 case UNSPEC_GOTTPOFF
:
6700 case UNSPEC_GOTNTPOFF
:
6701 case UNSPEC_INDNTPOFF
:
6704 disp
= XVECEXP (disp
, 0, 0);
6705 return (GET_CODE (disp
) == SYMBOL_REF
6706 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6708 disp
= XVECEXP (disp
, 0, 0);
6709 return (GET_CODE (disp
) == SYMBOL_REF
6710 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6712 disp
= XVECEXP (disp
, 0, 0);
6713 return (GET_CODE (disp
) == SYMBOL_REF
6714 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6720 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6721 memory address for an instruction. The MODE argument is the machine mode
6722 for the MEM expression that wants to use this address.
6724 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6725 convert common non-canonical forms to canonical form so that they will
6729 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6731 struct ix86_address parts
;
6732 rtx base
, index
, disp
;
6733 HOST_WIDE_INT scale
;
6734 const char *reason
= NULL
;
6735 rtx reason_rtx
= NULL_RTX
;
6737 if (TARGET_DEBUG_ADDR
)
6740 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6741 GET_MODE_NAME (mode
), strict
);
6745 if (ix86_decompose_address (addr
, &parts
) <= 0)
6747 reason
= "decomposition failed";
6752 index
= parts
.index
;
6754 scale
= parts
.scale
;
6756 /* Validate base register.
6758 Don't allow SUBREG's that span more than a word here. It can lead to spill
6759 failures when the base is one word out of a two word structure, which is
6760 represented internally as a DImode int. */
6769 else if (GET_CODE (base
) == SUBREG
6770 && REG_P (SUBREG_REG (base
))
6771 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6773 reg
= SUBREG_REG (base
);
6776 reason
= "base is not a register";
6780 if (GET_MODE (base
) != Pmode
)
6782 reason
= "base is not in Pmode";
6786 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6787 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6789 reason
= "base is not valid";
6794 /* Validate index register.
6796 Don't allow SUBREG's that span more than a word here -- same as above. */
6805 else if (GET_CODE (index
) == SUBREG
6806 && REG_P (SUBREG_REG (index
))
6807 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6809 reg
= SUBREG_REG (index
);
6812 reason
= "index is not a register";
6816 if (GET_MODE (index
) != Pmode
)
6818 reason
= "index is not in Pmode";
6822 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6823 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6825 reason
= "index is not valid";
6830 /* Validate scale factor. */
6833 reason_rtx
= GEN_INT (scale
);
6836 reason
= "scale without index";
6840 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6842 reason
= "scale is not a valid multiplier";
6847 /* Validate displacement. */
6852 if (GET_CODE (disp
) == CONST
6853 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6854 switch (XINT (XEXP (disp
, 0), 1))
6856 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6857 used. While ABI specify also 32bit relocations, we don't produce
6858 them at all and use IP relative instead. */
6861 gcc_assert (flag_pic
);
6863 goto is_legitimate_pic
;
6864 reason
= "64bit address unspec";
6867 case UNSPEC_GOTPCREL
:
6868 gcc_assert (flag_pic
);
6869 goto is_legitimate_pic
;
6871 case UNSPEC_GOTTPOFF
:
6872 case UNSPEC_GOTNTPOFF
:
6873 case UNSPEC_INDNTPOFF
:
6879 reason
= "invalid address unspec";
6883 else if (SYMBOLIC_CONST (disp
)
6887 && MACHOPIC_INDIRECT
6888 && !machopic_operand_p (disp
)
6894 if (TARGET_64BIT
&& (index
|| base
))
6896 /* foo@dtpoff(%rX) is ok. */
6897 if (GET_CODE (disp
) != CONST
6898 || GET_CODE (XEXP (disp
, 0)) != PLUS
6899 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6900 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6901 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6902 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6904 reason
= "non-constant pic memory reference";
6908 else if (! legitimate_pic_address_disp_p (disp
))
6910 reason
= "displacement is an invalid pic construct";
6914 /* This code used to verify that a symbolic pic displacement
6915 includes the pic_offset_table_rtx register.
6917 While this is good idea, unfortunately these constructs may
6918 be created by "adds using lea" optimization for incorrect
6927 This code is nonsensical, but results in addressing
6928 GOT table with pic_offset_table_rtx base. We can't
6929 just refuse it easily, since it gets matched by
6930 "addsi3" pattern, that later gets split to lea in the
6931 case output register differs from input. While this
6932 can be handled by separate addsi pattern for this case
6933 that never results in lea, this seems to be easier and
6934 correct fix for crash to disable this test. */
6936 else if (GET_CODE (disp
) != LABEL_REF
6937 && !CONST_INT_P (disp
)
6938 && (GET_CODE (disp
) != CONST
6939 || !legitimate_constant_p (disp
))
6940 && (GET_CODE (disp
) != SYMBOL_REF
6941 || !legitimate_constant_p (disp
)))
6943 reason
= "displacement is not constant";
6946 else if (TARGET_64BIT
6947 && !x86_64_immediate_operand (disp
, VOIDmode
))
6949 reason
= "displacement is out of range";
6954 /* Everything looks valid. */
6955 if (TARGET_DEBUG_ADDR
)
6956 fprintf (stderr
, "Success.\n");
6960 if (TARGET_DEBUG_ADDR
)
6962 fprintf (stderr
, "Error: %s\n", reason
);
6963 debug_rtx (reason_rtx
);
6968 /* Return a unique alias set for the GOT. */
6970 static HOST_WIDE_INT
6971 ix86_GOT_alias_set (void)
6973 static HOST_WIDE_INT set
= -1;
6975 set
= new_alias_set ();
6979 /* Return a legitimate reference for ORIG (an address) using the
6980 register REG. If REG is 0, a new pseudo is generated.
6982 There are two types of references that must be handled:
6984 1. Global data references must load the address from the GOT, via
6985 the PIC reg. An insn is emitted to do this load, and the reg is
6988 2. Static data references, constant pool addresses, and code labels
6989 compute the address as an offset from the GOT, whose base is in
6990 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6991 differentiate them from global data objects. The returned
6992 address is the PIC reg + an unspec constant.
6994 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6995 reg also appears in the address. */
6998 legitimize_pic_address (rtx orig
, rtx reg
)
7005 if (TARGET_MACHO
&& !TARGET_64BIT
)
7008 reg
= gen_reg_rtx (Pmode
);
7009 /* Use the generic Mach-O PIC machinery. */
7010 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7014 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7016 else if (TARGET_64BIT
7017 && ix86_cmodel
!= CM_SMALL_PIC
7018 && local_symbolic_operand (addr
, Pmode
))
7021 /* This symbol may be referenced via a displacement from the PIC
7022 base address (@GOTOFF). */
7024 if (reload_in_progress
)
7025 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7026 if (GET_CODE (addr
) == CONST
)
7027 addr
= XEXP (addr
, 0);
7028 if (GET_CODE (addr
) == PLUS
)
7030 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7031 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7034 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7035 new = gen_rtx_CONST (Pmode
, new);
7037 tmpreg
= gen_reg_rtx (Pmode
);
7040 emit_move_insn (tmpreg
, new);
7044 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7045 tmpreg
, 1, OPTAB_DIRECT
);
7048 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7050 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
7052 /* This symbol may be referenced via a displacement from the PIC
7053 base address (@GOTOFF). */
7055 if (reload_in_progress
)
7056 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7057 if (GET_CODE (addr
) == CONST
)
7058 addr
= XEXP (addr
, 0);
7059 if (GET_CODE (addr
) == PLUS
)
7061 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7062 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7065 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7066 new = gen_rtx_CONST (Pmode
, new);
7067 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7071 emit_move_insn (reg
, new);
7075 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7079 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7080 new = gen_rtx_CONST (Pmode
, new);
7081 new = gen_const_mem (Pmode
, new);
7082 set_mem_alias_set (new, ix86_GOT_alias_set ());
7085 reg
= gen_reg_rtx (Pmode
);
7086 /* Use directly gen_movsi, otherwise the address is loaded
7087 into register for CSE. We don't want to CSE this addresses,
7088 instead we CSE addresses from the GOT table, so skip this. */
7089 emit_insn (gen_movsi (reg
, new));
7094 /* This symbol must be referenced via a load from the
7095 Global Offset Table (@GOT). */
7097 if (reload_in_progress
)
7098 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7099 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7100 new = gen_rtx_CONST (Pmode
, new);
7101 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7102 new = gen_const_mem (Pmode
, new);
7103 set_mem_alias_set (new, ix86_GOT_alias_set ());
7106 reg
= gen_reg_rtx (Pmode
);
7107 emit_move_insn (reg
, new);
7113 if (CONST_INT_P (addr
)
7114 && !x86_64_immediate_operand (addr
, VOIDmode
))
7118 emit_move_insn (reg
, addr
);
7122 new = force_reg (Pmode
, addr
);
7124 else if (GET_CODE (addr
) == CONST
)
7126 addr
= XEXP (addr
, 0);
7128 /* We must match stuff we generate before. Assume the only
7129 unspecs that can get here are ours. Not that we could do
7130 anything with them anyway.... */
7131 if (GET_CODE (addr
) == UNSPEC
7132 || (GET_CODE (addr
) == PLUS
7133 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7135 gcc_assert (GET_CODE (addr
) == PLUS
);
7137 if (GET_CODE (addr
) == PLUS
)
7139 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7141 /* Check first to see if this is a constant offset from a @GOTOFF
7142 symbol reference. */
7143 if (local_symbolic_operand (op0
, Pmode
)
7144 && CONST_INT_P (op1
))
7148 if (reload_in_progress
)
7149 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7150 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7152 new = gen_rtx_PLUS (Pmode
, new, op1
);
7153 new = gen_rtx_CONST (Pmode
, new);
7154 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7158 emit_move_insn (reg
, new);
7164 if (INTVAL (op1
) < -16*1024*1024
7165 || INTVAL (op1
) >= 16*1024*1024)
7167 if (!x86_64_immediate_operand (op1
, Pmode
))
7168 op1
= force_reg (Pmode
, op1
);
7169 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7175 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7176 new = legitimize_pic_address (XEXP (addr
, 1),
7177 base
== reg
? NULL_RTX
: reg
);
7179 if (CONST_INT_P (new))
7180 new = plus_constant (base
, INTVAL (new));
7183 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7185 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7186 new = XEXP (new, 1);
7188 new = gen_rtx_PLUS (Pmode
, base
, new);
7196 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7199 get_thread_pointer (int to_reg
)
7203 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7207 reg
= gen_reg_rtx (Pmode
);
7208 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7209 insn
= emit_insn (insn
);
7214 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7215 false if we expect this to be used for a memory address and true if
7216 we expect to load the address into a register. */
7219 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7221 rtx dest
, base
, off
, pic
, tp
;
7226 case TLS_MODEL_GLOBAL_DYNAMIC
:
7227 dest
= gen_reg_rtx (Pmode
);
7228 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7230 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7232 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7235 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7236 insns
= get_insns ();
7239 emit_libcall_block (insns
, dest
, rax
, x
);
7241 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7242 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7244 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7246 if (TARGET_GNU2_TLS
)
7248 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7250 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7254 case TLS_MODEL_LOCAL_DYNAMIC
:
7255 base
= gen_reg_rtx (Pmode
);
7256 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7258 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7260 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7263 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7264 insns
= get_insns ();
7267 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7268 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7269 emit_libcall_block (insns
, base
, rax
, note
);
7271 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7272 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7274 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7276 if (TARGET_GNU2_TLS
)
7278 rtx x
= ix86_tls_module_base ();
7280 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7281 gen_rtx_MINUS (Pmode
, x
, tp
));
7284 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7285 off
= gen_rtx_CONST (Pmode
, off
);
7287 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7289 if (TARGET_GNU2_TLS
)
7291 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7293 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7298 case TLS_MODEL_INITIAL_EXEC
:
7302 type
= UNSPEC_GOTNTPOFF
;
7306 if (reload_in_progress
)
7307 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7308 pic
= pic_offset_table_rtx
;
7309 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7311 else if (!TARGET_ANY_GNU_TLS
)
7313 pic
= gen_reg_rtx (Pmode
);
7314 emit_insn (gen_set_got (pic
));
7315 type
= UNSPEC_GOTTPOFF
;
7320 type
= UNSPEC_INDNTPOFF
;
7323 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7324 off
= gen_rtx_CONST (Pmode
, off
);
7326 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7327 off
= gen_const_mem (Pmode
, off
);
7328 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7330 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7332 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7333 off
= force_reg (Pmode
, off
);
7334 return gen_rtx_PLUS (Pmode
, base
, off
);
7338 base
= get_thread_pointer (true);
7339 dest
= gen_reg_rtx (Pmode
);
7340 emit_insn (gen_subsi3 (dest
, base
, off
));
7344 case TLS_MODEL_LOCAL_EXEC
:
7345 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7346 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7347 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7348 off
= gen_rtx_CONST (Pmode
, off
);
7350 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7352 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7353 return gen_rtx_PLUS (Pmode
, base
, off
);
7357 base
= get_thread_pointer (true);
7358 dest
= gen_reg_rtx (Pmode
);
7359 emit_insn (gen_subsi3 (dest
, base
, off
));
7370 /* Try machine-dependent ways of modifying an illegitimate address
7371 to be legitimate. If we find one, return the new, valid address.
7372 This macro is used in only one place: `memory_address' in explow.c.
7374 OLDX is the address as it was before break_out_memory_refs was called.
7375 In some cases it is useful to look at this to decide what needs to be done.
7377 MODE and WIN are passed so that this macro can use
7378 GO_IF_LEGITIMATE_ADDRESS.
7380 It is always safe for this macro to do nothing. It exists to recognize
7381 opportunities to optimize the output.
7383 For the 80386, we handle X+REG by loading X into a register R and
7384 using R+REG. R will go in a general reg and indexing will be used.
7385 However, if REG is a broken-out memory address or multiplication,
7386 nothing needs to be done because REG can certainly go in a general reg.
7388 When -fpic is used, special handling is needed for symbolic references.
7389 See comments by legitimize_pic_address in i386.c for details. */
7392 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7397 if (TARGET_DEBUG_ADDR
)
7399 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7400 GET_MODE_NAME (mode
));
7404 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7406 return legitimize_tls_address (x
, log
, false);
7407 if (GET_CODE (x
) == CONST
7408 && GET_CODE (XEXP (x
, 0)) == PLUS
7409 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7410 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7412 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7413 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7416 if (flag_pic
&& SYMBOLIC_CONST (x
))
7417 return legitimize_pic_address (x
, 0);
7419 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7420 if (GET_CODE (x
) == ASHIFT
7421 && CONST_INT_P (XEXP (x
, 1))
7422 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7425 log
= INTVAL (XEXP (x
, 1));
7426 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7427 GEN_INT (1 << log
));
7430 if (GET_CODE (x
) == PLUS
)
7432 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7434 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7435 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7436 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7439 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7440 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7441 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7442 GEN_INT (1 << log
));
7445 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7446 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7447 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7450 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7451 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7452 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7453 GEN_INT (1 << log
));
7456 /* Put multiply first if it isn't already. */
7457 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7459 rtx tmp
= XEXP (x
, 0);
7460 XEXP (x
, 0) = XEXP (x
, 1);
7465 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7466 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7467 created by virtual register instantiation, register elimination, and
7468 similar optimizations. */
7469 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7472 x
= gen_rtx_PLUS (Pmode
,
7473 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7474 XEXP (XEXP (x
, 1), 0)),
7475 XEXP (XEXP (x
, 1), 1));
7479 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7480 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7481 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7482 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7483 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7484 && CONSTANT_P (XEXP (x
, 1)))
7487 rtx other
= NULL_RTX
;
7489 if (CONST_INT_P (XEXP (x
, 1)))
7491 constant
= XEXP (x
, 1);
7492 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7494 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7496 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7497 other
= XEXP (x
, 1);
7505 x
= gen_rtx_PLUS (Pmode
,
7506 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7507 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7508 plus_constant (other
, INTVAL (constant
)));
7512 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7515 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7518 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7521 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7524 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7528 && REG_P (XEXP (x
, 1))
7529 && REG_P (XEXP (x
, 0)))
7532 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7535 x
= legitimize_pic_address (x
, 0);
7538 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7541 if (REG_P (XEXP (x
, 0)))
7543 rtx temp
= gen_reg_rtx (Pmode
);
7544 rtx val
= force_operand (XEXP (x
, 1), temp
);
7546 emit_move_insn (temp
, val
);
7552 else if (REG_P (XEXP (x
, 1)))
7554 rtx temp
= gen_reg_rtx (Pmode
);
7555 rtx val
= force_operand (XEXP (x
, 0), temp
);
7557 emit_move_insn (temp
, val
);
7567 /* Print an integer constant expression in assembler syntax. Addition
7568 and subtraction are the only arithmetic that may appear in these
7569 expressions. FILE is the stdio stream to write to, X is the rtx, and
7570 CODE is the operand print code from the output string. */
7573 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7577 switch (GET_CODE (x
))
7580 gcc_assert (flag_pic
);
7585 if (! TARGET_MACHO
|| TARGET_64BIT
)
7586 output_addr_const (file
, x
);
7589 const char *name
= XSTR (x
, 0);
7591 /* Mark the decl as referenced so that cgraph will output the function. */
7592 if (SYMBOL_REF_DECL (x
))
7593 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7596 if (MACHOPIC_INDIRECT
7597 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7598 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7600 assemble_name (file
, name
);
7602 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7603 fputs ("@PLT", file
);
7610 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7611 assemble_name (asm_out_file
, buf
);
7615 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7619 /* This used to output parentheses around the expression,
7620 but that does not work on the 386 (either ATT or BSD assembler). */
7621 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7625 if (GET_MODE (x
) == VOIDmode
)
7627 /* We can use %d if the number is <32 bits and positive. */
7628 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7629 fprintf (file
, "0x%lx%08lx",
7630 (unsigned long) CONST_DOUBLE_HIGH (x
),
7631 (unsigned long) CONST_DOUBLE_LOW (x
));
7633 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7636 /* We can't handle floating point constants;
7637 PRINT_OPERAND must handle them. */
7638 output_operand_lossage ("floating constant misused");
7642 /* Some assemblers need integer constants to appear first. */
7643 if (CONST_INT_P (XEXP (x
, 0)))
7645 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7647 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7651 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7652 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7654 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7660 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7661 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7663 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7665 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7669 gcc_assert (XVECLEN (x
, 0) == 1);
7670 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7671 switch (XINT (x
, 1))
7674 fputs ("@GOT", file
);
7677 fputs ("@GOTOFF", file
);
7679 case UNSPEC_GOTPCREL
:
7680 fputs ("@GOTPCREL(%rip)", file
);
7682 case UNSPEC_GOTTPOFF
:
7683 /* FIXME: This might be @TPOFF in Sun ld too. */
7684 fputs ("@GOTTPOFF", file
);
7687 fputs ("@TPOFF", file
);
7691 fputs ("@TPOFF", file
);
7693 fputs ("@NTPOFF", file
);
7696 fputs ("@DTPOFF", file
);
7698 case UNSPEC_GOTNTPOFF
:
7700 fputs ("@GOTTPOFF(%rip)", file
);
7702 fputs ("@GOTNTPOFF", file
);
7704 case UNSPEC_INDNTPOFF
:
7705 fputs ("@INDNTPOFF", file
);
7708 output_operand_lossage ("invalid UNSPEC as operand");
7714 output_operand_lossage ("invalid expression as operand");
7718 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7719 We need to emit DTP-relative relocations. */
7722 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7724 fputs (ASM_LONG
, file
);
7725 output_addr_const (file
, x
);
7726 fputs ("@DTPOFF", file
);
7732 fputs (", 0", file
);
7739 /* In the name of slightly smaller debug output, and to cater to
7740 general assembler lossage, recognize PIC+GOTOFF and turn it back
7741 into a direct symbol reference.
7743 On Darwin, this is necessary to avoid a crash, because Darwin
7744 has a different PIC label for each routine but the DWARF debugging
7745 information is not associated with any particular routine, so it's
7746 necessary to remove references to the PIC label from RTL stored by
7747 the DWARF output code. */
7750 ix86_delegitimize_address (rtx orig_x
)
7753 /* reg_addend is NULL or a multiple of some register. */
7754 rtx reg_addend
= NULL_RTX
;
7755 /* const_addend is NULL or a const_int. */
7756 rtx const_addend
= NULL_RTX
;
7757 /* This is the result, or NULL. */
7758 rtx result
= NULL_RTX
;
7765 if (GET_CODE (x
) != CONST
7766 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7767 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7770 return XVECEXP (XEXP (x
, 0), 0, 0);
7773 if (GET_CODE (x
) != PLUS
7774 || GET_CODE (XEXP (x
, 1)) != CONST
)
7777 if (REG_P (XEXP (x
, 0))
7778 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7779 /* %ebx + GOT/GOTOFF */
7781 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7783 /* %ebx + %reg * scale + GOT/GOTOFF */
7784 reg_addend
= XEXP (x
, 0);
7785 if (REG_P (XEXP (reg_addend
, 0))
7786 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7787 reg_addend
= XEXP (reg_addend
, 1);
7788 else if (REG_P (XEXP (reg_addend
, 1))
7789 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7790 reg_addend
= XEXP (reg_addend
, 0);
7793 if (!REG_P (reg_addend
)
7794 && GET_CODE (reg_addend
) != MULT
7795 && GET_CODE (reg_addend
) != ASHIFT
)
7801 x
= XEXP (XEXP (x
, 1), 0);
7802 if (GET_CODE (x
) == PLUS
7803 && CONST_INT_P (XEXP (x
, 1)))
7805 const_addend
= XEXP (x
, 1);
7809 if (GET_CODE (x
) == UNSPEC
7810 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7811 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7812 result
= XVECEXP (x
, 0, 0);
7814 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7816 result
= XEXP (x
, 0);
7822 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7824 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7829 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7834 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7836 enum rtx_code second_code
, bypass_code
;
7837 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7838 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7839 code
= ix86_fp_compare_code_to_integer (code
);
7843 code
= reverse_condition (code
);
7854 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7858 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7859 Those same assemblers have the same but opposite lossage on cmov. */
7860 gcc_assert (mode
== CCmode
);
7861 suffix
= fp
? "nbe" : "a";
7881 gcc_assert (mode
== CCmode
);
7903 gcc_assert (mode
== CCmode
);
7904 suffix
= fp
? "nb" : "ae";
7907 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7911 gcc_assert (mode
== CCmode
);
7915 suffix
= fp
? "u" : "p";
7918 suffix
= fp
? "nu" : "np";
7923 fputs (suffix
, file
);
7926 /* Print the name of register X to FILE based on its machine mode and number.
7927 If CODE is 'w', pretend the mode is HImode.
7928 If CODE is 'b', pretend the mode is QImode.
7929 If CODE is 'k', pretend the mode is SImode.
7930 If CODE is 'q', pretend the mode is DImode.
7931 If CODE is 'h', pretend the reg is the 'high' byte register.
7932 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7935 print_reg (rtx x
, int code
, FILE *file
)
7937 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7938 && REGNO (x
) != FRAME_POINTER_REGNUM
7939 && REGNO (x
) != FLAGS_REG
7940 && REGNO (x
) != FPSR_REG
7941 && REGNO (x
) != FPCR_REG
);
7943 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7946 if (code
== 'w' || MMX_REG_P (x
))
7948 else if (code
== 'b')
7950 else if (code
== 'k')
7952 else if (code
== 'q')
7954 else if (code
== 'y')
7956 else if (code
== 'h')
7959 code
= GET_MODE_SIZE (GET_MODE (x
));
7961 /* Irritatingly, AMD extended registers use different naming convention
7962 from the normal registers. */
7963 if (REX_INT_REG_P (x
))
7965 gcc_assert (TARGET_64BIT
);
7969 error ("extended registers have no high halves");
7972 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7975 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7978 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7981 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7984 error ("unsupported operand size for extended register");
7992 if (STACK_TOP_P (x
))
7994 fputs ("st(0)", file
);
8001 if (! ANY_FP_REG_P (x
))
8002 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8007 fputs (hi_reg_name
[REGNO (x
)], file
);
8010 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8012 fputs (qi_reg_name
[REGNO (x
)], file
);
8015 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8017 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8024 /* Locate some local-dynamic symbol still in use by this function
8025 so that we can print its name in some tls_local_dynamic_base
8029 get_some_local_dynamic_name (void)
8033 if (cfun
->machine
->some_ld_name
)
8034 return cfun
->machine
->some_ld_name
;
8036 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8038 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8039 return cfun
->machine
->some_ld_name
;
8045 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8049 if (GET_CODE (x
) == SYMBOL_REF
8050 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8052 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8060 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8061 C -- print opcode suffix for set/cmov insn.
8062 c -- like C, but print reversed condition
8063 F,f -- likewise, but for floating-point.
8064 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8066 R -- print the prefix for register names.
8067 z -- print the opcode suffix for the size of the current operand.
8068 * -- print a star (in certain assembler syntax)
8069 A -- print an absolute memory reference.
8070 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8071 s -- print a shift double count, followed by the assemblers argument
8073 b -- print the QImode name of the register for the indicated operand.
8074 %b0 would print %al if operands[0] is reg 0.
8075 w -- likewise, print the HImode name of the register.
8076 k -- likewise, print the SImode name of the register.
8077 q -- likewise, print the DImode name of the register.
8078 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8079 y -- print "st(0)" instead of "st" as a register.
8080 D -- print condition for SSE cmp instruction.
8081 P -- if PIC, print an @PLT suffix.
8082 X -- don't print any sort of PIC '@' suffix for a symbol.
8083 & -- print some in-use local-dynamic symbol name.
8084 H -- print a memory address offset by 8; used for sse high-parts
8088 print_operand (FILE *file
, rtx x
, int code
)
8095 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8100 assemble_name (file
, get_some_local_dynamic_name ());
8104 switch (ASSEMBLER_DIALECT
)
8111 /* Intel syntax. For absolute addresses, registers should not
8112 be surrounded by braces. */
8116 PRINT_OPERAND (file
, x
, 0);
8126 PRINT_OPERAND (file
, x
, 0);
8131 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8136 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8141 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8146 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8151 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8156 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8161 /* 387 opcodes don't get size suffixes if the operands are
8163 if (STACK_REG_P (x
))
8166 /* Likewise if using Intel opcodes. */
8167 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8170 /* This is the size of op from size of operand. */
8171 switch (GET_MODE_SIZE (GET_MODE (x
)))
8178 #ifdef HAVE_GAS_FILDS_FISTS
8184 if (GET_MODE (x
) == SFmode
)
8199 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8201 #ifdef GAS_MNEMONICS
8227 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8229 PRINT_OPERAND (file
, x
, 0);
8235 /* Little bit of braindamage here. The SSE compare instructions
8236 does use completely different names for the comparisons that the
8237 fp conditional moves. */
8238 switch (GET_CODE (x
))
8253 fputs ("unord", file
);
8257 fputs ("neq", file
);
8261 fputs ("nlt", file
);
8265 fputs ("nle", file
);
8268 fputs ("ord", file
);
8275 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8276 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8278 switch (GET_MODE (x
))
8280 case HImode
: putc ('w', file
); break;
8282 case SFmode
: putc ('l', file
); break;
8284 case DFmode
: putc ('q', file
); break;
8285 default: gcc_unreachable ();
8292 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8295 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8296 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8299 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8302 /* Like above, but reverse condition */
8304 /* Check to see if argument to %c is really a constant
8305 and not a condition code which needs to be reversed. */
8306 if (!COMPARISON_P (x
))
8308 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8311 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8314 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8315 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8318 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8322 /* It doesn't actually matter what mode we use here, as we're
8323 only going to use this for printing. */
8324 x
= adjust_address_nv (x
, DImode
, 8);
8331 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8334 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8337 int pred_val
= INTVAL (XEXP (x
, 0));
8339 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8340 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8342 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8343 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8345 /* Emit hints only in the case default branch prediction
8346 heuristics would fail. */
8347 if (taken
!= cputaken
)
8349 /* We use 3e (DS) prefix for taken branches and
8350 2e (CS) prefix for not taken branches. */
8352 fputs ("ds ; ", file
);
8354 fputs ("cs ; ", file
);
8361 output_operand_lossage ("invalid operand code '%c'", code
);
8366 print_reg (x
, code
, file
);
8370 /* No `byte ptr' prefix for call instructions. */
8371 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8374 switch (GET_MODE_SIZE (GET_MODE (x
)))
8376 case 1: size
= "BYTE"; break;
8377 case 2: size
= "WORD"; break;
8378 case 4: size
= "DWORD"; break;
8379 case 8: size
= "QWORD"; break;
8380 case 12: size
= "XWORD"; break;
8381 case 16: size
= "XMMWORD"; break;
8386 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8389 else if (code
== 'w')
8391 else if (code
== 'k')
8395 fputs (" PTR ", file
);
8399 /* Avoid (%rip) for call operands. */
8400 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8401 && !CONST_INT_P (x
))
8402 output_addr_const (file
, x
);
8403 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8404 output_operand_lossage ("invalid constraints for operand");
8409 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8414 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8415 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8417 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8419 fprintf (file
, "0x%08lx", l
);
8422 /* These float cases don't actually occur as immediate operands. */
8423 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8427 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8428 fprintf (file
, "%s", dstr
);
8431 else if (GET_CODE (x
) == CONST_DOUBLE
8432 && GET_MODE (x
) == XFmode
)
8436 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8437 fprintf (file
, "%s", dstr
);
8442 /* We have patterns that allow zero sets of memory, for instance.
8443 In 64-bit mode, we should probably support all 8-byte vectors,
8444 since we can in fact encode that into an immediate. */
8445 if (GET_CODE (x
) == CONST_VECTOR
)
8447 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8453 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8455 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8458 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8459 || GET_CODE (x
) == LABEL_REF
)
8461 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8464 fputs ("OFFSET FLAT:", file
);
8467 if (CONST_INT_P (x
))
8468 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8470 output_pic_addr_const (file
, x
, code
);
8472 output_addr_const (file
, x
);
8476 /* Print a memory operand whose address is ADDR. */
8479 print_operand_address (FILE *file
, rtx addr
)
8481 struct ix86_address parts
;
8482 rtx base
, index
, disp
;
8484 int ok
= ix86_decompose_address (addr
, &parts
);
8489 index
= parts
.index
;
8491 scale
= parts
.scale
;
8499 if (USER_LABEL_PREFIX
[0] == 0)
8501 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8507 if (!base
&& !index
)
8509 /* Displacement only requires special attention. */
8511 if (CONST_INT_P (disp
))
8513 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8515 if (USER_LABEL_PREFIX
[0] == 0)
8517 fputs ("ds:", file
);
8519 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8522 output_pic_addr_const (file
, disp
, 0);
8524 output_addr_const (file
, disp
);
8526 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8529 if (GET_CODE (disp
) == CONST
8530 && GET_CODE (XEXP (disp
, 0)) == PLUS
8531 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8532 disp
= XEXP (XEXP (disp
, 0), 0);
8533 if (GET_CODE (disp
) == LABEL_REF
8534 || (GET_CODE (disp
) == SYMBOL_REF
8535 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8536 fputs ("(%rip)", file
);
8541 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8546 output_pic_addr_const (file
, disp
, 0);
8547 else if (GET_CODE (disp
) == LABEL_REF
)
8548 output_asm_label (disp
);
8550 output_addr_const (file
, disp
);
8555 print_reg (base
, 0, file
);
8559 print_reg (index
, 0, file
);
8561 fprintf (file
, ",%d", scale
);
8567 rtx offset
= NULL_RTX
;
8571 /* Pull out the offset of a symbol; print any symbol itself. */
8572 if (GET_CODE (disp
) == CONST
8573 && GET_CODE (XEXP (disp
, 0)) == PLUS
8574 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8576 offset
= XEXP (XEXP (disp
, 0), 1);
8577 disp
= gen_rtx_CONST (VOIDmode
,
8578 XEXP (XEXP (disp
, 0), 0));
8582 output_pic_addr_const (file
, disp
, 0);
8583 else if (GET_CODE (disp
) == LABEL_REF
)
8584 output_asm_label (disp
);
8585 else if (CONST_INT_P (disp
))
8588 output_addr_const (file
, disp
);
8594 print_reg (base
, 0, file
);
8597 if (INTVAL (offset
) >= 0)
8599 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8603 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8610 print_reg (index
, 0, file
);
8612 fprintf (file
, "*%d", scale
);
8620 output_addr_const_extra (FILE *file
, rtx x
)
8624 if (GET_CODE (x
) != UNSPEC
)
8627 op
= XVECEXP (x
, 0, 0);
8628 switch (XINT (x
, 1))
8630 case UNSPEC_GOTTPOFF
:
8631 output_addr_const (file
, op
);
8632 /* FIXME: This might be @TPOFF in Sun ld. */
8633 fputs ("@GOTTPOFF", file
);
8636 output_addr_const (file
, op
);
8637 fputs ("@TPOFF", file
);
8640 output_addr_const (file
, op
);
8642 fputs ("@TPOFF", file
);
8644 fputs ("@NTPOFF", file
);
8647 output_addr_const (file
, op
);
8648 fputs ("@DTPOFF", file
);
8650 case UNSPEC_GOTNTPOFF
:
8651 output_addr_const (file
, op
);
8653 fputs ("@GOTTPOFF(%rip)", file
);
8655 fputs ("@GOTNTPOFF", file
);
8657 case UNSPEC_INDNTPOFF
:
8658 output_addr_const (file
, op
);
8659 fputs ("@INDNTPOFF", file
);
8669 /* Split one or more DImode RTL references into pairs of SImode
8670 references. The RTL can be REG, offsettable MEM, integer constant, or
8671 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8672 split and "num" is its length. lo_half and hi_half are output arrays
8673 that parallel "operands". */
8676 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8680 rtx op
= operands
[num
];
8682 /* simplify_subreg refuse to split volatile memory addresses,
8683 but we still have to handle it. */
8686 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8687 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8691 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8692 GET_MODE (op
) == VOIDmode
8693 ? DImode
: GET_MODE (op
), 0);
8694 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8695 GET_MODE (op
) == VOIDmode
8696 ? DImode
: GET_MODE (op
), 4);
8700 /* Split one or more TImode RTL references into pairs of DImode
8701 references. The RTL can be REG, offsettable MEM, integer constant, or
8702 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8703 split and "num" is its length. lo_half and hi_half are output arrays
8704 that parallel "operands". */
8707 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8711 rtx op
= operands
[num
];
8713 /* simplify_subreg refuse to split volatile memory addresses, but we
8714 still have to handle it. */
8717 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8718 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8722 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8723 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8728 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8729 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8730 is the expression of the binary operation. The output may either be
8731 emitted here, or returned to the caller, like all output_* functions.
8733 There is no guarantee that the operands are the same mode, as they
8734 might be within FLOAT or FLOAT_EXTEND expressions. */
8736 #ifndef SYSV386_COMPAT
8737 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8738 wants to fix the assemblers because that causes incompatibility
8739 with gcc. No-one wants to fix gcc because that causes
8740 incompatibility with assemblers... You can use the option of
8741 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8742 #define SYSV386_COMPAT 1
8746 output_387_binary_op (rtx insn
, rtx
*operands
)
8748 static char buf
[30];
8751 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8753 #ifdef ENABLE_CHECKING
8754 /* Even if we do not want to check the inputs, this documents input
8755 constraints. Which helps in understanding the following code. */
8756 if (STACK_REG_P (operands
[0])
8757 && ((REG_P (operands
[1])
8758 && REGNO (operands
[0]) == REGNO (operands
[1])
8759 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8760 || (REG_P (operands
[2])
8761 && REGNO (operands
[0]) == REGNO (operands
[2])
8762 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8763 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8766 gcc_assert (is_sse
);
8769 switch (GET_CODE (operands
[3]))
8772 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8773 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8781 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8782 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8790 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8791 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8799 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8800 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8814 if (GET_MODE (operands
[0]) == SFmode
)
8815 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8817 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8822 switch (GET_CODE (operands
[3]))
8826 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8828 rtx temp
= operands
[2];
8829 operands
[2] = operands
[1];
8833 /* know operands[0] == operands[1]. */
8835 if (MEM_P (operands
[2]))
8841 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8843 if (STACK_TOP_P (operands
[0]))
8844 /* How is it that we are storing to a dead operand[2]?
8845 Well, presumably operands[1] is dead too. We can't
8846 store the result to st(0) as st(0) gets popped on this
8847 instruction. Instead store to operands[2] (which I
8848 think has to be st(1)). st(1) will be popped later.
8849 gcc <= 2.8.1 didn't have this check and generated
8850 assembly code that the Unixware assembler rejected. */
8851 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8853 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8857 if (STACK_TOP_P (operands
[0]))
8858 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8860 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8865 if (MEM_P (operands
[1]))
8871 if (MEM_P (operands
[2]))
8877 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8880 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8881 derived assemblers, confusingly reverse the direction of
8882 the operation for fsub{r} and fdiv{r} when the
8883 destination register is not st(0). The Intel assembler
8884 doesn't have this brain damage. Read !SYSV386_COMPAT to
8885 figure out what the hardware really does. */
8886 if (STACK_TOP_P (operands
[0]))
8887 p
= "{p\t%0, %2|rp\t%2, %0}";
8889 p
= "{rp\t%2, %0|p\t%0, %2}";
8891 if (STACK_TOP_P (operands
[0]))
8892 /* As above for fmul/fadd, we can't store to st(0). */
8893 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8895 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8900 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8903 if (STACK_TOP_P (operands
[0]))
8904 p
= "{rp\t%0, %1|p\t%1, %0}";
8906 p
= "{p\t%1, %0|rp\t%0, %1}";
8908 if (STACK_TOP_P (operands
[0]))
8909 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8911 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8916 if (STACK_TOP_P (operands
[0]))
8918 if (STACK_TOP_P (operands
[1]))
8919 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8921 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8924 else if (STACK_TOP_P (operands
[1]))
8927 p
= "{\t%1, %0|r\t%0, %1}";
8929 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8935 p
= "{r\t%2, %0|\t%0, %2}";
8937 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8950 /* Return needed mode for entity in optimize_mode_switching pass. */
8953 ix86_mode_needed (int entity
, rtx insn
)
8955 enum attr_i387_cw mode
;
8957 /* The mode UNINITIALIZED is used to store control word after a
8958 function call or ASM pattern. The mode ANY specify that function
8959 has no requirements on the control word and make no changes in the
8960 bits we are interested in. */
8963 || (NONJUMP_INSN_P (insn
)
8964 && (asm_noperands (PATTERN (insn
)) >= 0
8965 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8966 return I387_CW_UNINITIALIZED
;
8968 if (recog_memoized (insn
) < 0)
8971 mode
= get_attr_i387_cw (insn
);
8976 if (mode
== I387_CW_TRUNC
)
8981 if (mode
== I387_CW_FLOOR
)
8986 if (mode
== I387_CW_CEIL
)
8991 if (mode
== I387_CW_MASK_PM
)
9002 /* Output code to initialize control word copies used by trunc?f?i and
9003 rounding patterns. CURRENT_MODE is set to current control word,
9004 while NEW_MODE is set to new control word. */
9007 emit_i387_cw_initialization (int mode
)
9009 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9014 rtx reg
= gen_reg_rtx (HImode
);
9016 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9017 emit_move_insn (reg
, copy_rtx (stored_mode
));
9019 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9024 /* round toward zero (truncate) */
9025 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9026 slot
= SLOT_CW_TRUNC
;
9030 /* round down toward -oo */
9031 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9032 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9033 slot
= SLOT_CW_FLOOR
;
9037 /* round up toward +oo */
9038 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9039 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9040 slot
= SLOT_CW_CEIL
;
9043 case I387_CW_MASK_PM
:
9044 /* mask precision exception for nearbyint() */
9045 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9046 slot
= SLOT_CW_MASK_PM
;
9058 /* round toward zero (truncate) */
9059 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9060 slot
= SLOT_CW_TRUNC
;
9064 /* round down toward -oo */
9065 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9066 slot
= SLOT_CW_FLOOR
;
9070 /* round up toward +oo */
9071 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9072 slot
= SLOT_CW_CEIL
;
9075 case I387_CW_MASK_PM
:
9076 /* mask precision exception for nearbyint() */
9077 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9078 slot
= SLOT_CW_MASK_PM
;
9086 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9088 new_mode
= assign_386_stack_local (HImode
, slot
);
9089 emit_move_insn (new_mode
, reg
);
9092 /* Output code for INSN to convert a float to a signed int. OPERANDS
9093 are the insn operands. The output may be [HSD]Imode and the input
9094 operand may be [SDX]Fmode. */
9097 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9099 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9100 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9101 int round_mode
= get_attr_i387_cw (insn
);
9103 /* Jump through a hoop or two for DImode, since the hardware has no
9104 non-popping instruction. We used to do this a different way, but
9105 that was somewhat fragile and broke with post-reload splitters. */
9106 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9107 output_asm_insn ("fld\t%y1", operands
);
9109 gcc_assert (STACK_TOP_P (operands
[1]));
9110 gcc_assert (MEM_P (operands
[0]));
9113 output_asm_insn ("fisttp%z0\t%0", operands
);
9116 if (round_mode
!= I387_CW_ANY
)
9117 output_asm_insn ("fldcw\t%3", operands
);
9118 if (stack_top_dies
|| dimode_p
)
9119 output_asm_insn ("fistp%z0\t%0", operands
);
9121 output_asm_insn ("fist%z0\t%0", operands
);
9122 if (round_mode
!= I387_CW_ANY
)
9123 output_asm_insn ("fldcw\t%2", operands
);
9129 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9130 have the values zero or one, indicates the ffreep insn's operand
9131 from the OPERANDS array. */
9134 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9136 if (TARGET_USE_FFREEP
)
9137 #if HAVE_AS_IX86_FFREEP
9138 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9141 static char retval
[] = ".word\t0xc_df";
9142 int regno
= REGNO (operands
[opno
]);
9144 gcc_assert (FP_REGNO_P (regno
));
9146 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9151 return opno
? "fstp\t%y1" : "fstp\t%y0";
9155 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9156 should be used. UNORDERED_P is true when fucom should be used. */
9159 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9162 rtx cmp_op0
, cmp_op1
;
9163 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9167 cmp_op0
= operands
[0];
9168 cmp_op1
= operands
[1];
9172 cmp_op0
= operands
[1];
9173 cmp_op1
= operands
[2];
9178 if (GET_MODE (operands
[0]) == SFmode
)
9180 return "ucomiss\t{%1, %0|%0, %1}";
9182 return "comiss\t{%1, %0|%0, %1}";
9185 return "ucomisd\t{%1, %0|%0, %1}";
9187 return "comisd\t{%1, %0|%0, %1}";
9190 gcc_assert (STACK_TOP_P (cmp_op0
));
9192 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9194 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9198 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9199 return output_387_ffreep (operands
, 1);
9202 return "ftst\n\tfnstsw\t%0";
9205 if (STACK_REG_P (cmp_op1
)
9207 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9208 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9210 /* If both the top of the 387 stack dies, and the other operand
9211 is also a stack register that dies, then this must be a
9212 `fcompp' float compare */
9216 /* There is no double popping fcomi variant. Fortunately,
9217 eflags is immune from the fstp's cc clobbering. */
9219 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9221 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9222 return output_387_ffreep (operands
, 0);
9227 return "fucompp\n\tfnstsw\t%0";
9229 return "fcompp\n\tfnstsw\t%0";
9234 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9236 static const char * const alt
[16] =
9238 "fcom%z2\t%y2\n\tfnstsw\t%0",
9239 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9240 "fucom%z2\t%y2\n\tfnstsw\t%0",
9241 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9243 "ficom%z2\t%y2\n\tfnstsw\t%0",
9244 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9248 "fcomi\t{%y1, %0|%0, %y1}",
9249 "fcomip\t{%y1, %0|%0, %y1}",
9250 "fucomi\t{%y1, %0|%0, %y1}",
9251 "fucomip\t{%y1, %0|%0, %y1}",
9262 mask
= eflags_p
<< 3;
9263 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9264 mask
|= unordered_p
<< 1;
9265 mask
|= stack_top_dies
;
9267 gcc_assert (mask
< 16);
9276 ix86_output_addr_vec_elt (FILE *file
, int value
)
9278 const char *directive
= ASM_LONG
;
9282 directive
= ASM_QUAD
;
9284 gcc_assert (!TARGET_64BIT
);
9287 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9291 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9294 fprintf (file
, "%s%s%d-%s%d\n",
9295 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9296 else if (HAVE_AS_GOTOFF_IN_DATA
)
9297 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9299 else if (TARGET_MACHO
)
9301 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9302 machopic_output_function_base_name (file
);
9303 fprintf(file
, "\n");
9307 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9308 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9311 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9315 ix86_expand_clear (rtx dest
)
9319 /* We play register width games, which are only valid after reload. */
9320 gcc_assert (reload_completed
);
9322 /* Avoid HImode and its attendant prefix byte. */
9323 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9324 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9326 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9328 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9329 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9331 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9332 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9338 /* X is an unchanging MEM. If it is a constant pool reference, return
9339 the constant pool rtx, else NULL. */
9342 maybe_get_pool_constant (rtx x
)
9344 x
= ix86_delegitimize_address (XEXP (x
, 0));
9346 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9347 return get_pool_constant (x
);
9353 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9355 int strict
= (reload_in_progress
|| reload_completed
);
9357 enum tls_model model
;
9362 if (GET_CODE (op1
) == SYMBOL_REF
)
9364 model
= SYMBOL_REF_TLS_MODEL (op1
);
9367 op1
= legitimize_tls_address (op1
, model
, true);
9368 op1
= force_operand (op1
, op0
);
9373 else if (GET_CODE (op1
) == CONST
9374 && GET_CODE (XEXP (op1
, 0)) == PLUS
9375 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9377 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9380 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9381 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9382 op1
= force_operand (op1
, NULL
);
9383 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9384 op0
, 1, OPTAB_DIRECT
);
9390 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9392 if (TARGET_MACHO
&& !TARGET_64BIT
)
9397 rtx temp
= ((reload_in_progress
9398 || ((op0
&& REG_P (op0
))
9400 ? op0
: gen_reg_rtx (Pmode
));
9401 op1
= machopic_indirect_data_reference (op1
, temp
);
9402 op1
= machopic_legitimize_pic_address (op1
, mode
,
9403 temp
== op1
? 0 : temp
);
9405 else if (MACHOPIC_INDIRECT
)
9406 op1
= machopic_indirect_data_reference (op1
, 0);
9414 op1
= force_reg (Pmode
, op1
);
9416 op1
= legitimize_address (op1
, op1
, Pmode
);
9422 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9423 || !push_operand (op0
, mode
))
9425 op1
= force_reg (mode
, op1
);
9427 if (push_operand (op0
, mode
)
9428 && ! general_no_elim_operand (op1
, mode
))
9429 op1
= copy_to_mode_reg (mode
, op1
);
9431 /* Force large constants in 64bit compilation into register
9432 to get them CSEed. */
9433 if (TARGET_64BIT
&& mode
== DImode
9434 && immediate_operand (op1
, mode
)
9435 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9436 && !register_operand (op0
, mode
)
9437 && optimize
&& !reload_completed
&& !reload_in_progress
)
9438 op1
= copy_to_mode_reg (mode
, op1
);
9440 if (FLOAT_MODE_P (mode
))
9442 /* If we are loading a floating point constant to a register,
9443 force the value to memory now, since we'll get better code
9444 out the back end. */
9448 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9450 op1
= validize_mem (force_const_mem (mode
, op1
));
9451 if (!register_operand (op0
, mode
))
9453 rtx temp
= gen_reg_rtx (mode
);
9454 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9455 emit_move_insn (op0
, temp
);
9462 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9466 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9468 rtx op0
= operands
[0], op1
= operands
[1];
9470 /* Force constants other than zero into memory. We do not know how
9471 the instructions used to build constants modify the upper 64 bits
9472 of the register, once we have that information we may be able
9473 to handle some of them more efficiently. */
9474 if ((reload_in_progress
| reload_completed
) == 0
9475 && register_operand (op0
, mode
)
9477 && standard_sse_constant_p (op1
) <= 0)
9478 op1
= validize_mem (force_const_mem (mode
, op1
));
9480 /* Make operand1 a register if it isn't already. */
9482 && !register_operand (op0
, mode
)
9483 && !register_operand (op1
, mode
))
9485 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9489 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9492 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9493 straight to ix86_expand_vector_move. */
9496 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9505 /* If we're optimizing for size, movups is the smallest. */
9508 op0
= gen_lowpart (V4SFmode
, op0
);
9509 op1
= gen_lowpart (V4SFmode
, op1
);
9510 emit_insn (gen_sse_movups (op0
, op1
));
9514 /* ??? If we have typed data, then it would appear that using
9515 movdqu is the only way to get unaligned data loaded with
9517 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9519 op0
= gen_lowpart (V16QImode
, op0
);
9520 op1
= gen_lowpart (V16QImode
, op1
);
9521 emit_insn (gen_sse2_movdqu (op0
, op1
));
9525 if (TARGET_SSE2
&& mode
== V2DFmode
)
9529 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9531 op0
= gen_lowpart (V2DFmode
, op0
);
9532 op1
= gen_lowpart (V2DFmode
, op1
);
9533 emit_insn (gen_sse2_movupd (op0
, op1
));
9537 /* When SSE registers are split into halves, we can avoid
9538 writing to the top half twice. */
9539 if (TARGET_SSE_SPLIT_REGS
)
9541 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9546 /* ??? Not sure about the best option for the Intel chips.
9547 The following would seem to satisfy; the register is
9548 entirely cleared, breaking the dependency chain. We
9549 then store to the upper half, with a dependency depth
9550 of one. A rumor has it that Intel recommends two movsd
9551 followed by an unpacklpd, but this is unconfirmed. And
9552 given that the dependency depth of the unpacklpd would
9553 still be one, I'm not sure why this would be better. */
9554 zero
= CONST0_RTX (V2DFmode
);
9557 m
= adjust_address (op1
, DFmode
, 0);
9558 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9559 m
= adjust_address (op1
, DFmode
, 8);
9560 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9564 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9566 op0
= gen_lowpart (V4SFmode
, op0
);
9567 op1
= gen_lowpart (V4SFmode
, op1
);
9568 emit_insn (gen_sse_movups (op0
, op1
));
9572 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9573 emit_move_insn (op0
, CONST0_RTX (mode
));
9575 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9577 if (mode
!= V4SFmode
)
9578 op0
= gen_lowpart (V4SFmode
, op0
);
9579 m
= adjust_address (op1
, V2SFmode
, 0);
9580 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9581 m
= adjust_address (op1
, V2SFmode
, 8);
9582 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9585 else if (MEM_P (op0
))
9587 /* If we're optimizing for size, movups is the smallest. */
9590 op0
= gen_lowpart (V4SFmode
, op0
);
9591 op1
= gen_lowpart (V4SFmode
, op1
);
9592 emit_insn (gen_sse_movups (op0
, op1
));
9596 /* ??? Similar to above, only less clear because of quote
9597 typeless stores unquote. */
9598 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9599 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9601 op0
= gen_lowpart (V16QImode
, op0
);
9602 op1
= gen_lowpart (V16QImode
, op1
);
9603 emit_insn (gen_sse2_movdqu (op0
, op1
));
9607 if (TARGET_SSE2
&& mode
== V2DFmode
)
9609 m
= adjust_address (op0
, DFmode
, 0);
9610 emit_insn (gen_sse2_storelpd (m
, op1
));
9611 m
= adjust_address (op0
, DFmode
, 8);
9612 emit_insn (gen_sse2_storehpd (m
, op1
));
9616 if (mode
!= V4SFmode
)
9617 op1
= gen_lowpart (V4SFmode
, op1
);
9618 m
= adjust_address (op0
, V2SFmode
, 0);
9619 emit_insn (gen_sse_storelps (m
, op1
));
9620 m
= adjust_address (op0
, V2SFmode
, 8);
9621 emit_insn (gen_sse_storehps (m
, op1
));
9628 /* Expand a push in MODE. This is some mode for which we do not support
9629 proper push instructions, at least from the registers that we expect
9630 the value to live in. */
9633 ix86_expand_push (enum machine_mode mode
, rtx x
)
9637 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9638 GEN_INT (-GET_MODE_SIZE (mode
)),
9639 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9640 if (tmp
!= stack_pointer_rtx
)
9641 emit_move_insn (stack_pointer_rtx
, tmp
);
9643 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9644 emit_move_insn (tmp
, x
);
9647 /* Helper function of ix86_fixup_binary_operands to canonicalize
9648 operand order. Returns true if the operands should be swapped. */
9651 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9654 rtx dst
= operands
[0];
9655 rtx src1
= operands
[1];
9656 rtx src2
= operands
[2];
9658 /* If the operation is not commutative, we can't do anything. */
9659 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9662 /* Highest priority is that src1 should match dst. */
9663 if (rtx_equal_p (dst
, src1
))
9665 if (rtx_equal_p (dst
, src2
))
9668 /* Next highest priority is that immediate constants come second. */
9669 if (immediate_operand (src2
, mode
))
9671 if (immediate_operand (src1
, mode
))
9674 /* Lowest priority is that memory references should come second. */
9684 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9685 destination to use for the operation. If different from the true
9686 destination in operands[0], a copy operation will be required. */
9689 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9692 rtx dst
= operands
[0];
9693 rtx src1
= operands
[1];
9694 rtx src2
= operands
[2];
9696 /* Canonicalize operand order. */
9697 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9704 /* Both source operands cannot be in memory. */
9705 if (MEM_P (src1
) && MEM_P (src2
))
9707 /* Optimization: Only read from memory once. */
9708 if (rtx_equal_p (src1
, src2
))
9710 src2
= force_reg (mode
, src2
);
9714 src2
= force_reg (mode
, src2
);
9717 /* If the destination is memory, and we do not have matching source
9718 operands, do things in registers. */
9719 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9720 dst
= gen_reg_rtx (mode
);
9722 /* Source 1 cannot be a constant. */
9723 if (CONSTANT_P (src1
))
9724 src1
= force_reg (mode
, src1
);
9726 /* Source 1 cannot be a non-matching memory. */
9727 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9728 src1
= force_reg (mode
, src1
);
9735 /* Similarly, but assume that the destination has already been
9739 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9740 enum machine_mode mode
, rtx operands
[])
9742 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9743 gcc_assert (dst
== operands
[0]);
9746 /* Attempt to expand a binary operator. Make the expansion closer to the
9747 actual machine, then just general_operand, which will allow 3 separate
9748 memory references (one output, two input) in a single insn. */
9751 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9754 rtx src1
, src2
, dst
, op
, clob
;
9756 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9760 /* Emit the instruction. */
9762 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9763 if (reload_in_progress
)
9765 /* Reload doesn't know about the flags register, and doesn't know that
9766 it doesn't want to clobber it. We can only do this with PLUS. */
9767 gcc_assert (code
== PLUS
);
9772 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9773 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9776 /* Fix up the destination if needed. */
9777 if (dst
!= operands
[0])
9778 emit_move_insn (operands
[0], dst
);
9781 /* Return TRUE or FALSE depending on whether the binary operator meets the
9782 appropriate constraints. */
9785 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9788 rtx dst
= operands
[0];
9789 rtx src1
= operands
[1];
9790 rtx src2
= operands
[2];
9792 /* Both source operands cannot be in memory. */
9793 if (MEM_P (src1
) && MEM_P (src2
))
9796 /* Canonicalize operand order for commutative operators. */
9797 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9804 /* If the destination is memory, we must have a matching source operand. */
9805 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9808 /* Source 1 cannot be a constant. */
9809 if (CONSTANT_P (src1
))
9812 /* Source 1 cannot be a non-matching memory. */
9813 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9819 /* Attempt to expand a unary operator. Make the expansion closer to the
9820 actual machine, then just general_operand, which will allow 2 separate
9821 memory references (one output, one input) in a single insn. */
9824 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9827 int matching_memory
;
9828 rtx src
, dst
, op
, clob
;
9833 /* If the destination is memory, and we do not have matching source
9834 operands, do things in registers. */
9835 matching_memory
= 0;
9838 if (rtx_equal_p (dst
, src
))
9839 matching_memory
= 1;
9841 dst
= gen_reg_rtx (mode
);
9844 /* When source operand is memory, destination must match. */
9845 if (MEM_P (src
) && !matching_memory
)
9846 src
= force_reg (mode
, src
);
9848 /* Emit the instruction. */
9850 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9851 if (reload_in_progress
|| code
== NOT
)
9853 /* Reload doesn't know about the flags register, and doesn't know that
9854 it doesn't want to clobber it. */
9855 gcc_assert (code
== NOT
);
9860 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9861 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9864 /* Fix up the destination if needed. */
9865 if (dst
!= operands
[0])
9866 emit_move_insn (operands
[0], dst
);
9869 /* Return TRUE or FALSE depending on whether the unary operator meets the
9870 appropriate constraints. */
9873 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9874 enum machine_mode mode ATTRIBUTE_UNUSED
,
9875 rtx operands
[2] ATTRIBUTE_UNUSED
)
9877 /* If one of operands is memory, source and destination must match. */
9878 if ((MEM_P (operands
[0])
9879 || MEM_P (operands
[1]))
9880 && ! rtx_equal_p (operands
[0], operands
[1]))
9885 /* Post-reload splitter for converting an SF or DFmode value in an
9886 SSE register into an unsigned SImode. */
9889 ix86_split_convert_uns_si_sse (rtx operands
[])
9891 enum machine_mode vecmode
;
9892 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
9894 large
= operands
[1];
9895 zero_or_two31
= operands
[2];
9896 input
= operands
[3];
9897 two31
= operands
[4];
9898 vecmode
= GET_MODE (large
);
9899 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
9901 /* Load up the value into the low element. We must ensure that the other
9902 elements are valid floats -- zero is the easiest such value. */
9905 if (vecmode
== V4SFmode
)
9906 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
9908 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
9912 input
= gen_rtx_REG (vecmode
, REGNO (input
));
9913 emit_move_insn (value
, CONST0_RTX (vecmode
));
9914 if (vecmode
== V4SFmode
)
9915 emit_insn (gen_sse_movss (value
, value
, input
));
9917 emit_insn (gen_sse2_movsd (value
, value
, input
));
9920 emit_move_insn (large
, two31
);
9921 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
9923 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
9924 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
9926 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
9927 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
9929 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
9930 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
9932 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
9933 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
9935 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
9936 if (vecmode
== V4SFmode
)
9937 emit_insn (gen_sse2_cvttps2dq (x
, value
));
9939 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
9942 emit_insn (gen_xorv4si3 (value
, value
, large
));
9945 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9946 Expects the 64-bit DImode to be supplied in a pair of integral
9947 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9948 -mfpmath=sse, !optimize_size only. */
9951 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
9953 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
9954 rtx int_xmm
, fp_xmm
;
9955 rtx biases
, exponents
;
9958 int_xmm
= gen_reg_rtx (V4SImode
);
9959 if (TARGET_INTER_UNIT_MOVES
)
9960 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
9961 else if (TARGET_SSE_SPLIT_REGS
)
9963 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
9964 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
9968 x
= gen_reg_rtx (V2DImode
);
9969 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
9970 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
9973 x
= gen_rtx_CONST_VECTOR (V4SImode
,
9974 gen_rtvec (4, GEN_INT (0x43300000UL
),
9975 GEN_INT (0x45300000UL
),
9976 const0_rtx
, const0_rtx
));
9977 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
9979 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9980 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
9982 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9983 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9984 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9985 (0x1.0p84 + double(fp_value_hi_xmm)).
9986 Note these exponents differ by 32. */
9988 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
9990 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9991 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9992 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
9993 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
9994 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
9995 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
9996 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
9997 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
9998 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10000 /* Add the upper and lower DFmode values together. */
10002 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10005 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10006 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10007 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10010 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10013 /* Convert an unsigned SImode value into a DFmode. Only currently used
10014 for SSE, but applicable anywhere. */
10017 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10019 REAL_VALUE_TYPE TWO31r
;
10022 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10023 NULL
, 1, OPTAB_DIRECT
);
10025 fp
= gen_reg_rtx (DFmode
);
10026 emit_insn (gen_floatsidf2 (fp
, x
));
10028 real_ldexp (&TWO31r
, &dconst1
, 31);
10029 x
= const_double_from_real_value (TWO31r
, DFmode
);
10031 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10033 emit_move_insn (target
, x
);
10036 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10037 32-bit mode; otherwise we have a direct convert instruction. */
10040 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10042 REAL_VALUE_TYPE TWO32r
;
10043 rtx fp_lo
, fp_hi
, x
;
10045 fp_lo
= gen_reg_rtx (DFmode
);
10046 fp_hi
= gen_reg_rtx (DFmode
);
10048 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10050 real_ldexp (&TWO32r
, &dconst1
, 32);
10051 x
= const_double_from_real_value (TWO32r
, DFmode
);
10052 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10054 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10056 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10059 emit_move_insn (target
, x
);
10062 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10063 For x86_32, -mfpmath=sse, !optimize_size only. */
10065 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10067 REAL_VALUE_TYPE ONE16r
;
10068 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10070 real_ldexp (&ONE16r
, &dconst1
, 16);
10071 x
= const_double_from_real_value (ONE16r
, SFmode
);
10072 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10073 NULL
, 0, OPTAB_DIRECT
);
10074 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10075 NULL
, 0, OPTAB_DIRECT
);
10076 fp_hi
= gen_reg_rtx (SFmode
);
10077 fp_lo
= gen_reg_rtx (SFmode
);
10078 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10079 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10080 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10082 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10084 if (!rtx_equal_p (target
, fp_hi
))
10085 emit_move_insn (target
, fp_hi
);
10088 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10089 then replicate the value for all elements of the vector
10093 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10100 v
= gen_rtvec (4, value
, value
, value
, value
);
10102 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10103 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10104 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10108 v
= gen_rtvec (2, value
, value
);
10110 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10111 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10114 gcc_unreachable ();
10118 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10119 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10120 true, then replicate the mask for all elements of the vector register.
10121 If INVERT is true, then create a mask excluding the sign bit. */
10124 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10126 enum machine_mode vec_mode
;
10127 HOST_WIDE_INT hi
, lo
;
10132 /* Find the sign bit, sign extended to 2*HWI. */
10133 if (mode
== SFmode
)
10134 lo
= 0x80000000, hi
= lo
< 0;
10135 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10136 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10138 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10141 lo
= ~lo
, hi
= ~hi
;
10143 /* Force this value into the low part of a fp vector constant. */
10144 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10145 mask
= gen_lowpart (mode
, mask
);
10147 v
= ix86_build_const_vector (mode
, vect
, mask
);
10148 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10149 return force_reg (vec_mode
, v
);
10152 /* Generate code for floating point ABS or NEG. */
10155 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10158 rtx mask
, set
, use
, clob
, dst
, src
;
10159 bool matching_memory
;
10160 bool use_sse
= false;
10161 bool vector_mode
= VECTOR_MODE_P (mode
);
10162 enum machine_mode elt_mode
= mode
;
10166 elt_mode
= GET_MODE_INNER (mode
);
10169 else if (TARGET_SSE_MATH
)
10170 use_sse
= SSE_FLOAT_MODE_P (mode
);
10172 /* NEG and ABS performed with SSE use bitwise mask operations.
10173 Create the appropriate mask now. */
10175 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10182 /* If the destination is memory, and we don't have matching source
10183 operands or we're using the x87, do things in registers. */
10184 matching_memory
= false;
10187 if (use_sse
&& rtx_equal_p (dst
, src
))
10188 matching_memory
= true;
10190 dst
= gen_reg_rtx (mode
);
10192 if (MEM_P (src
) && !matching_memory
)
10193 src
= force_reg (mode
, src
);
10197 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10198 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10203 set
= gen_rtx_fmt_e (code
, mode
, src
);
10204 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10207 use
= gen_rtx_USE (VOIDmode
, mask
);
10208 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10209 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10210 gen_rtvec (3, set
, use
, clob
)));
10216 if (dst
!= operands
[0])
10217 emit_move_insn (operands
[0], dst
);
10220 /* Expand a copysign operation. Special case operand 0 being a constant. */
10223 ix86_expand_copysign (rtx operands
[])
10225 enum machine_mode mode
, vmode
;
10226 rtx dest
, op0
, op1
, mask
, nmask
;
10228 dest
= operands
[0];
10232 mode
= GET_MODE (dest
);
10233 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10235 if (GET_CODE (op0
) == CONST_DOUBLE
)
10239 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10240 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10242 if (op0
== CONST0_RTX (mode
))
10243 op0
= CONST0_RTX (vmode
);
10246 if (mode
== SFmode
)
10247 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10248 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10250 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10251 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10254 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10256 if (mode
== SFmode
)
10257 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10259 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10263 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10264 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10266 if (mode
== SFmode
)
10267 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10269 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10273 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10274 be a constant, and so has already been expanded into a vector constant. */
10277 ix86_split_copysign_const (rtx operands
[])
10279 enum machine_mode mode
, vmode
;
10280 rtx dest
, op0
, op1
, mask
, x
;
10282 dest
= operands
[0];
10285 mask
= operands
[3];
10287 mode
= GET_MODE (dest
);
10288 vmode
= GET_MODE (mask
);
10290 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10291 x
= gen_rtx_AND (vmode
, dest
, mask
);
10292 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10294 if (op0
!= CONST0_RTX (vmode
))
10296 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10297 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10301 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10302 so we have to do two masks. */
10305 ix86_split_copysign_var (rtx operands
[])
10307 enum machine_mode mode
, vmode
;
10308 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10310 dest
= operands
[0];
10311 scratch
= operands
[1];
10314 nmask
= operands
[4];
10315 mask
= operands
[5];
10317 mode
= GET_MODE (dest
);
10318 vmode
= GET_MODE (mask
);
10320 if (rtx_equal_p (op0
, op1
))
10322 /* Shouldn't happen often (it's useless, obviously), but when it does
10323 we'd generate incorrect code if we continue below. */
10324 emit_move_insn (dest
, op0
);
10328 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10330 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10332 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10333 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10336 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10337 x
= gen_rtx_NOT (vmode
, dest
);
10338 x
= gen_rtx_AND (vmode
, x
, op0
);
10339 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10343 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10345 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10347 else /* alternative 2,4 */
10349 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10350 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10351 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10353 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10355 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10357 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10358 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10360 else /* alternative 3,4 */
10362 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10364 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10365 x
= gen_rtx_AND (vmode
, dest
, op0
);
10367 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10370 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10371 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10374 /* Return TRUE or FALSE depending on whether the first SET in INSN
10375 has source and destination with matching CC modes, and that the
10376 CC mode is at least as constrained as REQ_MODE. */
10379 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10382 enum machine_mode set_mode
;
10384 set
= PATTERN (insn
);
10385 if (GET_CODE (set
) == PARALLEL
)
10386 set
= XVECEXP (set
, 0, 0);
10387 gcc_assert (GET_CODE (set
) == SET
);
10388 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10390 set_mode
= GET_MODE (SET_DEST (set
));
10394 if (req_mode
!= CCNOmode
10395 && (req_mode
!= CCmode
10396 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10400 if (req_mode
== CCGCmode
)
10404 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10408 if (req_mode
== CCZmode
)
10415 gcc_unreachable ();
10418 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10421 /* Generate insn patterns to do an integer compare of OPERANDS. */
10424 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10426 enum machine_mode cmpmode
;
10429 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10430 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10432 /* This is very simple, but making the interface the same as in the
10433 FP case makes the rest of the code easier. */
10434 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10435 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10437 /* Return the test that should be put into the flags user, i.e.
10438 the bcc, scc, or cmov instruction. */
10439 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10442 /* Figure out whether to use ordered or unordered fp comparisons.
10443 Return the appropriate mode to use. */
10446 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10448 /* ??? In order to make all comparisons reversible, we do all comparisons
10449 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10450 all forms trapping and nontrapping comparisons, we can make inequality
10451 comparisons trapping again, since it results in better code when using
10452 FCOM based compares. */
10453 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10457 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10459 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10460 return ix86_fp_compare_mode (code
);
10463 /* Only zero flag is needed. */
10464 case EQ
: /* ZF=0 */
10465 case NE
: /* ZF!=0 */
10467 /* Codes needing carry flag. */
10468 case GEU
: /* CF=0 */
10469 case GTU
: /* CF=0 & ZF=0 */
10470 case LTU
: /* CF=1 */
10471 case LEU
: /* CF=1 | ZF=1 */
10473 /* Codes possibly doable only with sign flag when
10474 comparing against zero. */
10475 case GE
: /* SF=OF or SF=0 */
10476 case LT
: /* SF<>OF or SF=1 */
10477 if (op1
== const0_rtx
)
10480 /* For other cases Carry flag is not required. */
10482 /* Codes doable only with sign flag when comparing
10483 against zero, but we miss jump instruction for it
10484 so we need to use relational tests against overflow
10485 that thus needs to be zero. */
10486 case GT
: /* ZF=0 & SF=OF */
10487 case LE
: /* ZF=1 | SF<>OF */
10488 if (op1
== const0_rtx
)
10492 /* strcmp pattern do (use flags) and combine may ask us for proper
10497 gcc_unreachable ();
10501 /* Return the fixed registers used for condition codes. */
10504 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10511 /* If two condition code modes are compatible, return a condition code
10512 mode which is compatible with both. Otherwise, return
10515 static enum machine_mode
10516 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10521 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10524 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10525 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10531 gcc_unreachable ();
10553 /* These are only compatible with themselves, which we already
10559 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10562 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10564 enum rtx_code swapped_code
= swap_condition (code
);
10565 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10566 || (ix86_fp_comparison_cost (swapped_code
)
10567 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10570 /* Swap, force into registers, or otherwise massage the two operands
10571 to a fp comparison. The operands are updated in place; the new
10572 comparison code is returned. */
10574 static enum rtx_code
10575 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10577 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10578 rtx op0
= *pop0
, op1
= *pop1
;
10579 enum machine_mode op_mode
= GET_MODE (op0
);
10580 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10582 /* All of the unordered compare instructions only work on registers.
10583 The same is true of the fcomi compare instructions. The XFmode
10584 compare instructions require registers except when comparing
10585 against zero or when converting operand 1 from fixed point to
10589 && (fpcmp_mode
== CCFPUmode
10590 || (op_mode
== XFmode
10591 && ! (standard_80387_constant_p (op0
) == 1
10592 || standard_80387_constant_p (op1
) == 1)
10593 && GET_CODE (op1
) != FLOAT
)
10594 || ix86_use_fcomi_compare (code
)))
10596 op0
= force_reg (op_mode
, op0
);
10597 op1
= force_reg (op_mode
, op1
);
10601 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10602 things around if they appear profitable, otherwise force op0
10603 into a register. */
10605 if (standard_80387_constant_p (op0
) == 0
10607 && ! (standard_80387_constant_p (op1
) == 0
10611 tmp
= op0
, op0
= op1
, op1
= tmp
;
10612 code
= swap_condition (code
);
10616 op0
= force_reg (op_mode
, op0
);
10618 if (CONSTANT_P (op1
))
10620 int tmp
= standard_80387_constant_p (op1
);
10622 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10626 op1
= force_reg (op_mode
, op1
);
10629 op1
= force_reg (op_mode
, op1
);
10633 /* Try to rearrange the comparison to make it cheaper. */
10634 if (ix86_fp_comparison_cost (code
)
10635 > ix86_fp_comparison_cost (swap_condition (code
))
10636 && (REG_P (op1
) || !no_new_pseudos
))
10639 tmp
= op0
, op0
= op1
, op1
= tmp
;
10640 code
= swap_condition (code
);
10642 op0
= force_reg (op_mode
, op0
);
10650 /* Convert comparison codes we use to represent FP comparison to integer
10651 code that will result in proper branch. Return UNKNOWN if no such code
10655 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10684 /* Split comparison code CODE into comparisons we can do using branch
10685 instructions. BYPASS_CODE is comparison code for branch that will
10686 branch around FIRST_CODE and SECOND_CODE. If some of branches
10687 is not required, set value to UNKNOWN.
10688 We never require more than two branches. */
10691 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10692 enum rtx_code
*first_code
,
10693 enum rtx_code
*second_code
)
10695 *first_code
= code
;
10696 *bypass_code
= UNKNOWN
;
10697 *second_code
= UNKNOWN
;
10699 /* The fcomi comparison sets flags as follows:
10709 case GT
: /* GTU - CF=0 & ZF=0 */
10710 case GE
: /* GEU - CF=0 */
10711 case ORDERED
: /* PF=0 */
10712 case UNORDERED
: /* PF=1 */
10713 case UNEQ
: /* EQ - ZF=1 */
10714 case UNLT
: /* LTU - CF=1 */
10715 case UNLE
: /* LEU - CF=1 | ZF=1 */
10716 case LTGT
: /* EQ - ZF=0 */
10718 case LT
: /* LTU - CF=1 - fails on unordered */
10719 *first_code
= UNLT
;
10720 *bypass_code
= UNORDERED
;
10722 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10723 *first_code
= UNLE
;
10724 *bypass_code
= UNORDERED
;
10726 case EQ
: /* EQ - ZF=1 - fails on unordered */
10727 *first_code
= UNEQ
;
10728 *bypass_code
= UNORDERED
;
10730 case NE
: /* NE - ZF=0 - fails on unordered */
10731 *first_code
= LTGT
;
10732 *second_code
= UNORDERED
;
10734 case UNGE
: /* GEU - CF=0 - fails on unordered */
10736 *second_code
= UNORDERED
;
10738 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10740 *second_code
= UNORDERED
;
10743 gcc_unreachable ();
10745 if (!TARGET_IEEE_FP
)
10747 *second_code
= UNKNOWN
;
10748 *bypass_code
= UNKNOWN
;
10752 /* Return cost of comparison done fcom + arithmetics operations on AX.
10753 All following functions do use number of instructions as a cost metrics.
10754 In future this should be tweaked to compute bytes for optimize_size and
10755 take into account performance of various instructions on various CPUs. */
10757 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10759 if (!TARGET_IEEE_FP
)
10761 /* The cost of code output by ix86_expand_fp_compare. */
10785 gcc_unreachable ();
10789 /* Return cost of comparison done using fcomi operation.
10790 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10792 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10794 enum rtx_code bypass_code
, first_code
, second_code
;
10795 /* Return arbitrarily high cost when instruction is not supported - this
10796 prevents gcc from using it. */
10799 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10800 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10803 /* Return cost of comparison done using sahf operation.
10804 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10806 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10808 enum rtx_code bypass_code
, first_code
, second_code
;
10809 /* Return arbitrarily high cost when instruction is not preferred - this
10810 avoids gcc from using it. */
10811 if (!TARGET_USE_SAHF
&& !optimize_size
)
10813 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10814 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10817 /* Compute cost of the comparison done using any method.
10818 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10820 ix86_fp_comparison_cost (enum rtx_code code
)
10822 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10825 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10826 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10828 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10829 if (min
> sahf_cost
)
10831 if (min
> fcomi_cost
)
10836 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10839 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10840 rtx
*second_test
, rtx
*bypass_test
)
10842 enum machine_mode fpcmp_mode
, intcmp_mode
;
10844 int cost
= ix86_fp_comparison_cost (code
);
10845 enum rtx_code bypass_code
, first_code
, second_code
;
10847 fpcmp_mode
= ix86_fp_compare_mode (code
);
10848 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10851 *second_test
= NULL_RTX
;
10853 *bypass_test
= NULL_RTX
;
10855 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10857 /* Do fcomi/sahf based test when profitable. */
10858 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10859 && (second_code
== UNKNOWN
|| second_test
)
10860 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10864 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10865 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10871 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10872 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10874 scratch
= gen_reg_rtx (HImode
);
10875 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10876 emit_insn (gen_x86_sahf_1 (scratch
));
10879 /* The FP codes work out to act like unsigned. */
10880 intcmp_mode
= fpcmp_mode
;
10882 if (bypass_code
!= UNKNOWN
)
10883 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10884 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10886 if (second_code
!= UNKNOWN
)
10887 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10888 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10893 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10894 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10895 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10897 scratch
= gen_reg_rtx (HImode
);
10898 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10900 /* In the unordered case, we have to check C2 for NaN's, which
10901 doesn't happen to work out to anything nice combination-wise.
10902 So do some bit twiddling on the value we've got in AH to come
10903 up with an appropriate set of condition codes. */
10905 intcmp_mode
= CCNOmode
;
10910 if (code
== GT
|| !TARGET_IEEE_FP
)
10912 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10917 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10918 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10919 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10920 intcmp_mode
= CCmode
;
10926 if (code
== LT
&& TARGET_IEEE_FP
)
10928 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10929 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10930 intcmp_mode
= CCmode
;
10935 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10941 if (code
== GE
|| !TARGET_IEEE_FP
)
10943 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10948 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10949 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10956 if (code
== LE
&& TARGET_IEEE_FP
)
10958 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10959 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10960 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10961 intcmp_mode
= CCmode
;
10966 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10972 if (code
== EQ
&& TARGET_IEEE_FP
)
10974 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10975 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10976 intcmp_mode
= CCmode
;
10981 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10988 if (code
== NE
&& TARGET_IEEE_FP
)
10990 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10991 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10997 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11003 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11007 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11012 gcc_unreachable ();
11016 /* Return the test that should be put into the flags user, i.e.
11017 the bcc, scc, or cmov instruction. */
11018 return gen_rtx_fmt_ee (code
, VOIDmode
,
11019 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11024 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11027 op0
= ix86_compare_op0
;
11028 op1
= ix86_compare_op1
;
11031 *second_test
= NULL_RTX
;
11033 *bypass_test
= NULL_RTX
;
11035 if (ix86_compare_emitted
)
11037 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11038 ix86_compare_emitted
= NULL_RTX
;
11040 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11041 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11042 second_test
, bypass_test
);
11044 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11049 /* Return true if the CODE will result in nontrivial jump sequence. */
11051 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11053 enum rtx_code bypass_code
, first_code
, second_code
;
11056 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11057 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11061 ix86_expand_branch (enum rtx_code code
, rtx label
)
11065 /* If we have emitted a compare insn, go straight to simple.
11066 ix86_expand_compare won't emit anything if ix86_compare_emitted
11068 if (ix86_compare_emitted
)
11071 switch (GET_MODE (ix86_compare_op0
))
11077 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11078 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11079 gen_rtx_LABEL_REF (VOIDmode
, label
),
11081 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11090 enum rtx_code bypass_code
, first_code
, second_code
;
11092 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11093 &ix86_compare_op1
);
11095 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11097 /* Check whether we will use the natural sequence with one jump. If
11098 so, we can expand jump early. Otherwise delay expansion by
11099 creating compound insn to not confuse optimizers. */
11100 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11103 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11104 gen_rtx_LABEL_REF (VOIDmode
, label
),
11105 pc_rtx
, NULL_RTX
, NULL_RTX
);
11109 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11110 ix86_compare_op0
, ix86_compare_op1
);
11111 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11112 gen_rtx_LABEL_REF (VOIDmode
, label
),
11114 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11116 use_fcomi
= ix86_use_fcomi_compare (code
);
11117 vec
= rtvec_alloc (3 + !use_fcomi
);
11118 RTVEC_ELT (vec
, 0) = tmp
;
11120 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11122 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11125 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11127 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11136 /* Expand DImode branch into multiple compare+branch. */
11138 rtx lo
[2], hi
[2], label2
;
11139 enum rtx_code code1
, code2
, code3
;
11140 enum machine_mode submode
;
11142 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11144 tmp
= ix86_compare_op0
;
11145 ix86_compare_op0
= ix86_compare_op1
;
11146 ix86_compare_op1
= tmp
;
11147 code
= swap_condition (code
);
11149 if (GET_MODE (ix86_compare_op0
) == DImode
)
11151 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11152 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11157 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11158 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11162 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11163 avoid two branches. This costs one extra insn, so disable when
11164 optimizing for size. */
11166 if ((code
== EQ
|| code
== NE
)
11168 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11173 if (hi
[1] != const0_rtx
)
11174 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11175 NULL_RTX
, 0, OPTAB_WIDEN
);
11178 if (lo
[1] != const0_rtx
)
11179 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11180 NULL_RTX
, 0, OPTAB_WIDEN
);
11182 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11183 NULL_RTX
, 0, OPTAB_WIDEN
);
11185 ix86_compare_op0
= tmp
;
11186 ix86_compare_op1
= const0_rtx
;
11187 ix86_expand_branch (code
, label
);
11191 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11192 op1 is a constant and the low word is zero, then we can just
11193 examine the high word. */
11195 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11198 case LT
: case LTU
: case GE
: case GEU
:
11199 ix86_compare_op0
= hi
[0];
11200 ix86_compare_op1
= hi
[1];
11201 ix86_expand_branch (code
, label
);
11207 /* Otherwise, we need two or three jumps. */
11209 label2
= gen_label_rtx ();
11212 code2
= swap_condition (code
);
11213 code3
= unsigned_condition (code
);
11217 case LT
: case GT
: case LTU
: case GTU
:
11220 case LE
: code1
= LT
; code2
= GT
; break;
11221 case GE
: code1
= GT
; code2
= LT
; break;
11222 case LEU
: code1
= LTU
; code2
= GTU
; break;
11223 case GEU
: code1
= GTU
; code2
= LTU
; break;
11225 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11226 case NE
: code2
= UNKNOWN
; break;
11229 gcc_unreachable ();
11234 * if (hi(a) < hi(b)) goto true;
11235 * if (hi(a) > hi(b)) goto false;
11236 * if (lo(a) < lo(b)) goto true;
11240 ix86_compare_op0
= hi
[0];
11241 ix86_compare_op1
= hi
[1];
11243 if (code1
!= UNKNOWN
)
11244 ix86_expand_branch (code1
, label
);
11245 if (code2
!= UNKNOWN
)
11246 ix86_expand_branch (code2
, label2
);
11248 ix86_compare_op0
= lo
[0];
11249 ix86_compare_op1
= lo
[1];
11250 ix86_expand_branch (code3
, label
);
11252 if (code2
!= UNKNOWN
)
11253 emit_label (label2
);
11258 gcc_unreachable ();
11262 /* Split branch based on floating point condition. */
11264 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11265 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11267 rtx second
, bypass
;
11268 rtx label
= NULL_RTX
;
11270 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11273 if (target2
!= pc_rtx
)
11276 code
= reverse_condition_maybe_unordered (code
);
11281 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11282 tmp
, &second
, &bypass
);
11284 /* Remove pushed operand from stack. */
11286 ix86_free_from_memory (GET_MODE (pushed
));
11288 if (split_branch_probability
>= 0)
11290 /* Distribute the probabilities across the jumps.
11291 Assume the BYPASS and SECOND to be always test
11293 probability
= split_branch_probability
;
11295 /* Value of 1 is low enough to make no need for probability
11296 to be updated. Later we may run some experiments and see
11297 if unordered values are more frequent in practice. */
11299 bypass_probability
= 1;
11301 second_probability
= 1;
11303 if (bypass
!= NULL_RTX
)
11305 label
= gen_label_rtx ();
11306 i
= emit_jump_insn (gen_rtx_SET
11308 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11310 gen_rtx_LABEL_REF (VOIDmode
,
11313 if (bypass_probability
>= 0)
11315 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11316 GEN_INT (bypass_probability
),
11319 i
= emit_jump_insn (gen_rtx_SET
11321 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11322 condition
, target1
, target2
)));
11323 if (probability
>= 0)
11325 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11326 GEN_INT (probability
),
11328 if (second
!= NULL_RTX
)
11330 i
= emit_jump_insn (gen_rtx_SET
11332 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11334 if (second_probability
>= 0)
11336 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11337 GEN_INT (second_probability
),
11340 if (label
!= NULL_RTX
)
11341 emit_label (label
);
11345 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11347 rtx ret
, tmp
, tmpreg
, equiv
;
11348 rtx second_test
, bypass_test
;
11350 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11351 return 0; /* FAIL */
11353 gcc_assert (GET_MODE (dest
) == QImode
);
11355 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11356 PUT_MODE (ret
, QImode
);
11361 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11362 if (bypass_test
|| second_test
)
11364 rtx test
= second_test
;
11366 rtx tmp2
= gen_reg_rtx (QImode
);
11369 gcc_assert (!second_test
);
11370 test
= bypass_test
;
11372 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11374 PUT_MODE (test
, QImode
);
11375 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11378 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11380 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11383 /* Attach a REG_EQUAL note describing the comparison result. */
11384 if (ix86_compare_op0
&& ix86_compare_op1
)
11386 equiv
= simplify_gen_relational (code
, QImode
,
11387 GET_MODE (ix86_compare_op0
),
11388 ix86_compare_op0
, ix86_compare_op1
);
11389 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11392 return 1; /* DONE */
11395 /* Expand comparison setting or clearing carry flag. Return true when
11396 successful and set pop for the operation. */
11398 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11400 enum machine_mode mode
=
11401 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11403 /* Do not handle DImode compares that go through special path. Also we can't
11404 deal with FP compares yet. This is possible to add. */
11405 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11407 if (FLOAT_MODE_P (mode
))
11409 rtx second_test
= NULL
, bypass_test
= NULL
;
11410 rtx compare_op
, compare_seq
;
11412 /* Shortcut: following common codes never translate into carry flag compares. */
11413 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11414 || code
== ORDERED
|| code
== UNORDERED
)
11417 /* These comparisons require zero flag; swap operands so they won't. */
11418 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11419 && !TARGET_IEEE_FP
)
11424 code
= swap_condition (code
);
11427 /* Try to expand the comparison and verify that we end up with carry flag
11428 based comparison. This is fails to be true only when we decide to expand
11429 comparison using arithmetic that is not too common scenario. */
11431 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11432 &second_test
, &bypass_test
);
11433 compare_seq
= get_insns ();
11436 if (second_test
|| bypass_test
)
11438 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11439 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11440 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11442 code
= GET_CODE (compare_op
);
11443 if (code
!= LTU
&& code
!= GEU
)
11445 emit_insn (compare_seq
);
11449 if (!INTEGRAL_MODE_P (mode
))
11457 /* Convert a==0 into (unsigned)a<1. */
11460 if (op1
!= const0_rtx
)
11463 code
= (code
== EQ
? LTU
: GEU
);
11466 /* Convert a>b into b<a or a>=b-1. */
11469 if (CONST_INT_P (op1
))
11471 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11472 /* Bail out on overflow. We still can swap operands but that
11473 would force loading of the constant into register. */
11474 if (op1
== const0_rtx
11475 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11477 code
= (code
== GTU
? GEU
: LTU
);
11484 code
= (code
== GTU
? LTU
: GEU
);
11488 /* Convert a>=0 into (unsigned)a<0x80000000. */
11491 if (mode
== DImode
|| op1
!= const0_rtx
)
11493 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11494 code
= (code
== LT
? GEU
: LTU
);
11498 if (mode
== DImode
|| op1
!= constm1_rtx
)
11500 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11501 code
= (code
== LE
? GEU
: LTU
);
11507 /* Swapping operands may cause constant to appear as first operand. */
11508 if (!nonimmediate_operand (op0
, VOIDmode
))
11510 if (no_new_pseudos
)
11512 op0
= force_reg (mode
, op0
);
11514 ix86_compare_op0
= op0
;
11515 ix86_compare_op1
= op1
;
11516 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11517 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11522 ix86_expand_int_movcc (rtx operands
[])
11524 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11525 rtx compare_seq
, compare_op
;
11526 rtx second_test
, bypass_test
;
11527 enum machine_mode mode
= GET_MODE (operands
[0]);
11528 bool sign_bit_compare_p
= false;;
11531 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11532 compare_seq
= get_insns ();
11535 compare_code
= GET_CODE (compare_op
);
11537 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11538 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11539 sign_bit_compare_p
= true;
11541 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11542 HImode insns, we'd be swallowed in word prefix ops. */
11544 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11545 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11546 && CONST_INT_P (operands
[2])
11547 && CONST_INT_P (operands
[3]))
11549 rtx out
= operands
[0];
11550 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11551 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11552 HOST_WIDE_INT diff
;
11555 /* Sign bit compares are better done using shifts than we do by using
11557 if (sign_bit_compare_p
11558 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11559 ix86_compare_op1
, &compare_op
))
11561 /* Detect overlap between destination and compare sources. */
11564 if (!sign_bit_compare_p
)
11566 bool fpcmp
= false;
11568 compare_code
= GET_CODE (compare_op
);
11570 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11571 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11574 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11577 /* To simplify rest of code, restrict to the GEU case. */
11578 if (compare_code
== LTU
)
11580 HOST_WIDE_INT tmp
= ct
;
11583 compare_code
= reverse_condition (compare_code
);
11584 code
= reverse_condition (code
);
11589 PUT_CODE (compare_op
,
11590 reverse_condition_maybe_unordered
11591 (GET_CODE (compare_op
)));
11593 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11597 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11598 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11599 tmp
= gen_reg_rtx (mode
);
11601 if (mode
== DImode
)
11602 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11604 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11608 if (code
== GT
|| code
== GE
)
11609 code
= reverse_condition (code
);
11612 HOST_WIDE_INT tmp
= ct
;
11617 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11618 ix86_compare_op1
, VOIDmode
, 0, -1);
11631 tmp
= expand_simple_binop (mode
, PLUS
,
11633 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11644 tmp
= expand_simple_binop (mode
, IOR
,
11646 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11648 else if (diff
== -1 && ct
)
11658 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11660 tmp
= expand_simple_binop (mode
, PLUS
,
11661 copy_rtx (tmp
), GEN_INT (cf
),
11662 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11670 * andl cf - ct, dest
11680 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11683 tmp
= expand_simple_binop (mode
, AND
,
11685 gen_int_mode (cf
- ct
, mode
),
11686 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11688 tmp
= expand_simple_binop (mode
, PLUS
,
11689 copy_rtx (tmp
), GEN_INT (ct
),
11690 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11693 if (!rtx_equal_p (tmp
, out
))
11694 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11696 return 1; /* DONE */
11702 tmp
= ct
, ct
= cf
, cf
= tmp
;
11704 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11706 /* We may be reversing unordered compare to normal compare, that
11707 is not valid in general (we may convert non-trapping condition
11708 to trapping one), however on i386 we currently emit all
11709 comparisons unordered. */
11710 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11711 code
= reverse_condition_maybe_unordered (code
);
11715 compare_code
= reverse_condition (compare_code
);
11716 code
= reverse_condition (code
);
11720 compare_code
= UNKNOWN
;
11721 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11722 && CONST_INT_P (ix86_compare_op1
))
11724 if (ix86_compare_op1
== const0_rtx
11725 && (code
== LT
|| code
== GE
))
11726 compare_code
= code
;
11727 else if (ix86_compare_op1
== constm1_rtx
)
11731 else if (code
== GT
)
11736 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11737 if (compare_code
!= UNKNOWN
11738 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11739 && (cf
== -1 || ct
== -1))
11741 /* If lea code below could be used, only optimize
11742 if it results in a 2 insn sequence. */
11744 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11745 || diff
== 3 || diff
== 5 || diff
== 9)
11746 || (compare_code
== LT
&& ct
== -1)
11747 || (compare_code
== GE
&& cf
== -1))
11750 * notl op1 (if necessary)
11758 code
= reverse_condition (code
);
11761 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11762 ix86_compare_op1
, VOIDmode
, 0, -1);
11764 out
= expand_simple_binop (mode
, IOR
,
11766 out
, 1, OPTAB_DIRECT
);
11767 if (out
!= operands
[0])
11768 emit_move_insn (operands
[0], out
);
11770 return 1; /* DONE */
11775 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11776 || diff
== 3 || diff
== 5 || diff
== 9)
11777 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11779 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11785 * lea cf(dest*(ct-cf)),dest
11789 * This also catches the degenerate setcc-only case.
11795 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11796 ix86_compare_op1
, VOIDmode
, 0, 1);
11799 /* On x86_64 the lea instruction operates on Pmode, so we need
11800 to get arithmetics done in proper mode to match. */
11802 tmp
= copy_rtx (out
);
11806 out1
= copy_rtx (out
);
11807 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11811 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11817 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11820 if (!rtx_equal_p (tmp
, out
))
11823 out
= force_operand (tmp
, copy_rtx (out
));
11825 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11827 if (!rtx_equal_p (out
, operands
[0]))
11828 emit_move_insn (operands
[0], copy_rtx (out
));
11830 return 1; /* DONE */
11834 * General case: Jumpful:
11835 * xorl dest,dest cmpl op1, op2
11836 * cmpl op1, op2 movl ct, dest
11837 * setcc dest jcc 1f
11838 * decl dest movl cf, dest
11839 * andl (cf-ct),dest 1:
11842 * Size 20. Size 14.
11844 * This is reasonably steep, but branch mispredict costs are
11845 * high on modern cpus, so consider failing only if optimizing
11849 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11850 && BRANCH_COST
>= 2)
11856 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11857 /* We may be reversing unordered compare to normal compare,
11858 that is not valid in general (we may convert non-trapping
11859 condition to trapping one), however on i386 we currently
11860 emit all comparisons unordered. */
11861 code
= reverse_condition_maybe_unordered (code
);
11864 code
= reverse_condition (code
);
11865 if (compare_code
!= UNKNOWN
)
11866 compare_code
= reverse_condition (compare_code
);
11870 if (compare_code
!= UNKNOWN
)
11872 /* notl op1 (if needed)
11877 For x < 0 (resp. x <= -1) there will be no notl,
11878 so if possible swap the constants to get rid of the
11880 True/false will be -1/0 while code below (store flag
11881 followed by decrement) is 0/-1, so the constants need
11882 to be exchanged once more. */
11884 if (compare_code
== GE
|| !cf
)
11886 code
= reverse_condition (code
);
11891 HOST_WIDE_INT tmp
= cf
;
11896 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11897 ix86_compare_op1
, VOIDmode
, 0, -1);
11901 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11902 ix86_compare_op1
, VOIDmode
, 0, 1);
11904 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11905 copy_rtx (out
), 1, OPTAB_DIRECT
);
11908 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11909 gen_int_mode (cf
- ct
, mode
),
11910 copy_rtx (out
), 1, OPTAB_DIRECT
);
11912 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11913 copy_rtx (out
), 1, OPTAB_DIRECT
);
11914 if (!rtx_equal_p (out
, operands
[0]))
11915 emit_move_insn (operands
[0], copy_rtx (out
));
11917 return 1; /* DONE */
11921 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11923 /* Try a few things more with specific constants and a variable. */
11926 rtx var
, orig_out
, out
, tmp
;
11928 if (BRANCH_COST
<= 2)
11929 return 0; /* FAIL */
11931 /* If one of the two operands is an interesting constant, load a
11932 constant with the above and mask it in with a logical operation. */
11934 if (CONST_INT_P (operands
[2]))
11937 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11938 operands
[3] = constm1_rtx
, op
= and_optab
;
11939 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11940 operands
[3] = const0_rtx
, op
= ior_optab
;
11942 return 0; /* FAIL */
11944 else if (CONST_INT_P (operands
[3]))
11947 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11948 operands
[2] = constm1_rtx
, op
= and_optab
;
11949 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11950 operands
[2] = const0_rtx
, op
= ior_optab
;
11952 return 0; /* FAIL */
11955 return 0; /* FAIL */
11957 orig_out
= operands
[0];
11958 tmp
= gen_reg_rtx (mode
);
11961 /* Recurse to get the constant loaded. */
11962 if (ix86_expand_int_movcc (operands
) == 0)
11963 return 0; /* FAIL */
11965 /* Mask in the interesting variable. */
11966 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11968 if (!rtx_equal_p (out
, orig_out
))
11969 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11971 return 1; /* DONE */
11975 * For comparison with above,
11985 if (! nonimmediate_operand (operands
[2], mode
))
11986 operands
[2] = force_reg (mode
, operands
[2]);
11987 if (! nonimmediate_operand (operands
[3], mode
))
11988 operands
[3] = force_reg (mode
, operands
[3]);
11990 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11992 rtx tmp
= gen_reg_rtx (mode
);
11993 emit_move_insn (tmp
, operands
[3]);
11996 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11998 rtx tmp
= gen_reg_rtx (mode
);
11999 emit_move_insn (tmp
, operands
[2]);
12003 if (! register_operand (operands
[2], VOIDmode
)
12005 || ! register_operand (operands
[3], VOIDmode
)))
12006 operands
[2] = force_reg (mode
, operands
[2]);
12009 && ! register_operand (operands
[3], VOIDmode
))
12010 operands
[3] = force_reg (mode
, operands
[3]);
12012 emit_insn (compare_seq
);
12013 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12014 gen_rtx_IF_THEN_ELSE (mode
,
12015 compare_op
, operands
[2],
12018 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12019 gen_rtx_IF_THEN_ELSE (mode
,
12021 copy_rtx (operands
[3]),
12022 copy_rtx (operands
[0]))));
12024 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12025 gen_rtx_IF_THEN_ELSE (mode
,
12027 copy_rtx (operands
[2]),
12028 copy_rtx (operands
[0]))));
12030 return 1; /* DONE */
12033 /* Swap, force into registers, or otherwise massage the two operands
12034 to an sse comparison with a mask result. Thus we differ a bit from
12035 ix86_prepare_fp_compare_args which expects to produce a flags result.
12037 The DEST operand exists to help determine whether to commute commutative
12038 operators. The POP0/POP1 operands are updated in place. The new
12039 comparison code is returned, or UNKNOWN if not implementable. */
12041 static enum rtx_code
12042 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12043 rtx
*pop0
, rtx
*pop1
)
12051 /* We have no LTGT as an operator. We could implement it with
12052 NE & ORDERED, but this requires an extra temporary. It's
12053 not clear that it's worth it. */
12060 /* These are supported directly. */
12067 /* For commutative operators, try to canonicalize the destination
12068 operand to be first in the comparison - this helps reload to
12069 avoid extra moves. */
12070 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12078 /* These are not supported directly. Swap the comparison operands
12079 to transform into something that is supported. */
12083 code
= swap_condition (code
);
12087 gcc_unreachable ();
12093 /* Detect conditional moves that exactly match min/max operational
12094 semantics. Note that this is IEEE safe, as long as we don't
12095 interchange the operands.
12097 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12098 and TRUE if the operation is successful and instructions are emitted. */
12101 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12102 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12104 enum machine_mode mode
;
12110 else if (code
== UNGE
)
12113 if_true
= if_false
;
12119 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12121 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12126 mode
= GET_MODE (dest
);
12128 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12129 but MODE may be a vector mode and thus not appropriate. */
12130 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12132 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12135 if_true
= force_reg (mode
, if_true
);
12136 v
= gen_rtvec (2, if_true
, if_false
);
12137 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12141 code
= is_min
? SMIN
: SMAX
;
12142 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12145 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12149 /* Expand an sse vector comparison. Return the register with the result. */
12152 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12153 rtx op_true
, rtx op_false
)
12155 enum machine_mode mode
= GET_MODE (dest
);
12158 cmp_op0
= force_reg (mode
, cmp_op0
);
12159 if (!nonimmediate_operand (cmp_op1
, mode
))
12160 cmp_op1
= force_reg (mode
, cmp_op1
);
12163 || reg_overlap_mentioned_p (dest
, op_true
)
12164 || reg_overlap_mentioned_p (dest
, op_false
))
12165 dest
= gen_reg_rtx (mode
);
12167 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12168 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12173 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12174 operations. This is used for both scalar and vector conditional moves. */
12177 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12179 enum machine_mode mode
= GET_MODE (dest
);
12182 if (op_false
== CONST0_RTX (mode
))
12184 op_true
= force_reg (mode
, op_true
);
12185 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12186 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12188 else if (op_true
== CONST0_RTX (mode
))
12190 op_false
= force_reg (mode
, op_false
);
12191 x
= gen_rtx_NOT (mode
, cmp
);
12192 x
= gen_rtx_AND (mode
, x
, op_false
);
12193 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12197 op_true
= force_reg (mode
, op_true
);
12198 op_false
= force_reg (mode
, op_false
);
12200 t2
= gen_reg_rtx (mode
);
12202 t3
= gen_reg_rtx (mode
);
12206 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12207 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12209 x
= gen_rtx_NOT (mode
, cmp
);
12210 x
= gen_rtx_AND (mode
, x
, op_false
);
12211 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12213 x
= gen_rtx_IOR (mode
, t3
, t2
);
12214 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12218 /* Expand a floating-point conditional move. Return true if successful. */
12221 ix86_expand_fp_movcc (rtx operands
[])
12223 enum machine_mode mode
= GET_MODE (operands
[0]);
12224 enum rtx_code code
= GET_CODE (operands
[1]);
12225 rtx tmp
, compare_op
, second_test
, bypass_test
;
12227 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12229 enum machine_mode cmode
;
12231 /* Since we've no cmove for sse registers, don't force bad register
12232 allocation just to gain access to it. Deny movcc when the
12233 comparison mode doesn't match the move mode. */
12234 cmode
= GET_MODE (ix86_compare_op0
);
12235 if (cmode
== VOIDmode
)
12236 cmode
= GET_MODE (ix86_compare_op1
);
12240 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12242 &ix86_compare_op1
);
12243 if (code
== UNKNOWN
)
12246 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12247 ix86_compare_op1
, operands
[2],
12251 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12252 ix86_compare_op1
, operands
[2], operands
[3]);
12253 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12257 /* The floating point conditional move instructions don't directly
12258 support conditions resulting from a signed integer comparison. */
12260 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12262 /* The floating point conditional move instructions don't directly
12263 support signed integer comparisons. */
12265 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12267 gcc_assert (!second_test
&& !bypass_test
);
12268 tmp
= gen_reg_rtx (QImode
);
12269 ix86_expand_setcc (code
, tmp
);
12271 ix86_compare_op0
= tmp
;
12272 ix86_compare_op1
= const0_rtx
;
12273 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12275 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12277 tmp
= gen_reg_rtx (mode
);
12278 emit_move_insn (tmp
, operands
[3]);
12281 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12283 tmp
= gen_reg_rtx (mode
);
12284 emit_move_insn (tmp
, operands
[2]);
12288 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12289 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12290 operands
[2], operands
[3])));
12292 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12293 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12294 operands
[3], operands
[0])));
12296 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12297 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12298 operands
[2], operands
[0])));
12303 /* Expand a floating-point vector conditional move; a vcond operation
12304 rather than a movcc operation. */
12307 ix86_expand_fp_vcond (rtx operands
[])
12309 enum rtx_code code
= GET_CODE (operands
[3]);
12312 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12313 &operands
[4], &operands
[5]);
12314 if (code
== UNKNOWN
)
12317 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12318 operands
[5], operands
[1], operands
[2]))
12321 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12322 operands
[1], operands
[2]);
12323 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12327 /* Expand a signed integral vector conditional move. */
12330 ix86_expand_int_vcond (rtx operands
[])
12332 enum machine_mode mode
= GET_MODE (operands
[0]);
12333 enum rtx_code code
= GET_CODE (operands
[3]);
12334 bool negate
= false;
12337 cop0
= operands
[4];
12338 cop1
= operands
[5];
12340 /* Canonicalize the comparison to EQ, GT, GTU. */
12351 code
= reverse_condition (code
);
12357 code
= reverse_condition (code
);
12363 code
= swap_condition (code
);
12364 x
= cop0
, cop0
= cop1
, cop1
= x
;
12368 gcc_unreachable ();
12371 /* Unsigned parallel compare is not supported by the hardware. Play some
12372 tricks to turn this into a signed comparison against 0. */
12375 cop0
= force_reg (mode
, cop0
);
12383 /* Perform a parallel modulo subtraction. */
12384 t1
= gen_reg_rtx (mode
);
12385 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12387 /* Extract the original sign bit of op0. */
12388 mask
= GEN_INT (-0x80000000);
12389 mask
= gen_rtx_CONST_VECTOR (mode
,
12390 gen_rtvec (4, mask
, mask
, mask
, mask
));
12391 mask
= force_reg (mode
, mask
);
12392 t2
= gen_reg_rtx (mode
);
12393 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12395 /* XOR it back into the result of the subtraction. This results
12396 in the sign bit set iff we saw unsigned underflow. */
12397 x
= gen_reg_rtx (mode
);
12398 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12406 /* Perform a parallel unsigned saturating subtraction. */
12407 x
= gen_reg_rtx (mode
);
12408 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12409 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12416 gcc_unreachable ();
12420 cop1
= CONST0_RTX (mode
);
12423 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12424 operands
[1+negate
], operands
[2-negate
]);
12426 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12427 operands
[2-negate
]);
12431 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12432 true if we should do zero extension, else sign extension. HIGH_P is
12433 true if we want the N/2 high elements, else the low elements. */
12436 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12438 enum machine_mode imode
= GET_MODE (operands
[1]);
12439 rtx (*unpack
)(rtx
, rtx
, rtx
);
12446 unpack
= gen_vec_interleave_highv16qi
;
12448 unpack
= gen_vec_interleave_lowv16qi
;
12452 unpack
= gen_vec_interleave_highv8hi
;
12454 unpack
= gen_vec_interleave_lowv8hi
;
12458 unpack
= gen_vec_interleave_highv4si
;
12460 unpack
= gen_vec_interleave_lowv4si
;
12463 gcc_unreachable ();
12466 dest
= gen_lowpart (imode
, operands
[0]);
12469 se
= force_reg (imode
, CONST0_RTX (imode
));
12471 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12472 operands
[1], pc_rtx
, pc_rtx
);
12474 emit_insn (unpack (dest
, operands
[1], se
));
12477 /* Expand conditional increment or decrement using adb/sbb instructions.
12478 The default case using setcc followed by the conditional move can be
12479 done by generic code. */
12481 ix86_expand_int_addcc (rtx operands
[])
12483 enum rtx_code code
= GET_CODE (operands
[1]);
12485 rtx val
= const0_rtx
;
12486 bool fpcmp
= false;
12487 enum machine_mode mode
= GET_MODE (operands
[0]);
12489 if (operands
[3] != const1_rtx
12490 && operands
[3] != constm1_rtx
)
12492 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12493 ix86_compare_op1
, &compare_op
))
12495 code
= GET_CODE (compare_op
);
12497 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12498 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12501 code
= ix86_fp_compare_code_to_integer (code
);
12508 PUT_CODE (compare_op
,
12509 reverse_condition_maybe_unordered
12510 (GET_CODE (compare_op
)));
12512 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12514 PUT_MODE (compare_op
, mode
);
12516 /* Construct either adc or sbb insn. */
12517 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12519 switch (GET_MODE (operands
[0]))
12522 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12525 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12528 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12531 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12534 gcc_unreachable ();
12539 switch (GET_MODE (operands
[0]))
12542 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12545 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12548 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12551 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12554 gcc_unreachable ();
12557 return 1; /* DONE */
12561 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12562 works for floating pointer parameters and nonoffsetable memories.
12563 For pushes, it returns just stack offsets; the values will be saved
12564 in the right order. Maximally three parts are generated. */
12567 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12572 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12574 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12576 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12577 gcc_assert (size
>= 2 && size
<= 3);
12579 /* Optimize constant pool reference to immediates. This is used by fp
12580 moves, that force all constants to memory to allow combining. */
12581 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12583 rtx tmp
= maybe_get_pool_constant (operand
);
12588 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12590 /* The only non-offsetable memories we handle are pushes. */
12591 int ok
= push_operand (operand
, VOIDmode
);
12595 operand
= copy_rtx (operand
);
12596 PUT_MODE (operand
, Pmode
);
12597 parts
[0] = parts
[1] = parts
[2] = operand
;
12601 if (GET_CODE (operand
) == CONST_VECTOR
)
12603 enum machine_mode imode
= int_mode_for_mode (mode
);
12604 /* Caution: if we looked through a constant pool memory above,
12605 the operand may actually have a different mode now. That's
12606 ok, since we want to pun this all the way back to an integer. */
12607 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12608 gcc_assert (operand
!= NULL
);
12614 if (mode
== DImode
)
12615 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12618 if (REG_P (operand
))
12620 gcc_assert (reload_completed
);
12621 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12622 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12624 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12626 else if (offsettable_memref_p (operand
))
12628 operand
= adjust_address (operand
, SImode
, 0);
12629 parts
[0] = operand
;
12630 parts
[1] = adjust_address (operand
, SImode
, 4);
12632 parts
[2] = adjust_address (operand
, SImode
, 8);
12634 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12639 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12643 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12644 parts
[2] = gen_int_mode (l
[2], SImode
);
12647 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12650 gcc_unreachable ();
12652 parts
[1] = gen_int_mode (l
[1], SImode
);
12653 parts
[0] = gen_int_mode (l
[0], SImode
);
12656 gcc_unreachable ();
12661 if (mode
== TImode
)
12662 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12663 if (mode
== XFmode
|| mode
== TFmode
)
12665 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12666 if (REG_P (operand
))
12668 gcc_assert (reload_completed
);
12669 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12670 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12672 else if (offsettable_memref_p (operand
))
12674 operand
= adjust_address (operand
, DImode
, 0);
12675 parts
[0] = operand
;
12676 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12678 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12683 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12684 real_to_target (l
, &r
, mode
);
12686 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12687 if (HOST_BITS_PER_WIDE_INT
>= 64)
12690 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12691 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12694 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12696 if (upper_mode
== SImode
)
12697 parts
[1] = gen_int_mode (l
[2], SImode
);
12698 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12701 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12702 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12705 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12708 gcc_unreachable ();
12715 /* Emit insns to perform a move or push of DI, DF, and XF values.
12716 Return false when normal moves are needed; true when all required
12717 insns have been emitted. Operands 2-4 contain the input values
12718 int the correct order; operands 5-7 contain the output values. */
12721 ix86_split_long_move (rtx operands
[])
12726 int collisions
= 0;
12727 enum machine_mode mode
= GET_MODE (operands
[0]);
12729 /* The DFmode expanders may ask us to move double.
12730 For 64bit target this is single move. By hiding the fact
12731 here we simplify i386.md splitters. */
12732 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12734 /* Optimize constant pool reference to immediates. This is used by
12735 fp moves, that force all constants to memory to allow combining. */
12737 if (MEM_P (operands
[1])
12738 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12739 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12740 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12741 if (push_operand (operands
[0], VOIDmode
))
12743 operands
[0] = copy_rtx (operands
[0]);
12744 PUT_MODE (operands
[0], Pmode
);
12747 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12748 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12749 emit_move_insn (operands
[0], operands
[1]);
12753 /* The only non-offsettable memory we handle is push. */
12754 if (push_operand (operands
[0], VOIDmode
))
12757 gcc_assert (!MEM_P (operands
[0])
12758 || offsettable_memref_p (operands
[0]));
12760 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12761 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12763 /* When emitting push, take care for source operands on the stack. */
12764 if (push
&& MEM_P (operands
[1])
12765 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12768 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12769 XEXP (part
[1][2], 0));
12770 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12771 XEXP (part
[1][1], 0));
12774 /* We need to do copy in the right order in case an address register
12775 of the source overlaps the destination. */
12776 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12778 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12780 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12783 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12786 /* Collision in the middle part can be handled by reordering. */
12787 if (collisions
== 1 && nparts
== 3
12788 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12791 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12792 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12795 /* If there are more collisions, we can't handle it by reordering.
12796 Do an lea to the last part and use only one colliding move. */
12797 else if (collisions
> 1)
12803 base
= part
[0][nparts
- 1];
12805 /* Handle the case when the last part isn't valid for lea.
12806 Happens in 64-bit mode storing the 12-byte XFmode. */
12807 if (GET_MODE (base
) != Pmode
)
12808 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12810 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12811 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12812 part
[1][1] = replace_equiv_address (part
[1][1],
12813 plus_constant (base
, UNITS_PER_WORD
));
12815 part
[1][2] = replace_equiv_address (part
[1][2],
12816 plus_constant (base
, 8));
12826 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12827 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12828 emit_move_insn (part
[0][2], part
[1][2]);
12833 /* In 64bit mode we don't have 32bit push available. In case this is
12834 register, it is OK - we will just use larger counterpart. We also
12835 retype memory - these comes from attempt to avoid REX prefix on
12836 moving of second half of TFmode value. */
12837 if (GET_MODE (part
[1][1]) == SImode
)
12839 switch (GET_CODE (part
[1][1]))
12842 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12846 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12850 gcc_unreachable ();
12853 if (GET_MODE (part
[1][0]) == SImode
)
12854 part
[1][0] = part
[1][1];
12857 emit_move_insn (part
[0][1], part
[1][1]);
12858 emit_move_insn (part
[0][0], part
[1][0]);
12862 /* Choose correct order to not overwrite the source before it is copied. */
12863 if ((REG_P (part
[0][0])
12864 && REG_P (part
[1][1])
12865 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12867 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12869 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12873 operands
[2] = part
[0][2];
12874 operands
[3] = part
[0][1];
12875 operands
[4] = part
[0][0];
12876 operands
[5] = part
[1][2];
12877 operands
[6] = part
[1][1];
12878 operands
[7] = part
[1][0];
12882 operands
[2] = part
[0][1];
12883 operands
[3] = part
[0][0];
12884 operands
[5] = part
[1][1];
12885 operands
[6] = part
[1][0];
12892 operands
[2] = part
[0][0];
12893 operands
[3] = part
[0][1];
12894 operands
[4] = part
[0][2];
12895 operands
[5] = part
[1][0];
12896 operands
[6] = part
[1][1];
12897 operands
[7] = part
[1][2];
12901 operands
[2] = part
[0][0];
12902 operands
[3] = part
[0][1];
12903 operands
[5] = part
[1][0];
12904 operands
[6] = part
[1][1];
12908 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12911 if (CONST_INT_P (operands
[5])
12912 && operands
[5] != const0_rtx
12913 && REG_P (operands
[2]))
12915 if (CONST_INT_P (operands
[6])
12916 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12917 operands
[6] = operands
[2];
12920 && CONST_INT_P (operands
[7])
12921 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12922 operands
[7] = operands
[2];
12926 && CONST_INT_P (operands
[6])
12927 && operands
[6] != const0_rtx
12928 && REG_P (operands
[3])
12929 && CONST_INT_P (operands
[7])
12930 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12931 operands
[7] = operands
[3];
12934 emit_move_insn (operands
[2], operands
[5]);
12935 emit_move_insn (operands
[3], operands
[6]);
12937 emit_move_insn (operands
[4], operands
[7]);
12942 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12943 left shift by a constant, either using a single shift or
12944 a sequence of add instructions. */
12947 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12951 emit_insn ((mode
== DImode
12953 : gen_adddi3
) (operand
, operand
, operand
));
12955 else if (!optimize_size
12956 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12959 for (i
=0; i
<count
; i
++)
12961 emit_insn ((mode
== DImode
12963 : gen_adddi3
) (operand
, operand
, operand
));
12967 emit_insn ((mode
== DImode
12969 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12973 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12975 rtx low
[2], high
[2];
12977 const int single_width
= mode
== DImode
? 32 : 64;
12979 if (CONST_INT_P (operands
[2]))
12981 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12982 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12984 if (count
>= single_width
)
12986 emit_move_insn (high
[0], low
[1]);
12987 emit_move_insn (low
[0], const0_rtx
);
12989 if (count
> single_width
)
12990 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12994 if (!rtx_equal_p (operands
[0], operands
[1]))
12995 emit_move_insn (operands
[0], operands
[1]);
12996 emit_insn ((mode
== DImode
12998 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12999 ix86_expand_ashl_const (low
[0], count
, mode
);
13004 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13006 if (operands
[1] == const1_rtx
)
13008 /* Assuming we've chosen a QImode capable registers, then 1 << N
13009 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13010 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13012 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13014 ix86_expand_clear (low
[0]);
13015 ix86_expand_clear (high
[0]);
13016 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13018 d
= gen_lowpart (QImode
, low
[0]);
13019 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13020 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13021 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13023 d
= gen_lowpart (QImode
, high
[0]);
13024 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13025 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13026 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13029 /* Otherwise, we can get the same results by manually performing
13030 a bit extract operation on bit 5/6, and then performing the two
13031 shifts. The two methods of getting 0/1 into low/high are exactly
13032 the same size. Avoiding the shift in the bit extract case helps
13033 pentium4 a bit; no one else seems to care much either way. */
13038 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13039 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13041 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13042 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13044 emit_insn ((mode
== DImode
13046 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13047 emit_insn ((mode
== DImode
13049 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13050 emit_move_insn (low
[0], high
[0]);
13051 emit_insn ((mode
== DImode
13053 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13056 emit_insn ((mode
== DImode
13058 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13059 emit_insn ((mode
== DImode
13061 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13065 if (operands
[1] == constm1_rtx
)
13067 /* For -1 << N, we can avoid the shld instruction, because we
13068 know that we're shifting 0...31/63 ones into a -1. */
13069 emit_move_insn (low
[0], constm1_rtx
);
13071 emit_move_insn (high
[0], low
[0]);
13073 emit_move_insn (high
[0], constm1_rtx
);
13077 if (!rtx_equal_p (operands
[0], operands
[1]))
13078 emit_move_insn (operands
[0], operands
[1]);
13080 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13081 emit_insn ((mode
== DImode
13083 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13086 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13088 if (TARGET_CMOVE
&& scratch
)
13090 ix86_expand_clear (scratch
);
13091 emit_insn ((mode
== DImode
13092 ? gen_x86_shift_adj_1
13093 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13096 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13100 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13102 rtx low
[2], high
[2];
13104 const int single_width
= mode
== DImode
? 32 : 64;
13106 if (CONST_INT_P (operands
[2]))
13108 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13109 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13111 if (count
== single_width
* 2 - 1)
13113 emit_move_insn (high
[0], high
[1]);
13114 emit_insn ((mode
== DImode
13116 : gen_ashrdi3
) (high
[0], high
[0],
13117 GEN_INT (single_width
- 1)));
13118 emit_move_insn (low
[0], high
[0]);
13121 else if (count
>= single_width
)
13123 emit_move_insn (low
[0], high
[1]);
13124 emit_move_insn (high
[0], low
[0]);
13125 emit_insn ((mode
== DImode
13127 : gen_ashrdi3
) (high
[0], high
[0],
13128 GEN_INT (single_width
- 1)));
13129 if (count
> single_width
)
13130 emit_insn ((mode
== DImode
13132 : gen_ashrdi3
) (low
[0], low
[0],
13133 GEN_INT (count
- single_width
)));
13137 if (!rtx_equal_p (operands
[0], operands
[1]))
13138 emit_move_insn (operands
[0], operands
[1]);
13139 emit_insn ((mode
== DImode
13141 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13142 emit_insn ((mode
== DImode
13144 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13149 if (!rtx_equal_p (operands
[0], operands
[1]))
13150 emit_move_insn (operands
[0], operands
[1]);
13152 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13154 emit_insn ((mode
== DImode
13156 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13157 emit_insn ((mode
== DImode
13159 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13161 if (TARGET_CMOVE
&& scratch
)
13163 emit_move_insn (scratch
, high
[0]);
13164 emit_insn ((mode
== DImode
13166 : gen_ashrdi3
) (scratch
, scratch
,
13167 GEN_INT (single_width
- 1)));
13168 emit_insn ((mode
== DImode
13169 ? gen_x86_shift_adj_1
13170 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13174 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13179 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13181 rtx low
[2], high
[2];
13183 const int single_width
= mode
== DImode
? 32 : 64;
13185 if (CONST_INT_P (operands
[2]))
13187 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13188 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13190 if (count
>= single_width
)
13192 emit_move_insn (low
[0], high
[1]);
13193 ix86_expand_clear (high
[0]);
13195 if (count
> single_width
)
13196 emit_insn ((mode
== DImode
13198 : gen_lshrdi3
) (low
[0], low
[0],
13199 GEN_INT (count
- single_width
)));
13203 if (!rtx_equal_p (operands
[0], operands
[1]))
13204 emit_move_insn (operands
[0], operands
[1]);
13205 emit_insn ((mode
== DImode
13207 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13208 emit_insn ((mode
== DImode
13210 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13215 if (!rtx_equal_p (operands
[0], operands
[1]))
13216 emit_move_insn (operands
[0], operands
[1]);
13218 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13220 emit_insn ((mode
== DImode
13222 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13223 emit_insn ((mode
== DImode
13225 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13227 /* Heh. By reversing the arguments, we can reuse this pattern. */
13228 if (TARGET_CMOVE
&& scratch
)
13230 ix86_expand_clear (scratch
);
13231 emit_insn ((mode
== DImode
13232 ? gen_x86_shift_adj_1
13233 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13237 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13241 /* Predict just emitted jump instruction to be taken with probability PROB. */
13243 predict_jump (int prob
)
13245 rtx insn
= get_last_insn ();
13246 gcc_assert (JUMP_P (insn
));
13248 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13253 /* Helper function for the string operations below. Dest VARIABLE whether
13254 it is aligned to VALUE bytes. If true, jump to the label. */
13256 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13258 rtx label
= gen_label_rtx ();
13259 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13260 if (GET_MODE (variable
) == DImode
)
13261 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13263 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13264 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13267 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13269 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13273 /* Adjust COUNTER by the VALUE. */
13275 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13277 if (GET_MODE (countreg
) == DImode
)
13278 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13280 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13283 /* Zero extend possibly SImode EXP to Pmode register. */
13285 ix86_zero_extend_to_Pmode (rtx exp
)
13288 if (GET_MODE (exp
) == VOIDmode
)
13289 return force_reg (Pmode
, exp
);
13290 if (GET_MODE (exp
) == Pmode
)
13291 return copy_to_mode_reg (Pmode
, exp
);
13292 r
= gen_reg_rtx (Pmode
);
13293 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13297 /* Divide COUNTREG by SCALE. */
13299 scale_counter (rtx countreg
, int scale
)
13302 rtx piece_size_mask
;
13306 if (CONST_INT_P (countreg
))
13307 return GEN_INT (INTVAL (countreg
) / scale
);
13308 gcc_assert (REG_P (countreg
));
13310 piece_size_mask
= GEN_INT (scale
- 1);
13311 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13312 GEN_INT (exact_log2 (scale
)),
13313 NULL
, 1, OPTAB_DIRECT
);
13317 /* When SRCPTR is non-NULL, output simple loop to move memory
13318 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13319 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13320 equivalent loop to set memory by VALUE (supposed to be in MODE).
13322 The size is rounded down to whole number of chunk size moved at once.
13323 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13327 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13328 rtx destptr
, rtx srcptr
, rtx value
,
13329 rtx count
, enum machine_mode mode
, int unroll
,
13332 rtx out_label
, top_label
, iter
, tmp
;
13333 enum machine_mode iter_mode
;
13334 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13335 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13341 iter_mode
= GET_MODE (count
);
13342 if (iter_mode
== VOIDmode
)
13343 iter_mode
= word_mode
;
13345 top_label
= gen_label_rtx ();
13346 out_label
= gen_label_rtx ();
13347 iter
= gen_reg_rtx (iter_mode
);
13349 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13350 NULL
, 1, OPTAB_DIRECT
);
13351 /* Those two should combine. */
13352 if (piece_size
== const1_rtx
)
13354 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13356 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13358 emit_move_insn (iter
, const0_rtx
);
13360 emit_label (top_label
);
13362 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13363 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13364 destmem
= change_address (destmem
, mode
, x_addr
);
13368 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13369 srcmem
= change_address (srcmem
, mode
, y_addr
);
13371 /* When unrolling for chips that reorder memory reads and writes,
13372 we can save registers by using single temporary.
13373 Also using 4 temporaries is overkill in 32bit mode. */
13374 if (!TARGET_64BIT
&& 0)
13376 for (i
= 0; i
< unroll
; i
++)
13381 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13383 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13385 emit_move_insn (destmem
, srcmem
);
13391 gcc_assert (unroll
<= 4);
13392 for (i
= 0; i
< unroll
; i
++)
13394 tmpreg
[i
] = gen_reg_rtx (mode
);
13398 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13400 emit_move_insn (tmpreg
[i
], srcmem
);
13402 for (i
= 0; i
< unroll
; i
++)
13407 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13409 emit_move_insn (destmem
, tmpreg
[i
]);
13414 for (i
= 0; i
< unroll
; i
++)
13418 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13419 emit_move_insn (destmem
, value
);
13422 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13423 true, OPTAB_LIB_WIDEN
);
13425 emit_move_insn (iter
, tmp
);
13427 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13429 if (expected_size
!= -1)
13431 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13432 if (expected_size
== 0)
13434 else if (expected_size
> REG_BR_PROB_BASE
)
13435 predict_jump (REG_BR_PROB_BASE
- 1);
13437 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13440 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13441 iter
= ix86_zero_extend_to_Pmode (iter
);
13442 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13443 true, OPTAB_LIB_WIDEN
);
13444 if (tmp
!= destptr
)
13445 emit_move_insn (destptr
, tmp
);
13448 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13449 true, OPTAB_LIB_WIDEN
);
13451 emit_move_insn (srcptr
, tmp
);
13453 emit_label (out_label
);
13456 /* Output "rep; mov" instruction.
13457 Arguments have same meaning as for previous function */
13459 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13460 rtx destptr
, rtx srcptr
,
13462 enum machine_mode mode
)
13468 /* If the size is known, it is shorter to use rep movs. */
13469 if (mode
== QImode
&& CONST_INT_P (count
)
13470 && !(INTVAL (count
) & 3))
13473 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13474 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13475 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13476 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13477 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13478 if (mode
!= QImode
)
13480 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13481 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13482 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13483 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13484 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13485 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13489 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13490 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13492 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13496 /* Output "rep; stos" instruction.
13497 Arguments have same meaning as for previous function */
13499 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13501 enum machine_mode mode
)
13506 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13507 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13508 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13509 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13510 if (mode
!= QImode
)
13512 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13513 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13514 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13517 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13518 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13522 emit_strmov (rtx destmem
, rtx srcmem
,
13523 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13525 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13526 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13527 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13530 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13532 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13533 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13536 if (CONST_INT_P (count
))
13538 HOST_WIDE_INT countval
= INTVAL (count
);
13541 if ((countval
& 0x16) && max_size
> 16)
13545 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13546 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13549 gcc_unreachable ();
13552 if ((countval
& 0x08) && max_size
> 8)
13555 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13558 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13559 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 4);
13563 if ((countval
& 0x04) && max_size
> 4)
13565 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13568 if ((countval
& 0x02) && max_size
> 2)
13570 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13573 if ((countval
& 0x01) && max_size
> 1)
13575 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13582 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13583 count
, 1, OPTAB_DIRECT
);
13584 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13585 count
, QImode
, 1, 4);
13589 /* When there are stringops, we can cheaply increase dest and src pointers.
13590 Otherwise we save code size by maintaining offset (zero is readily
13591 available from preceding rep operation) and using x86 addressing modes.
13593 if (TARGET_SINGLE_STRINGOP
)
13597 rtx label
= ix86_expand_aligntest (count
, 4, true);
13598 src
= change_address (srcmem
, SImode
, srcptr
);
13599 dest
= change_address (destmem
, SImode
, destptr
);
13600 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13601 emit_label (label
);
13602 LABEL_NUSES (label
) = 1;
13606 rtx label
= ix86_expand_aligntest (count
, 2, true);
13607 src
= change_address (srcmem
, HImode
, srcptr
);
13608 dest
= change_address (destmem
, HImode
, destptr
);
13609 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13610 emit_label (label
);
13611 LABEL_NUSES (label
) = 1;
13615 rtx label
= ix86_expand_aligntest (count
, 1, true);
13616 src
= change_address (srcmem
, QImode
, srcptr
);
13617 dest
= change_address (destmem
, QImode
, destptr
);
13618 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13619 emit_label (label
);
13620 LABEL_NUSES (label
) = 1;
13625 rtx offset
= force_reg (Pmode
, const0_rtx
);
13630 rtx label
= ix86_expand_aligntest (count
, 4, true);
13631 src
= change_address (srcmem
, SImode
, srcptr
);
13632 dest
= change_address (destmem
, SImode
, destptr
);
13633 emit_move_insn (dest
, src
);
13634 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13635 true, OPTAB_LIB_WIDEN
);
13637 emit_move_insn (offset
, tmp
);
13638 emit_label (label
);
13639 LABEL_NUSES (label
) = 1;
13643 rtx label
= ix86_expand_aligntest (count
, 2, true);
13644 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13645 src
= change_address (srcmem
, HImode
, tmp
);
13646 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13647 dest
= change_address (destmem
, HImode
, tmp
);
13648 emit_move_insn (dest
, src
);
13649 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13650 true, OPTAB_LIB_WIDEN
);
13652 emit_move_insn (offset
, tmp
);
13653 emit_label (label
);
13654 LABEL_NUSES (label
) = 1;
13658 rtx label
= ix86_expand_aligntest (count
, 1, true);
13659 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13660 src
= change_address (srcmem
, QImode
, tmp
);
13661 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13662 dest
= change_address (destmem
, QImode
, tmp
);
13663 emit_move_insn (dest
, src
);
13664 emit_label (label
);
13665 LABEL_NUSES (label
) = 1;
13670 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13672 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13673 rtx count
, int max_size
)
13676 expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13677 count
, 1, OPTAB_DIRECT
);
13678 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13679 gen_lowpart (QImode
, value
), count
, QImode
,
13683 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13685 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13689 if (CONST_INT_P (count
))
13691 HOST_WIDE_INT countval
= INTVAL (count
);
13694 if ((countval
& 0x16) && max_size
> 16)
13698 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13699 emit_insn (gen_strset (destptr
, dest
, value
));
13700 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13701 emit_insn (gen_strset (destptr
, dest
, value
));
13704 gcc_unreachable ();
13707 if ((countval
& 0x08) && max_size
> 8)
13711 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13712 emit_insn (gen_strset (destptr
, dest
, value
));
13716 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13717 emit_insn (gen_strset (destptr
, dest
, value
));
13718 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13719 emit_insn (gen_strset (destptr
, dest
, value
));
13723 if ((countval
& 0x04) && max_size
> 4)
13725 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13726 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13729 if ((countval
& 0x02) && max_size
> 2)
13731 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13732 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13735 if ((countval
& 0x01) && max_size
> 1)
13737 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13738 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13745 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13750 rtx label
= ix86_expand_aligntest (count
, 16, true);
13753 dest
= change_address (destmem
, DImode
, destptr
);
13754 emit_insn (gen_strset (destptr
, dest
, value
));
13755 emit_insn (gen_strset (destptr
, dest
, value
));
13759 dest
= change_address (destmem
, SImode
, destptr
);
13760 emit_insn (gen_strset (destptr
, dest
, value
));
13761 emit_insn (gen_strset (destptr
, dest
, value
));
13762 emit_insn (gen_strset (destptr
, dest
, value
));
13763 emit_insn (gen_strset (destptr
, dest
, value
));
13765 emit_label (label
);
13766 LABEL_NUSES (label
) = 1;
13770 rtx label
= ix86_expand_aligntest (count
, 8, true);
13773 dest
= change_address (destmem
, DImode
, destptr
);
13774 emit_insn (gen_strset (destptr
, dest
, value
));
13778 dest
= change_address (destmem
, SImode
, destptr
);
13779 emit_insn (gen_strset (destptr
, dest
, value
));
13780 emit_insn (gen_strset (destptr
, dest
, value
));
13782 emit_label (label
);
13783 LABEL_NUSES (label
) = 1;
13787 rtx label
= ix86_expand_aligntest (count
, 4, true);
13788 dest
= change_address (destmem
, SImode
, destptr
);
13789 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13790 emit_label (label
);
13791 LABEL_NUSES (label
) = 1;
13795 rtx label
= ix86_expand_aligntest (count
, 2, true);
13796 dest
= change_address (destmem
, HImode
, destptr
);
13797 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13798 emit_label (label
);
13799 LABEL_NUSES (label
) = 1;
13803 rtx label
= ix86_expand_aligntest (count
, 1, true);
13804 dest
= change_address (destmem
, QImode
, destptr
);
13805 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13806 emit_label (label
);
13807 LABEL_NUSES (label
) = 1;
13811 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13812 DESIRED_ALIGNMENT. */
13814 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13815 rtx destptr
, rtx srcptr
, rtx count
,
13816 int align
, int desired_alignment
)
13818 if (align
<= 1 && desired_alignment
> 1)
13820 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13821 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13822 destmem
= change_address (destmem
, QImode
, destptr
);
13823 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13824 ix86_adjust_counter (count
, 1);
13825 emit_label (label
);
13826 LABEL_NUSES (label
) = 1;
13828 if (align
<= 2 && desired_alignment
> 2)
13830 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13831 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13832 destmem
= change_address (destmem
, HImode
, destptr
);
13833 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13834 ix86_adjust_counter (count
, 2);
13835 emit_label (label
);
13836 LABEL_NUSES (label
) = 1;
13838 if (align
<= 4 && desired_alignment
> 4)
13840 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13841 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13842 destmem
= change_address (destmem
, SImode
, destptr
);
13843 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13844 ix86_adjust_counter (count
, 4);
13845 emit_label (label
);
13846 LABEL_NUSES (label
) = 1;
13848 gcc_assert (desired_alignment
<= 8);
13851 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13852 DESIRED_ALIGNMENT. */
13854 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13855 int align
, int desired_alignment
)
13857 if (align
<= 1 && desired_alignment
> 1)
13859 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13860 destmem
= change_address (destmem
, QImode
, destptr
);
13861 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13862 ix86_adjust_counter (count
, 1);
13863 emit_label (label
);
13864 LABEL_NUSES (label
) = 1;
13866 if (align
<= 2 && desired_alignment
> 2)
13868 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13869 destmem
= change_address (destmem
, HImode
, destptr
);
13870 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13871 ix86_adjust_counter (count
, 2);
13872 emit_label (label
);
13873 LABEL_NUSES (label
) = 1;
13875 if (align
<= 4 && desired_alignment
> 4)
13877 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13878 destmem
= change_address (destmem
, SImode
, destptr
);
13879 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13880 ix86_adjust_counter (count
, 4);
13881 emit_label (label
);
13882 LABEL_NUSES (label
) = 1;
13884 gcc_assert (desired_alignment
<= 8);
13887 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13888 static enum stringop_alg
13889 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13890 int *dynamic_check
)
13892 const struct stringop_algs
* algs
;
13894 *dynamic_check
= -1;
13896 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13898 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13899 if (stringop_alg
!= no_stringop
)
13900 return stringop_alg
;
13901 /* rep; movq or rep; movl is the smallest variant. */
13902 else if (optimize_size
)
13904 if (!count
|| (count
& 3))
13905 return rep_prefix_1_byte
;
13907 return rep_prefix_4_byte
;
13909 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13911 else if (expected_size
!= -1 && expected_size
< 4)
13912 return loop_1_byte
;
13913 else if (expected_size
!= -1)
13916 enum stringop_alg alg
= libcall
;
13917 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13919 gcc_assert (algs
->size
[i
].max
);
13920 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13922 if (algs
->size
[i
].alg
!= libcall
)
13923 alg
= algs
->size
[i
].alg
;
13924 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13925 last non-libcall inline algorithm. */
13926 if (TARGET_INLINE_ALL_STRINGOPS
)
13928 /* When the current size is best to be copied by a libcall,
13929 but we are still forced to inline, run the heuristic bellow
13930 that will pick code for medium sized blocks. */
13931 if (alg
!= libcall
)
13936 return algs
->size
[i
].alg
;
13939 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13941 /* When asked to inline the call anyway, try to pick meaningful choice.
13942 We look for maximal size of block that is faster to copy by hand and
13943 take blocks of at most of that size guessing that average size will
13944 be roughly half of the block.
13946 If this turns out to be bad, we might simply specify the preferred
13947 choice in ix86_costs. */
13948 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13949 && algs
->unknown_size
== libcall
)
13952 enum stringop_alg alg
;
13955 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13956 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13957 max
= algs
->size
[i
].max
;
13960 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13961 gcc_assert (*dynamic_check
== -1);
13962 gcc_assert (alg
!= libcall
);
13963 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13964 *dynamic_check
= max
;
13967 return algs
->unknown_size
;
13970 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13971 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13973 decide_alignment (int align
,
13974 enum stringop_alg alg
,
13977 int desired_align
= 0;
13981 gcc_unreachable ();
13983 case unrolled_loop
:
13984 desired_align
= GET_MODE_SIZE (Pmode
);
13986 case rep_prefix_8_byte
:
13989 case rep_prefix_4_byte
:
13990 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13991 copying whole cacheline at once. */
13992 if (TARGET_PENTIUMPRO
)
13997 case rep_prefix_1_byte
:
13998 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13999 copying whole cacheline at once. */
14000 if (TARGET_PENTIUMPRO
)
14014 if (desired_align
< align
)
14015 desired_align
= align
;
14016 if (expected_size
!= -1 && expected_size
< 4)
14017 desired_align
= align
;
14018 return desired_align
;
14021 /* Return the smallest power of 2 greater than VAL. */
14023 smallest_pow2_greater_than (int val
)
14031 /* Expand string move (memcpy) operation. Use i386 string operations when
14032 profitable. expand_clrmem contains similar code. The code depends upon
14033 architecture, block size and alignment, but always has the same
14036 1) Prologue guard: Conditional that jumps up to epilogues for small
14037 blocks that can be handled by epilogue alone. This is faster but
14038 also needed for correctness, since prologue assume the block is larger
14039 than the desired alignment.
14041 Optional dynamic check for size and libcall for large
14042 blocks is emitted here too, with -minline-stringops-dynamically.
14044 2) Prologue: copy first few bytes in order to get destination aligned
14045 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14046 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14047 We emit either a jump tree on power of two sized blocks, or a byte loop.
14049 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14050 with specified algorithm.
14052 4) Epilogue: code copying tail of the block that is too small to be
14053 handled by main body (or up to size guarded by prologue guard). */
14056 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14057 rtx expected_align_exp
, rtx expected_size_exp
)
14063 rtx jump_around_label
= NULL
;
14064 HOST_WIDE_INT align
= 1;
14065 unsigned HOST_WIDE_INT count
= 0;
14066 HOST_WIDE_INT expected_size
= -1;
14067 int size_needed
= 0, epilogue_size_needed
;
14068 int desired_align
= 0;
14069 enum stringop_alg alg
;
14072 if (CONST_INT_P (align_exp
))
14073 align
= INTVAL (align_exp
);
14074 /* i386 can do misaligned access on reasonably increased cost. */
14075 if (CONST_INT_P (expected_align_exp
)
14076 && INTVAL (expected_align_exp
) > align
)
14077 align
= INTVAL (expected_align_exp
);
14078 if (CONST_INT_P (count_exp
))
14079 count
= expected_size
= INTVAL (count_exp
);
14080 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14081 expected_size
= INTVAL (expected_size_exp
);
14083 /* Step 0: Decide on preferred algorithm, desired alignment and
14084 size of chunks to be copied by main loop. */
14086 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14087 desired_align
= decide_alignment (align
, alg
, expected_size
);
14089 if (!TARGET_ALIGN_STRINGOPS
)
14090 align
= desired_align
;
14092 if (alg
== libcall
)
14094 gcc_assert (alg
!= no_stringop
);
14096 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14097 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14098 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14103 gcc_unreachable ();
14105 size_needed
= GET_MODE_SIZE (Pmode
);
14107 case unrolled_loop
:
14108 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14110 case rep_prefix_8_byte
:
14113 case rep_prefix_4_byte
:
14116 case rep_prefix_1_byte
:
14122 epilogue_size_needed
= size_needed
;
14124 /* Step 1: Prologue guard. */
14126 /* Alignment code needs count to be in register. */
14127 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14129 enum machine_mode mode
= SImode
;
14130 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14132 count_exp
= force_reg (mode
, count_exp
);
14134 gcc_assert (desired_align
>= 1 && align
>= 1);
14136 /* Ensure that alignment prologue won't copy past end of block. */
14137 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14140 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14142 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14143 Make sure it is power of 2. */
14144 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14146 label
= gen_label_rtx ();
14147 emit_cmp_and_jump_insns (count_exp
,
14148 GEN_INT (epilogue_size_needed
),
14149 LTU
, 0, GET_MODE (count_exp
), 1, label
);
14150 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14151 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14153 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14155 /* Emit code to decide on runtime whether library call or inline should be
14157 if (dynamic_check
!= -1)
14159 rtx hot_label
= gen_label_rtx ();
14160 jump_around_label
= gen_label_rtx ();
14161 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14162 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14163 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14164 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14165 emit_jump (jump_around_label
);
14166 emit_label (hot_label
);
14169 /* Step 2: Alignment prologue. */
14171 if (desired_align
> align
)
14173 /* Except for the first move in epilogue, we no longer know
14174 constant offset in aliasing info. It don't seems to worth
14175 the pain to maintain it for the first move, so throw away
14177 src
= change_address (src
, BLKmode
, srcreg
);
14178 dst
= change_address (dst
, BLKmode
, destreg
);
14179 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14182 if (label
&& size_needed
== 1)
14184 emit_label (label
);
14185 LABEL_NUSES (label
) = 1;
14189 /* Step 3: Main loop. */
14195 gcc_unreachable ();
14197 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14198 count_exp
, QImode
, 1, expected_size
);
14201 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14202 count_exp
, Pmode
, 1, expected_size
);
14204 case unrolled_loop
:
14205 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14206 registers for 4 temporaries anyway. */
14207 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14208 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14211 case rep_prefix_8_byte
:
14212 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14215 case rep_prefix_4_byte
:
14216 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14219 case rep_prefix_1_byte
:
14220 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14224 /* Adjust properly the offset of src and dest memory for aliasing. */
14225 if (CONST_INT_P (count_exp
))
14227 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14228 (count
/ size_needed
) * size_needed
);
14229 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14230 (count
/ size_needed
) * size_needed
);
14234 src
= change_address (src
, BLKmode
, srcreg
);
14235 dst
= change_address (dst
, BLKmode
, destreg
);
14238 /* Step 4: Epilogue to copy the remaining bytes. */
14242 /* When the main loop is done, COUNT_EXP might hold original count,
14243 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14244 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14245 bytes. Compensate if needed. */
14247 if (size_needed
< epilogue_size_needed
)
14250 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14251 GEN_INT (size_needed
- 1), count_exp
, 1,
14253 if (tmp
!= count_exp
)
14254 emit_move_insn (count_exp
, tmp
);
14256 emit_label (label
);
14257 LABEL_NUSES (label
) = 1;
14260 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14261 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14262 epilogue_size_needed
);
14263 if (jump_around_label
)
14264 emit_label (jump_around_label
);
14268 /* Helper function for memcpy. For QImode value 0xXY produce
14269 0xXYXYXYXY of wide specified by MODE. This is essentially
14270 a * 0x10101010, but we can do slightly better than
14271 synth_mult by unwinding the sequence by hand on CPUs with
14274 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14276 enum machine_mode valmode
= GET_MODE (val
);
14278 int nops
= mode
== DImode
? 3 : 2;
14280 gcc_assert (mode
== SImode
|| mode
== DImode
);
14281 if (val
== const0_rtx
)
14282 return copy_to_mode_reg (mode
, const0_rtx
);
14283 if (CONST_INT_P (val
))
14285 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14289 if (mode
== DImode
)
14290 v
|= (v
<< 16) << 16;
14291 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14294 if (valmode
== VOIDmode
)
14296 if (valmode
!= QImode
)
14297 val
= gen_lowpart (QImode
, val
);
14298 if (mode
== QImode
)
14300 if (!TARGET_PARTIAL_REG_STALL
)
14302 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14303 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14304 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14305 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14307 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14308 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14309 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14314 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14316 if (!TARGET_PARTIAL_REG_STALL
)
14317 if (mode
== SImode
)
14318 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14320 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14323 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14324 NULL
, 1, OPTAB_DIRECT
);
14326 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14328 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14329 NULL
, 1, OPTAB_DIRECT
);
14330 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14331 if (mode
== SImode
)
14333 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14334 NULL
, 1, OPTAB_DIRECT
);
14335 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14340 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14341 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14342 alignment from ALIGN to DESIRED_ALIGN. */
14344 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14349 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14350 promoted_val
= promote_duplicated_reg (DImode
, val
);
14351 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14352 promoted_val
= promote_duplicated_reg (SImode
, val
);
14353 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14354 promoted_val
= promote_duplicated_reg (HImode
, val
);
14356 promoted_val
= val
;
14358 return promoted_val
;
14361 /* Expand string clear operation (bzero). Use i386 string operations when
14362 profitable. See expand_movmem comment for explanation of individual
14363 steps performed. */
14365 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14366 rtx expected_align_exp
, rtx expected_size_exp
)
14371 rtx jump_around_label
= NULL
;
14372 HOST_WIDE_INT align
= 1;
14373 unsigned HOST_WIDE_INT count
= 0;
14374 HOST_WIDE_INT expected_size
= -1;
14375 int size_needed
= 0, epilogue_size_needed
;
14376 int desired_align
= 0;
14377 enum stringop_alg alg
;
14378 rtx promoted_val
= NULL
;
14379 bool force_loopy_epilogue
= false;
14382 if (CONST_INT_P (align_exp
))
14383 align
= INTVAL (align_exp
);
14384 /* i386 can do misaligned access on reasonably increased cost. */
14385 if (CONST_INT_P (expected_align_exp
)
14386 && INTVAL (expected_align_exp
) > align
)
14387 align
= INTVAL (expected_align_exp
);
14388 if (CONST_INT_P (count_exp
))
14389 count
= expected_size
= INTVAL (count_exp
);
14390 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14391 expected_size
= INTVAL (expected_size_exp
);
14393 /* Step 0: Decide on preferred algorithm, desired alignment and
14394 size of chunks to be copied by main loop. */
14396 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14397 desired_align
= decide_alignment (align
, alg
, expected_size
);
14399 if (!TARGET_ALIGN_STRINGOPS
)
14400 align
= desired_align
;
14402 if (alg
== libcall
)
14404 gcc_assert (alg
!= no_stringop
);
14406 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14407 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14412 gcc_unreachable ();
14414 size_needed
= GET_MODE_SIZE (Pmode
);
14416 case unrolled_loop
:
14417 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14419 case rep_prefix_8_byte
:
14422 case rep_prefix_4_byte
:
14425 case rep_prefix_1_byte
:
14430 epilogue_size_needed
= size_needed
;
14432 /* Step 1: Prologue guard. */
14434 /* Alignment code needs count to be in register. */
14435 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14437 enum machine_mode mode
= SImode
;
14438 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14440 count_exp
= force_reg (mode
, count_exp
);
14442 /* Do the cheap promotion to allow better CSE across the
14443 main loop and epilogue (ie one load of the big constant in the
14444 front of all code. */
14445 if (CONST_INT_P (val_exp
))
14446 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14447 desired_align
, align
);
14448 /* Ensure that alignment prologue won't copy past end of block. */
14449 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14452 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14454 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14455 Make sure it is power of 2. */
14456 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14458 /* To improve performance of small blocks, we jump around the VAL
14459 promoting mode. This mean that if the promoted VAL is not constant,
14460 we might not use it in the epilogue and have to use byte
14462 if (epilogue_size_needed
> 2 && !promoted_val
)
14463 force_loopy_epilogue
= true;
14464 label
= gen_label_rtx ();
14465 emit_cmp_and_jump_insns (count_exp
,
14466 GEN_INT (epilogue_size_needed
),
14467 LTU
, 0, GET_MODE (count_exp
), 1, label
);
14468 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14469 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14471 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14473 if (dynamic_check
!= -1)
14475 rtx hot_label
= gen_label_rtx ();
14476 jump_around_label
= gen_label_rtx ();
14477 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14478 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14479 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14480 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14481 emit_jump (jump_around_label
);
14482 emit_label (hot_label
);
14485 /* Step 2: Alignment prologue. */
14487 /* Do the expensive promotion once we branched off the small blocks. */
14489 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14490 desired_align
, align
);
14491 gcc_assert (desired_align
>= 1 && align
>= 1);
14493 if (desired_align
> align
)
14495 /* Except for the first move in epilogue, we no longer know
14496 constant offset in aliasing info. It don't seems to worth
14497 the pain to maintain it for the first move, so throw away
14499 dst
= change_address (dst
, BLKmode
, destreg
);
14500 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14503 if (label
&& size_needed
== 1)
14505 emit_label (label
);
14506 LABEL_NUSES (label
) = 1;
14510 /* Step 3: Main loop. */
14516 gcc_unreachable ();
14518 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14519 count_exp
, QImode
, 1, expected_size
);
14522 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14523 count_exp
, Pmode
, 1, expected_size
);
14525 case unrolled_loop
:
14526 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14527 count_exp
, Pmode
, 4, expected_size
);
14529 case rep_prefix_8_byte
:
14530 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14533 case rep_prefix_4_byte
:
14534 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14537 case rep_prefix_1_byte
:
14538 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14542 /* Adjust properly the offset of src and dest memory for aliasing. */
14543 if (CONST_INT_P (count_exp
))
14544 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14545 (count
/ size_needed
) * size_needed
);
14547 dst
= change_address (dst
, BLKmode
, destreg
);
14549 /* Step 4: Epilogue to copy the remaining bytes. */
14553 /* When the main loop is done, COUNT_EXP might hold original count,
14554 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14555 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14556 bytes. Compensate if needed. */
14558 if (size_needed
< desired_align
- align
)
14561 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14562 GEN_INT (size_needed
- 1), count_exp
, 1,
14564 size_needed
= desired_align
- align
+ 1;
14565 if (tmp
!= count_exp
)
14566 emit_move_insn (count_exp
, tmp
);
14568 emit_label (label
);
14569 LABEL_NUSES (label
) = 1;
14571 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14573 if (force_loopy_epilogue
)
14574 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14577 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14580 if (jump_around_label
)
14581 emit_label (jump_around_label
);
14585 /* Expand strlen. */
14587 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14589 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14591 /* The generic case of strlen expander is long. Avoid it's
14592 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14594 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14595 && !TARGET_INLINE_ALL_STRINGOPS
14597 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14600 addr
= force_reg (Pmode
, XEXP (src
, 0));
14601 scratch1
= gen_reg_rtx (Pmode
);
14603 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14606 /* Well it seems that some optimizer does not combine a call like
14607 foo(strlen(bar), strlen(bar));
14608 when the move and the subtraction is done here. It does calculate
14609 the length just once when these instructions are done inside of
14610 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14611 often used and I use one fewer register for the lifetime of
14612 output_strlen_unroll() this is better. */
14614 emit_move_insn (out
, addr
);
14616 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14618 /* strlensi_unroll_1 returns the address of the zero at the end of
14619 the string, like memchr(), so compute the length by subtracting
14620 the start address. */
14622 emit_insn (gen_subdi3 (out
, out
, addr
));
14624 emit_insn (gen_subsi3 (out
, out
, addr
));
14629 scratch2
= gen_reg_rtx (Pmode
);
14630 scratch3
= gen_reg_rtx (Pmode
);
14631 scratch4
= force_reg (Pmode
, constm1_rtx
);
14633 emit_move_insn (scratch3
, addr
);
14634 eoschar
= force_reg (QImode
, eoschar
);
14636 src
= replace_equiv_address_nv (src
, scratch3
);
14638 /* If .md starts supporting :P, this can be done in .md. */
14639 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14640 scratch4
), UNSPEC_SCAS
);
14641 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14644 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14645 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14649 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14650 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14656 /* Expand the appropriate insns for doing strlen if not just doing
14659 out = result, initialized with the start address
14660 align_rtx = alignment of the address.
14661 scratch = scratch register, initialized with the startaddress when
14662 not aligned, otherwise undefined
14664 This is just the body. It needs the initializations mentioned above and
14665 some address computing at the end. These things are done in i386.md. */
14668 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14672 rtx align_2_label
= NULL_RTX
;
14673 rtx align_3_label
= NULL_RTX
;
14674 rtx align_4_label
= gen_label_rtx ();
14675 rtx end_0_label
= gen_label_rtx ();
14677 rtx tmpreg
= gen_reg_rtx (SImode
);
14678 rtx scratch
= gen_reg_rtx (SImode
);
14682 if (CONST_INT_P (align_rtx
))
14683 align
= INTVAL (align_rtx
);
14685 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14687 /* Is there a known alignment and is it less than 4? */
14690 rtx scratch1
= gen_reg_rtx (Pmode
);
14691 emit_move_insn (scratch1
, out
);
14692 /* Is there a known alignment and is it not 2? */
14695 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14696 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14698 /* Leave just the 3 lower bits. */
14699 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14700 NULL_RTX
, 0, OPTAB_WIDEN
);
14702 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14703 Pmode
, 1, align_4_label
);
14704 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14705 Pmode
, 1, align_2_label
);
14706 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14707 Pmode
, 1, align_3_label
);
14711 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14712 check if is aligned to 4 - byte. */
14714 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14715 NULL_RTX
, 0, OPTAB_WIDEN
);
14717 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14718 Pmode
, 1, align_4_label
);
14721 mem
= change_address (src
, QImode
, out
);
14723 /* Now compare the bytes. */
14725 /* Compare the first n unaligned byte on a byte per byte basis. */
14726 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14727 QImode
, 1, end_0_label
);
14729 /* Increment the address. */
14731 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14733 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14735 /* Not needed with an alignment of 2 */
14738 emit_label (align_2_label
);
14740 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14744 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14746 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14748 emit_label (align_3_label
);
14751 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14755 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14757 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14760 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14761 align this loop. It gives only huge programs, but does not help to
14763 emit_label (align_4_label
);
14765 mem
= change_address (src
, SImode
, out
);
14766 emit_move_insn (scratch
, mem
);
14768 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14770 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14772 /* This formula yields a nonzero result iff one of the bytes is zero.
14773 This saves three branches inside loop and many cycles. */
14775 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14776 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14777 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14778 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14779 gen_int_mode (0x80808080, SImode
)));
14780 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14785 rtx reg
= gen_reg_rtx (SImode
);
14786 rtx reg2
= gen_reg_rtx (Pmode
);
14787 emit_move_insn (reg
, tmpreg
);
14788 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14790 /* If zero is not in the first two bytes, move two bytes forward. */
14791 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14792 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14793 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14794 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14795 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14798 /* Emit lea manually to avoid clobbering of flags. */
14799 emit_insn (gen_rtx_SET (SImode
, reg2
,
14800 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14802 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14803 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14804 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14805 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14812 rtx end_2_label
= gen_label_rtx ();
14813 /* Is zero in the first two bytes? */
14815 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14816 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14817 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14818 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14819 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14821 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14822 JUMP_LABEL (tmp
) = end_2_label
;
14824 /* Not in the first two. Move two bytes forward. */
14825 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14827 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14829 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14831 emit_label (end_2_label
);
14835 /* Avoid branch in fixing the byte. */
14836 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14837 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14838 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14840 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14842 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14844 emit_label (end_0_label
);
14848 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14849 rtx callarg2 ATTRIBUTE_UNUSED
,
14850 rtx pop
, int sibcall
)
14852 rtx use
= NULL
, call
;
14854 if (pop
== const0_rtx
)
14856 gcc_assert (!TARGET_64BIT
|| !pop
);
14858 if (TARGET_MACHO
&& !TARGET_64BIT
)
14861 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14862 fnaddr
= machopic_indirect_call_target (fnaddr
);
14867 /* Static functions and indirect calls don't need the pic register. */
14868 if (! TARGET_64BIT
&& flag_pic
14869 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14870 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14871 use_reg (&use
, pic_offset_table_rtx
);
14874 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14876 rtx al
= gen_rtx_REG (QImode
, 0);
14877 emit_move_insn (al
, callarg2
);
14878 use_reg (&use
, al
);
14881 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14883 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14884 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14886 if (sibcall
&& TARGET_64BIT
14887 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14890 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14891 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14892 emit_move_insn (fnaddr
, addr
);
14893 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14896 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14898 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14901 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14902 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14903 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14906 call
= emit_call_insn (call
);
14908 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14912 /* Clear stack slot assignments remembered from previous functions.
14913 This is called from INIT_EXPANDERS once before RTL is emitted for each
14916 static struct machine_function
*
14917 ix86_init_machine_status (void)
14919 struct machine_function
*f
;
14921 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14922 f
->use_fast_prologue_epilogue_nregs
= -1;
14923 f
->tls_descriptor_call_expanded_p
= 0;
14928 /* Return a MEM corresponding to a stack slot with mode MODE.
14929 Allocate a new slot if necessary.
14931 The RTL for a function can have several slots available: N is
14932 which slot to use. */
14935 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14937 struct stack_local_entry
*s
;
14939 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14941 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14942 if (s
->mode
== mode
&& s
->n
== n
)
14943 return copy_rtx (s
->rtl
);
14945 s
= (struct stack_local_entry
*)
14946 ggc_alloc (sizeof (struct stack_local_entry
));
14949 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14951 s
->next
= ix86_stack_locals
;
14952 ix86_stack_locals
= s
;
14956 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14958 static GTY(()) rtx ix86_tls_symbol
;
14960 ix86_tls_get_addr (void)
14963 if (!ix86_tls_symbol
)
14965 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14966 (TARGET_ANY_GNU_TLS
14968 ? "___tls_get_addr"
14969 : "__tls_get_addr");
14972 return ix86_tls_symbol
;
14975 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14977 static GTY(()) rtx ix86_tls_module_base_symbol
;
14979 ix86_tls_module_base (void)
14982 if (!ix86_tls_module_base_symbol
)
14984 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14985 "_TLS_MODULE_BASE_");
14986 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
14987 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
14990 return ix86_tls_module_base_symbol
;
14993 /* Calculate the length of the memory address in the instruction
14994 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14997 memory_address_length (rtx addr
)
14999 struct ix86_address parts
;
15000 rtx base
, index
, disp
;
15004 if (GET_CODE (addr
) == PRE_DEC
15005 || GET_CODE (addr
) == POST_INC
15006 || GET_CODE (addr
) == PRE_MODIFY
15007 || GET_CODE (addr
) == POST_MODIFY
)
15010 ok
= ix86_decompose_address (addr
, &parts
);
15013 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15014 parts
.base
= SUBREG_REG (parts
.base
);
15015 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15016 parts
.index
= SUBREG_REG (parts
.index
);
15019 index
= parts
.index
;
15024 - esp as the base always wants an index,
15025 - ebp as the base always wants a displacement. */
15027 /* Register Indirect. */
15028 if (base
&& !index
&& !disp
)
15030 /* esp (for its index) and ebp (for its displacement) need
15031 the two-byte modrm form. */
15032 if (addr
== stack_pointer_rtx
15033 || addr
== arg_pointer_rtx
15034 || addr
== frame_pointer_rtx
15035 || addr
== hard_frame_pointer_rtx
)
15039 /* Direct Addressing. */
15040 else if (disp
&& !base
&& !index
)
15045 /* Find the length of the displacement constant. */
15048 if (base
&& satisfies_constraint_K (disp
))
15053 /* ebp always wants a displacement. */
15054 else if (base
== hard_frame_pointer_rtx
)
15057 /* An index requires the two-byte modrm form.... */
15059 /* ...like esp, which always wants an index. */
15060 || base
== stack_pointer_rtx
15061 || base
== arg_pointer_rtx
15062 || base
== frame_pointer_rtx
)
15069 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15070 is set, expect that insn have 8bit immediate alternative. */
15072 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15076 extract_insn_cached (insn
);
15077 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15078 if (CONSTANT_P (recog_data
.operand
[i
]))
15081 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15085 switch (get_attr_mode (insn
))
15096 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15101 fatal_insn ("unknown insn mode", insn
);
15107 /* Compute default value for "length_address" attribute. */
15109 ix86_attr_length_address_default (rtx insn
)
15113 if (get_attr_type (insn
) == TYPE_LEA
)
15115 rtx set
= PATTERN (insn
);
15117 if (GET_CODE (set
) == PARALLEL
)
15118 set
= XVECEXP (set
, 0, 0);
15120 gcc_assert (GET_CODE (set
) == SET
);
15122 return memory_address_length (SET_SRC (set
));
15125 extract_insn_cached (insn
);
15126 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15127 if (MEM_P (recog_data
.operand
[i
]))
15129 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15135 /* Return the maximum number of instructions a cpu can issue. */
15138 ix86_issue_rate (void)
15142 case PROCESSOR_PENTIUM
:
15146 case PROCESSOR_PENTIUMPRO
:
15147 case PROCESSOR_PENTIUM4
:
15148 case PROCESSOR_ATHLON
:
15150 case PROCESSOR_AMDFAM10
:
15151 case PROCESSOR_NOCONA
:
15152 case PROCESSOR_GENERIC32
:
15153 case PROCESSOR_GENERIC64
:
15156 case PROCESSOR_CORE2
:
15164 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15165 by DEP_INSN and nothing set by DEP_INSN. */
15168 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15172 /* Simplify the test for uninteresting insns. */
15173 if (insn_type
!= TYPE_SETCC
15174 && insn_type
!= TYPE_ICMOV
15175 && insn_type
!= TYPE_FCMOV
15176 && insn_type
!= TYPE_IBR
)
15179 if ((set
= single_set (dep_insn
)) != 0)
15181 set
= SET_DEST (set
);
15184 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15185 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15186 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15187 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15189 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15190 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15195 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15198 /* This test is true if the dependent insn reads the flags but
15199 not any other potentially set register. */
15200 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15203 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15209 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15210 address with operands set by DEP_INSN. */
15213 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15217 if (insn_type
== TYPE_LEA
15220 addr
= PATTERN (insn
);
15222 if (GET_CODE (addr
) == PARALLEL
)
15223 addr
= XVECEXP (addr
, 0, 0);
15225 gcc_assert (GET_CODE (addr
) == SET
);
15227 addr
= SET_SRC (addr
);
15232 extract_insn_cached (insn
);
15233 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15234 if (MEM_P (recog_data
.operand
[i
]))
15236 addr
= XEXP (recog_data
.operand
[i
], 0);
15243 return modified_in_p (addr
, dep_insn
);
15247 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15249 enum attr_type insn_type
, dep_insn_type
;
15250 enum attr_memory memory
;
15252 int dep_insn_code_number
;
15254 /* Anti and output dependencies have zero cost on all CPUs. */
15255 if (REG_NOTE_KIND (link
) != 0)
15258 dep_insn_code_number
= recog_memoized (dep_insn
);
15260 /* If we can't recognize the insns, we can't really do anything. */
15261 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15264 insn_type
= get_attr_type (insn
);
15265 dep_insn_type
= get_attr_type (dep_insn
);
15269 case PROCESSOR_PENTIUM
:
15270 /* Address Generation Interlock adds a cycle of latency. */
15271 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15274 /* ??? Compares pair with jump/setcc. */
15275 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15278 /* Floating point stores require value to be ready one cycle earlier. */
15279 if (insn_type
== TYPE_FMOV
15280 && get_attr_memory (insn
) == MEMORY_STORE
15281 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15285 case PROCESSOR_PENTIUMPRO
:
15286 memory
= get_attr_memory (insn
);
15288 /* INT->FP conversion is expensive. */
15289 if (get_attr_fp_int_src (dep_insn
))
15292 /* There is one cycle extra latency between an FP op and a store. */
15293 if (insn_type
== TYPE_FMOV
15294 && (set
= single_set (dep_insn
)) != NULL_RTX
15295 && (set2
= single_set (insn
)) != NULL_RTX
15296 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15297 && MEM_P (SET_DEST (set2
)))
15300 /* Show ability of reorder buffer to hide latency of load by executing
15301 in parallel with previous instruction in case
15302 previous instruction is not needed to compute the address. */
15303 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15304 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15306 /* Claim moves to take one cycle, as core can issue one load
15307 at time and the next load can start cycle later. */
15308 if (dep_insn_type
== TYPE_IMOV
15309 || dep_insn_type
== TYPE_FMOV
)
15317 memory
= get_attr_memory (insn
);
15319 /* The esp dependency is resolved before the instruction is really
15321 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15322 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15325 /* INT->FP conversion is expensive. */
15326 if (get_attr_fp_int_src (dep_insn
))
15329 /* Show ability of reorder buffer to hide latency of load by executing
15330 in parallel with previous instruction in case
15331 previous instruction is not needed to compute the address. */
15332 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15333 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15335 /* Claim moves to take one cycle, as core can issue one load
15336 at time and the next load can start cycle later. */
15337 if (dep_insn_type
== TYPE_IMOV
15338 || dep_insn_type
== TYPE_FMOV
)
15347 case PROCESSOR_ATHLON
:
15349 case PROCESSOR_AMDFAM10
:
15350 case PROCESSOR_GENERIC32
:
15351 case PROCESSOR_GENERIC64
:
15352 memory
= get_attr_memory (insn
);
15354 /* Show ability of reorder buffer to hide latency of load by executing
15355 in parallel with previous instruction in case
15356 previous instruction is not needed to compute the address. */
15357 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15358 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15360 enum attr_unit unit
= get_attr_unit (insn
);
15363 /* Because of the difference between the length of integer and
15364 floating unit pipeline preparation stages, the memory operands
15365 for floating point are cheaper.
15367 ??? For Athlon it the difference is most probably 2. */
15368 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15371 loadcost
= TARGET_ATHLON
? 2 : 0;
15373 if (cost
>= loadcost
)
15386 /* How many alternative schedules to try. This should be as wide as the
15387 scheduling freedom in the DFA, but no wider. Making this value too
15388 large results extra work for the scheduler. */
15391 ia32_multipass_dfa_lookahead (void)
15393 if (ix86_tune
== PROCESSOR_PENTIUM
)
15396 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15397 || ix86_tune
== PROCESSOR_K6
)
15405 /* Compute the alignment given to a constant that is being placed in memory.
15406 EXP is the constant and ALIGN is the alignment that the object would
15408 The value of this function is used instead of that alignment to align
15412 ix86_constant_alignment (tree exp
, int align
)
15414 if (TREE_CODE (exp
) == REAL_CST
)
15416 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15418 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15421 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15422 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15423 return BITS_PER_WORD
;
15428 /* Compute the alignment for a static variable.
15429 TYPE is the data type, and ALIGN is the alignment that
15430 the object would ordinarily have. The value of this function is used
15431 instead of that alignment to align the object. */
15434 ix86_data_alignment (tree type
, int align
)
15436 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15438 if (AGGREGATE_TYPE_P (type
)
15439 && TYPE_SIZE (type
)
15440 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15441 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15442 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15443 && align
< max_align
)
15446 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15447 to 16byte boundary. */
15450 if (AGGREGATE_TYPE_P (type
)
15451 && TYPE_SIZE (type
)
15452 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15453 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15454 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15458 if (TREE_CODE (type
) == ARRAY_TYPE
)
15460 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15462 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15465 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15468 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15470 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15473 else if ((TREE_CODE (type
) == RECORD_TYPE
15474 || TREE_CODE (type
) == UNION_TYPE
15475 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15476 && TYPE_FIELDS (type
))
15478 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15480 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15483 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15484 || TREE_CODE (type
) == INTEGER_TYPE
)
15486 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15488 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15495 /* Compute the alignment for a local variable.
15496 TYPE is the data type, and ALIGN is the alignment that
15497 the object would ordinarily have. The value of this macro is used
15498 instead of that alignment to align the object. */
15501 ix86_local_alignment (tree type
, int align
)
15503 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15504 to 16byte boundary. */
15507 if (AGGREGATE_TYPE_P (type
)
15508 && TYPE_SIZE (type
)
15509 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15510 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15511 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15514 if (TREE_CODE (type
) == ARRAY_TYPE
)
15516 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15518 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15521 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15523 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15525 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15528 else if ((TREE_CODE (type
) == RECORD_TYPE
15529 || TREE_CODE (type
) == UNION_TYPE
15530 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15531 && TYPE_FIELDS (type
))
15533 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15535 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15538 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15539 || TREE_CODE (type
) == INTEGER_TYPE
)
15542 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15544 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15550 /* Emit RTL insns to initialize the variable parts of a trampoline.
15551 FNADDR is an RTX for the address of the function's pure code.
15552 CXT is an RTX for the static chain value for the function. */
15554 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15558 /* Compute offset from the end of the jmp to the target function. */
15559 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15560 plus_constant (tramp
, 10),
15561 NULL_RTX
, 1, OPTAB_DIRECT
);
15562 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15563 gen_int_mode (0xb9, QImode
));
15564 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15565 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15566 gen_int_mode (0xe9, QImode
));
15567 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15572 /* Try to load address using shorter movl instead of movabs.
15573 We may want to support movq for kernel mode, but kernel does not use
15574 trampolines at the moment. */
15575 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15577 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15578 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15579 gen_int_mode (0xbb41, HImode
));
15580 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15581 gen_lowpart (SImode
, fnaddr
));
15586 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15587 gen_int_mode (0xbb49, HImode
));
15588 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15592 /* Load static chain using movabs to r10. */
15593 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15594 gen_int_mode (0xba49, HImode
));
15595 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15598 /* Jump to the r11 */
15599 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15600 gen_int_mode (0xff49, HImode
));
15601 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15602 gen_int_mode (0xe3, QImode
));
15604 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15607 #ifdef ENABLE_EXECUTE_STACK
15608 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15609 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15613 /* Codes for all the SSE/MMX builtins. */
15616 IX86_BUILTIN_ADDPS
,
15617 IX86_BUILTIN_ADDSS
,
15618 IX86_BUILTIN_DIVPS
,
15619 IX86_BUILTIN_DIVSS
,
15620 IX86_BUILTIN_MULPS
,
15621 IX86_BUILTIN_MULSS
,
15622 IX86_BUILTIN_SUBPS
,
15623 IX86_BUILTIN_SUBSS
,
15625 IX86_BUILTIN_CMPEQPS
,
15626 IX86_BUILTIN_CMPLTPS
,
15627 IX86_BUILTIN_CMPLEPS
,
15628 IX86_BUILTIN_CMPGTPS
,
15629 IX86_BUILTIN_CMPGEPS
,
15630 IX86_BUILTIN_CMPNEQPS
,
15631 IX86_BUILTIN_CMPNLTPS
,
15632 IX86_BUILTIN_CMPNLEPS
,
15633 IX86_BUILTIN_CMPNGTPS
,
15634 IX86_BUILTIN_CMPNGEPS
,
15635 IX86_BUILTIN_CMPORDPS
,
15636 IX86_BUILTIN_CMPUNORDPS
,
15637 IX86_BUILTIN_CMPEQSS
,
15638 IX86_BUILTIN_CMPLTSS
,
15639 IX86_BUILTIN_CMPLESS
,
15640 IX86_BUILTIN_CMPNEQSS
,
15641 IX86_BUILTIN_CMPNLTSS
,
15642 IX86_BUILTIN_CMPNLESS
,
15643 IX86_BUILTIN_CMPNGTSS
,
15644 IX86_BUILTIN_CMPNGESS
,
15645 IX86_BUILTIN_CMPORDSS
,
15646 IX86_BUILTIN_CMPUNORDSS
,
15648 IX86_BUILTIN_COMIEQSS
,
15649 IX86_BUILTIN_COMILTSS
,
15650 IX86_BUILTIN_COMILESS
,
15651 IX86_BUILTIN_COMIGTSS
,
15652 IX86_BUILTIN_COMIGESS
,
15653 IX86_BUILTIN_COMINEQSS
,
15654 IX86_BUILTIN_UCOMIEQSS
,
15655 IX86_BUILTIN_UCOMILTSS
,
15656 IX86_BUILTIN_UCOMILESS
,
15657 IX86_BUILTIN_UCOMIGTSS
,
15658 IX86_BUILTIN_UCOMIGESS
,
15659 IX86_BUILTIN_UCOMINEQSS
,
15661 IX86_BUILTIN_CVTPI2PS
,
15662 IX86_BUILTIN_CVTPS2PI
,
15663 IX86_BUILTIN_CVTSI2SS
,
15664 IX86_BUILTIN_CVTSI642SS
,
15665 IX86_BUILTIN_CVTSS2SI
,
15666 IX86_BUILTIN_CVTSS2SI64
,
15667 IX86_BUILTIN_CVTTPS2PI
,
15668 IX86_BUILTIN_CVTTSS2SI
,
15669 IX86_BUILTIN_CVTTSS2SI64
,
15671 IX86_BUILTIN_MAXPS
,
15672 IX86_BUILTIN_MAXSS
,
15673 IX86_BUILTIN_MINPS
,
15674 IX86_BUILTIN_MINSS
,
15676 IX86_BUILTIN_LOADUPS
,
15677 IX86_BUILTIN_STOREUPS
,
15678 IX86_BUILTIN_MOVSS
,
15680 IX86_BUILTIN_MOVHLPS
,
15681 IX86_BUILTIN_MOVLHPS
,
15682 IX86_BUILTIN_LOADHPS
,
15683 IX86_BUILTIN_LOADLPS
,
15684 IX86_BUILTIN_STOREHPS
,
15685 IX86_BUILTIN_STORELPS
,
15687 IX86_BUILTIN_MASKMOVQ
,
15688 IX86_BUILTIN_MOVMSKPS
,
15689 IX86_BUILTIN_PMOVMSKB
,
15691 IX86_BUILTIN_MOVNTPS
,
15692 IX86_BUILTIN_MOVNTQ
,
15694 IX86_BUILTIN_LOADDQU
,
15695 IX86_BUILTIN_STOREDQU
,
15697 IX86_BUILTIN_PACKSSWB
,
15698 IX86_BUILTIN_PACKSSDW
,
15699 IX86_BUILTIN_PACKUSWB
,
15701 IX86_BUILTIN_PADDB
,
15702 IX86_BUILTIN_PADDW
,
15703 IX86_BUILTIN_PADDD
,
15704 IX86_BUILTIN_PADDQ
,
15705 IX86_BUILTIN_PADDSB
,
15706 IX86_BUILTIN_PADDSW
,
15707 IX86_BUILTIN_PADDUSB
,
15708 IX86_BUILTIN_PADDUSW
,
15709 IX86_BUILTIN_PSUBB
,
15710 IX86_BUILTIN_PSUBW
,
15711 IX86_BUILTIN_PSUBD
,
15712 IX86_BUILTIN_PSUBQ
,
15713 IX86_BUILTIN_PSUBSB
,
15714 IX86_BUILTIN_PSUBSW
,
15715 IX86_BUILTIN_PSUBUSB
,
15716 IX86_BUILTIN_PSUBUSW
,
15719 IX86_BUILTIN_PANDN
,
15723 IX86_BUILTIN_PAVGB
,
15724 IX86_BUILTIN_PAVGW
,
15726 IX86_BUILTIN_PCMPEQB
,
15727 IX86_BUILTIN_PCMPEQW
,
15728 IX86_BUILTIN_PCMPEQD
,
15729 IX86_BUILTIN_PCMPGTB
,
15730 IX86_BUILTIN_PCMPGTW
,
15731 IX86_BUILTIN_PCMPGTD
,
15733 IX86_BUILTIN_PMADDWD
,
15735 IX86_BUILTIN_PMAXSW
,
15736 IX86_BUILTIN_PMAXUB
,
15737 IX86_BUILTIN_PMINSW
,
15738 IX86_BUILTIN_PMINUB
,
15740 IX86_BUILTIN_PMULHUW
,
15741 IX86_BUILTIN_PMULHW
,
15742 IX86_BUILTIN_PMULLW
,
15744 IX86_BUILTIN_PSADBW
,
15745 IX86_BUILTIN_PSHUFW
,
15747 IX86_BUILTIN_PSLLW
,
15748 IX86_BUILTIN_PSLLD
,
15749 IX86_BUILTIN_PSLLQ
,
15750 IX86_BUILTIN_PSRAW
,
15751 IX86_BUILTIN_PSRAD
,
15752 IX86_BUILTIN_PSRLW
,
15753 IX86_BUILTIN_PSRLD
,
15754 IX86_BUILTIN_PSRLQ
,
15755 IX86_BUILTIN_PSLLWI
,
15756 IX86_BUILTIN_PSLLDI
,
15757 IX86_BUILTIN_PSLLQI
,
15758 IX86_BUILTIN_PSRAWI
,
15759 IX86_BUILTIN_PSRADI
,
15760 IX86_BUILTIN_PSRLWI
,
15761 IX86_BUILTIN_PSRLDI
,
15762 IX86_BUILTIN_PSRLQI
,
15764 IX86_BUILTIN_PUNPCKHBW
,
15765 IX86_BUILTIN_PUNPCKHWD
,
15766 IX86_BUILTIN_PUNPCKHDQ
,
15767 IX86_BUILTIN_PUNPCKLBW
,
15768 IX86_BUILTIN_PUNPCKLWD
,
15769 IX86_BUILTIN_PUNPCKLDQ
,
15771 IX86_BUILTIN_SHUFPS
,
15773 IX86_BUILTIN_RCPPS
,
15774 IX86_BUILTIN_RCPSS
,
15775 IX86_BUILTIN_RSQRTPS
,
15776 IX86_BUILTIN_RSQRTSS
,
15777 IX86_BUILTIN_SQRTPS
,
15778 IX86_BUILTIN_SQRTSS
,
15780 IX86_BUILTIN_UNPCKHPS
,
15781 IX86_BUILTIN_UNPCKLPS
,
15783 IX86_BUILTIN_ANDPS
,
15784 IX86_BUILTIN_ANDNPS
,
15786 IX86_BUILTIN_XORPS
,
15789 IX86_BUILTIN_LDMXCSR
,
15790 IX86_BUILTIN_STMXCSR
,
15791 IX86_BUILTIN_SFENCE
,
15793 /* 3DNow! Original */
15794 IX86_BUILTIN_FEMMS
,
15795 IX86_BUILTIN_PAVGUSB
,
15796 IX86_BUILTIN_PF2ID
,
15797 IX86_BUILTIN_PFACC
,
15798 IX86_BUILTIN_PFADD
,
15799 IX86_BUILTIN_PFCMPEQ
,
15800 IX86_BUILTIN_PFCMPGE
,
15801 IX86_BUILTIN_PFCMPGT
,
15802 IX86_BUILTIN_PFMAX
,
15803 IX86_BUILTIN_PFMIN
,
15804 IX86_BUILTIN_PFMUL
,
15805 IX86_BUILTIN_PFRCP
,
15806 IX86_BUILTIN_PFRCPIT1
,
15807 IX86_BUILTIN_PFRCPIT2
,
15808 IX86_BUILTIN_PFRSQIT1
,
15809 IX86_BUILTIN_PFRSQRT
,
15810 IX86_BUILTIN_PFSUB
,
15811 IX86_BUILTIN_PFSUBR
,
15812 IX86_BUILTIN_PI2FD
,
15813 IX86_BUILTIN_PMULHRW
,
15815 /* 3DNow! Athlon Extensions */
15816 IX86_BUILTIN_PF2IW
,
15817 IX86_BUILTIN_PFNACC
,
15818 IX86_BUILTIN_PFPNACC
,
15819 IX86_BUILTIN_PI2FW
,
15820 IX86_BUILTIN_PSWAPDSI
,
15821 IX86_BUILTIN_PSWAPDSF
,
15824 IX86_BUILTIN_ADDPD
,
15825 IX86_BUILTIN_ADDSD
,
15826 IX86_BUILTIN_DIVPD
,
15827 IX86_BUILTIN_DIVSD
,
15828 IX86_BUILTIN_MULPD
,
15829 IX86_BUILTIN_MULSD
,
15830 IX86_BUILTIN_SUBPD
,
15831 IX86_BUILTIN_SUBSD
,
15833 IX86_BUILTIN_CMPEQPD
,
15834 IX86_BUILTIN_CMPLTPD
,
15835 IX86_BUILTIN_CMPLEPD
,
15836 IX86_BUILTIN_CMPGTPD
,
15837 IX86_BUILTIN_CMPGEPD
,
15838 IX86_BUILTIN_CMPNEQPD
,
15839 IX86_BUILTIN_CMPNLTPD
,
15840 IX86_BUILTIN_CMPNLEPD
,
15841 IX86_BUILTIN_CMPNGTPD
,
15842 IX86_BUILTIN_CMPNGEPD
,
15843 IX86_BUILTIN_CMPORDPD
,
15844 IX86_BUILTIN_CMPUNORDPD
,
15845 IX86_BUILTIN_CMPNEPD
,
15846 IX86_BUILTIN_CMPEQSD
,
15847 IX86_BUILTIN_CMPLTSD
,
15848 IX86_BUILTIN_CMPLESD
,
15849 IX86_BUILTIN_CMPNEQSD
,
15850 IX86_BUILTIN_CMPNLTSD
,
15851 IX86_BUILTIN_CMPNLESD
,
15852 IX86_BUILTIN_CMPORDSD
,
15853 IX86_BUILTIN_CMPUNORDSD
,
15854 IX86_BUILTIN_CMPNESD
,
15856 IX86_BUILTIN_COMIEQSD
,
15857 IX86_BUILTIN_COMILTSD
,
15858 IX86_BUILTIN_COMILESD
,
15859 IX86_BUILTIN_COMIGTSD
,
15860 IX86_BUILTIN_COMIGESD
,
15861 IX86_BUILTIN_COMINEQSD
,
15862 IX86_BUILTIN_UCOMIEQSD
,
15863 IX86_BUILTIN_UCOMILTSD
,
15864 IX86_BUILTIN_UCOMILESD
,
15865 IX86_BUILTIN_UCOMIGTSD
,
15866 IX86_BUILTIN_UCOMIGESD
,
15867 IX86_BUILTIN_UCOMINEQSD
,
15869 IX86_BUILTIN_MAXPD
,
15870 IX86_BUILTIN_MAXSD
,
15871 IX86_BUILTIN_MINPD
,
15872 IX86_BUILTIN_MINSD
,
15874 IX86_BUILTIN_ANDPD
,
15875 IX86_BUILTIN_ANDNPD
,
15877 IX86_BUILTIN_XORPD
,
15879 IX86_BUILTIN_SQRTPD
,
15880 IX86_BUILTIN_SQRTSD
,
15882 IX86_BUILTIN_UNPCKHPD
,
15883 IX86_BUILTIN_UNPCKLPD
,
15885 IX86_BUILTIN_SHUFPD
,
15887 IX86_BUILTIN_LOADUPD
,
15888 IX86_BUILTIN_STOREUPD
,
15889 IX86_BUILTIN_MOVSD
,
15891 IX86_BUILTIN_LOADHPD
,
15892 IX86_BUILTIN_LOADLPD
,
15894 IX86_BUILTIN_CVTDQ2PD
,
15895 IX86_BUILTIN_CVTDQ2PS
,
15897 IX86_BUILTIN_CVTPD2DQ
,
15898 IX86_BUILTIN_CVTPD2PI
,
15899 IX86_BUILTIN_CVTPD2PS
,
15900 IX86_BUILTIN_CVTTPD2DQ
,
15901 IX86_BUILTIN_CVTTPD2PI
,
15903 IX86_BUILTIN_CVTPI2PD
,
15904 IX86_BUILTIN_CVTSI2SD
,
15905 IX86_BUILTIN_CVTSI642SD
,
15907 IX86_BUILTIN_CVTSD2SI
,
15908 IX86_BUILTIN_CVTSD2SI64
,
15909 IX86_BUILTIN_CVTSD2SS
,
15910 IX86_BUILTIN_CVTSS2SD
,
15911 IX86_BUILTIN_CVTTSD2SI
,
15912 IX86_BUILTIN_CVTTSD2SI64
,
15914 IX86_BUILTIN_CVTPS2DQ
,
15915 IX86_BUILTIN_CVTPS2PD
,
15916 IX86_BUILTIN_CVTTPS2DQ
,
15918 IX86_BUILTIN_MOVNTI
,
15919 IX86_BUILTIN_MOVNTPD
,
15920 IX86_BUILTIN_MOVNTDQ
,
15923 IX86_BUILTIN_MASKMOVDQU
,
15924 IX86_BUILTIN_MOVMSKPD
,
15925 IX86_BUILTIN_PMOVMSKB128
,
15927 IX86_BUILTIN_PACKSSWB128
,
15928 IX86_BUILTIN_PACKSSDW128
,
15929 IX86_BUILTIN_PACKUSWB128
,
15931 IX86_BUILTIN_PADDB128
,
15932 IX86_BUILTIN_PADDW128
,
15933 IX86_BUILTIN_PADDD128
,
15934 IX86_BUILTIN_PADDQ128
,
15935 IX86_BUILTIN_PADDSB128
,
15936 IX86_BUILTIN_PADDSW128
,
15937 IX86_BUILTIN_PADDUSB128
,
15938 IX86_BUILTIN_PADDUSW128
,
15939 IX86_BUILTIN_PSUBB128
,
15940 IX86_BUILTIN_PSUBW128
,
15941 IX86_BUILTIN_PSUBD128
,
15942 IX86_BUILTIN_PSUBQ128
,
15943 IX86_BUILTIN_PSUBSB128
,
15944 IX86_BUILTIN_PSUBSW128
,
15945 IX86_BUILTIN_PSUBUSB128
,
15946 IX86_BUILTIN_PSUBUSW128
,
15948 IX86_BUILTIN_PAND128
,
15949 IX86_BUILTIN_PANDN128
,
15950 IX86_BUILTIN_POR128
,
15951 IX86_BUILTIN_PXOR128
,
15953 IX86_BUILTIN_PAVGB128
,
15954 IX86_BUILTIN_PAVGW128
,
15956 IX86_BUILTIN_PCMPEQB128
,
15957 IX86_BUILTIN_PCMPEQW128
,
15958 IX86_BUILTIN_PCMPEQD128
,
15959 IX86_BUILTIN_PCMPGTB128
,
15960 IX86_BUILTIN_PCMPGTW128
,
15961 IX86_BUILTIN_PCMPGTD128
,
15963 IX86_BUILTIN_PMADDWD128
,
15965 IX86_BUILTIN_PMAXSW128
,
15966 IX86_BUILTIN_PMAXUB128
,
15967 IX86_BUILTIN_PMINSW128
,
15968 IX86_BUILTIN_PMINUB128
,
15970 IX86_BUILTIN_PMULUDQ
,
15971 IX86_BUILTIN_PMULUDQ128
,
15972 IX86_BUILTIN_PMULHUW128
,
15973 IX86_BUILTIN_PMULHW128
,
15974 IX86_BUILTIN_PMULLW128
,
15976 IX86_BUILTIN_PSADBW128
,
15977 IX86_BUILTIN_PSHUFHW
,
15978 IX86_BUILTIN_PSHUFLW
,
15979 IX86_BUILTIN_PSHUFD
,
15981 IX86_BUILTIN_PSLLW128
,
15982 IX86_BUILTIN_PSLLD128
,
15983 IX86_BUILTIN_PSLLQ128
,
15984 IX86_BUILTIN_PSRAW128
,
15985 IX86_BUILTIN_PSRAD128
,
15986 IX86_BUILTIN_PSRLW128
,
15987 IX86_BUILTIN_PSRLD128
,
15988 IX86_BUILTIN_PSRLQ128
,
15989 IX86_BUILTIN_PSLLDQI128
,
15990 IX86_BUILTIN_PSLLWI128
,
15991 IX86_BUILTIN_PSLLDI128
,
15992 IX86_BUILTIN_PSLLQI128
,
15993 IX86_BUILTIN_PSRAWI128
,
15994 IX86_BUILTIN_PSRADI128
,
15995 IX86_BUILTIN_PSRLDQI128
,
15996 IX86_BUILTIN_PSRLWI128
,
15997 IX86_BUILTIN_PSRLDI128
,
15998 IX86_BUILTIN_PSRLQI128
,
16000 IX86_BUILTIN_PUNPCKHBW128
,
16001 IX86_BUILTIN_PUNPCKHWD128
,
16002 IX86_BUILTIN_PUNPCKHDQ128
,
16003 IX86_BUILTIN_PUNPCKHQDQ128
,
16004 IX86_BUILTIN_PUNPCKLBW128
,
16005 IX86_BUILTIN_PUNPCKLWD128
,
16006 IX86_BUILTIN_PUNPCKLDQ128
,
16007 IX86_BUILTIN_PUNPCKLQDQ128
,
16009 IX86_BUILTIN_CLFLUSH
,
16010 IX86_BUILTIN_MFENCE
,
16011 IX86_BUILTIN_LFENCE
,
16013 /* Prescott New Instructions. */
16014 IX86_BUILTIN_ADDSUBPS
,
16015 IX86_BUILTIN_HADDPS
,
16016 IX86_BUILTIN_HSUBPS
,
16017 IX86_BUILTIN_MOVSHDUP
,
16018 IX86_BUILTIN_MOVSLDUP
,
16019 IX86_BUILTIN_ADDSUBPD
,
16020 IX86_BUILTIN_HADDPD
,
16021 IX86_BUILTIN_HSUBPD
,
16022 IX86_BUILTIN_LDDQU
,
16024 IX86_BUILTIN_MONITOR
,
16025 IX86_BUILTIN_MWAIT
,
16028 IX86_BUILTIN_PHADDW
,
16029 IX86_BUILTIN_PHADDD
,
16030 IX86_BUILTIN_PHADDSW
,
16031 IX86_BUILTIN_PHSUBW
,
16032 IX86_BUILTIN_PHSUBD
,
16033 IX86_BUILTIN_PHSUBSW
,
16034 IX86_BUILTIN_PMADDUBSW
,
16035 IX86_BUILTIN_PMULHRSW
,
16036 IX86_BUILTIN_PSHUFB
,
16037 IX86_BUILTIN_PSIGNB
,
16038 IX86_BUILTIN_PSIGNW
,
16039 IX86_BUILTIN_PSIGND
,
16040 IX86_BUILTIN_PALIGNR
,
16041 IX86_BUILTIN_PABSB
,
16042 IX86_BUILTIN_PABSW
,
16043 IX86_BUILTIN_PABSD
,
16045 IX86_BUILTIN_PHADDW128
,
16046 IX86_BUILTIN_PHADDD128
,
16047 IX86_BUILTIN_PHADDSW128
,
16048 IX86_BUILTIN_PHSUBW128
,
16049 IX86_BUILTIN_PHSUBD128
,
16050 IX86_BUILTIN_PHSUBSW128
,
16051 IX86_BUILTIN_PMADDUBSW128
,
16052 IX86_BUILTIN_PMULHRSW128
,
16053 IX86_BUILTIN_PSHUFB128
,
16054 IX86_BUILTIN_PSIGNB128
,
16055 IX86_BUILTIN_PSIGNW128
,
16056 IX86_BUILTIN_PSIGND128
,
16057 IX86_BUILTIN_PALIGNR128
,
16058 IX86_BUILTIN_PABSB128
,
16059 IX86_BUILTIN_PABSW128
,
16060 IX86_BUILTIN_PABSD128
,
16062 /* AMDFAM10 - SSE4A New Instructions. */
16063 IX86_BUILTIN_MOVNTSD
,
16064 IX86_BUILTIN_MOVNTSS
,
16065 IX86_BUILTIN_EXTRQI
,
16066 IX86_BUILTIN_EXTRQ
,
16067 IX86_BUILTIN_INSERTQI
,
16068 IX86_BUILTIN_INSERTQ
,
16070 IX86_BUILTIN_VEC_INIT_V2SI
,
16071 IX86_BUILTIN_VEC_INIT_V4HI
,
16072 IX86_BUILTIN_VEC_INIT_V8QI
,
16073 IX86_BUILTIN_VEC_EXT_V2DF
,
16074 IX86_BUILTIN_VEC_EXT_V2DI
,
16075 IX86_BUILTIN_VEC_EXT_V4SF
,
16076 IX86_BUILTIN_VEC_EXT_V4SI
,
16077 IX86_BUILTIN_VEC_EXT_V8HI
,
16078 IX86_BUILTIN_VEC_EXT_V2SI
,
16079 IX86_BUILTIN_VEC_EXT_V4HI
,
16080 IX86_BUILTIN_VEC_SET_V8HI
,
16081 IX86_BUILTIN_VEC_SET_V4HI
,
16086 /* Table for the ix86 builtin decls. */
16087 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16089 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16090 * if the target_flags include one of MASK. Stores the function decl
16091 * in the ix86_builtins array.
16092 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16095 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16097 tree decl
= NULL_TREE
;
16099 if (mask
& target_flags
16100 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16102 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16104 ix86_builtins
[(int) code
] = decl
;
16110 /* Like def_builtin, but also marks the function decl "const". */
16113 def_builtin_const (int mask
, const char *name
, tree type
,
16114 enum ix86_builtins code
)
16116 tree decl
= def_builtin (mask
, name
, type
, code
);
16118 TREE_READONLY (decl
) = 1;
16122 /* Bits for builtin_description.flag. */
16124 /* Set when we don't support the comparison natively, and should
16125 swap_comparison in order to support it. */
16126 #define BUILTIN_DESC_SWAP_OPERANDS 1
16128 struct builtin_description
16130 const unsigned int mask
;
16131 const enum insn_code icode
;
16132 const char *const name
;
16133 const enum ix86_builtins code
;
16134 const enum rtx_code comparison
;
16135 const unsigned int flag
;
16138 static const struct builtin_description bdesc_comi
[] =
16140 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16141 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16142 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16143 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16144 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16145 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16146 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16147 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16148 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16149 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16150 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16151 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16152 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16153 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16154 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16155 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16156 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16157 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16158 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16159 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16160 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16161 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16162 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16163 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16166 static const struct builtin_description bdesc_2arg
[] =
16169 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16170 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16171 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16172 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16173 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16174 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16175 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16176 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16178 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16179 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16180 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16181 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16182 BUILTIN_DESC_SWAP_OPERANDS
},
16183 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16184 BUILTIN_DESC_SWAP_OPERANDS
},
16185 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16186 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16187 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16188 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16189 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16190 BUILTIN_DESC_SWAP_OPERANDS
},
16191 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16192 BUILTIN_DESC_SWAP_OPERANDS
},
16193 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16194 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16195 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16196 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16197 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16198 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16199 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16200 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16201 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16202 BUILTIN_DESC_SWAP_OPERANDS
},
16203 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16204 BUILTIN_DESC_SWAP_OPERANDS
},
16205 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16207 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16208 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16209 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16210 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16212 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16213 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16214 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16215 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16217 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16218 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16219 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16220 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16221 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16224 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16225 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16226 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16227 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16228 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16229 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16230 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16231 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16233 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16234 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16235 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16236 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16237 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16238 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16239 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16240 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16242 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16243 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16244 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16246 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16247 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16248 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16249 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16251 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16252 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16254 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16255 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16256 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16257 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16258 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16259 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16261 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16262 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16263 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16264 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16266 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16267 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16268 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16269 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16270 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16271 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16274 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16275 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16276 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16278 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16279 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16280 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16282 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16283 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16284 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16285 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16286 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16287 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16289 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16290 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16291 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16292 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16293 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16294 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16296 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16297 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16298 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16299 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16301 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16302 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16305 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16306 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16307 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16308 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16309 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16310 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16311 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16312 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16314 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16315 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16316 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16317 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16318 BUILTIN_DESC_SWAP_OPERANDS
},
16319 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16320 BUILTIN_DESC_SWAP_OPERANDS
},
16321 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16322 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16323 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16324 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16325 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16326 BUILTIN_DESC_SWAP_OPERANDS
},
16327 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16328 BUILTIN_DESC_SWAP_OPERANDS
},
16329 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16330 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16331 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16332 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16333 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16334 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16335 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16336 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16337 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16339 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16340 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16341 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16342 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16344 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16345 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16346 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16347 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16349 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16350 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16351 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16354 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16355 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16356 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16357 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16358 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16359 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16360 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16361 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16363 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16364 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16365 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16366 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16367 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16368 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16369 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16370 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16372 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16373 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16375 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16376 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16377 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16378 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16380 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16381 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16383 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16384 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16385 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16386 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16387 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16388 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16390 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16391 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16392 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16393 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16395 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16396 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16397 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16398 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16399 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16400 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16401 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16402 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16404 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16405 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16406 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16408 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16409 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16411 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16412 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16414 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16415 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16416 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16418 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16419 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16420 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16422 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16423 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16425 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16427 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16428 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16429 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16430 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16433 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16434 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16435 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16436 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16437 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16438 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16441 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16442 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16443 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16444 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16445 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16446 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16447 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16448 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16449 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16450 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16451 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16452 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16453 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16454 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16455 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16456 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16457 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16458 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16459 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16460 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16461 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16462 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16463 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16464 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16467 static const struct builtin_description bdesc_1arg
[] =
16469 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16470 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16472 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16473 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16474 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16476 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16477 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16478 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16479 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16480 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16481 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16483 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16484 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16486 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16488 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16489 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16491 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16492 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16493 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16494 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16495 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16497 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16499 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16500 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16501 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16502 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16504 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16505 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16506 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16509 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16510 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16513 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16514 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16515 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16516 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16517 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16518 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16522 ix86_init_builtins (void)
16525 ix86_init_mmx_sse_builtins ();
16528 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16529 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16532 ix86_init_mmx_sse_builtins (void)
16534 const struct builtin_description
* d
;
16537 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16538 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16539 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16540 tree V2DI_type_node
16541 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16542 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16543 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16544 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16545 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16546 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16547 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16549 tree pchar_type_node
= build_pointer_type (char_type_node
);
16550 tree pcchar_type_node
= build_pointer_type (
16551 build_type_variant (char_type_node
, 1, 0));
16552 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16553 tree pcfloat_type_node
= build_pointer_type (
16554 build_type_variant (float_type_node
, 1, 0));
16555 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16556 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16557 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16560 tree int_ftype_v4sf_v4sf
16561 = build_function_type_list (integer_type_node
,
16562 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16563 tree v4si_ftype_v4sf_v4sf
16564 = build_function_type_list (V4SI_type_node
,
16565 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16566 /* MMX/SSE/integer conversions. */
16567 tree int_ftype_v4sf
16568 = build_function_type_list (integer_type_node
,
16569 V4SF_type_node
, NULL_TREE
);
16570 tree int64_ftype_v4sf
16571 = build_function_type_list (long_long_integer_type_node
,
16572 V4SF_type_node
, NULL_TREE
);
16573 tree int_ftype_v8qi
16574 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16575 tree v4sf_ftype_v4sf_int
16576 = build_function_type_list (V4SF_type_node
,
16577 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16578 tree v4sf_ftype_v4sf_int64
16579 = build_function_type_list (V4SF_type_node
,
16580 V4SF_type_node
, long_long_integer_type_node
,
16582 tree v4sf_ftype_v4sf_v2si
16583 = build_function_type_list (V4SF_type_node
,
16584 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16586 /* Miscellaneous. */
16587 tree v8qi_ftype_v4hi_v4hi
16588 = build_function_type_list (V8QI_type_node
,
16589 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16590 tree v4hi_ftype_v2si_v2si
16591 = build_function_type_list (V4HI_type_node
,
16592 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16593 tree v4sf_ftype_v4sf_v4sf_int
16594 = build_function_type_list (V4SF_type_node
,
16595 V4SF_type_node
, V4SF_type_node
,
16596 integer_type_node
, NULL_TREE
);
16597 tree v2si_ftype_v4hi_v4hi
16598 = build_function_type_list (V2SI_type_node
,
16599 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16600 tree v4hi_ftype_v4hi_int
16601 = build_function_type_list (V4HI_type_node
,
16602 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16603 tree v4hi_ftype_v4hi_di
16604 = build_function_type_list (V4HI_type_node
,
16605 V4HI_type_node
, long_long_unsigned_type_node
,
16607 tree v2si_ftype_v2si_di
16608 = build_function_type_list (V2SI_type_node
,
16609 V2SI_type_node
, long_long_unsigned_type_node
,
16611 tree void_ftype_void
16612 = build_function_type (void_type_node
, void_list_node
);
16613 tree void_ftype_unsigned
16614 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16615 tree void_ftype_unsigned_unsigned
16616 = build_function_type_list (void_type_node
, unsigned_type_node
,
16617 unsigned_type_node
, NULL_TREE
);
16618 tree void_ftype_pcvoid_unsigned_unsigned
16619 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16620 unsigned_type_node
, unsigned_type_node
,
16622 tree unsigned_ftype_void
16623 = build_function_type (unsigned_type_node
, void_list_node
);
16624 tree v2si_ftype_v4sf
16625 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16626 /* Loads/stores. */
16627 tree void_ftype_v8qi_v8qi_pchar
16628 = build_function_type_list (void_type_node
,
16629 V8QI_type_node
, V8QI_type_node
,
16630 pchar_type_node
, NULL_TREE
);
16631 tree v4sf_ftype_pcfloat
16632 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16633 /* @@@ the type is bogus */
16634 tree v4sf_ftype_v4sf_pv2si
16635 = build_function_type_list (V4SF_type_node
,
16636 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16637 tree void_ftype_pv2si_v4sf
16638 = build_function_type_list (void_type_node
,
16639 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16640 tree void_ftype_pfloat_v4sf
16641 = build_function_type_list (void_type_node
,
16642 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16643 tree void_ftype_pdi_di
16644 = build_function_type_list (void_type_node
,
16645 pdi_type_node
, long_long_unsigned_type_node
,
16647 tree void_ftype_pv2di_v2di
16648 = build_function_type_list (void_type_node
,
16649 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16650 /* Normal vector unops. */
16651 tree v4sf_ftype_v4sf
16652 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16653 tree v16qi_ftype_v16qi
16654 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16655 tree v8hi_ftype_v8hi
16656 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16657 tree v4si_ftype_v4si
16658 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16659 tree v8qi_ftype_v8qi
16660 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16661 tree v4hi_ftype_v4hi
16662 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16664 /* Normal vector binops. */
16665 tree v4sf_ftype_v4sf_v4sf
16666 = build_function_type_list (V4SF_type_node
,
16667 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16668 tree v8qi_ftype_v8qi_v8qi
16669 = build_function_type_list (V8QI_type_node
,
16670 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16671 tree v4hi_ftype_v4hi_v4hi
16672 = build_function_type_list (V4HI_type_node
,
16673 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16674 tree v2si_ftype_v2si_v2si
16675 = build_function_type_list (V2SI_type_node
,
16676 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16677 tree di_ftype_di_di
16678 = build_function_type_list (long_long_unsigned_type_node
,
16679 long_long_unsigned_type_node
,
16680 long_long_unsigned_type_node
, NULL_TREE
);
16682 tree di_ftype_di_di_int
16683 = build_function_type_list (long_long_unsigned_type_node
,
16684 long_long_unsigned_type_node
,
16685 long_long_unsigned_type_node
,
16686 integer_type_node
, NULL_TREE
);
16688 tree v2si_ftype_v2sf
16689 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16690 tree v2sf_ftype_v2si
16691 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16692 tree v2si_ftype_v2si
16693 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16694 tree v2sf_ftype_v2sf
16695 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16696 tree v2sf_ftype_v2sf_v2sf
16697 = build_function_type_list (V2SF_type_node
,
16698 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16699 tree v2si_ftype_v2sf_v2sf
16700 = build_function_type_list (V2SI_type_node
,
16701 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16702 tree pint_type_node
= build_pointer_type (integer_type_node
);
16703 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16704 tree pcdouble_type_node
= build_pointer_type (
16705 build_type_variant (double_type_node
, 1, 0));
16706 tree int_ftype_v2df_v2df
16707 = build_function_type_list (integer_type_node
,
16708 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16710 tree void_ftype_pcvoid
16711 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16712 tree v4sf_ftype_v4si
16713 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16714 tree v4si_ftype_v4sf
16715 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16716 tree v2df_ftype_v4si
16717 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16718 tree v4si_ftype_v2df
16719 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16720 tree v2si_ftype_v2df
16721 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16722 tree v4sf_ftype_v2df
16723 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16724 tree v2df_ftype_v2si
16725 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16726 tree v2df_ftype_v4sf
16727 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16728 tree int_ftype_v2df
16729 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16730 tree int64_ftype_v2df
16731 = build_function_type_list (long_long_integer_type_node
,
16732 V2DF_type_node
, NULL_TREE
);
16733 tree v2df_ftype_v2df_int
16734 = build_function_type_list (V2DF_type_node
,
16735 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16736 tree v2df_ftype_v2df_int64
16737 = build_function_type_list (V2DF_type_node
,
16738 V2DF_type_node
, long_long_integer_type_node
,
16740 tree v4sf_ftype_v4sf_v2df
16741 = build_function_type_list (V4SF_type_node
,
16742 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16743 tree v2df_ftype_v2df_v4sf
16744 = build_function_type_list (V2DF_type_node
,
16745 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16746 tree v2df_ftype_v2df_v2df_int
16747 = build_function_type_list (V2DF_type_node
,
16748 V2DF_type_node
, V2DF_type_node
,
16751 tree v2df_ftype_v2df_pcdouble
16752 = build_function_type_list (V2DF_type_node
,
16753 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16754 tree void_ftype_pdouble_v2df
16755 = build_function_type_list (void_type_node
,
16756 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16757 tree void_ftype_pint_int
16758 = build_function_type_list (void_type_node
,
16759 pint_type_node
, integer_type_node
, NULL_TREE
);
16760 tree void_ftype_v16qi_v16qi_pchar
16761 = build_function_type_list (void_type_node
,
16762 V16QI_type_node
, V16QI_type_node
,
16763 pchar_type_node
, NULL_TREE
);
16764 tree v2df_ftype_pcdouble
16765 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16766 tree v2df_ftype_v2df_v2df
16767 = build_function_type_list (V2DF_type_node
,
16768 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16769 tree v16qi_ftype_v16qi_v16qi
16770 = build_function_type_list (V16QI_type_node
,
16771 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16772 tree v8hi_ftype_v8hi_v8hi
16773 = build_function_type_list (V8HI_type_node
,
16774 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16775 tree v4si_ftype_v4si_v4si
16776 = build_function_type_list (V4SI_type_node
,
16777 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16778 tree v2di_ftype_v2di_v2di
16779 = build_function_type_list (V2DI_type_node
,
16780 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16781 tree v2di_ftype_v2df_v2df
16782 = build_function_type_list (V2DI_type_node
,
16783 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16784 tree v2df_ftype_v2df
16785 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16786 tree v2di_ftype_v2di_int
16787 = build_function_type_list (V2DI_type_node
,
16788 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16789 tree v2di_ftype_v2di_v2di_int
16790 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16791 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16792 tree v4si_ftype_v4si_int
16793 = build_function_type_list (V4SI_type_node
,
16794 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16795 tree v8hi_ftype_v8hi_int
16796 = build_function_type_list (V8HI_type_node
,
16797 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16798 tree v8hi_ftype_v8hi_v2di
16799 = build_function_type_list (V8HI_type_node
,
16800 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16801 tree v4si_ftype_v4si_v2di
16802 = build_function_type_list (V4SI_type_node
,
16803 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16804 tree v4si_ftype_v8hi_v8hi
16805 = build_function_type_list (V4SI_type_node
,
16806 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16807 tree di_ftype_v8qi_v8qi
16808 = build_function_type_list (long_long_unsigned_type_node
,
16809 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16810 tree di_ftype_v2si_v2si
16811 = build_function_type_list (long_long_unsigned_type_node
,
16812 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16813 tree v2di_ftype_v16qi_v16qi
16814 = build_function_type_list (V2DI_type_node
,
16815 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16816 tree v2di_ftype_v4si_v4si
16817 = build_function_type_list (V2DI_type_node
,
16818 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16819 tree int_ftype_v16qi
16820 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16821 tree v16qi_ftype_pcchar
16822 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16823 tree void_ftype_pchar_v16qi
16824 = build_function_type_list (void_type_node
,
16825 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16827 tree v2di_ftype_v2di_unsigned_unsigned
16828 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16829 unsigned_type_node
, unsigned_type_node
,
16831 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16832 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16833 unsigned_type_node
, unsigned_type_node
,
16835 tree v2di_ftype_v2di_v16qi
16836 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16840 tree float128_type
;
16843 /* The __float80 type. */
16844 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16845 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16849 /* The __float80 type. */
16850 float80_type
= make_node (REAL_TYPE
);
16851 TYPE_PRECISION (float80_type
) = 80;
16852 layout_type (float80_type
);
16853 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16858 float128_type
= make_node (REAL_TYPE
);
16859 TYPE_PRECISION (float128_type
) = 128;
16860 layout_type (float128_type
);
16861 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16864 /* Add all builtins that are more or less simple operations on two
16866 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16868 /* Use one of the operands; the target can have a different mode for
16869 mask-generating compares. */
16870 enum machine_mode mode
;
16875 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16880 type
= v16qi_ftype_v16qi_v16qi
;
16883 type
= v8hi_ftype_v8hi_v8hi
;
16886 type
= v4si_ftype_v4si_v4si
;
16889 type
= v2di_ftype_v2di_v2di
;
16892 type
= v2df_ftype_v2df_v2df
;
16895 type
= v4sf_ftype_v4sf_v4sf
;
16898 type
= v8qi_ftype_v8qi_v8qi
;
16901 type
= v4hi_ftype_v4hi_v4hi
;
16904 type
= v2si_ftype_v2si_v2si
;
16907 type
= di_ftype_di_di
;
16911 gcc_unreachable ();
16914 /* Override for comparisons. */
16915 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16916 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16917 type
= v4si_ftype_v4sf_v4sf
;
16919 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16920 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16921 type
= v2di_ftype_v2df_v2df
;
16923 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16926 /* Add all builtins that are more or less simple operations on 1 operand. */
16927 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16929 enum machine_mode mode
;
16934 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16939 type
= v16qi_ftype_v16qi
;
16942 type
= v8hi_ftype_v8hi
;
16945 type
= v4si_ftype_v4si
;
16948 type
= v2df_ftype_v2df
;
16951 type
= v4sf_ftype_v4sf
;
16954 type
= v8qi_ftype_v8qi
;
16957 type
= v4hi_ftype_v4hi
;
16960 type
= v2si_ftype_v2si
;
16967 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16970 /* Add the remaining MMX insns with somewhat more complicated types. */
16971 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16972 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16973 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16974 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16976 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16977 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16978 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16980 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
16981 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
16983 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
16984 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
16986 /* comi/ucomi insns. */
16987 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16988 if (d
->mask
== MASK_SSE2
)
16989 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
16991 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
16993 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
16994 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
16995 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
16997 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
16998 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
16999 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17000 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17001 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17002 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17003 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17004 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17005 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17006 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17007 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17009 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17011 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17012 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17014 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17015 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17016 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17017 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17019 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17020 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17021 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17022 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17024 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17026 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17028 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17029 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17030 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17031 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17032 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17033 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17035 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17037 /* Original 3DNow! */
17038 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17039 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17040 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17041 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17042 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17043 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17044 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17045 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17046 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17047 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17048 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17049 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17050 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17051 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17052 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17053 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17054 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17055 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17056 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17057 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17059 /* 3DNow! extension as used in the Athlon CPU. */
17060 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17061 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17062 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17063 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17064 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17065 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17068 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17070 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17071 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17073 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17074 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17076 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17077 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17078 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17079 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17080 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17082 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17083 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17084 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17085 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17087 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17088 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17090 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17092 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17093 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17095 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17096 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17097 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17098 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17099 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17101 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17103 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17104 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17105 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17106 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17108 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17109 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17110 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17112 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17113 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17114 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17115 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17117 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17118 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17119 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17121 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17122 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17124 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17125 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17127 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17128 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17129 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17131 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17132 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17133 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17135 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17136 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17138 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17139 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17140 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17141 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17143 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17144 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17145 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17146 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17148 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17149 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17151 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17153 /* Prescott New Instructions. */
17154 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17155 void_ftype_pcvoid_unsigned_unsigned
,
17156 IX86_BUILTIN_MONITOR
);
17157 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17158 void_ftype_unsigned_unsigned
,
17159 IX86_BUILTIN_MWAIT
);
17160 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17161 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17164 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17165 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17166 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17167 IX86_BUILTIN_PALIGNR
);
17169 /* AMDFAM10 SSE4A New built-ins */
17170 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17171 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17172 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17173 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17174 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17175 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17176 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17177 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17178 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17179 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17180 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17181 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17183 /* Access to the vec_init patterns. */
17184 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17185 integer_type_node
, NULL_TREE
);
17186 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17187 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17189 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17190 short_integer_type_node
,
17191 short_integer_type_node
,
17192 short_integer_type_node
, NULL_TREE
);
17193 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17194 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17196 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17197 char_type_node
, char_type_node
,
17198 char_type_node
, char_type_node
,
17199 char_type_node
, char_type_node
,
17200 char_type_node
, NULL_TREE
);
17201 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17202 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17204 /* Access to the vec_extract patterns. */
17205 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17206 integer_type_node
, NULL_TREE
);
17207 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17208 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17210 ftype
= build_function_type_list (long_long_integer_type_node
,
17211 V2DI_type_node
, integer_type_node
,
17213 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17214 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17216 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17217 integer_type_node
, NULL_TREE
);
17218 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17219 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17221 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17222 integer_type_node
, NULL_TREE
);
17223 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17224 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17226 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17227 integer_type_node
, NULL_TREE
);
17228 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17229 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17231 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17232 integer_type_node
, NULL_TREE
);
17233 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17234 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17236 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17237 integer_type_node
, NULL_TREE
);
17238 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17239 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17241 /* Access to the vec_set patterns. */
17242 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17244 integer_type_node
, NULL_TREE
);
17245 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17246 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17248 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17250 integer_type_node
, NULL_TREE
);
17251 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17252 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17255 /* Errors in the source file can cause expand_expr to return const0_rtx
17256 where we expect a vector. To avoid crashing, use one of the vector
17257 clear instructions. */
17259 safe_vector_operand (rtx x
, enum machine_mode mode
)
17261 if (x
== const0_rtx
)
17262 x
= CONST0_RTX (mode
);
17266 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17269 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17272 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17273 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17274 rtx op0
= expand_normal (arg0
);
17275 rtx op1
= expand_normal (arg1
);
17276 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17277 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17278 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17280 if (VECTOR_MODE_P (mode0
))
17281 op0
= safe_vector_operand (op0
, mode0
);
17282 if (VECTOR_MODE_P (mode1
))
17283 op1
= safe_vector_operand (op1
, mode1
);
17285 if (optimize
|| !target
17286 || GET_MODE (target
) != tmode
17287 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17288 target
= gen_reg_rtx (tmode
);
17290 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17292 rtx x
= gen_reg_rtx (V4SImode
);
17293 emit_insn (gen_sse2_loadd (x
, op1
));
17294 op1
= gen_lowpart (TImode
, x
);
17297 /* The insn must want input operands in the same modes as the
17299 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17300 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17302 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17303 op0
= copy_to_mode_reg (mode0
, op0
);
17304 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17305 op1
= copy_to_mode_reg (mode1
, op1
);
17307 /* ??? Using ix86_fixup_binary_operands is problematic when
17308 we've got mismatched modes. Fake it. */
17314 if (tmode
== mode0
&& tmode
== mode1
)
17316 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17320 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17322 op0
= force_reg (mode0
, op0
);
17323 op1
= force_reg (mode1
, op1
);
17324 target
= gen_reg_rtx (tmode
);
17327 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17334 /* Subroutine of ix86_expand_builtin to take care of stores. */
17337 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17340 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17341 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17342 rtx op0
= expand_normal (arg0
);
17343 rtx op1
= expand_normal (arg1
);
17344 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17345 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17347 if (VECTOR_MODE_P (mode1
))
17348 op1
= safe_vector_operand (op1
, mode1
);
17350 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17351 op1
= copy_to_mode_reg (mode1
, op1
);
17353 pat
= GEN_FCN (icode
) (op0
, op1
);
17359 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17362 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17363 rtx target
, int do_load
)
17366 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17367 rtx op0
= expand_normal (arg0
);
17368 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17369 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17371 if (optimize
|| !target
17372 || GET_MODE (target
) != tmode
17373 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17374 target
= gen_reg_rtx (tmode
);
17376 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17379 if (VECTOR_MODE_P (mode0
))
17380 op0
= safe_vector_operand (op0
, mode0
);
17382 if ((optimize
&& !register_operand (op0
, mode0
))
17383 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17384 op0
= copy_to_mode_reg (mode0
, op0
);
17387 pat
= GEN_FCN (icode
) (target
, op0
);
17394 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17395 sqrtss, rsqrtss, rcpss. */
17398 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17401 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17402 rtx op1
, op0
= expand_normal (arg0
);
17403 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17404 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17406 if (optimize
|| !target
17407 || GET_MODE (target
) != tmode
17408 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17409 target
= gen_reg_rtx (tmode
);
17411 if (VECTOR_MODE_P (mode0
))
17412 op0
= safe_vector_operand (op0
, mode0
);
17414 if ((optimize
&& !register_operand (op0
, mode0
))
17415 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17416 op0
= copy_to_mode_reg (mode0
, op0
);
17419 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17420 op1
= copy_to_mode_reg (mode0
, op1
);
17422 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17429 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17432 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17436 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17437 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17438 rtx op0
= expand_normal (arg0
);
17439 rtx op1
= expand_normal (arg1
);
17441 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17442 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17443 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17444 enum rtx_code comparison
= d
->comparison
;
17446 if (VECTOR_MODE_P (mode0
))
17447 op0
= safe_vector_operand (op0
, mode0
);
17448 if (VECTOR_MODE_P (mode1
))
17449 op1
= safe_vector_operand (op1
, mode1
);
17451 /* Swap operands if we have a comparison that isn't available in
17453 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17455 rtx tmp
= gen_reg_rtx (mode1
);
17456 emit_move_insn (tmp
, op1
);
17461 if (optimize
|| !target
17462 || GET_MODE (target
) != tmode
17463 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17464 target
= gen_reg_rtx (tmode
);
17466 if ((optimize
&& !register_operand (op0
, mode0
))
17467 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17468 op0
= copy_to_mode_reg (mode0
, op0
);
17469 if ((optimize
&& !register_operand (op1
, mode1
))
17470 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17471 op1
= copy_to_mode_reg (mode1
, op1
);
17473 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17474 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17481 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17484 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17488 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17489 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17490 rtx op0
= expand_normal (arg0
);
17491 rtx op1
= expand_normal (arg1
);
17493 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17494 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17495 enum rtx_code comparison
= d
->comparison
;
17497 if (VECTOR_MODE_P (mode0
))
17498 op0
= safe_vector_operand (op0
, mode0
);
17499 if (VECTOR_MODE_P (mode1
))
17500 op1
= safe_vector_operand (op1
, mode1
);
17502 /* Swap operands if we have a comparison that isn't available in
17504 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17511 target
= gen_reg_rtx (SImode
);
17512 emit_move_insn (target
, const0_rtx
);
17513 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17515 if ((optimize
&& !register_operand (op0
, mode0
))
17516 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17517 op0
= copy_to_mode_reg (mode0
, op0
);
17518 if ((optimize
&& !register_operand (op1
, mode1
))
17519 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17520 op1
= copy_to_mode_reg (mode1
, op1
);
17522 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17523 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17527 emit_insn (gen_rtx_SET (VOIDmode
,
17528 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17529 gen_rtx_fmt_ee (comparison
, QImode
,
17533 return SUBREG_REG (target
);
17536 /* Return the integer constant in ARG. Constrain it to be in the range
17537 of the subparts of VEC_TYPE; issue an error if not. */
17540 get_element_number (tree vec_type
, tree arg
)
17542 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17544 if (!host_integerp (arg
, 1)
17545 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17547 error ("selector must be an integer constant in the range 0..%wi", max
);
17554 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17555 ix86_expand_vector_init. We DO have language-level syntax for this, in
17556 the form of (type){ init-list }. Except that since we can't place emms
17557 instructions from inside the compiler, we can't allow the use of MMX
17558 registers unless the user explicitly asks for it. So we do *not* define
17559 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17560 we have builtins invoked by mmintrin.h that gives us license to emit
17561 these sorts of instructions. */
17564 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17566 enum machine_mode tmode
= TYPE_MODE (type
);
17567 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17568 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17569 rtvec v
= rtvec_alloc (n_elt
);
17571 gcc_assert (VECTOR_MODE_P (tmode
));
17572 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17574 for (i
= 0; i
< n_elt
; ++i
)
17576 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17577 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17580 if (!target
|| !register_operand (target
, tmode
))
17581 target
= gen_reg_rtx (tmode
);
17583 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17587 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17588 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17589 had a language-level syntax for referencing vector elements. */
17592 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17594 enum machine_mode tmode
, mode0
;
17599 arg0
= CALL_EXPR_ARG (exp
, 0);
17600 arg1
= CALL_EXPR_ARG (exp
, 1);
17602 op0
= expand_normal (arg0
);
17603 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17605 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17606 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17607 gcc_assert (VECTOR_MODE_P (mode0
));
17609 op0
= force_reg (mode0
, op0
);
17611 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17612 target
= gen_reg_rtx (tmode
);
17614 ix86_expand_vector_extract (true, target
, op0
, elt
);
17619 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17620 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17621 a language-level syntax for referencing vector elements. */
17624 ix86_expand_vec_set_builtin (tree exp
)
17626 enum machine_mode tmode
, mode1
;
17627 tree arg0
, arg1
, arg2
;
17631 arg0
= CALL_EXPR_ARG (exp
, 0);
17632 arg1
= CALL_EXPR_ARG (exp
, 1);
17633 arg2
= CALL_EXPR_ARG (exp
, 2);
17635 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17636 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17637 gcc_assert (VECTOR_MODE_P (tmode
));
17639 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17640 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17641 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17643 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17644 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17646 op0
= force_reg (tmode
, op0
);
17647 op1
= force_reg (mode1
, op1
);
17649 ix86_expand_vector_set (true, op0
, op1
, elt
);
17654 /* Expand an expression EXP that calls a built-in function,
17655 with result going to TARGET if that's convenient
17656 (and in mode MODE if that's convenient).
17657 SUBTARGET may be used as the target for computing one of EXP's operands.
17658 IGNORE is nonzero if the value is to be ignored. */
17661 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17662 enum machine_mode mode ATTRIBUTE_UNUSED
,
17663 int ignore ATTRIBUTE_UNUSED
)
17665 const struct builtin_description
*d
;
17667 enum insn_code icode
;
17668 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17669 tree arg0
, arg1
, arg2
, arg3
;
17670 rtx op0
, op1
, op2
, op3
, pat
;
17671 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17672 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17676 case IX86_BUILTIN_EMMS
:
17677 emit_insn (gen_mmx_emms ());
17680 case IX86_BUILTIN_SFENCE
:
17681 emit_insn (gen_sse_sfence ());
17684 case IX86_BUILTIN_MASKMOVQ
:
17685 case IX86_BUILTIN_MASKMOVDQU
:
17686 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17687 ? CODE_FOR_mmx_maskmovq
17688 : CODE_FOR_sse2_maskmovdqu
);
17689 /* Note the arg order is different from the operand order. */
17690 arg1
= CALL_EXPR_ARG (exp
, 0);
17691 arg2
= CALL_EXPR_ARG (exp
, 1);
17692 arg0
= CALL_EXPR_ARG (exp
, 2);
17693 op0
= expand_normal (arg0
);
17694 op1
= expand_normal (arg1
);
17695 op2
= expand_normal (arg2
);
17696 mode0
= insn_data
[icode
].operand
[0].mode
;
17697 mode1
= insn_data
[icode
].operand
[1].mode
;
17698 mode2
= insn_data
[icode
].operand
[2].mode
;
17700 op0
= force_reg (Pmode
, op0
);
17701 op0
= gen_rtx_MEM (mode1
, op0
);
17703 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17704 op0
= copy_to_mode_reg (mode0
, op0
);
17705 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17706 op1
= copy_to_mode_reg (mode1
, op1
);
17707 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17708 op2
= copy_to_mode_reg (mode2
, op2
);
17709 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17715 case IX86_BUILTIN_SQRTSS
:
17716 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17717 case IX86_BUILTIN_RSQRTSS
:
17718 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17719 case IX86_BUILTIN_RCPSS
:
17720 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17722 case IX86_BUILTIN_LOADUPS
:
17723 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17725 case IX86_BUILTIN_STOREUPS
:
17726 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17728 case IX86_BUILTIN_LOADHPS
:
17729 case IX86_BUILTIN_LOADLPS
:
17730 case IX86_BUILTIN_LOADHPD
:
17731 case IX86_BUILTIN_LOADLPD
:
17732 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17733 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17734 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17735 : CODE_FOR_sse2_loadlpd
);
17736 arg0
= CALL_EXPR_ARG (exp
, 0);
17737 arg1
= CALL_EXPR_ARG (exp
, 1);
17738 op0
= expand_normal (arg0
);
17739 op1
= expand_normal (arg1
);
17740 tmode
= insn_data
[icode
].operand
[0].mode
;
17741 mode0
= insn_data
[icode
].operand
[1].mode
;
17742 mode1
= insn_data
[icode
].operand
[2].mode
;
17744 op0
= force_reg (mode0
, op0
);
17745 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17746 if (optimize
|| target
== 0
17747 || GET_MODE (target
) != tmode
17748 || !register_operand (target
, tmode
))
17749 target
= gen_reg_rtx (tmode
);
17750 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17756 case IX86_BUILTIN_STOREHPS
:
17757 case IX86_BUILTIN_STORELPS
:
17758 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17759 : CODE_FOR_sse_storelps
);
17760 arg0
= CALL_EXPR_ARG (exp
, 0);
17761 arg1
= CALL_EXPR_ARG (exp
, 1);
17762 op0
= expand_normal (arg0
);
17763 op1
= expand_normal (arg1
);
17764 mode0
= insn_data
[icode
].operand
[0].mode
;
17765 mode1
= insn_data
[icode
].operand
[1].mode
;
17767 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17768 op1
= force_reg (mode1
, op1
);
17770 pat
= GEN_FCN (icode
) (op0
, op1
);
17776 case IX86_BUILTIN_MOVNTPS
:
17777 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17778 case IX86_BUILTIN_MOVNTQ
:
17779 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17781 case IX86_BUILTIN_LDMXCSR
:
17782 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17783 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17784 emit_move_insn (target
, op0
);
17785 emit_insn (gen_sse_ldmxcsr (target
));
17788 case IX86_BUILTIN_STMXCSR
:
17789 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17790 emit_insn (gen_sse_stmxcsr (target
));
17791 return copy_to_mode_reg (SImode
, target
);
17793 case IX86_BUILTIN_SHUFPS
:
17794 case IX86_BUILTIN_SHUFPD
:
17795 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17796 ? CODE_FOR_sse_shufps
17797 : CODE_FOR_sse2_shufpd
);
17798 arg0
= CALL_EXPR_ARG (exp
, 0);
17799 arg1
= CALL_EXPR_ARG (exp
, 1);
17800 arg2
= CALL_EXPR_ARG (exp
, 2);
17801 op0
= expand_normal (arg0
);
17802 op1
= expand_normal (arg1
);
17803 op2
= expand_normal (arg2
);
17804 tmode
= insn_data
[icode
].operand
[0].mode
;
17805 mode0
= insn_data
[icode
].operand
[1].mode
;
17806 mode1
= insn_data
[icode
].operand
[2].mode
;
17807 mode2
= insn_data
[icode
].operand
[3].mode
;
17809 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17810 op0
= copy_to_mode_reg (mode0
, op0
);
17811 if ((optimize
&& !register_operand (op1
, mode1
))
17812 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17813 op1
= copy_to_mode_reg (mode1
, op1
);
17814 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17816 /* @@@ better error message */
17817 error ("mask must be an immediate");
17818 return gen_reg_rtx (tmode
);
17820 if (optimize
|| target
== 0
17821 || GET_MODE (target
) != tmode
17822 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17823 target
= gen_reg_rtx (tmode
);
17824 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17830 case IX86_BUILTIN_PSHUFW
:
17831 case IX86_BUILTIN_PSHUFD
:
17832 case IX86_BUILTIN_PSHUFHW
:
17833 case IX86_BUILTIN_PSHUFLW
:
17834 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17835 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17836 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17837 : CODE_FOR_mmx_pshufw
);
17838 arg0
= CALL_EXPR_ARG (exp
, 0);
17839 arg1
= CALL_EXPR_ARG (exp
, 1);
17840 op0
= expand_normal (arg0
);
17841 op1
= expand_normal (arg1
);
17842 tmode
= insn_data
[icode
].operand
[0].mode
;
17843 mode1
= insn_data
[icode
].operand
[1].mode
;
17844 mode2
= insn_data
[icode
].operand
[2].mode
;
17846 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17847 op0
= copy_to_mode_reg (mode1
, op0
);
17848 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17850 /* @@@ better error message */
17851 error ("mask must be an immediate");
17855 || GET_MODE (target
) != tmode
17856 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17857 target
= gen_reg_rtx (tmode
);
17858 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17864 case IX86_BUILTIN_PSLLDQI128
:
17865 case IX86_BUILTIN_PSRLDQI128
:
17866 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17867 : CODE_FOR_sse2_lshrti3
);
17868 arg0
= CALL_EXPR_ARG (exp
, 0);
17869 arg1
= CALL_EXPR_ARG (exp
, 1);
17870 op0
= expand_normal (arg0
);
17871 op1
= expand_normal (arg1
);
17872 tmode
= insn_data
[icode
].operand
[0].mode
;
17873 mode1
= insn_data
[icode
].operand
[1].mode
;
17874 mode2
= insn_data
[icode
].operand
[2].mode
;
17876 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17878 op0
= copy_to_reg (op0
);
17879 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17881 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17883 error ("shift must be an immediate");
17886 target
= gen_reg_rtx (V2DImode
);
17887 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
17893 case IX86_BUILTIN_FEMMS
:
17894 emit_insn (gen_mmx_femms ());
17897 case IX86_BUILTIN_PAVGUSB
:
17898 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
17900 case IX86_BUILTIN_PF2ID
:
17901 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
17903 case IX86_BUILTIN_PFACC
:
17904 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
17906 case IX86_BUILTIN_PFADD
:
17907 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
17909 case IX86_BUILTIN_PFCMPEQ
:
17910 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
17912 case IX86_BUILTIN_PFCMPGE
:
17913 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
17915 case IX86_BUILTIN_PFCMPGT
:
17916 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
17918 case IX86_BUILTIN_PFMAX
:
17919 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
17921 case IX86_BUILTIN_PFMIN
:
17922 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
17924 case IX86_BUILTIN_PFMUL
:
17925 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
17927 case IX86_BUILTIN_PFRCP
:
17928 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
17930 case IX86_BUILTIN_PFRCPIT1
:
17931 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
17933 case IX86_BUILTIN_PFRCPIT2
:
17934 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
17936 case IX86_BUILTIN_PFRSQIT1
:
17937 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
17939 case IX86_BUILTIN_PFRSQRT
:
17940 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
17942 case IX86_BUILTIN_PFSUB
:
17943 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
17945 case IX86_BUILTIN_PFSUBR
:
17946 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
17948 case IX86_BUILTIN_PI2FD
:
17949 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
17951 case IX86_BUILTIN_PMULHRW
:
17952 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
17954 case IX86_BUILTIN_PF2IW
:
17955 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
17957 case IX86_BUILTIN_PFNACC
:
17958 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
17960 case IX86_BUILTIN_PFPNACC
:
17961 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
17963 case IX86_BUILTIN_PI2FW
:
17964 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
17966 case IX86_BUILTIN_PSWAPDSI
:
17967 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
17969 case IX86_BUILTIN_PSWAPDSF
:
17970 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
17972 case IX86_BUILTIN_SQRTSD
:
17973 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
17974 case IX86_BUILTIN_LOADUPD
:
17975 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
17976 case IX86_BUILTIN_STOREUPD
:
17977 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
17979 case IX86_BUILTIN_MFENCE
:
17980 emit_insn (gen_sse2_mfence ());
17982 case IX86_BUILTIN_LFENCE
:
17983 emit_insn (gen_sse2_lfence ());
17986 case IX86_BUILTIN_CLFLUSH
:
17987 arg0
= CALL_EXPR_ARG (exp
, 0);
17988 op0
= expand_normal (arg0
);
17989 icode
= CODE_FOR_sse2_clflush
;
17990 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
17991 op0
= copy_to_mode_reg (Pmode
, op0
);
17993 emit_insn (gen_sse2_clflush (op0
));
17996 case IX86_BUILTIN_MOVNTPD
:
17997 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
17998 case IX86_BUILTIN_MOVNTDQ
:
17999 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18000 case IX86_BUILTIN_MOVNTI
:
18001 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18003 case IX86_BUILTIN_LOADDQU
:
18004 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18005 case IX86_BUILTIN_STOREDQU
:
18006 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18008 case IX86_BUILTIN_MONITOR
:
18009 arg0
= CALL_EXPR_ARG (exp
, 0);
18010 arg1
= CALL_EXPR_ARG (exp
, 1);
18011 arg2
= CALL_EXPR_ARG (exp
, 2);
18012 op0
= expand_normal (arg0
);
18013 op1
= expand_normal (arg1
);
18014 op2
= expand_normal (arg2
);
18016 op0
= copy_to_mode_reg (Pmode
, op0
);
18018 op1
= copy_to_mode_reg (SImode
, op1
);
18020 op2
= copy_to_mode_reg (SImode
, op2
);
18022 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18024 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18027 case IX86_BUILTIN_MWAIT
:
18028 arg0
= CALL_EXPR_ARG (exp
, 0);
18029 arg1
= CALL_EXPR_ARG (exp
, 1);
18030 op0
= expand_normal (arg0
);
18031 op1
= expand_normal (arg1
);
18033 op0
= copy_to_mode_reg (SImode
, op0
);
18035 op1
= copy_to_mode_reg (SImode
, op1
);
18036 emit_insn (gen_sse3_mwait (op0
, op1
));
18039 case IX86_BUILTIN_LDDQU
:
18040 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18043 case IX86_BUILTIN_PALIGNR
:
18044 case IX86_BUILTIN_PALIGNR128
:
18045 if (fcode
== IX86_BUILTIN_PALIGNR
)
18047 icode
= CODE_FOR_ssse3_palignrdi
;
18052 icode
= CODE_FOR_ssse3_palignrti
;
18055 arg0
= CALL_EXPR_ARG (exp
, 0);
18056 arg1
= CALL_EXPR_ARG (exp
, 1);
18057 arg2
= CALL_EXPR_ARG (exp
, 2);
18058 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18059 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18060 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18061 tmode
= insn_data
[icode
].operand
[0].mode
;
18062 mode1
= insn_data
[icode
].operand
[1].mode
;
18063 mode2
= insn_data
[icode
].operand
[2].mode
;
18064 mode3
= insn_data
[icode
].operand
[3].mode
;
18066 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18068 op0
= copy_to_reg (op0
);
18069 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18071 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18073 op1
= copy_to_reg (op1
);
18074 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18076 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18078 error ("shift must be an immediate");
18081 target
= gen_reg_rtx (mode
);
18082 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18089 case IX86_BUILTIN_MOVNTSD
:
18090 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18092 case IX86_BUILTIN_MOVNTSS
:
18093 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18095 case IX86_BUILTIN_INSERTQ
:
18096 case IX86_BUILTIN_EXTRQ
:
18097 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18098 ? CODE_FOR_sse4a_extrq
18099 : CODE_FOR_sse4a_insertq
);
18100 arg0
= CALL_EXPR_ARG (exp
, 0);
18101 arg1
= CALL_EXPR_ARG (exp
, 1);
18102 op0
= expand_normal (arg0
);
18103 op1
= expand_normal (arg1
);
18104 tmode
= insn_data
[icode
].operand
[0].mode
;
18105 mode1
= insn_data
[icode
].operand
[1].mode
;
18106 mode2
= insn_data
[icode
].operand
[2].mode
;
18107 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18108 op0
= copy_to_mode_reg (mode1
, op0
);
18109 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18110 op1
= copy_to_mode_reg (mode2
, op1
);
18111 if (optimize
|| target
== 0
18112 || GET_MODE (target
) != tmode
18113 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18114 target
= gen_reg_rtx (tmode
);
18115 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18121 case IX86_BUILTIN_EXTRQI
:
18122 icode
= CODE_FOR_sse4a_extrqi
;
18123 arg0
= CALL_EXPR_ARG (exp
, 0);
18124 arg1
= CALL_EXPR_ARG (exp
, 1);
18125 arg2
= CALL_EXPR_ARG (exp
, 2);
18126 op0
= expand_normal (arg0
);
18127 op1
= expand_normal (arg1
);
18128 op2
= expand_normal (arg2
);
18129 tmode
= insn_data
[icode
].operand
[0].mode
;
18130 mode1
= insn_data
[icode
].operand
[1].mode
;
18131 mode2
= insn_data
[icode
].operand
[2].mode
;
18132 mode3
= insn_data
[icode
].operand
[3].mode
;
18133 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18134 op0
= copy_to_mode_reg (mode1
, op0
);
18135 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18137 error ("index mask must be an immediate");
18138 return gen_reg_rtx (tmode
);
18140 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18142 error ("length mask must be an immediate");
18143 return gen_reg_rtx (tmode
);
18145 if (optimize
|| target
== 0
18146 || GET_MODE (target
) != tmode
18147 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18148 target
= gen_reg_rtx (tmode
);
18149 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18155 case IX86_BUILTIN_INSERTQI
:
18156 icode
= CODE_FOR_sse4a_insertqi
;
18157 arg0
= CALL_EXPR_ARG (exp
, 0);
18158 arg1
= CALL_EXPR_ARG (exp
, 1);
18159 arg2
= CALL_EXPR_ARG (exp
, 2);
18160 arg3
= CALL_EXPR_ARG (exp
, 3);
18161 op0
= expand_normal (arg0
);
18162 op1
= expand_normal (arg1
);
18163 op2
= expand_normal (arg2
);
18164 op3
= expand_normal (arg3
);
18165 tmode
= insn_data
[icode
].operand
[0].mode
;
18166 mode1
= insn_data
[icode
].operand
[1].mode
;
18167 mode2
= insn_data
[icode
].operand
[2].mode
;
18168 mode3
= insn_data
[icode
].operand
[3].mode
;
18169 mode4
= insn_data
[icode
].operand
[4].mode
;
18171 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18172 op0
= copy_to_mode_reg (mode1
, op0
);
18174 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18175 op1
= copy_to_mode_reg (mode2
, op1
);
18177 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18179 error ("index mask must be an immediate");
18180 return gen_reg_rtx (tmode
);
18182 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18184 error ("length mask must be an immediate");
18185 return gen_reg_rtx (tmode
);
18187 if (optimize
|| target
== 0
18188 || GET_MODE (target
) != tmode
18189 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18190 target
= gen_reg_rtx (tmode
);
18191 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18197 case IX86_BUILTIN_VEC_INIT_V2SI
:
18198 case IX86_BUILTIN_VEC_INIT_V4HI
:
18199 case IX86_BUILTIN_VEC_INIT_V8QI
:
18200 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18202 case IX86_BUILTIN_VEC_EXT_V2DF
:
18203 case IX86_BUILTIN_VEC_EXT_V2DI
:
18204 case IX86_BUILTIN_VEC_EXT_V4SF
:
18205 case IX86_BUILTIN_VEC_EXT_V4SI
:
18206 case IX86_BUILTIN_VEC_EXT_V8HI
:
18207 case IX86_BUILTIN_VEC_EXT_V2SI
:
18208 case IX86_BUILTIN_VEC_EXT_V4HI
:
18209 return ix86_expand_vec_ext_builtin (exp
, target
);
18211 case IX86_BUILTIN_VEC_SET_V8HI
:
18212 case IX86_BUILTIN_VEC_SET_V4HI
:
18213 return ix86_expand_vec_set_builtin (exp
);
18219 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18220 if (d
->code
== fcode
)
18222 /* Compares are treated specially. */
18223 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18224 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18225 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18226 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18227 return ix86_expand_sse_compare (d
, exp
, target
);
18229 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18232 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18233 if (d
->code
== fcode
)
18234 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18236 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18237 if (d
->code
== fcode
)
18238 return ix86_expand_sse_comi (d
, exp
, target
);
18240 gcc_unreachable ();
18243 /* Returns a function decl for a vectorized version of the builtin function
18244 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18245 if it is not available. */
18248 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18251 enum machine_mode in_mode
, out_mode
;
18254 if (TREE_CODE (type_out
) != VECTOR_TYPE
18255 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18258 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18259 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18260 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18261 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18265 case BUILT_IN_SQRT
:
18266 if (out_mode
== DFmode
&& out_n
== 2
18267 && in_mode
== DFmode
&& in_n
== 2)
18268 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18271 case BUILT_IN_SQRTF
:
18272 if (out_mode
== SFmode
&& out_n
== 4
18273 && in_mode
== SFmode
&& in_n
== 4)
18274 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18277 case BUILT_IN_LRINTF
:
18278 if (out_mode
== SImode
&& out_n
== 4
18279 && in_mode
== SFmode
&& in_n
== 4)
18280 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18290 /* Returns a decl of a function that implements conversion of the
18291 input vector of type TYPE, or NULL_TREE if it is not available. */
18294 ix86_builtin_conversion (enum tree_code code
, tree type
)
18296 if (TREE_CODE (type
) != VECTOR_TYPE
)
18302 switch (TYPE_MODE (type
))
18305 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18310 case FIX_TRUNC_EXPR
:
18311 switch (TYPE_MODE (type
))
18314 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18324 /* Store OPERAND to the memory after reload is completed. This means
18325 that we can't easily use assign_stack_local. */
18327 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18331 gcc_assert (reload_completed
);
18332 if (TARGET_RED_ZONE
)
18334 result
= gen_rtx_MEM (mode
,
18335 gen_rtx_PLUS (Pmode
,
18337 GEN_INT (-RED_ZONE_SIZE
)));
18338 emit_move_insn (result
, operand
);
18340 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18346 operand
= gen_lowpart (DImode
, operand
);
18350 gen_rtx_SET (VOIDmode
,
18351 gen_rtx_MEM (DImode
,
18352 gen_rtx_PRE_DEC (DImode
,
18353 stack_pointer_rtx
)),
18357 gcc_unreachable ();
18359 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18368 split_di (&operand
, 1, operands
, operands
+ 1);
18370 gen_rtx_SET (VOIDmode
,
18371 gen_rtx_MEM (SImode
,
18372 gen_rtx_PRE_DEC (Pmode
,
18373 stack_pointer_rtx
)),
18376 gen_rtx_SET (VOIDmode
,
18377 gen_rtx_MEM (SImode
,
18378 gen_rtx_PRE_DEC (Pmode
,
18379 stack_pointer_rtx
)),
18384 /* Store HImodes as SImodes. */
18385 operand
= gen_lowpart (SImode
, operand
);
18389 gen_rtx_SET (VOIDmode
,
18390 gen_rtx_MEM (GET_MODE (operand
),
18391 gen_rtx_PRE_DEC (SImode
,
18392 stack_pointer_rtx
)),
18396 gcc_unreachable ();
18398 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18403 /* Free operand from the memory. */
18405 ix86_free_from_memory (enum machine_mode mode
)
18407 if (!TARGET_RED_ZONE
)
18411 if (mode
== DImode
|| TARGET_64BIT
)
18415 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18416 to pop or add instruction if registers are available. */
18417 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18418 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18423 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18424 QImode must go into class Q_REGS.
18425 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18426 movdf to do mem-to-mem moves through integer regs. */
18428 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18430 enum machine_mode mode
= GET_MODE (x
);
18432 /* We're only allowed to return a subclass of CLASS. Many of the
18433 following checks fail for NO_REGS, so eliminate that early. */
18434 if (class == NO_REGS
)
18437 /* All classes can load zeros. */
18438 if (x
== CONST0_RTX (mode
))
18441 /* Force constants into memory if we are loading a (nonzero) constant into
18442 an MMX or SSE register. This is because there are no MMX/SSE instructions
18443 to load from a constant. */
18445 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18448 /* Prefer SSE regs only, if we can use them for math. */
18449 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18450 return SSE_CLASS_P (class) ? class : NO_REGS
;
18452 /* Floating-point constants need more complex checks. */
18453 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18455 /* General regs can load everything. */
18456 if (reg_class_subset_p (class, GENERAL_REGS
))
18459 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18460 zero above. We only want to wind up preferring 80387 registers if
18461 we plan on doing computation with them. */
18463 && standard_80387_constant_p (x
))
18465 /* Limit class to non-sse. */
18466 if (class == FLOAT_SSE_REGS
)
18468 if (class == FP_TOP_SSE_REGS
)
18470 if (class == FP_SECOND_SSE_REGS
)
18471 return FP_SECOND_REG
;
18472 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18479 /* Generally when we see PLUS here, it's the function invariant
18480 (plus soft-fp const_int). Which can only be computed into general
18482 if (GET_CODE (x
) == PLUS
)
18483 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18485 /* QImode constants are easy to load, but non-constant QImode data
18486 must go into Q_REGS. */
18487 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18489 if (reg_class_subset_p (class, Q_REGS
))
18491 if (reg_class_subset_p (Q_REGS
, class))
18499 /* Discourage putting floating-point values in SSE registers unless
18500 SSE math is being used, and likewise for the 387 registers. */
18502 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18504 enum machine_mode mode
= GET_MODE (x
);
18506 /* Restrict the output reload class to the register bank that we are doing
18507 math on. If we would like not to return a subset of CLASS, reject this
18508 alternative: if reload cannot do this, it will still use its choice. */
18509 mode
= GET_MODE (x
);
18510 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18511 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18513 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18515 if (class == FP_TOP_SSE_REGS
)
18517 else if (class == FP_SECOND_SSE_REGS
)
18518 return FP_SECOND_REG
;
18520 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18526 /* If we are copying between general and FP registers, we need a memory
18527 location. The same is true for SSE and MMX registers.
18529 The macro can't work reliably when one of the CLASSES is class containing
18530 registers from multiple units (SSE, MMX, integer). We avoid this by never
18531 combining those units in single alternative in the machine description.
18532 Ensure that this constraint holds to avoid unexpected surprises.
18534 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18535 enforce these sanity checks. */
18538 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18539 enum machine_mode mode
, int strict
)
18541 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18542 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18543 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18544 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18545 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18546 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18548 gcc_assert (!strict
);
18552 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18555 /* ??? This is a lie. We do have moves between mmx/general, and for
18556 mmx/sse2. But by saying we need secondary memory we discourage the
18557 register allocator from using the mmx registers unless needed. */
18558 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18561 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18563 /* SSE1 doesn't have any direct moves from other classes. */
18567 /* If the target says that inter-unit moves are more expensive
18568 than moving through memory, then don't generate them. */
18569 if (!TARGET_INTER_UNIT_MOVES
)
18572 /* Between SSE and general, we have moves no larger than word size. */
18573 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18580 /* Return true if the registers in CLASS cannot represent the change from
18581 modes FROM to TO. */
18584 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18585 enum reg_class
class)
18590 /* x87 registers can't do subreg at all, as all values are reformatted
18591 to extended precision. */
18592 if (MAYBE_FLOAT_CLASS_P (class))
18595 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18597 /* Vector registers do not support QI or HImode loads. If we don't
18598 disallow a change to these modes, reload will assume it's ok to
18599 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18600 the vec_dupv4hi pattern. */
18601 if (GET_MODE_SIZE (from
) < 4)
18604 /* Vector registers do not support subreg with nonzero offsets, which
18605 are otherwise valid for integer registers. Since we can't see
18606 whether we have a nonzero offset from here, prohibit all
18607 nonparadoxical subregs changing size. */
18608 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18615 /* Return the cost of moving data from a register in class CLASS1 to
18616 one in class CLASS2.
18618 It is not required that the cost always equal 2 when FROM is the same as TO;
18619 on some machines it is expensive to move between registers if they are not
18620 general registers. */
18623 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18624 enum reg_class class2
)
18626 /* In case we require secondary memory, compute cost of the store followed
18627 by load. In order to avoid bad register allocation choices, we need
18628 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18630 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18634 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18635 MEMORY_MOVE_COST (mode
, class1
, 1));
18636 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18637 MEMORY_MOVE_COST (mode
, class2
, 1));
18639 /* In case of copying from general_purpose_register we may emit multiple
18640 stores followed by single load causing memory size mismatch stall.
18641 Count this as arbitrarily high cost of 20. */
18642 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18645 /* In the case of FP/MMX moves, the registers actually overlap, and we
18646 have to switch modes in order to treat them differently. */
18647 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18648 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18654 /* Moves between SSE/MMX and integer unit are expensive. */
18655 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18656 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18657 return ix86_cost
->mmxsse_to_integer
;
18658 if (MAYBE_FLOAT_CLASS_P (class1
))
18659 return ix86_cost
->fp_move
;
18660 if (MAYBE_SSE_CLASS_P (class1
))
18661 return ix86_cost
->sse_move
;
18662 if (MAYBE_MMX_CLASS_P (class1
))
18663 return ix86_cost
->mmx_move
;
18667 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18670 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18672 /* Flags and only flags can only hold CCmode values. */
18673 if (CC_REGNO_P (regno
))
18674 return GET_MODE_CLASS (mode
) == MODE_CC
;
18675 if (GET_MODE_CLASS (mode
) == MODE_CC
18676 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18677 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18679 if (FP_REGNO_P (regno
))
18680 return VALID_FP_MODE_P (mode
);
18681 if (SSE_REGNO_P (regno
))
18683 /* We implement the move patterns for all vector modes into and
18684 out of SSE registers, even when no operation instructions
18686 return (VALID_SSE_REG_MODE (mode
)
18687 || VALID_SSE2_REG_MODE (mode
)
18688 || VALID_MMX_REG_MODE (mode
)
18689 || VALID_MMX_REG_MODE_3DNOW (mode
));
18691 if (MMX_REGNO_P (regno
))
18693 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18694 so if the register is available at all, then we can move data of
18695 the given mode into or out of it. */
18696 return (VALID_MMX_REG_MODE (mode
)
18697 || VALID_MMX_REG_MODE_3DNOW (mode
));
18700 if (mode
== QImode
)
18702 /* Take care for QImode values - they can be in non-QI regs,
18703 but then they do cause partial register stalls. */
18704 if (regno
< 4 || TARGET_64BIT
)
18706 if (!TARGET_PARTIAL_REG_STALL
)
18708 return reload_in_progress
|| reload_completed
;
18710 /* We handle both integer and floats in the general purpose registers. */
18711 else if (VALID_INT_MODE_P (mode
))
18713 else if (VALID_FP_MODE_P (mode
))
18715 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18716 on to use that value in smaller contexts, this can easily force a
18717 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18718 supporting DImode, allow it. */
18719 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18725 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18726 tieable integer mode. */
18729 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18738 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18741 return TARGET_64BIT
;
18748 /* Return true if MODE1 is accessible in a register that can hold MODE2
18749 without copying. That is, all register classes that can hold MODE2
18750 can also hold MODE1. */
18753 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18755 if (mode1
== mode2
)
18758 if (ix86_tieable_integer_mode_p (mode1
)
18759 && ix86_tieable_integer_mode_p (mode2
))
18762 /* MODE2 being XFmode implies fp stack or general regs, which means we
18763 can tie any smaller floating point modes to it. Note that we do not
18764 tie this with TFmode. */
18765 if (mode2
== XFmode
)
18766 return mode1
== SFmode
|| mode1
== DFmode
;
18768 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18769 that we can tie it with SFmode. */
18770 if (mode2
== DFmode
)
18771 return mode1
== SFmode
;
18773 /* If MODE2 is only appropriate for an SSE register, then tie with
18774 any other mode acceptable to SSE registers. */
18775 if (GET_MODE_SIZE (mode2
) >= 8
18776 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18777 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
18779 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18780 with any other mode acceptable to MMX registers. */
18781 if (GET_MODE_SIZE (mode2
) == 8
18782 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18783 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
18788 /* Return the cost of moving data of mode M between a
18789 register and memory. A value of 2 is the default; this cost is
18790 relative to those in `REGISTER_MOVE_COST'.
18792 If moving between registers and memory is more expensive than
18793 between two registers, you should define this macro to express the
18796 Model also increased moving costs of QImode registers in non
18800 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18802 if (FLOAT_CLASS_P (class))
18819 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18821 if (SSE_CLASS_P (class))
18824 switch (GET_MODE_SIZE (mode
))
18838 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18840 if (MMX_CLASS_P (class))
18843 switch (GET_MODE_SIZE (mode
))
18854 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18856 switch (GET_MODE_SIZE (mode
))
18860 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18861 : ix86_cost
->movzbl_load
);
18863 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18864 : ix86_cost
->int_store
[0] + 4);
18867 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18869 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18870 if (mode
== TFmode
)
18872 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18873 * (((int) GET_MODE_SIZE (mode
)
18874 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18878 /* Compute a (partial) cost for rtx X. Return true if the complete
18879 cost has been computed, and false if subexpressions should be
18880 scanned. In either case, *TOTAL contains the cost result. */
18883 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18885 enum machine_mode mode
= GET_MODE (x
);
18893 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18895 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18897 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18899 || (!GET_CODE (x
) != LABEL_REF
18900 && (GET_CODE (x
) != SYMBOL_REF
18901 || !SYMBOL_REF_LOCAL_P (x
)))))
18908 if (mode
== VOIDmode
)
18911 switch (standard_80387_constant_p (x
))
18916 default: /* Other constants */
18921 /* Start with (MEM (SYMBOL_REF)), since that's where
18922 it'll probably end up. Add a penalty for size. */
18923 *total
= (COSTS_N_INSNS (1)
18924 + (flag_pic
!= 0 && !TARGET_64BIT
)
18925 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18931 /* The zero extensions is often completely free on x86_64, so make
18932 it as cheap as possible. */
18933 if (TARGET_64BIT
&& mode
== DImode
18934 && GET_MODE (XEXP (x
, 0)) == SImode
)
18936 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18937 *total
= ix86_cost
->add
;
18939 *total
= ix86_cost
->movzx
;
18943 *total
= ix86_cost
->movsx
;
18947 if (CONST_INT_P (XEXP (x
, 1))
18948 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18950 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18953 *total
= ix86_cost
->add
;
18956 if ((value
== 2 || value
== 3)
18957 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
18959 *total
= ix86_cost
->lea
;
18969 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
18971 if (CONST_INT_P (XEXP (x
, 1)))
18973 if (INTVAL (XEXP (x
, 1)) > 32)
18974 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
18976 *total
= ix86_cost
->shift_const
* 2;
18980 if (GET_CODE (XEXP (x
, 1)) == AND
)
18981 *total
= ix86_cost
->shift_var
* 2;
18983 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
18988 if (CONST_INT_P (XEXP (x
, 1)))
18989 *total
= ix86_cost
->shift_const
;
18991 *total
= ix86_cost
->shift_var
;
18996 if (FLOAT_MODE_P (mode
))
18998 *total
= ix86_cost
->fmul
;
19003 rtx op0
= XEXP (x
, 0);
19004 rtx op1
= XEXP (x
, 1);
19006 if (CONST_INT_P (XEXP (x
, 1)))
19008 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19009 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19013 /* This is arbitrary. */
19016 /* Compute costs correctly for widening multiplication. */
19017 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19018 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19019 == GET_MODE_SIZE (mode
))
19021 int is_mulwiden
= 0;
19022 enum machine_mode inner_mode
= GET_MODE (op0
);
19024 if (GET_CODE (op0
) == GET_CODE (op1
))
19025 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19026 else if (CONST_INT_P (op1
))
19028 if (GET_CODE (op0
) == SIGN_EXTEND
)
19029 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19032 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19036 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19039 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19040 + nbits
* ix86_cost
->mult_bit
19041 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19050 if (FLOAT_MODE_P (mode
))
19051 *total
= ix86_cost
->fdiv
;
19053 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19057 if (FLOAT_MODE_P (mode
))
19058 *total
= ix86_cost
->fadd
;
19059 else if (GET_MODE_CLASS (mode
) == MODE_INT
19060 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19062 if (GET_CODE (XEXP (x
, 0)) == PLUS
19063 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19064 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19065 && CONSTANT_P (XEXP (x
, 1)))
19067 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19068 if (val
== 2 || val
== 4 || val
== 8)
19070 *total
= ix86_cost
->lea
;
19071 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19072 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19074 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19078 else if (GET_CODE (XEXP (x
, 0)) == MULT
19079 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19081 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19082 if (val
== 2 || val
== 4 || val
== 8)
19084 *total
= ix86_cost
->lea
;
19085 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19086 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19090 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19092 *total
= ix86_cost
->lea
;
19093 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19094 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19095 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19102 if (FLOAT_MODE_P (mode
))
19104 *total
= ix86_cost
->fadd
;
19112 if (!TARGET_64BIT
&& mode
== DImode
)
19114 *total
= (ix86_cost
->add
* 2
19115 + (rtx_cost (XEXP (x
, 0), outer_code
)
19116 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19117 + (rtx_cost (XEXP (x
, 1), outer_code
)
19118 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19124 if (FLOAT_MODE_P (mode
))
19126 *total
= ix86_cost
->fchs
;
19132 if (!TARGET_64BIT
&& mode
== DImode
)
19133 *total
= ix86_cost
->add
* 2;
19135 *total
= ix86_cost
->add
;
19139 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19140 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19141 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19142 && XEXP (x
, 1) == const0_rtx
)
19144 /* This kind of construct is implemented using test[bwl].
19145 Treat it as if we had an AND. */
19146 *total
= (ix86_cost
->add
19147 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19148 + rtx_cost (const1_rtx
, outer_code
));
19154 if (!TARGET_SSE_MATH
19156 || (mode
== DFmode
&& !TARGET_SSE2
))
19161 if (FLOAT_MODE_P (mode
))
19162 *total
= ix86_cost
->fabs
;
19166 if (FLOAT_MODE_P (mode
))
19167 *total
= ix86_cost
->fsqrt
;
19171 if (XINT (x
, 1) == UNSPEC_TP
)
19182 static int current_machopic_label_num
;
19184 /* Given a symbol name and its associated stub, write out the
19185 definition of the stub. */
19188 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19190 unsigned int length
;
19191 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19192 int label
= ++current_machopic_label_num
;
19194 /* For 64-bit we shouldn't get here. */
19195 gcc_assert (!TARGET_64BIT
);
19197 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19198 symb
= (*targetm
.strip_name_encoding
) (symb
);
19200 length
= strlen (stub
);
19201 binder_name
= alloca (length
+ 32);
19202 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19204 length
= strlen (symb
);
19205 symbol_name
= alloca (length
+ 32);
19206 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19208 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19211 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19213 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19215 fprintf (file
, "%s:\n", stub
);
19216 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19220 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19221 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19222 fprintf (file
, "\tjmp\t*%%edx\n");
19225 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19227 fprintf (file
, "%s:\n", binder_name
);
19231 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19232 fprintf (file
, "\tpushl\t%%eax\n");
19235 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19237 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19239 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19240 fprintf (file
, "%s:\n", lazy_ptr_name
);
19241 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19242 fprintf (file
, "\t.long %s\n", binder_name
);
19246 darwin_x86_file_end (void)
19248 darwin_file_end ();
19251 #endif /* TARGET_MACHO */
19253 /* Order the registers for register allocator. */
19256 x86_order_regs_for_local_alloc (void)
19261 /* First allocate the local general purpose registers. */
19262 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19263 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19264 reg_alloc_order
[pos
++] = i
;
19266 /* Global general purpose registers. */
19267 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19268 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19269 reg_alloc_order
[pos
++] = i
;
19271 /* x87 registers come first in case we are doing FP math
19273 if (!TARGET_SSE_MATH
)
19274 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19275 reg_alloc_order
[pos
++] = i
;
19277 /* SSE registers. */
19278 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19279 reg_alloc_order
[pos
++] = i
;
19280 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19281 reg_alloc_order
[pos
++] = i
;
19283 /* x87 registers. */
19284 if (TARGET_SSE_MATH
)
19285 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19286 reg_alloc_order
[pos
++] = i
;
19288 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19289 reg_alloc_order
[pos
++] = i
;
19291 /* Initialize the rest of array as we do not allocate some registers
19293 while (pos
< FIRST_PSEUDO_REGISTER
)
19294 reg_alloc_order
[pos
++] = 0;
19297 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19298 struct attribute_spec.handler. */
19300 ix86_handle_struct_attribute (tree
*node
, tree name
,
19301 tree args ATTRIBUTE_UNUSED
,
19302 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19305 if (DECL_P (*node
))
19307 if (TREE_CODE (*node
) == TYPE_DECL
)
19308 type
= &TREE_TYPE (*node
);
19313 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19314 || TREE_CODE (*type
) == UNION_TYPE
)))
19316 warning (OPT_Wattributes
, "%qs attribute ignored",
19317 IDENTIFIER_POINTER (name
));
19318 *no_add_attrs
= true;
19321 else if ((is_attribute_p ("ms_struct", name
)
19322 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19323 || ((is_attribute_p ("gcc_struct", name
)
19324 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19326 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19327 IDENTIFIER_POINTER (name
));
19328 *no_add_attrs
= true;
19335 ix86_ms_bitfield_layout_p (tree record_type
)
19337 return (TARGET_MS_BITFIELD_LAYOUT
&&
19338 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19339 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19342 /* Returns an expression indicating where the this parameter is
19343 located on entry to the FUNCTION. */
19346 x86_this_parameter (tree function
)
19348 tree type
= TREE_TYPE (function
);
19352 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19353 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19356 if (ix86_function_regparm (type
, function
) > 0)
19360 parm
= TYPE_ARG_TYPES (type
);
19361 /* Figure out whether or not the function has a variable number of
19363 for (; parm
; parm
= TREE_CHAIN (parm
))
19364 if (TREE_VALUE (parm
) == void_type_node
)
19366 /* If not, the this parameter is in the first argument. */
19370 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19372 return gen_rtx_REG (SImode
, regno
);
19376 if (aggregate_value_p (TREE_TYPE (type
), type
))
19377 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19379 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19382 /* Determine whether x86_output_mi_thunk can succeed. */
19385 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19386 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19387 HOST_WIDE_INT vcall_offset
, tree function
)
19389 /* 64-bit can handle anything. */
19393 /* For 32-bit, everything's fine if we have one free register. */
19394 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19397 /* Need a free register for vcall_offset. */
19401 /* Need a free register for GOT references. */
19402 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19405 /* Otherwise ok. */
19409 /* Output the assembler code for a thunk function. THUNK_DECL is the
19410 declaration for the thunk function itself, FUNCTION is the decl for
19411 the target function. DELTA is an immediate constant offset to be
19412 added to THIS. If VCALL_OFFSET is nonzero, the word at
19413 *(*this + vcall_offset) should be added to THIS. */
19416 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19417 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19418 HOST_WIDE_INT vcall_offset
, tree function
)
19421 rtx
this = x86_this_parameter (function
);
19424 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19425 pull it in now and let DELTA benefit. */
19428 else if (vcall_offset
)
19430 /* Put the this parameter into %eax. */
19432 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19433 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19436 this_reg
= NULL_RTX
;
19438 /* Adjust the this parameter by a fixed constant. */
19441 xops
[0] = GEN_INT (delta
);
19442 xops
[1] = this_reg
? this_reg
: this;
19445 if (!x86_64_general_operand (xops
[0], DImode
))
19447 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19449 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19453 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19456 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19459 /* Adjust the this parameter by a value stored in the vtable. */
19463 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19466 int tmp_regno
= 2 /* ECX */;
19467 if (lookup_attribute ("fastcall",
19468 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19469 tmp_regno
= 0 /* EAX */;
19470 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19473 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19476 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19478 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19480 /* Adjust the this parameter. */
19481 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19482 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19484 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19485 xops
[0] = GEN_INT (vcall_offset
);
19487 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19488 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19490 xops
[1] = this_reg
;
19492 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19494 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19497 /* If necessary, drop THIS back to its stack slot. */
19498 if (this_reg
&& this_reg
!= this)
19500 xops
[0] = this_reg
;
19502 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19505 xops
[0] = XEXP (DECL_RTL (function
), 0);
19508 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19509 output_asm_insn ("jmp\t%P0", xops
);
19512 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19513 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19514 tmp
= gen_rtx_MEM (QImode
, tmp
);
19516 output_asm_insn ("jmp\t%A0", xops
);
19521 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19522 output_asm_insn ("jmp\t%P0", xops
);
19527 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19528 tmp
= (gen_rtx_SYMBOL_REF
19530 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19531 tmp
= gen_rtx_MEM (QImode
, tmp
);
19533 output_asm_insn ("jmp\t%0", xops
);
19536 #endif /* TARGET_MACHO */
19538 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19539 output_set_got (tmp
, NULL_RTX
);
19542 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19543 output_asm_insn ("jmp\t{*}%1", xops
);
19549 x86_file_start (void)
19551 default_file_start ();
19553 darwin_file_start ();
19555 if (X86_FILE_START_VERSION_DIRECTIVE
)
19556 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19557 if (X86_FILE_START_FLTUSED
)
19558 fputs ("\t.global\t__fltused\n", asm_out_file
);
19559 if (ix86_asm_dialect
== ASM_INTEL
)
19560 fputs ("\t.intel_syntax\n", asm_out_file
);
19564 x86_field_alignment (tree field
, int computed
)
19566 enum machine_mode mode
;
19567 tree type
= TREE_TYPE (field
);
19569 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19571 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19572 ? get_inner_array_type (type
) : type
);
19573 if (mode
== DFmode
|| mode
== DCmode
19574 || GET_MODE_CLASS (mode
) == MODE_INT
19575 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19576 return MIN (32, computed
);
19580 /* Output assembler code to FILE to increment profiler label # LABELNO
19581 for profiling a function entry. */
19583 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19588 #ifndef NO_PROFILE_COUNTERS
19589 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19591 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19595 #ifndef NO_PROFILE_COUNTERS
19596 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19598 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19602 #ifndef NO_PROFILE_COUNTERS
19603 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19604 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19606 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19610 #ifndef NO_PROFILE_COUNTERS
19611 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19612 PROFILE_COUNT_REGISTER
);
19614 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19618 /* We don't have exact information about the insn sizes, but we may assume
19619 quite safely that we are informed about all 1 byte insns and memory
19620 address sizes. This is enough to eliminate unnecessary padding in
19624 min_insn_size (rtx insn
)
19628 if (!INSN_P (insn
) || !active_insn_p (insn
))
19631 /* Discard alignments we've emit and jump instructions. */
19632 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19633 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19636 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19637 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19640 /* Important case - calls are always 5 bytes.
19641 It is common to have many calls in the row. */
19643 && symbolic_reference_mentioned_p (PATTERN (insn
))
19644 && !SIBLING_CALL_P (insn
))
19646 if (get_attr_length (insn
) <= 1)
19649 /* For normal instructions we may rely on the sizes of addresses
19650 and the presence of symbol to require 4 bytes of encoding.
19651 This is not the case for jumps where references are PC relative. */
19652 if (!JUMP_P (insn
))
19654 l
= get_attr_length_address (insn
);
19655 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19664 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19668 ix86_avoid_jump_misspredicts (void)
19670 rtx insn
, start
= get_insns ();
19671 int nbytes
= 0, njumps
= 0;
19674 /* Look for all minimal intervals of instructions containing 4 jumps.
19675 The intervals are bounded by START and INSN. NBYTES is the total
19676 size of instructions in the interval including INSN and not including
19677 START. When the NBYTES is smaller than 16 bytes, it is possible
19678 that the end of START and INSN ends up in the same 16byte page.
19680 The smallest offset in the page INSN can start is the case where START
19681 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19682 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19684 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19687 nbytes
+= min_insn_size (insn
);
19689 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19690 INSN_UID (insn
), min_insn_size (insn
));
19692 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19693 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19701 start
= NEXT_INSN (start
);
19702 if ((JUMP_P (start
)
19703 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19704 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19706 njumps
--, isjump
= 1;
19709 nbytes
-= min_insn_size (start
);
19711 gcc_assert (njumps
>= 0);
19713 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19714 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19716 if (njumps
== 3 && isjump
&& nbytes
< 16)
19718 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19721 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19722 INSN_UID (insn
), padsize
);
19723 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19728 /* AMD Athlon works faster
19729 when RET is not destination of conditional jump or directly preceded
19730 by other jump instruction. We avoid the penalty by inserting NOP just
19731 before the RET instructions in such cases. */
19733 ix86_pad_returns (void)
19738 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19740 basic_block bb
= e
->src
;
19741 rtx ret
= BB_END (bb
);
19743 bool replace
= false;
19745 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19746 || !maybe_hot_bb_p (bb
))
19748 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19749 if (active_insn_p (prev
) || LABEL_P (prev
))
19751 if (prev
&& LABEL_P (prev
))
19756 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19757 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19758 && !(e
->flags
& EDGE_FALLTHRU
))
19763 prev
= prev_active_insn (ret
);
19765 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19768 /* Empty functions get branch mispredict even when the jump destination
19769 is not visible to us. */
19770 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19775 emit_insn_before (gen_return_internal_long (), ret
);
19781 /* Implement machine specific optimizations. We implement padding of returns
19782 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19786 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19787 ix86_pad_returns ();
19788 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19789 ix86_avoid_jump_misspredicts ();
19792 /* Return nonzero when QImode register that must be represented via REX prefix
19795 x86_extended_QIreg_mentioned_p (rtx insn
)
19798 extract_insn_cached (insn
);
19799 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19800 if (REG_P (recog_data
.operand
[i
])
19801 && REGNO (recog_data
.operand
[i
]) >= 4)
19806 /* Return nonzero when P points to register encoded via REX prefix.
19807 Called via for_each_rtx. */
19809 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19811 unsigned int regno
;
19814 regno
= REGNO (*p
);
19815 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19818 /* Return true when INSN mentions register that must be encoded using REX
19821 x86_extended_reg_mentioned_p (rtx insn
)
19823 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19826 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19827 optabs would emit if we didn't have TFmode patterns. */
19830 x86_emit_floatuns (rtx operands
[2])
19832 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19833 enum machine_mode mode
, inmode
;
19835 inmode
= GET_MODE (operands
[1]);
19836 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19839 in
= force_reg (inmode
, operands
[1]);
19840 mode
= GET_MODE (out
);
19841 neglab
= gen_label_rtx ();
19842 donelab
= gen_label_rtx ();
19843 f0
= gen_reg_rtx (mode
);
19845 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
19847 expand_float (out
, in
, 0);
19849 emit_jump_insn (gen_jump (donelab
));
19852 emit_label (neglab
);
19854 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
19856 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
19858 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19860 expand_float (f0
, i0
, 0);
19862 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19864 emit_label (donelab
);
19867 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19868 with all elements equal to VAR. Return true if successful. */
19871 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19872 rtx target
, rtx val
)
19874 enum machine_mode smode
, wsmode
, wvmode
;
19889 val
= force_reg (GET_MODE_INNER (mode
), val
);
19890 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19891 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19897 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19899 val
= gen_lowpart (SImode
, val
);
19900 x
= gen_rtx_TRUNCATE (HImode
, val
);
19901 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19902 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19924 /* Extend HImode to SImode using a paradoxical SUBREG. */
19925 tmp1
= gen_reg_rtx (SImode
);
19926 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19927 /* Insert the SImode value as low element of V4SImode vector. */
19928 tmp2
= gen_reg_rtx (V4SImode
);
19929 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19930 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19931 CONST0_RTX (V4SImode
),
19933 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19934 /* Cast the V4SImode vector back to a V8HImode vector. */
19935 tmp1
= gen_reg_rtx (V8HImode
);
19936 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19937 /* Duplicate the low short through the whole low SImode word. */
19938 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19939 /* Cast the V8HImode vector back to a V4SImode vector. */
19940 tmp2
= gen_reg_rtx (V4SImode
);
19941 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19942 /* Replicate the low element of the V4SImode vector. */
19943 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19944 /* Cast the V2SImode back to V8HImode, and store in target. */
19945 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
19956 /* Extend QImode to SImode using a paradoxical SUBREG. */
19957 tmp1
= gen_reg_rtx (SImode
);
19958 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19959 /* Insert the SImode value as low element of V4SImode vector. */
19960 tmp2
= gen_reg_rtx (V4SImode
);
19961 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19962 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19963 CONST0_RTX (V4SImode
),
19965 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19966 /* Cast the V4SImode vector back to a V16QImode vector. */
19967 tmp1
= gen_reg_rtx (V16QImode
);
19968 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
19969 /* Duplicate the low byte through the whole low SImode word. */
19970 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19971 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19972 /* Cast the V16QImode vector back to a V4SImode vector. */
19973 tmp2
= gen_reg_rtx (V4SImode
);
19974 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19975 /* Replicate the low element of the V4SImode vector. */
19976 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19977 /* Cast the V2SImode back to V16QImode, and store in target. */
19978 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
19986 /* Replicate the value once into the next wider mode and recurse. */
19987 val
= convert_modes (wsmode
, smode
, val
, true);
19988 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
19989 GEN_INT (GET_MODE_BITSIZE (smode
)),
19990 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19991 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
19993 x
= gen_reg_rtx (wvmode
);
19994 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
19995 gcc_unreachable ();
19996 emit_move_insn (target
, gen_lowpart (mode
, x
));
20004 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20005 whose ONE_VAR element is VAR, and other elements are zero. Return true
20009 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20010 rtx target
, rtx var
, int one_var
)
20012 enum machine_mode vsimode
;
20028 var
= force_reg (GET_MODE_INNER (mode
), var
);
20029 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20030 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20035 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20036 new_target
= gen_reg_rtx (mode
);
20038 new_target
= target
;
20039 var
= force_reg (GET_MODE_INNER (mode
), var
);
20040 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20041 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20042 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20045 /* We need to shuffle the value to the correct position, so
20046 create a new pseudo to store the intermediate result. */
20048 /* With SSE2, we can use the integer shuffle insns. */
20049 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20051 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20053 GEN_INT (one_var
== 1 ? 0 : 1),
20054 GEN_INT (one_var
== 2 ? 0 : 1),
20055 GEN_INT (one_var
== 3 ? 0 : 1)));
20056 if (target
!= new_target
)
20057 emit_move_insn (target
, new_target
);
20061 /* Otherwise convert the intermediate result to V4SFmode and
20062 use the SSE1 shuffle instructions. */
20063 if (mode
!= V4SFmode
)
20065 tmp
= gen_reg_rtx (V4SFmode
);
20066 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20071 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20073 GEN_INT (one_var
== 1 ? 0 : 1),
20074 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20075 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20077 if (mode
!= V4SFmode
)
20078 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20079 else if (tmp
!= target
)
20080 emit_move_insn (target
, tmp
);
20082 else if (target
!= new_target
)
20083 emit_move_insn (target
, new_target
);
20088 vsimode
= V4SImode
;
20094 vsimode
= V2SImode
;
20100 /* Zero extend the variable element to SImode and recurse. */
20101 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20103 x
= gen_reg_rtx (vsimode
);
20104 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20106 gcc_unreachable ();
20108 emit_move_insn (target
, gen_lowpart (mode
, x
));
20116 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20117 consisting of the values in VALS. It is known that all elements
20118 except ONE_VAR are constants. Return true if successful. */
20121 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20122 rtx target
, rtx vals
, int one_var
)
20124 rtx var
= XVECEXP (vals
, 0, one_var
);
20125 enum machine_mode wmode
;
20128 const_vec
= copy_rtx (vals
);
20129 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20130 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20138 /* For the two element vectors, it's just as easy to use
20139 the general case. */
20155 /* There's no way to set one QImode entry easily. Combine
20156 the variable value with its adjacent constant value, and
20157 promote to an HImode set. */
20158 x
= XVECEXP (vals
, 0, one_var
^ 1);
20161 var
= convert_modes (HImode
, QImode
, var
, true);
20162 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20163 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20164 x
= GEN_INT (INTVAL (x
) & 0xff);
20168 var
= convert_modes (HImode
, QImode
, var
, true);
20169 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20171 if (x
!= const0_rtx
)
20172 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20173 1, OPTAB_LIB_WIDEN
);
20175 x
= gen_reg_rtx (wmode
);
20176 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20177 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20179 emit_move_insn (target
, gen_lowpart (mode
, x
));
20186 emit_move_insn (target
, const_vec
);
20187 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20191 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20192 all values variable, and none identical. */
20195 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20196 rtx target
, rtx vals
)
20198 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20199 rtx op0
= NULL
, op1
= NULL
;
20200 bool use_vec_concat
= false;
20206 if (!mmx_ok
&& !TARGET_SSE
)
20212 /* For the two element vectors, we always implement VEC_CONCAT. */
20213 op0
= XVECEXP (vals
, 0, 0);
20214 op1
= XVECEXP (vals
, 0, 1);
20215 use_vec_concat
= true;
20219 half_mode
= V2SFmode
;
20222 half_mode
= V2SImode
;
20228 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20229 Recurse to load the two halves. */
20231 op0
= gen_reg_rtx (half_mode
);
20232 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20233 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20235 op1
= gen_reg_rtx (half_mode
);
20236 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20237 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20239 use_vec_concat
= true;
20250 gcc_unreachable ();
20253 if (use_vec_concat
)
20255 if (!register_operand (op0
, half_mode
))
20256 op0
= force_reg (half_mode
, op0
);
20257 if (!register_operand (op1
, half_mode
))
20258 op1
= force_reg (half_mode
, op1
);
20260 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20261 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20265 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20266 enum machine_mode inner_mode
;
20267 rtx words
[4], shift
;
20269 inner_mode
= GET_MODE_INNER (mode
);
20270 n_elts
= GET_MODE_NUNITS (mode
);
20271 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20272 n_elt_per_word
= n_elts
/ n_words
;
20273 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20275 for (i
= 0; i
< n_words
; ++i
)
20277 rtx word
= NULL_RTX
;
20279 for (j
= 0; j
< n_elt_per_word
; ++j
)
20281 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20282 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20288 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20289 word
, 1, OPTAB_LIB_WIDEN
);
20290 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20291 word
, 1, OPTAB_LIB_WIDEN
);
20299 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20300 else if (n_words
== 2)
20302 rtx tmp
= gen_reg_rtx (mode
);
20303 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20304 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20305 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20306 emit_move_insn (target
, tmp
);
20308 else if (n_words
== 4)
20310 rtx tmp
= gen_reg_rtx (V4SImode
);
20311 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20312 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20313 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20316 gcc_unreachable ();
20320 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20321 instructions unless MMX_OK is true. */
20324 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20326 enum machine_mode mode
= GET_MODE (target
);
20327 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20328 int n_elts
= GET_MODE_NUNITS (mode
);
20329 int n_var
= 0, one_var
= -1;
20330 bool all_same
= true, all_const_zero
= true;
20334 for (i
= 0; i
< n_elts
; ++i
)
20336 x
= XVECEXP (vals
, 0, i
);
20337 if (!CONSTANT_P (x
))
20338 n_var
++, one_var
= i
;
20339 else if (x
!= CONST0_RTX (inner_mode
))
20340 all_const_zero
= false;
20341 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20345 /* Constants are best loaded from the constant pool. */
20348 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20352 /* If all values are identical, broadcast the value. */
20354 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20355 XVECEXP (vals
, 0, 0)))
20358 /* Values where only one field is non-constant are best loaded from
20359 the pool and overwritten via move later. */
20363 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20364 XVECEXP (vals
, 0, one_var
),
20368 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20372 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20376 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20378 enum machine_mode mode
= GET_MODE (target
);
20379 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20380 bool use_vec_merge
= false;
20389 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20390 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20392 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20394 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20395 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20405 /* For the two element vectors, we implement a VEC_CONCAT with
20406 the extraction of the other element. */
20408 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20409 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20412 op0
= val
, op1
= tmp
;
20414 op0
= tmp
, op1
= val
;
20416 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20417 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20425 use_vec_merge
= true;
20429 /* tmp = target = A B C D */
20430 tmp
= copy_to_reg (target
);
20431 /* target = A A B B */
20432 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20433 /* target = X A B B */
20434 ix86_expand_vector_set (false, target
, val
, 0);
20435 /* target = A X C D */
20436 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20437 GEN_INT (1), GEN_INT (0),
20438 GEN_INT (2+4), GEN_INT (3+4)));
20442 /* tmp = target = A B C D */
20443 tmp
= copy_to_reg (target
);
20444 /* tmp = X B C D */
20445 ix86_expand_vector_set (false, tmp
, val
, 0);
20446 /* target = A B X D */
20447 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20448 GEN_INT (0), GEN_INT (1),
20449 GEN_INT (0+4), GEN_INT (3+4)));
20453 /* tmp = target = A B C D */
20454 tmp
= copy_to_reg (target
);
20455 /* tmp = X B C D */
20456 ix86_expand_vector_set (false, tmp
, val
, 0);
20457 /* target = A B X D */
20458 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20459 GEN_INT (0), GEN_INT (1),
20460 GEN_INT (2+4), GEN_INT (0+4)));
20464 gcc_unreachable ();
20469 /* Element 0 handled by vec_merge below. */
20472 use_vec_merge
= true;
20478 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20479 store into element 0, then shuffle them back. */
20483 order
[0] = GEN_INT (elt
);
20484 order
[1] = const1_rtx
;
20485 order
[2] = const2_rtx
;
20486 order
[3] = GEN_INT (3);
20487 order
[elt
] = const0_rtx
;
20489 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20490 order
[1], order
[2], order
[3]));
20492 ix86_expand_vector_set (false, target
, val
, 0);
20494 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20495 order
[1], order
[2], order
[3]));
20499 /* For SSE1, we have to reuse the V4SF code. */
20500 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20501 gen_lowpart (SFmode
, val
), elt
);
20506 use_vec_merge
= TARGET_SSE2
;
20509 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20520 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20521 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20522 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20526 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20528 emit_move_insn (mem
, target
);
20530 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20531 emit_move_insn (tmp
, val
);
20533 emit_move_insn (target
, mem
);
20538 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20540 enum machine_mode mode
= GET_MODE (vec
);
20541 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20542 bool use_vec_extr
= false;
20555 use_vec_extr
= true;
20567 tmp
= gen_reg_rtx (mode
);
20568 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20569 GEN_INT (elt
), GEN_INT (elt
),
20570 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20574 tmp
= gen_reg_rtx (mode
);
20575 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20579 gcc_unreachable ();
20582 use_vec_extr
= true;
20597 tmp
= gen_reg_rtx (mode
);
20598 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20599 GEN_INT (elt
), GEN_INT (elt
),
20600 GEN_INT (elt
), GEN_INT (elt
)));
20604 tmp
= gen_reg_rtx (mode
);
20605 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20609 gcc_unreachable ();
20612 use_vec_extr
= true;
20617 /* For SSE1, we have to reuse the V4SF code. */
20618 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20619 gen_lowpart (V4SFmode
, vec
), elt
);
20625 use_vec_extr
= TARGET_SSE2
;
20628 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20633 /* ??? Could extract the appropriate HImode element and shift. */
20640 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20641 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20643 /* Let the rtl optimizers know about the zero extension performed. */
20644 if (inner_mode
== HImode
)
20646 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20647 target
= gen_lowpart (SImode
, target
);
20650 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20654 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20656 emit_move_insn (mem
, vec
);
20658 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20659 emit_move_insn (target
, tmp
);
20663 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20664 pattern to reduce; DEST is the destination; IN is the input vector. */
20667 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20669 rtx tmp1
, tmp2
, tmp3
;
20671 tmp1
= gen_reg_rtx (V4SFmode
);
20672 tmp2
= gen_reg_rtx (V4SFmode
);
20673 tmp3
= gen_reg_rtx (V4SFmode
);
20675 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20676 emit_insn (fn (tmp2
, tmp1
, in
));
20678 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20679 GEN_INT (1), GEN_INT (1),
20680 GEN_INT (1+4), GEN_INT (1+4)));
20681 emit_insn (fn (dest
, tmp2
, tmp3
));
20684 /* Target hook for scalar_mode_supported_p. */
20686 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20688 if (DECIMAL_FLOAT_MODE_P (mode
))
20691 return default_scalar_mode_supported_p (mode
);
20694 /* Implements target hook vector_mode_supported_p. */
20696 ix86_vector_mode_supported_p (enum machine_mode mode
)
20698 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20700 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20702 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20704 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20709 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20711 We do this in the new i386 backend to maintain source compatibility
20712 with the old cc0-based compiler. */
20715 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20716 tree inputs ATTRIBUTE_UNUSED
,
20719 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20721 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20726 /* Return true if this goes in small data/bss. */
20729 ix86_in_large_data_p (tree exp
)
20731 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20734 /* Functions are never large data. */
20735 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20738 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20740 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20741 if (strcmp (section
, ".ldata") == 0
20742 || strcmp (section
, ".lbss") == 0)
20748 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20750 /* If this is an incomplete type with size 0, then we can't put it
20751 in data because it might be too big when completed. */
20752 if (!size
|| size
> ix86_section_threshold
)
20759 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20761 default_encode_section_info (decl
, rtl
, first
);
20763 if (TREE_CODE (decl
) == VAR_DECL
20764 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20765 && ix86_in_large_data_p (decl
))
20766 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20769 /* Worker function for REVERSE_CONDITION. */
20772 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20774 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20775 ? reverse_condition (code
)
20776 : reverse_condition_maybe_unordered (code
));
20779 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20783 output_387_reg_move (rtx insn
, rtx
*operands
)
20785 if (REG_P (operands
[1])
20786 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20788 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20789 return output_387_ffreep (operands
, 0);
20790 return "fstp\t%y0";
20792 if (STACK_TOP_P (operands
[0]))
20793 return "fld%z1\t%y1";
20797 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20798 FP status register is set. */
20801 ix86_emit_fp_unordered_jump (rtx label
)
20803 rtx reg
= gen_reg_rtx (HImode
);
20806 emit_insn (gen_x86_fnstsw_1 (reg
));
20808 if (TARGET_USE_SAHF
)
20810 emit_insn (gen_x86_sahf_1 (reg
));
20812 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20813 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20817 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20819 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20820 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20823 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20824 gen_rtx_LABEL_REF (VOIDmode
, label
),
20826 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20827 emit_jump_insn (temp
);
20830 /* Output code to perform a log1p XFmode calculation. */
20832 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20834 rtx label1
= gen_label_rtx ();
20835 rtx label2
= gen_label_rtx ();
20837 rtx tmp
= gen_reg_rtx (XFmode
);
20838 rtx tmp2
= gen_reg_rtx (XFmode
);
20840 emit_insn (gen_absxf2 (tmp
, op1
));
20841 emit_insn (gen_cmpxf (tmp
,
20842 CONST_DOUBLE_FROM_REAL_VALUE (
20843 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20845 emit_jump_insn (gen_bge (label1
));
20847 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20848 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20849 emit_jump (label2
);
20851 emit_label (label1
);
20852 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20853 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20854 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20855 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20857 emit_label (label2
);
20860 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20863 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20866 /* With Binutils 2.15, the "@unwind" marker must be specified on
20867 every occurrence of the ".eh_frame" section, not just the first
20870 && strcmp (name
, ".eh_frame") == 0)
20872 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20873 flags
& SECTION_WRITE
? "aw" : "a");
20876 default_elf_asm_named_section (name
, flags
, decl
);
20879 /* Return the mangling of TYPE if it is an extended fundamental type. */
20881 static const char *
20882 ix86_mangle_fundamental_type (tree type
)
20884 switch (TYPE_MODE (type
))
20887 /* __float128 is "g". */
20890 /* "long double" or __float80 is "e". */
20897 /* For 32-bit code we can save PIC register setup by using
20898 __stack_chk_fail_local hidden function instead of calling
20899 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20900 register, so it is better to call __stack_chk_fail directly. */
20903 ix86_stack_protect_fail (void)
20905 return TARGET_64BIT
20906 ? default_external_stack_protect_fail ()
20907 : default_hidden_stack_protect_fail ();
20910 /* Select a format to encode pointers in exception handling data. CODE
20911 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20912 true if the symbol may be affected by dynamic relocations.
20914 ??? All x86 object file formats are capable of representing this.
20915 After all, the relocation needed is the same as for the call insn.
20916 Whether or not a particular assembler allows us to enter such, I
20917 guess we'll have to see. */
20919 asm_preferred_eh_data_format (int code
, int global
)
20923 int type
= DW_EH_PE_sdata8
;
20925 || ix86_cmodel
== CM_SMALL_PIC
20926 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20927 type
= DW_EH_PE_sdata4
;
20928 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20930 if (ix86_cmodel
== CM_SMALL
20931 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20932 return DW_EH_PE_udata4
;
20933 return DW_EH_PE_absptr
;
20936 /* Expand copysign from SIGN to the positive value ABS_VALUE
20937 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20940 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20942 enum machine_mode mode
= GET_MODE (sign
);
20943 rtx sgn
= gen_reg_rtx (mode
);
20944 if (mask
== NULL_RTX
)
20946 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20947 if (!VECTOR_MODE_P (mode
))
20949 /* We need to generate a scalar mode mask in this case. */
20950 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20951 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20952 mask
= gen_reg_rtx (mode
);
20953 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20957 mask
= gen_rtx_NOT (mode
, mask
);
20958 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20959 gen_rtx_AND (mode
, mask
, sign
)));
20960 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20961 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20964 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20965 mask for masking out the sign-bit is stored in *SMASK, if that is
20968 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20970 enum machine_mode mode
= GET_MODE (op0
);
20973 xa
= gen_reg_rtx (mode
);
20974 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20975 if (!VECTOR_MODE_P (mode
))
20977 /* We need to generate a scalar mode mask in this case. */
20978 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20979 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20980 mask
= gen_reg_rtx (mode
);
20981 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20983 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
20984 gen_rtx_AND (mode
, op0
, mask
)));
20992 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20993 swapping the operands if SWAP_OPERANDS is true. The expanded
20994 code is a forward jump to a newly created label in case the
20995 comparison is true. The generated label rtx is returned. */
20997 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
20998 bool swap_operands
)
21009 label
= gen_label_rtx ();
21010 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21011 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21012 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21013 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21014 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21015 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21016 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21017 JUMP_LABEL (tmp
) = label
;
21022 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21023 using comparison code CODE. Operands are swapped for the comparison if
21024 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21026 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21027 bool swap_operands
)
21029 enum machine_mode mode
= GET_MODE (op0
);
21030 rtx mask
= gen_reg_rtx (mode
);
21039 if (mode
== DFmode
)
21040 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21041 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21043 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21044 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21049 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21050 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21052 ix86_gen_TWO52 (enum machine_mode mode
)
21054 REAL_VALUE_TYPE TWO52r
;
21057 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21058 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21059 TWO52
= force_reg (mode
, TWO52
);
21064 /* Expand SSE sequence for computing lround from OP1 storing
21067 ix86_expand_lround (rtx op0
, rtx op1
)
21069 /* C code for the stuff we're doing below:
21070 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21073 enum machine_mode mode
= GET_MODE (op1
);
21074 const struct real_format
*fmt
;
21075 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21078 /* load nextafter (0.5, 0.0) */
21079 fmt
= REAL_MODE_FORMAT (mode
);
21080 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21081 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21083 /* adj = copysign (0.5, op1) */
21084 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21085 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21087 /* adj = op1 + adj */
21088 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21090 /* op0 = (imode)adj */
21091 expand_fix (op0
, adj
, 0);
21094 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21097 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21099 /* C code for the stuff we're doing below (for do_floor):
21101 xi -= (double)xi > op1 ? 1 : 0;
21104 enum machine_mode fmode
= GET_MODE (op1
);
21105 enum machine_mode imode
= GET_MODE (op0
);
21106 rtx ireg
, freg
, label
, tmp
;
21108 /* reg = (long)op1 */
21109 ireg
= gen_reg_rtx (imode
);
21110 expand_fix (ireg
, op1
, 0);
21112 /* freg = (double)reg */
21113 freg
= gen_reg_rtx (fmode
);
21114 expand_float (freg
, ireg
, 0);
21116 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21117 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21118 freg
, op1
, !do_floor
);
21119 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21120 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21121 emit_move_insn (ireg
, tmp
);
21123 emit_label (label
);
21124 LABEL_NUSES (label
) = 1;
21126 emit_move_insn (op0
, ireg
);
21129 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21130 result in OPERAND0. */
21132 ix86_expand_rint (rtx operand0
, rtx operand1
)
21134 /* C code for the stuff we're doing below:
21135 xa = fabs (operand1);
21136 if (!isless (xa, 2**52))
21138 xa = xa + 2**52 - 2**52;
21139 return copysign (xa, operand1);
21141 enum machine_mode mode
= GET_MODE (operand0
);
21142 rtx res
, xa
, label
, TWO52
, mask
;
21144 res
= gen_reg_rtx (mode
);
21145 emit_move_insn (res
, operand1
);
21147 /* xa = abs (operand1) */
21148 xa
= ix86_expand_sse_fabs (res
, &mask
);
21150 /* if (!isless (xa, TWO52)) goto label; */
21151 TWO52
= ix86_gen_TWO52 (mode
);
21152 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21154 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21155 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21157 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21159 emit_label (label
);
21160 LABEL_NUSES (label
) = 1;
21162 emit_move_insn (operand0
, res
);
21165 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21168 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21170 /* C code for the stuff we expand below.
21171 double xa = fabs (x), x2;
21172 if (!isless (xa, TWO52))
21174 xa = xa + TWO52 - TWO52;
21175 x2 = copysign (xa, x);
21184 enum machine_mode mode
= GET_MODE (operand0
);
21185 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21187 TWO52
= ix86_gen_TWO52 (mode
);
21189 /* Temporary for holding the result, initialized to the input
21190 operand to ease control flow. */
21191 res
= gen_reg_rtx (mode
);
21192 emit_move_insn (res
, operand1
);
21194 /* xa = abs (operand1) */
21195 xa
= ix86_expand_sse_fabs (res
, &mask
);
21197 /* if (!isless (xa, TWO52)) goto label; */
21198 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21200 /* xa = xa + TWO52 - TWO52; */
21201 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21202 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21204 /* xa = copysign (xa, operand1) */
21205 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21207 /* generate 1.0 or -1.0 */
21208 one
= force_reg (mode
,
21209 const_double_from_real_value (do_floor
21210 ? dconst1
: dconstm1
, mode
));
21212 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21213 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21214 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21215 gen_rtx_AND (mode
, one
, tmp
)));
21216 /* We always need to subtract here to preserve signed zero. */
21217 tmp
= expand_simple_binop (mode
, MINUS
,
21218 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21219 emit_move_insn (res
, tmp
);
21221 emit_label (label
);
21222 LABEL_NUSES (label
) = 1;
21224 emit_move_insn (operand0
, res
);
21227 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21230 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21232 /* C code for the stuff we expand below.
21233 double xa = fabs (x), x2;
21234 if (!isless (xa, TWO52))
21236 x2 = (double)(long)x;
21243 if (HONOR_SIGNED_ZEROS (mode))
21244 return copysign (x2, x);
21247 enum machine_mode mode
= GET_MODE (operand0
);
21248 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21250 TWO52
= ix86_gen_TWO52 (mode
);
21252 /* Temporary for holding the result, initialized to the input
21253 operand to ease control flow. */
21254 res
= gen_reg_rtx (mode
);
21255 emit_move_insn (res
, operand1
);
21257 /* xa = abs (operand1) */
21258 xa
= ix86_expand_sse_fabs (res
, &mask
);
21260 /* if (!isless (xa, TWO52)) goto label; */
21261 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21263 /* xa = (double)(long)x */
21264 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21265 expand_fix (xi
, res
, 0);
21266 expand_float (xa
, xi
, 0);
21269 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21271 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21272 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21273 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21274 gen_rtx_AND (mode
, one
, tmp
)));
21275 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21276 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21277 emit_move_insn (res
, tmp
);
21279 if (HONOR_SIGNED_ZEROS (mode
))
21280 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21282 emit_label (label
);
21283 LABEL_NUSES (label
) = 1;
21285 emit_move_insn (operand0
, res
);
21288 /* Expand SSE sequence for computing round from OPERAND1 storing
21289 into OPERAND0. Sequence that works without relying on DImode truncation
21290 via cvttsd2siq that is only available on 64bit targets. */
21292 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21294 /* C code for the stuff we expand below.
21295 double xa = fabs (x), xa2, x2;
21296 if (!isless (xa, TWO52))
21298 Using the absolute value and copying back sign makes
21299 -0.0 -> -0.0 correct.
21300 xa2 = xa + TWO52 - TWO52;
21305 else if (dxa > 0.5)
21307 x2 = copysign (xa2, x);
21310 enum machine_mode mode
= GET_MODE (operand0
);
21311 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21313 TWO52
= ix86_gen_TWO52 (mode
);
21315 /* Temporary for holding the result, initialized to the input
21316 operand to ease control flow. */
21317 res
= gen_reg_rtx (mode
);
21318 emit_move_insn (res
, operand1
);
21320 /* xa = abs (operand1) */
21321 xa
= ix86_expand_sse_fabs (res
, &mask
);
21323 /* if (!isless (xa, TWO52)) goto label; */
21324 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21326 /* xa2 = xa + TWO52 - TWO52; */
21327 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21328 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21330 /* dxa = xa2 - xa; */
21331 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21333 /* generate 0.5, 1.0 and -0.5 */
21334 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21335 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21336 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21340 tmp
= gen_reg_rtx (mode
);
21341 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21342 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21343 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21344 gen_rtx_AND (mode
, one
, tmp
)));
21345 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21346 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21347 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21348 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21349 gen_rtx_AND (mode
, one
, tmp
)));
21350 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21352 /* res = copysign (xa2, operand1) */
21353 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21355 emit_label (label
);
21356 LABEL_NUSES (label
) = 1;
21358 emit_move_insn (operand0
, res
);
21361 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21364 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21366 /* C code for SSE variant we expand below.
21367 double xa = fabs (x), x2;
21368 if (!isless (xa, TWO52))
21370 x2 = (double)(long)x;
21371 if (HONOR_SIGNED_ZEROS (mode))
21372 return copysign (x2, x);
21375 enum machine_mode mode
= GET_MODE (operand0
);
21376 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21378 TWO52
= ix86_gen_TWO52 (mode
);
21380 /* Temporary for holding the result, initialized to the input
21381 operand to ease control flow. */
21382 res
= gen_reg_rtx (mode
);
21383 emit_move_insn (res
, operand1
);
21385 /* xa = abs (operand1) */
21386 xa
= ix86_expand_sse_fabs (res
, &mask
);
21388 /* if (!isless (xa, TWO52)) goto label; */
21389 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21391 /* x = (double)(long)x */
21392 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21393 expand_fix (xi
, res
, 0);
21394 expand_float (res
, xi
, 0);
21396 if (HONOR_SIGNED_ZEROS (mode
))
21397 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21399 emit_label (label
);
21400 LABEL_NUSES (label
) = 1;
21402 emit_move_insn (operand0
, res
);
21405 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21408 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21410 enum machine_mode mode
= GET_MODE (operand0
);
21411 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21413 /* C code for SSE variant we expand below.
21414 double xa = fabs (x), x2;
21415 if (!isless (xa, TWO52))
21417 xa2 = xa + TWO52 - TWO52;
21421 x2 = copysign (xa2, x);
21425 TWO52
= ix86_gen_TWO52 (mode
);
21427 /* Temporary for holding the result, initialized to the input
21428 operand to ease control flow. */
21429 res
= gen_reg_rtx (mode
);
21430 emit_move_insn (res
, operand1
);
21432 /* xa = abs (operand1) */
21433 xa
= ix86_expand_sse_fabs (res
, &smask
);
21435 /* if (!isless (xa, TWO52)) goto label; */
21436 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21438 /* res = xa + TWO52 - TWO52; */
21439 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21440 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21441 emit_move_insn (res
, tmp
);
21444 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21446 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21447 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21448 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21449 gen_rtx_AND (mode
, mask
, one
)));
21450 tmp
= expand_simple_binop (mode
, MINUS
,
21451 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21452 emit_move_insn (res
, tmp
);
21454 /* res = copysign (res, operand1) */
21455 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21457 emit_label (label
);
21458 LABEL_NUSES (label
) = 1;
21460 emit_move_insn (operand0
, res
);
21463 /* Expand SSE sequence for computing round from OPERAND1 storing
21466 ix86_expand_round (rtx operand0
, rtx operand1
)
21468 /* C code for the stuff we're doing below:
21469 double xa = fabs (x);
21470 if (!isless (xa, TWO52))
21472 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21473 return copysign (xa, x);
21475 enum machine_mode mode
= GET_MODE (operand0
);
21476 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21477 const struct real_format
*fmt
;
21478 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21480 /* Temporary for holding the result, initialized to the input
21481 operand to ease control flow. */
21482 res
= gen_reg_rtx (mode
);
21483 emit_move_insn (res
, operand1
);
21485 TWO52
= ix86_gen_TWO52 (mode
);
21486 xa
= ix86_expand_sse_fabs (res
, &mask
);
21487 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21489 /* load nextafter (0.5, 0.0) */
21490 fmt
= REAL_MODE_FORMAT (mode
);
21491 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21492 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21494 /* xa = xa + 0.5 */
21495 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21496 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21498 /* xa = (double)(int64_t)xa */
21499 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21500 expand_fix (xi
, xa
, 0);
21501 expand_float (xa
, xi
, 0);
21503 /* res = copysign (xa, operand1) */
21504 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21506 emit_label (label
);
21507 LABEL_NUSES (label
) = 1;
21509 emit_move_insn (operand0
, res
);
21512 #include "gt-i386.h"