1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1010 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1011 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1012 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1015 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
1016 const int x86_double_with_add
= ~m_386
;
1017 const int x86_use_bit_test
= m_386
;
1018 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
1019 | m_K6
| m_CORE2
| m_GENERIC
;
1020 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1022 const int x86_3dnow_a
= m_ATHLON_K8_AMDFAM10
;
1023 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
1024 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints
= 0;
1030 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall
= m_PPRO
;
1041 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
1042 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
1043 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
1044 | m_CORE2
| m_GENERIC
);
1045 const int x86_use_mov0
= m_K6
;
1046 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
1047 const int x86_read_modify_write
= ~m_PENT
;
1048 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
1049 const int x86_split_long_moves
= m_PPRO
;
1050 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
1051 | m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1053 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
1054 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
1055 const int x86_qimode_math
= ~(0);
1056 const int x86_promote_qi_regs
= 0;
1057 /* On PPro this flag is meant to avoid partial register stalls. Just like
1058 the x86_partial_reg_stall this option might be considered for Generic32
1059 if our scheme for avoiding partial stalls was more effective. */
1060 const int x86_himode_math
= ~(m_PPRO
);
1061 const int x86_promote_hi_regs
= m_PPRO
;
1062 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1063 const int x86_sub_esp_4
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1064 | m_CORE2
| m_GENERIC
;
1065 const int x86_sub_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1066 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1067 const int x86_add_esp_4
= m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
1068 | m_CORE2
| m_GENERIC
;
1069 const int x86_add_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1070 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1071 /* Enable if integer moves are preferred for DFmode copies */
1072 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1073 | m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
1074 const int x86_partial_reg_dependency
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1075 | m_CORE2
| m_GENERIC
;
1076 const int x86_memory_mismatch_stall
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1077 | m_CORE2
| m_GENERIC
;
1078 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1079 for outgoing arguments will be computed and placed into the variable
1080 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1081 for each call; instead, the function prologue should increase the stack frame
1082 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1084 const int x86_accumulate_outgoing_args
= m_ATHLON_K8_AMDFAM10
| m_PENT4
1085 | m_NOCONA
| m_PPRO
| m_CORE2
1087 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1088 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1089 const int x86_shift1
= ~m_486
;
1090 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
1091 | m_ATHLON_K8_AMDFAM10
| m_PENT4
1092 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1093 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1094 that thread 128bit SSE registers as single units versus K8 based chips that
1095 divide SSE registers to two 64bit halves.
1096 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1097 to allow register renaming on 128bit SSE units, but usually results in one
1098 extra microop on 64bit SSE units. Experimental results shows that disabling
1099 this option on P4 brings over 20% SPECfp regression, while enabling it on
1100 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1102 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1103 | m_GENERIC
| m_AMDFAM10
;
1104 /* Set for machines where the type and dependencies are resolved on SSE
1105 register parts instead of whole registers, so we may maintain just
1106 lower part of scalar values in proper format leaving the upper part
1108 const int x86_sse_split_regs
= m_ATHLON_K8
;
1109 /* Code generation for scalar reg-reg moves of single and double precision data:
1110 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1114 if (x86_sse_partial_reg_dependency == true)
1119 Code generation for scalar loads of double precision data:
1120 if (x86_sse_split_regs == true)
1121 movlpd mem, reg (gas syntax)
1125 Code generation for unaligned packed loads of single precision data
1126 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1127 if (x86_sse_unaligned_move_optimal)
1130 if (x86_sse_partial_reg_dependency == true)
1142 Code generation for unaligned packed loads of double precision data
1143 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1144 if (x86_sse_unaligned_move_optimal)
1147 if (x86_sse_split_regs == true)
1158 const int x86_sse_unaligned_move_optimal
= m_AMDFAM10
;
1159 const int x86_sse_typeless_stores
= m_ATHLON_K8_AMDFAM10
;
1160 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
1161 const int x86_use_ffreep
= m_ATHLON_K8_AMDFAM10
;
1162 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
1164 const int x86_inter_unit_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
);
1166 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
1167 | m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1168 /* Some CPU cores are not able to predict more than 4 branch instructions in
1169 the 16 byte window. */
1170 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1171 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1172 const int x86_schedule
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
1173 | m_CORE2
| m_GENERIC
;
1174 const int x86_use_bt
= m_ATHLON_K8_AMDFAM10
;
1175 /* Compare and exchange was added for 80486. */
1176 const int x86_cmpxchg
= ~m_386
;
1177 /* Compare and exchange 8 bytes was added for pentium. */
1178 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
1179 /* Exchange and add was added for 80486. */
1180 const int x86_xadd
= ~m_386
;
1181 /* Byteswap was added for 80486. */
1182 const int x86_bswap
= ~m_386
;
1183 const int x86_pad_returns
= m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1185 static enum stringop_alg stringop_alg
= no_stringop
;
1187 /* In case the average insn count for single function invocation is
1188 lower than this constant, emit fast (but longer) prologue and
1190 #define FAST_PROLOGUE_INSN_COUNT 20
1192 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1193 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1194 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1195 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1197 /* Array of the smallest class containing reg number REGNO, indexed by
1198 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1200 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1202 /* ax, dx, cx, bx */
1203 AREG
, DREG
, CREG
, BREG
,
1204 /* si, di, bp, sp */
1205 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1207 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1208 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1211 /* flags, fpsr, fpcr, frame */
1212 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1213 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1215 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1217 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1218 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1219 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1223 /* The "default" register map used in 32bit mode. */
1225 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1227 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1228 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1229 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1236 static int const x86_64_int_parameter_registers
[6] =
1238 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1239 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1242 static int const x86_64_int_return_registers
[4] =
1244 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1247 /* The "default" register map used in 64bit mode. */
1248 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1250 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1251 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1252 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1253 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1254 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1255 8,9,10,11,12,13,14,15, /* extended integer registers */
1256 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1259 /* Define the register numbers to be used in Dwarf debugging information.
1260 The SVR4 reference port C compiler uses the following register numbers
1261 in its Dwarf output code:
1262 0 for %eax (gcc regno = 0)
1263 1 for %ecx (gcc regno = 2)
1264 2 for %edx (gcc regno = 1)
1265 3 for %ebx (gcc regno = 3)
1266 4 for %esp (gcc regno = 7)
1267 5 for %ebp (gcc regno = 6)
1268 6 for %esi (gcc regno = 4)
1269 7 for %edi (gcc regno = 5)
1270 The following three DWARF register numbers are never generated by
1271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1272 believes these numbers have these meanings.
1273 8 for %eip (no gcc equivalent)
1274 9 for %eflags (gcc regno = 17)
1275 10 for %trapno (no gcc equivalent)
1276 It is not at all clear how we should number the FP stack registers
1277 for the x86 architecture. If the version of SDB on x86/svr4 were
1278 a bit less brain dead with respect to floating-point then we would
1279 have a precedent to follow with respect to DWARF register numbers
1280 for x86 FP registers, but the SDB on x86/svr4 is so completely
1281 broken with respect to FP registers that it is hardly worth thinking
1282 of it as something to strive for compatibility with.
1283 The version of x86/svr4 SDB I have at the moment does (partially)
1284 seem to believe that DWARF register number 11 is associated with
1285 the x86 register %st(0), but that's about all. Higher DWARF
1286 register numbers don't seem to be associated with anything in
1287 particular, and even for DWARF regno 11, SDB only seems to under-
1288 stand that it should say that a variable lives in %st(0) (when
1289 asked via an `=' command) if we said it was in DWARF regno 11,
1290 but SDB still prints garbage when asked for the value of the
1291 variable in question (via a `/' command).
1292 (Also note that the labels SDB prints for various FP stack regs
1293 when doing an `x' command are all wrong.)
1294 Note that these problems generally don't affect the native SVR4
1295 C compiler because it doesn't allow the use of -O with -g and
1296 because when it is *not* optimizing, it allocates a memory
1297 location for each floating-point variable, and the memory
1298 location is what gets described in the DWARF AT_location
1299 attribute for the variable in question.
1300 Regardless of the severe mental illness of the x86/svr4 SDB, we
1301 do something sensible here and we use the following DWARF
1302 register numbers. Note that these are all stack-top-relative
1304 11 for %st(0) (gcc regno = 8)
1305 12 for %st(1) (gcc regno = 9)
1306 13 for %st(2) (gcc regno = 10)
1307 14 for %st(3) (gcc regno = 11)
1308 15 for %st(4) (gcc regno = 12)
1309 16 for %st(5) (gcc regno = 13)
1310 17 for %st(6) (gcc regno = 14)
1311 18 for %st(7) (gcc regno = 15)
1313 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1317 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1320 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1321 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1324 /* Test and compare insns in i386.md store the information needed to
1325 generate branch and scc insns here. */
1327 rtx ix86_compare_op0
= NULL_RTX
;
1328 rtx ix86_compare_op1
= NULL_RTX
;
1329 rtx ix86_compare_emitted
= NULL_RTX
;
1331 /* Size of the register save area. */
1332 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1334 /* Define the structure for the machine field in struct function. */
1336 struct stack_local_entry
GTY(())
1338 unsigned short mode
;
1341 struct stack_local_entry
*next
;
1344 /* Structure describing stack frame layout.
1345 Stack grows downward:
1351 saved frame pointer if frame_pointer_needed
1352 <- HARD_FRAME_POINTER
1357 [va_arg registers] (
1358 > to_allocate <- FRAME_POINTER
1368 HOST_WIDE_INT frame
;
1370 int outgoing_arguments_size
;
1373 HOST_WIDE_INT to_allocate
;
1374 /* The offsets relative to ARG_POINTER. */
1375 HOST_WIDE_INT frame_pointer_offset
;
1376 HOST_WIDE_INT hard_frame_pointer_offset
;
1377 HOST_WIDE_INT stack_pointer_offset
;
1379 /* When save_regs_using_mov is set, emit prologue using
1380 move instead of push instructions. */
1381 bool save_regs_using_mov
;
1384 /* Code model option. */
1385 enum cmodel ix86_cmodel
;
1387 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1389 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1391 /* Which unit we are generating floating point math for. */
1392 enum fpmath_unit ix86_fpmath
;
1394 /* Which cpu are we scheduling for. */
1395 enum processor_type ix86_tune
;
1396 /* Which instruction set architecture to use. */
1397 enum processor_type ix86_arch
;
1399 /* true if sse prefetch instruction is not NOOP. */
1400 int x86_prefetch_sse
;
1402 /* true if cmpxchg16b is supported. */
1405 /* ix86_regparm_string as a number */
1406 static int ix86_regparm
;
1408 /* -mstackrealign option */
1409 extern int ix86_force_align_arg_pointer
;
1410 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1412 /* Preferred alignment for stack boundary in bits. */
1413 unsigned int ix86_preferred_stack_boundary
;
1415 /* Values 1-5: see jump.c */
1416 int ix86_branch_cost
;
1418 /* Variables which are this size or smaller are put in the data/bss
1419 or ldata/lbss sections. */
1421 int ix86_section_threshold
= 65536;
1423 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1424 char internal_label_prefix
[16];
1425 int internal_label_prefix_len
;
1427 static bool ix86_handle_option (size_t, const char *, int);
1428 static void output_pic_addr_const (FILE *, rtx
, int);
1429 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1431 static const char *get_some_local_dynamic_name (void);
1432 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1433 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1434 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1436 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1437 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1439 static rtx
get_thread_pointer (int);
1440 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1441 static void get_pc_thunk_name (char [32], unsigned int);
1442 static rtx
gen_push (rtx
);
1443 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1444 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1445 static struct machine_function
* ix86_init_machine_status (void);
1446 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1447 static int ix86_nsaved_regs (void);
1448 static void ix86_emit_save_regs (void);
1449 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1450 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1451 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1452 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1453 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1454 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1455 static int ix86_issue_rate (void);
1456 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1457 static int ia32_multipass_dfa_lookahead (void);
1458 static void ix86_init_mmx_sse_builtins (void);
1459 static rtx
x86_this_parameter (tree
);
1460 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1461 HOST_WIDE_INT
, tree
);
1462 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1463 static void x86_file_start (void);
1464 static void ix86_reorg (void);
1465 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1466 static tree
ix86_build_builtin_va_list (void);
1467 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1469 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1470 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1471 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1473 static int ix86_address_cost (rtx
);
1474 static bool ix86_cannot_force_const_mem (rtx
);
1475 static rtx
ix86_delegitimize_address (rtx
);
1477 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1479 struct builtin_description
;
1480 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1482 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1484 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1485 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1486 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1487 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1488 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1489 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1490 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1491 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1492 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1493 static int ix86_fp_comparison_cost (enum rtx_code code
);
1494 static unsigned int ix86_select_alt_pic_regnum (void);
1495 static int ix86_save_reg (unsigned int, int);
1496 static void ix86_compute_frame_layout (struct ix86_frame
*);
1497 static int ix86_comp_type_attributes (tree
, tree
);
1498 static int ix86_function_regparm (tree
, tree
);
1499 const struct attribute_spec ix86_attribute_table
[];
1500 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1501 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1502 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1503 static bool contains_128bit_aligned_vector_p (tree
);
1504 static rtx
ix86_struct_value_rtx (tree
, int);
1505 static bool ix86_ms_bitfield_layout_p (tree
);
1506 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1507 static int extended_reg_mentioned_1 (rtx
*, void *);
1508 static bool ix86_rtx_costs (rtx
, int, int, int *);
1509 static int min_insn_size (rtx
);
1510 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1511 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1512 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1514 static void ix86_init_builtins (void);
1515 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1516 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1517 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1518 static const char *ix86_mangle_fundamental_type (tree
);
1519 static tree
ix86_stack_protect_fail (void);
1520 static rtx
ix86_internal_arg_pointer (void);
1521 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1522 static rtx
ix86_build_const_vector (enum machine_mode
, bool, rtx
);
1523 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1526 /* This function is only used on Solaris. */
1527 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1530 /* Register class used for passing given 64bit part of the argument.
1531 These represent classes as documented by the PS ABI, with the exception
1532 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1533 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1535 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1536 whenever possible (upper half does contain padding).
1538 enum x86_64_reg_class
1541 X86_64_INTEGER_CLASS
,
1542 X86_64_INTEGERSI_CLASS
,
1549 X86_64_COMPLEX_X87_CLASS
,
1552 static const char * const x86_64_reg_class_name
[] = {
1553 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1554 "sseup", "x87", "x87up", "cplx87", "no"
1557 #define MAX_CLASSES 4
1559 /* Table of constants used by fldpi, fldln2, etc.... */
1560 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1561 static bool ext_80387_constants_init
= 0;
1562 static void init_ext_80387_constants (void);
1563 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1564 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1565 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1566 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1567 unsigned HOST_WIDE_INT align
)
1570 /* Initialize the GCC target structure. */
1571 #undef TARGET_ATTRIBUTE_TABLE
1572 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1573 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1574 # undef TARGET_MERGE_DECL_ATTRIBUTES
1575 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1578 #undef TARGET_COMP_TYPE_ATTRIBUTES
1579 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1581 #undef TARGET_INIT_BUILTINS
1582 #define TARGET_INIT_BUILTINS ix86_init_builtins
1583 #undef TARGET_EXPAND_BUILTIN
1584 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1586 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1587 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1588 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1589 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1591 #undef TARGET_ASM_FUNCTION_EPILOGUE
1592 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1594 #undef TARGET_ENCODE_SECTION_INFO
1595 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1596 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1598 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1601 #undef TARGET_ASM_OPEN_PAREN
1602 #define TARGET_ASM_OPEN_PAREN ""
1603 #undef TARGET_ASM_CLOSE_PAREN
1604 #define TARGET_ASM_CLOSE_PAREN ""
1606 #undef TARGET_ASM_ALIGNED_HI_OP
1607 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1608 #undef TARGET_ASM_ALIGNED_SI_OP
1609 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1611 #undef TARGET_ASM_ALIGNED_DI_OP
1612 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1615 #undef TARGET_ASM_UNALIGNED_HI_OP
1616 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1617 #undef TARGET_ASM_UNALIGNED_SI_OP
1618 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1619 #undef TARGET_ASM_UNALIGNED_DI_OP
1620 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1622 #undef TARGET_SCHED_ADJUST_COST
1623 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1624 #undef TARGET_SCHED_ISSUE_RATE
1625 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1628 ia32_multipass_dfa_lookahead
1630 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1631 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1634 #undef TARGET_HAVE_TLS
1635 #define TARGET_HAVE_TLS true
1637 #undef TARGET_CANNOT_FORCE_CONST_MEM
1638 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1639 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1640 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1642 #undef TARGET_DELEGITIMIZE_ADDRESS
1643 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1645 #undef TARGET_MS_BITFIELD_LAYOUT_P
1646 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1649 #undef TARGET_BINDS_LOCAL_P
1650 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1653 #undef TARGET_ASM_OUTPUT_MI_THUNK
1654 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1655 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1656 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1658 #undef TARGET_ASM_FILE_START
1659 #define TARGET_ASM_FILE_START x86_file_start
1661 #undef TARGET_DEFAULT_TARGET_FLAGS
1662 #define TARGET_DEFAULT_TARGET_FLAGS \
1664 | TARGET_64BIT_DEFAULT \
1665 | TARGET_SUBTARGET_DEFAULT \
1666 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1668 #undef TARGET_HANDLE_OPTION
1669 #define TARGET_HANDLE_OPTION ix86_handle_option
1671 #undef TARGET_RTX_COSTS
1672 #define TARGET_RTX_COSTS ix86_rtx_costs
1673 #undef TARGET_ADDRESS_COST
1674 #define TARGET_ADDRESS_COST ix86_address_cost
1676 #undef TARGET_FIXED_CONDITION_CODE_REGS
1677 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1678 #undef TARGET_CC_MODES_COMPATIBLE
1679 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1681 #undef TARGET_MACHINE_DEPENDENT_REORG
1682 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1684 #undef TARGET_BUILD_BUILTIN_VA_LIST
1685 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1687 #undef TARGET_MD_ASM_CLOBBERS
1688 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1690 #undef TARGET_PROMOTE_PROTOTYPES
1691 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1692 #undef TARGET_STRUCT_VALUE_RTX
1693 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1694 #undef TARGET_SETUP_INCOMING_VARARGS
1695 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1696 #undef TARGET_MUST_PASS_IN_STACK
1697 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1698 #undef TARGET_PASS_BY_REFERENCE
1699 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1700 #undef TARGET_INTERNAL_ARG_POINTER
1701 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1702 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1703 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1705 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1706 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1708 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1709 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1711 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1712 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1715 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1716 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1719 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1720 #undef TARGET_INSERT_ATTRIBUTES
1721 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1724 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1725 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1727 #undef TARGET_STACK_PROTECT_FAIL
1728 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1730 #undef TARGET_FUNCTION_VALUE
1731 #define TARGET_FUNCTION_VALUE ix86_function_value
1733 struct gcc_target targetm
= TARGET_INITIALIZER
;
1736 /* The svr4 ABI for the i386 says that records and unions are returned
1738 #ifndef DEFAULT_PCC_STRUCT_RETURN
1739 #define DEFAULT_PCC_STRUCT_RETURN 1
1742 /* Implement TARGET_HANDLE_OPTION. */
1745 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1752 target_flags
&= ~MASK_3DNOW_A
;
1753 target_flags_explicit
|= MASK_3DNOW_A
;
1760 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1761 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1768 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1769 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1776 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1777 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1784 target_flags
&= ~MASK_SSE4A
;
1785 target_flags_explicit
|= MASK_SSE4A
;
1794 /* Sometimes certain combinations of command options do not make
1795 sense on a particular target machine. You can define a macro
1796 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1797 defined, is executed once just after all the command options have
1800 Don't use this macro to turn on various extra optimizations for
1801 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1804 override_options (void)
1807 int ix86_tune_defaulted
= 0;
1809 /* Comes from final.c -- no real reason to change it. */
1810 #define MAX_CODE_ALIGN 16
1814 const struct processor_costs
*cost
; /* Processor costs */
1815 const int target_enable
; /* Target flags to enable. */
1816 const int target_disable
; /* Target flags to disable. */
1817 const int align_loop
; /* Default alignments. */
1818 const int align_loop_max_skip
;
1819 const int align_jump
;
1820 const int align_jump_max_skip
;
1821 const int align_func
;
1823 const processor_target_table
[PROCESSOR_max
] =
1825 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1826 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1827 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1828 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1829 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1830 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1831 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1832 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1833 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1834 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1835 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1836 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1837 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1838 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1841 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1844 const char *const name
; /* processor name or nickname. */
1845 const enum processor_type processor
;
1846 const enum pta_flags
1852 PTA_PREFETCH_SSE
= 16,
1863 const processor_alias_table
[] =
1865 {"i386", PROCESSOR_I386
, 0},
1866 {"i486", PROCESSOR_I486
, 0},
1867 {"i586", PROCESSOR_PENTIUM
, 0},
1868 {"pentium", PROCESSOR_PENTIUM
, 0},
1869 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1870 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1871 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1872 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1873 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1874 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1875 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1876 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1877 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1878 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1879 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1880 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1881 | PTA_MMX
| PTA_PREFETCH_SSE
},
1882 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1883 | PTA_MMX
| PTA_PREFETCH_SSE
},
1884 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1885 | PTA_MMX
| PTA_PREFETCH_SSE
},
1886 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1887 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1888 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1889 | PTA_64BIT
| PTA_MMX
1890 | PTA_PREFETCH_SSE
| PTA_CX16
},
1891 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1893 {"k6", PROCESSOR_K6
, PTA_MMX
},
1894 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1895 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1896 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1898 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1899 | PTA_3DNOW
| PTA_3DNOW_A
},
1900 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1901 | PTA_3DNOW_A
| PTA_SSE
},
1902 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1903 | PTA_3DNOW_A
| PTA_SSE
},
1904 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1905 | PTA_3DNOW_A
| PTA_SSE
},
1906 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1907 | PTA_SSE
| PTA_SSE2
},
1908 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1909 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1910 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1911 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1912 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1913 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1914 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1915 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1916 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1917 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1918 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1919 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1920 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1921 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1924 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1926 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1927 SUBTARGET_OVERRIDE_OPTIONS
;
1930 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1931 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1934 /* -fPIC is the default for x86_64. */
1935 if (TARGET_MACHO
&& TARGET_64BIT
)
1938 /* Set the default values for switches whose default depends on TARGET_64BIT
1939 in case they weren't overwritten by command line options. */
1942 /* Mach-O doesn't support omitting the frame pointer for now. */
1943 if (flag_omit_frame_pointer
== 2)
1944 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1945 if (flag_asynchronous_unwind_tables
== 2)
1946 flag_asynchronous_unwind_tables
= 1;
1947 if (flag_pcc_struct_return
== 2)
1948 flag_pcc_struct_return
= 0;
1952 if (flag_omit_frame_pointer
== 2)
1953 flag_omit_frame_pointer
= 0;
1954 if (flag_asynchronous_unwind_tables
== 2)
1955 flag_asynchronous_unwind_tables
= 0;
1956 if (flag_pcc_struct_return
== 2)
1957 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1960 /* Need to check -mtune=generic first. */
1961 if (ix86_tune_string
)
1963 if (!strcmp (ix86_tune_string
, "generic")
1964 || !strcmp (ix86_tune_string
, "i686")
1965 /* As special support for cross compilers we read -mtune=native
1966 as -mtune=generic. With native compilers we won't see the
1967 -mtune=native, as it was changed by the driver. */
1968 || !strcmp (ix86_tune_string
, "native"))
1971 ix86_tune_string
= "generic64";
1973 ix86_tune_string
= "generic32";
1975 else if (!strncmp (ix86_tune_string
, "generic", 7))
1976 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1980 if (ix86_arch_string
)
1981 ix86_tune_string
= ix86_arch_string
;
1982 if (!ix86_tune_string
)
1984 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1985 ix86_tune_defaulted
= 1;
1988 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1989 need to use a sensible tune option. */
1990 if (!strcmp (ix86_tune_string
, "generic")
1991 || !strcmp (ix86_tune_string
, "x86-64")
1992 || !strcmp (ix86_tune_string
, "i686"))
1995 ix86_tune_string
= "generic64";
1997 ix86_tune_string
= "generic32";
2000 if (ix86_stringop_string
)
2002 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2003 stringop_alg
= rep_prefix_1_byte
;
2004 else if (!strcmp (ix86_stringop_string
, "libcall"))
2005 stringop_alg
= libcall
;
2006 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2007 stringop_alg
= rep_prefix_4_byte
;
2008 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2009 stringop_alg
= rep_prefix_8_byte
;
2010 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2011 stringop_alg
= loop_1_byte
;
2012 else if (!strcmp (ix86_stringop_string
, "loop"))
2013 stringop_alg
= loop
;
2014 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2015 stringop_alg
= unrolled_loop
;
2017 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2019 if (!strcmp (ix86_tune_string
, "x86-64"))
2020 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2021 "-mtune=generic instead as appropriate.");
2023 if (!ix86_arch_string
)
2024 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2025 if (!strcmp (ix86_arch_string
, "generic"))
2026 error ("generic CPU can be used only for -mtune= switch");
2027 if (!strncmp (ix86_arch_string
, "generic", 7))
2028 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2030 if (ix86_cmodel_string
!= 0)
2032 if (!strcmp (ix86_cmodel_string
, "small"))
2033 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2034 else if (!strcmp (ix86_cmodel_string
, "medium"))
2035 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2037 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
2038 else if (!strcmp (ix86_cmodel_string
, "32"))
2039 ix86_cmodel
= CM_32
;
2040 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2041 ix86_cmodel
= CM_KERNEL
;
2042 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
2043 ix86_cmodel
= CM_LARGE
;
2045 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2049 ix86_cmodel
= CM_32
;
2051 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2053 if (ix86_asm_string
!= 0)
2056 && !strcmp (ix86_asm_string
, "intel"))
2057 ix86_asm_dialect
= ASM_INTEL
;
2058 else if (!strcmp (ix86_asm_string
, "att"))
2059 ix86_asm_dialect
= ASM_ATT
;
2061 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2063 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2064 error ("code model %qs not supported in the %s bit mode",
2065 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2066 if (ix86_cmodel
== CM_LARGE
)
2067 sorry ("code model %<large%> not supported yet");
2068 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2069 sorry ("%i-bit mode not compiled in",
2070 (target_flags
& MASK_64BIT
) ? 64 : 32);
2072 for (i
= 0; i
< pta_size
; i
++)
2073 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2075 ix86_arch
= processor_alias_table
[i
].processor
;
2076 /* Default cpu tuning to the architecture. */
2077 ix86_tune
= ix86_arch
;
2078 if (processor_alias_table
[i
].flags
& PTA_MMX
2079 && !(target_flags_explicit
& MASK_MMX
))
2080 target_flags
|= MASK_MMX
;
2081 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2082 && !(target_flags_explicit
& MASK_3DNOW
))
2083 target_flags
|= MASK_3DNOW
;
2084 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2085 && !(target_flags_explicit
& MASK_3DNOW_A
))
2086 target_flags
|= MASK_3DNOW_A
;
2087 if (processor_alias_table
[i
].flags
& PTA_SSE
2088 && !(target_flags_explicit
& MASK_SSE
))
2089 target_flags
|= MASK_SSE
;
2090 if (processor_alias_table
[i
].flags
& PTA_SSE2
2091 && !(target_flags_explicit
& MASK_SSE2
))
2092 target_flags
|= MASK_SSE2
;
2093 if (processor_alias_table
[i
].flags
& PTA_SSE3
2094 && !(target_flags_explicit
& MASK_SSE3
))
2095 target_flags
|= MASK_SSE3
;
2096 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2097 && !(target_flags_explicit
& MASK_SSSE3
))
2098 target_flags
|= MASK_SSSE3
;
2099 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2100 x86_prefetch_sse
= true;
2101 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2102 x86_cmpxchg16b
= true;
2103 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2104 && !(target_flags_explicit
& MASK_POPCNT
))
2105 target_flags
|= MASK_POPCNT
;
2106 if (processor_alias_table
[i
].flags
& PTA_ABM
2107 && !(target_flags_explicit
& MASK_ABM
))
2108 target_flags
|= MASK_ABM
;
2109 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2110 && !(target_flags_explicit
& MASK_SSE4A
))
2111 target_flags
|= MASK_SSE4A
;
2112 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2113 error ("CPU you selected does not support x86-64 "
2119 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2121 for (i
= 0; i
< pta_size
; i
++)
2122 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2124 ix86_tune
= processor_alias_table
[i
].processor
;
2125 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2127 if (ix86_tune_defaulted
)
2129 ix86_tune_string
= "x86-64";
2130 for (i
= 0; i
< pta_size
; i
++)
2131 if (! strcmp (ix86_tune_string
,
2132 processor_alias_table
[i
].name
))
2134 ix86_tune
= processor_alias_table
[i
].processor
;
2137 error ("CPU you selected does not support x86-64 "
2140 /* Intel CPUs have always interpreted SSE prefetch instructions as
2141 NOPs; so, we can enable SSE prefetch instructions even when
2142 -mtune (rather than -march) points us to a processor that has them.
2143 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2144 higher processors. */
2145 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2146 x86_prefetch_sse
= true;
2150 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2153 ix86_cost
= &size_cost
;
2155 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2156 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2157 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2159 /* Arrange to set up i386_stack_locals for all functions. */
2160 init_machine_status
= ix86_init_machine_status
;
2162 /* Validate -mregparm= value. */
2163 if (ix86_regparm_string
)
2165 i
= atoi (ix86_regparm_string
);
2166 if (i
< 0 || i
> REGPARM_MAX
)
2167 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2173 ix86_regparm
= REGPARM_MAX
;
2175 /* If the user has provided any of the -malign-* options,
2176 warn and use that value only if -falign-* is not set.
2177 Remove this code in GCC 3.2 or later. */
2178 if (ix86_align_loops_string
)
2180 warning (0, "-malign-loops is obsolete, use -falign-loops");
2181 if (align_loops
== 0)
2183 i
= atoi (ix86_align_loops_string
);
2184 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2185 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2187 align_loops
= 1 << i
;
2191 if (ix86_align_jumps_string
)
2193 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2194 if (align_jumps
== 0)
2196 i
= atoi (ix86_align_jumps_string
);
2197 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2198 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2200 align_jumps
= 1 << i
;
2204 if (ix86_align_funcs_string
)
2206 warning (0, "-malign-functions is obsolete, use -falign-functions");
2207 if (align_functions
== 0)
2209 i
= atoi (ix86_align_funcs_string
);
2210 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2211 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2213 align_functions
= 1 << i
;
2217 /* Default align_* from the processor table. */
2218 if (align_loops
== 0)
2220 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2221 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2223 if (align_jumps
== 0)
2225 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2226 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2228 if (align_functions
== 0)
2230 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2233 /* Validate -mbranch-cost= value, or provide default. */
2234 ix86_branch_cost
= ix86_cost
->branch_cost
;
2235 if (ix86_branch_cost_string
)
2237 i
= atoi (ix86_branch_cost_string
);
2239 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2241 ix86_branch_cost
= i
;
2243 if (ix86_section_threshold_string
)
2245 i
= atoi (ix86_section_threshold_string
);
2247 error ("-mlarge-data-threshold=%d is negative", i
);
2249 ix86_section_threshold
= i
;
2252 if (ix86_tls_dialect_string
)
2254 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2255 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2256 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2257 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2258 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2259 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2261 error ("bad value (%s) for -mtls-dialect= switch",
2262 ix86_tls_dialect_string
);
2265 /* Keep nonleaf frame pointers. */
2266 if (flag_omit_frame_pointer
)
2267 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2268 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2269 flag_omit_frame_pointer
= 1;
2271 /* If we're doing fast math, we don't care about comparison order
2272 wrt NaNs. This lets us use a shorter comparison sequence. */
2273 if (flag_finite_math_only
)
2274 target_flags
&= ~MASK_IEEE_FP
;
2276 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2277 since the insns won't need emulation. */
2278 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
2279 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2281 /* Likewise, if the target doesn't have a 387, or we've specified
2282 software floating point, don't use 387 inline intrinsics. */
2284 target_flags
|= MASK_NO_FANCY_MATH_387
;
2286 /* Turn on SSE3 builtins for -mssse3. */
2288 target_flags
|= MASK_SSE3
;
2290 /* Turn on SSE3 builtins for -msse4a. */
2292 target_flags
|= MASK_SSE3
;
2294 /* Turn on SSE2 builtins for -msse3. */
2296 target_flags
|= MASK_SSE2
;
2298 /* Turn on SSE builtins for -msse2. */
2300 target_flags
|= MASK_SSE
;
2302 /* Turn on MMX builtins for -msse. */
2305 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2306 x86_prefetch_sse
= true;
2309 /* Turn on MMX builtins for 3Dnow. */
2311 target_flags
|= MASK_MMX
;
2313 /* Turn on POPCNT builtins for -mabm. */
2315 target_flags
|= MASK_POPCNT
;
2319 if (TARGET_ALIGN_DOUBLE
)
2320 error ("-malign-double makes no sense in the 64bit mode");
2322 error ("-mrtd calling convention not supported in the 64bit mode");
2324 /* Enable by default the SSE and MMX builtins. Do allow the user to
2325 explicitly disable any of these. In particular, disabling SSE and
2326 MMX for kernel code is extremely useful. */
2328 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2329 & ~target_flags_explicit
);
2333 /* i386 ABI does not specify red zone. It still makes sense to use it
2334 when programmer takes care to stack from being destroyed. */
2335 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2336 target_flags
|= MASK_NO_RED_ZONE
;
2339 /* Validate -mpreferred-stack-boundary= value, or provide default.
2340 The default of 128 bits is for Pentium III's SSE __m128. We can't
2341 change it because of optimize_size. Otherwise, we can't mix object
2342 files compiled with -Os and -On. */
2343 ix86_preferred_stack_boundary
= 128;
2344 if (ix86_preferred_stack_boundary_string
)
2346 i
= atoi (ix86_preferred_stack_boundary_string
);
2347 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2348 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2349 TARGET_64BIT
? 4 : 2);
2351 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2354 /* Accept -msseregparm only if at least SSE support is enabled. */
2355 if (TARGET_SSEREGPARM
2357 error ("-msseregparm used without SSE enabled");
2359 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2361 if (ix86_fpmath_string
!= 0)
2363 if (! strcmp (ix86_fpmath_string
, "387"))
2364 ix86_fpmath
= FPMATH_387
;
2365 else if (! strcmp (ix86_fpmath_string
, "sse"))
2369 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2370 ix86_fpmath
= FPMATH_387
;
2373 ix86_fpmath
= FPMATH_SSE
;
2375 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2376 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2380 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2381 ix86_fpmath
= FPMATH_387
;
2383 else if (!TARGET_80387
)
2385 warning (0, "387 instruction set disabled, using SSE arithmetics");
2386 ix86_fpmath
= FPMATH_SSE
;
2389 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2392 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2395 /* If the i387 is disabled, then do not return values in it. */
2397 target_flags
&= ~MASK_FLOAT_RETURNS
;
2399 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2400 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2402 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2404 /* ??? Unwind info is not correct around the CFG unless either a frame
2405 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2406 unwind info generation to be aware of the CFG and propagating states
2408 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2409 || flag_exceptions
|| flag_non_call_exceptions
)
2410 && flag_omit_frame_pointer
2411 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2413 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2414 warning (0, "unwind tables currently require either a frame pointer "
2415 "or -maccumulate-outgoing-args for correctness");
2416 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2419 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2422 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2423 p
= strchr (internal_label_prefix
, 'X');
2424 internal_label_prefix_len
= p
- internal_label_prefix
;
2428 /* When scheduling description is not available, disable scheduler pass
2429 so it won't slow down the compilation and make x87 code slower. */
2430 if (!TARGET_SCHEDULE
)
2431 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2433 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2434 set_param_value ("simultaneous-prefetches",
2435 ix86_cost
->simultaneous_prefetches
);
2436 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2437 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2440 /* switch to the appropriate section for output of DECL.
2441 DECL is either a `VAR_DECL' node or a constant of some sort.
2442 RELOC indicates whether forming the initial value of DECL requires
2443 link-time relocations. */
2446 x86_64_elf_select_section (tree decl
, int reloc
,
2447 unsigned HOST_WIDE_INT align
)
2449 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2450 && ix86_in_large_data_p (decl
))
2452 const char *sname
= NULL
;
2453 unsigned int flags
= SECTION_WRITE
;
2454 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2459 case SECCAT_DATA_REL
:
2460 sname
= ".ldata.rel";
2462 case SECCAT_DATA_REL_LOCAL
:
2463 sname
= ".ldata.rel.local";
2465 case SECCAT_DATA_REL_RO
:
2466 sname
= ".ldata.rel.ro";
2468 case SECCAT_DATA_REL_RO_LOCAL
:
2469 sname
= ".ldata.rel.ro.local";
2473 flags
|= SECTION_BSS
;
2476 case SECCAT_RODATA_MERGE_STR
:
2477 case SECCAT_RODATA_MERGE_STR_INIT
:
2478 case SECCAT_RODATA_MERGE_CONST
:
2482 case SECCAT_SRODATA
:
2489 /* We don't split these for medium model. Place them into
2490 default sections and hope for best. */
2495 /* We might get called with string constants, but get_named_section
2496 doesn't like them as they are not DECLs. Also, we need to set
2497 flags in that case. */
2499 return get_section (sname
, flags
, NULL
);
2500 return get_named_section (decl
, sname
, reloc
);
2503 return default_elf_select_section (decl
, reloc
, align
);
2506 /* Build up a unique section name, expressed as a
2507 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2508 RELOC indicates whether the initial value of EXP requires
2509 link-time relocations. */
2512 x86_64_elf_unique_section (tree decl
, int reloc
)
2514 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2515 && ix86_in_large_data_p (decl
))
2517 const char *prefix
= NULL
;
2518 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2519 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2521 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2524 case SECCAT_DATA_REL
:
2525 case SECCAT_DATA_REL_LOCAL
:
2526 case SECCAT_DATA_REL_RO
:
2527 case SECCAT_DATA_REL_RO_LOCAL
:
2528 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2531 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2534 case SECCAT_RODATA_MERGE_STR
:
2535 case SECCAT_RODATA_MERGE_STR_INIT
:
2536 case SECCAT_RODATA_MERGE_CONST
:
2537 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2539 case SECCAT_SRODATA
:
2546 /* We don't split these for medium model. Place them into
2547 default sections and hope for best. */
2555 plen
= strlen (prefix
);
2557 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2558 name
= targetm
.strip_name_encoding (name
);
2559 nlen
= strlen (name
);
2561 string
= alloca (nlen
+ plen
+ 1);
2562 memcpy (string
, prefix
, plen
);
2563 memcpy (string
+ plen
, name
, nlen
+ 1);
2565 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2569 default_unique_section (decl
, reloc
);
2572 #ifdef COMMON_ASM_OP
2573 /* This says how to output assembler code to declare an
2574 uninitialized external linkage data object.
2576 For medium model x86-64 we need to use .largecomm opcode for
2579 x86_elf_aligned_common (FILE *file
,
2580 const char *name
, unsigned HOST_WIDE_INT size
,
2583 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2584 && size
> (unsigned int)ix86_section_threshold
)
2585 fprintf (file
, ".largecomm\t");
2587 fprintf (file
, "%s", COMMON_ASM_OP
);
2588 assemble_name (file
, name
);
2589 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2590 size
, align
/ BITS_PER_UNIT
);
2593 /* Utility function for targets to use in implementing
2594 ASM_OUTPUT_ALIGNED_BSS. */
2597 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2598 const char *name
, unsigned HOST_WIDE_INT size
,
2601 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2602 && size
> (unsigned int)ix86_section_threshold
)
2603 switch_to_section (get_named_section (decl
, ".lbss", 0));
2605 switch_to_section (bss_section
);
2606 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2607 #ifdef ASM_DECLARE_OBJECT_NAME
2608 last_assemble_variable_decl
= decl
;
2609 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2611 /* Standard thing is just output label for the object. */
2612 ASM_OUTPUT_LABEL (file
, name
);
2613 #endif /* ASM_DECLARE_OBJECT_NAME */
2614 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2618 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2620 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2621 make the problem with not enough registers even worse. */
2622 #ifdef INSN_SCHEDULING
2624 flag_schedule_insns
= 0;
2628 /* The Darwin libraries never set errno, so we might as well
2629 avoid calling them when that's the only reason we would. */
2630 flag_errno_math
= 0;
2632 /* The default values of these switches depend on the TARGET_64BIT
2633 that is not known at this moment. Mark these values with 2 and
2634 let user the to override these. In case there is no command line option
2635 specifying them, we will set the defaults in override_options. */
2637 flag_omit_frame_pointer
= 2;
2638 flag_pcc_struct_return
= 2;
2639 flag_asynchronous_unwind_tables
= 2;
2640 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2641 SUBTARGET_OPTIMIZATION_OPTIONS
;
2645 /* Table of valid machine attributes. */
2646 const struct attribute_spec ix86_attribute_table
[] =
2648 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2649 /* Stdcall attribute says callee is responsible for popping arguments
2650 if they are not variable. */
2651 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2652 /* Fastcall attribute says callee is responsible for popping arguments
2653 if they are not variable. */
2654 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2655 /* Cdecl attribute says the callee is a normal C declaration */
2656 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2657 /* Regparm attribute specifies how many integer arguments are to be
2658 passed in registers. */
2659 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2660 /* Sseregparm attribute says we are using x86_64 calling conventions
2661 for FP arguments. */
2662 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2663 /* force_align_arg_pointer says this function realigns the stack at entry. */
2664 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2665 false, true, true, ix86_handle_cconv_attribute
},
2666 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2667 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2668 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2669 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2671 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2672 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2673 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2674 SUBTARGET_ATTRIBUTE_TABLE
,
2676 { NULL
, 0, 0, false, false, false, NULL
}
2679 /* Decide whether we can make a sibling call to a function. DECL is the
2680 declaration of the function being targeted by the call and EXP is the
2681 CALL_EXPR representing the call. */
2684 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2689 /* If we are generating position-independent code, we cannot sibcall
2690 optimize any indirect call, or a direct call to a global function,
2691 as the PLT requires %ebx be live. */
2692 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2699 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2700 if (POINTER_TYPE_P (func
))
2701 func
= TREE_TYPE (func
);
2704 /* Check that the return value locations are the same. Like
2705 if we are returning floats on the 80387 register stack, we cannot
2706 make a sibcall from a function that doesn't return a float to a
2707 function that does or, conversely, from a function that does return
2708 a float to a function that doesn't; the necessary stack adjustment
2709 would not be executed. This is also the place we notice
2710 differences in the return value ABI. Note that it is ok for one
2711 of the functions to have void return type as long as the return
2712 value of the other is passed in a register. */
2713 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2714 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2716 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2718 if (!rtx_equal_p (a
, b
))
2721 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2723 else if (!rtx_equal_p (a
, b
))
2726 /* If this call is indirect, we'll need to be able to use a call-clobbered
2727 register for the address of the target function. Make sure that all
2728 such registers are not used for passing parameters. */
2729 if (!decl
&& !TARGET_64BIT
)
2733 /* We're looking at the CALL_EXPR, we need the type of the function. */
2734 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2735 type
= TREE_TYPE (type
); /* pointer type */
2736 type
= TREE_TYPE (type
); /* function type */
2738 if (ix86_function_regparm (type
, NULL
) >= 3)
2740 /* ??? Need to count the actual number of registers to be used,
2741 not the possible number of registers. Fix later. */
2746 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2747 /* Dllimport'd functions are also called indirectly. */
2748 if (decl
&& DECL_DLLIMPORT_P (decl
)
2749 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2753 /* If we forced aligned the stack, then sibcalling would unalign the
2754 stack, which may break the called function. */
2755 if (cfun
->machine
->force_align_arg_pointer
)
2758 /* Otherwise okay. That also includes certain types of indirect calls. */
2762 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2763 calling convention attributes;
2764 arguments as in struct attribute_spec.handler. */
2767 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2769 int flags ATTRIBUTE_UNUSED
,
2772 if (TREE_CODE (*node
) != FUNCTION_TYPE
2773 && TREE_CODE (*node
) != METHOD_TYPE
2774 && TREE_CODE (*node
) != FIELD_DECL
2775 && TREE_CODE (*node
) != TYPE_DECL
)
2777 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2778 IDENTIFIER_POINTER (name
));
2779 *no_add_attrs
= true;
2783 /* Can combine regparm with all attributes but fastcall. */
2784 if (is_attribute_p ("regparm", name
))
2788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2790 error ("fastcall and regparm attributes are not compatible");
2793 cst
= TREE_VALUE (args
);
2794 if (TREE_CODE (cst
) != INTEGER_CST
)
2796 warning (OPT_Wattributes
,
2797 "%qs attribute requires an integer constant argument",
2798 IDENTIFIER_POINTER (name
));
2799 *no_add_attrs
= true;
2801 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2803 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2804 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2805 *no_add_attrs
= true;
2809 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2810 TYPE_ATTRIBUTES (*node
))
2811 && compare_tree_int (cst
, REGPARM_MAX
-1))
2813 error ("%s functions limited to %d register parameters",
2814 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2822 warning (OPT_Wattributes
, "%qs attribute ignored",
2823 IDENTIFIER_POINTER (name
));
2824 *no_add_attrs
= true;
2828 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2829 if (is_attribute_p ("fastcall", name
))
2831 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2833 error ("fastcall and cdecl attributes are not compatible");
2835 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2837 error ("fastcall and stdcall attributes are not compatible");
2839 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2841 error ("fastcall and regparm attributes are not compatible");
2845 /* Can combine stdcall with fastcall (redundant), regparm and
2847 else if (is_attribute_p ("stdcall", name
))
2849 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2851 error ("stdcall and cdecl attributes are not compatible");
2853 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2855 error ("stdcall and fastcall attributes are not compatible");
2859 /* Can combine cdecl with regparm and sseregparm. */
2860 else if (is_attribute_p ("cdecl", name
))
2862 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2864 error ("stdcall and cdecl attributes are not compatible");
2866 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2868 error ("fastcall and cdecl attributes are not compatible");
2872 /* Can combine sseregparm with all attributes. */
2877 /* Return 0 if the attributes for two types are incompatible, 1 if they
2878 are compatible, and 2 if they are nearly compatible (which causes a
2879 warning to be generated). */
2882 ix86_comp_type_attributes (tree type1
, tree type2
)
2884 /* Check for mismatch of non-default calling convention. */
2885 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2887 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2890 /* Check for mismatched fastcall/regparm types. */
2891 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2892 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2893 || (ix86_function_regparm (type1
, NULL
)
2894 != ix86_function_regparm (type2
, NULL
)))
2897 /* Check for mismatched sseregparm types. */
2898 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2899 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2902 /* Check for mismatched return types (cdecl vs stdcall). */
2903 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2904 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2910 /* Return the regparm value for a function with the indicated TYPE and DECL.
2911 DECL may be NULL when calling function indirectly
2912 or considering a libcall. */
2915 ix86_function_regparm (tree type
, tree decl
)
2918 int regparm
= ix86_regparm
;
2919 bool user_convention
= false;
2923 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2926 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2927 user_convention
= true;
2930 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2933 user_convention
= true;
2936 /* Use register calling convention for local functions when possible. */
2937 if (!TARGET_64BIT
&& !user_convention
&& decl
2938 && flag_unit_at_a_time
&& !profile_flag
)
2940 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2943 int local_regparm
, globals
= 0, regno
;
2945 /* Make sure no regparm register is taken by a global register
2947 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2948 if (global_regs
[local_regparm
])
2950 /* We can't use regparm(3) for nested functions as these use
2951 static chain pointer in third argument. */
2952 if (local_regparm
== 3
2953 && decl_function_context (decl
)
2954 && !DECL_NO_STATIC_CHAIN (decl
))
2956 /* If the function realigns its stackpointer, the
2957 prologue will clobber %ecx. If we've already
2958 generated code for the callee, the callee
2959 DECL_STRUCT_FUNCTION is gone, so we fall back to
2960 scanning the attributes for the self-realigning
2962 if ((DECL_STRUCT_FUNCTION (decl
)
2963 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2964 || (!DECL_STRUCT_FUNCTION (decl
)
2965 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2966 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2968 /* Each global register variable increases register preassure,
2969 so the more global reg vars there are, the smaller regparm
2970 optimization use, unless requested by the user explicitly. */
2971 for (regno
= 0; regno
< 6; regno
++)
2972 if (global_regs
[regno
])
2975 = globals
< local_regparm
? local_regparm
- globals
: 0;
2977 if (local_regparm
> regparm
)
2978 regparm
= local_regparm
;
2985 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2986 DFmode (2) arguments in SSE registers for a function with the
2987 indicated TYPE and DECL. DECL may be NULL when calling function
2988 indirectly or considering a libcall. Otherwise return 0. */
2991 ix86_function_sseregparm (tree type
, tree decl
)
2993 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2994 by the sseregparm attribute. */
2995 if (TARGET_SSEREGPARM
2997 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3002 error ("Calling %qD with attribute sseregparm without "
3003 "SSE/SSE2 enabled", decl
);
3005 error ("Calling %qT with attribute sseregparm without "
3006 "SSE/SSE2 enabled", type
);
3013 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3014 (and DFmode for SSE2) arguments in SSE registers,
3015 even for 32-bit targets. */
3016 if (!TARGET_64BIT
&& decl
3017 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3019 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3021 return TARGET_SSE2
? 2 : 1;
3027 /* Return true if EAX is live at the start of the function. Used by
3028 ix86_expand_prologue to determine if we need special help before
3029 calling allocate_stack_worker. */
3032 ix86_eax_live_at_start_p (void)
3034 /* Cheat. Don't bother working forward from ix86_function_regparm
3035 to the function type to whether an actual argument is located in
3036 eax. Instead just look at cfg info, which is still close enough
3037 to correct at this point. This gives false positives for broken
3038 functions that might use uninitialized data that happens to be
3039 allocated in eax, but who cares? */
3040 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3043 /* Value is the number of bytes of arguments automatically
3044 popped when returning from a subroutine call.
3045 FUNDECL is the declaration node of the function (as a tree),
3046 FUNTYPE is the data type of the function (as a tree),
3047 or for a library call it is an identifier node for the subroutine name.
3048 SIZE is the number of bytes of arguments passed on the stack.
3050 On the 80386, the RTD insn may be used to pop them if the number
3051 of args is fixed, but if the number is variable then the caller
3052 must pop them all. RTD can't be used for library calls now
3053 because the library is compiled with the Unix compiler.
3054 Use of RTD is a selectable option, since it is incompatible with
3055 standard Unix calling sequences. If the option is not selected,
3056 the caller must always pop the args.
3058 The attribute stdcall is equivalent to RTD on a per module basis. */
3061 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3063 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3065 /* Cdecl functions override -mrtd, and never pop the stack. */
3066 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3068 /* Stdcall and fastcall functions will pop the stack if not
3070 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3071 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3075 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3076 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3077 == void_type_node
)))
3081 /* Lose any fake structure return argument if it is passed on the stack. */
3082 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3084 && !KEEP_AGGREGATE_RETURN_POINTER
)
3086 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3089 return GET_MODE_SIZE (Pmode
);
3095 /* Argument support functions. */
3097 /* Return true when register may be used to pass function parameters. */
3099 ix86_function_arg_regno_p (int regno
)
3105 return (regno
< REGPARM_MAX
3106 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3108 return (regno
< REGPARM_MAX
3109 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3110 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3111 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3112 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3117 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3122 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3123 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3126 /* RAX is used as hidden argument to va_arg functions. */
3129 for (i
= 0; i
< REGPARM_MAX
; i
++)
3130 if (regno
== x86_64_int_parameter_registers
[i
])
3135 /* Return if we do not know how to pass TYPE solely in registers. */
3138 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3140 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3143 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3144 The layout_type routine is crafty and tries to trick us into passing
3145 currently unsupported vector types on the stack by using TImode. */
3146 return (!TARGET_64BIT
&& mode
== TImode
3147 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3150 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3151 for a call to a function whose data type is FNTYPE.
3152 For a library call, FNTYPE is 0. */
3155 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3156 tree fntype
, /* tree ptr for function decl */
3157 rtx libname
, /* SYMBOL_REF of library name or 0 */
3160 static CUMULATIVE_ARGS zero_cum
;
3161 tree param
, next_param
;
3163 if (TARGET_DEBUG_ARG
)
3165 fprintf (stderr
, "\ninit_cumulative_args (");
3167 fprintf (stderr
, "fntype code = %s, ret code = %s",
3168 tree_code_name
[(int) TREE_CODE (fntype
)],
3169 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3171 fprintf (stderr
, "no fntype");
3174 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3179 /* Set up the number of registers to use for passing arguments. */
3180 cum
->nregs
= ix86_regparm
;
3182 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3184 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3185 cum
->warn_sse
= true;
3186 cum
->warn_mmx
= true;
3187 cum
->maybe_vaarg
= false;
3189 /* Use ecx and edx registers if function has fastcall attribute,
3190 else look for regparm information. */
3191 if (fntype
&& !TARGET_64BIT
)
3193 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3199 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3202 /* Set up the number of SSE registers used for passing SFmode
3203 and DFmode arguments. Warn for mismatching ABI. */
3204 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3206 /* Determine if this function has variable arguments. This is
3207 indicated by the last argument being 'void_type_mode' if there
3208 are no variable arguments. If there are variable arguments, then
3209 we won't pass anything in registers in 32-bit mode. */
3211 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3213 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3214 param
!= 0; param
= next_param
)
3216 next_param
= TREE_CHAIN (param
);
3217 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3227 cum
->float_in_sse
= 0;
3229 cum
->maybe_vaarg
= true;
3233 if ((!fntype
&& !libname
)
3234 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3235 cum
->maybe_vaarg
= true;
3237 if (TARGET_DEBUG_ARG
)
3238 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3243 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3244 But in the case of vector types, it is some vector mode.
3246 When we have only some of our vector isa extensions enabled, then there
3247 are some modes for which vector_mode_supported_p is false. For these
3248 modes, the generic vector support in gcc will choose some non-vector mode
3249 in order to implement the type. By computing the natural mode, we'll
3250 select the proper ABI location for the operand and not depend on whatever
3251 the middle-end decides to do with these vector types. */
3253 static enum machine_mode
3254 type_natural_mode (tree type
)
3256 enum machine_mode mode
= TYPE_MODE (type
);
3258 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3260 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3261 if ((size
== 8 || size
== 16)
3262 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3263 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3265 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3267 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3268 mode
= MIN_MODE_VECTOR_FLOAT
;
3270 mode
= MIN_MODE_VECTOR_INT
;
3272 /* Get the mode which has this inner mode and number of units. */
3273 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3274 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3275 && GET_MODE_INNER (mode
) == innermode
)
3285 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3286 this may not agree with the mode that the type system has chosen for the
3287 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3288 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3291 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3296 if (orig_mode
!= BLKmode
)
3297 tmp
= gen_rtx_REG (orig_mode
, regno
);
3300 tmp
= gen_rtx_REG (mode
, regno
);
3301 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3302 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3308 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3309 of this code is to classify each 8bytes of incoming argument by the register
3310 class and assign registers accordingly. */
3312 /* Return the union class of CLASS1 and CLASS2.
3313 See the x86-64 PS ABI for details. */
3315 static enum x86_64_reg_class
3316 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3318 /* Rule #1: If both classes are equal, this is the resulting class. */
3319 if (class1
== class2
)
3322 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3324 if (class1
== X86_64_NO_CLASS
)
3326 if (class2
== X86_64_NO_CLASS
)
3329 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3330 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3331 return X86_64_MEMORY_CLASS
;
3333 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3334 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3335 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3336 return X86_64_INTEGERSI_CLASS
;
3337 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3338 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3339 return X86_64_INTEGER_CLASS
;
3341 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3343 if (class1
== X86_64_X87_CLASS
3344 || class1
== X86_64_X87UP_CLASS
3345 || class1
== X86_64_COMPLEX_X87_CLASS
3346 || class2
== X86_64_X87_CLASS
3347 || class2
== X86_64_X87UP_CLASS
3348 || class2
== X86_64_COMPLEX_X87_CLASS
)
3349 return X86_64_MEMORY_CLASS
;
3351 /* Rule #6: Otherwise class SSE is used. */
3352 return X86_64_SSE_CLASS
;
3355 /* Classify the argument of type TYPE and mode MODE.
3356 CLASSES will be filled by the register class used to pass each word
3357 of the operand. The number of words is returned. In case the parameter
3358 should be passed in memory, 0 is returned. As a special case for zero
3359 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3361 BIT_OFFSET is used internally for handling records and specifies offset
3362 of the offset in bits modulo 256 to avoid overflow cases.
3364 See the x86-64 PS ABI for details.
3368 classify_argument (enum machine_mode mode
, tree type
,
3369 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3371 HOST_WIDE_INT bytes
=
3372 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3373 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3375 /* Variable sized entities are always passed/returned in memory. */
3379 if (mode
!= VOIDmode
3380 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3383 if (type
&& AGGREGATE_TYPE_P (type
))
3387 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3389 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3393 for (i
= 0; i
< words
; i
++)
3394 classes
[i
] = X86_64_NO_CLASS
;
3396 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3397 signalize memory class, so handle it as special case. */
3400 classes
[0] = X86_64_NO_CLASS
;
3404 /* Classify each field of record and merge classes. */
3405 switch (TREE_CODE (type
))
3408 /* And now merge the fields of structure. */
3409 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3411 if (TREE_CODE (field
) == FIELD_DECL
)
3415 if (TREE_TYPE (field
) == error_mark_node
)
3418 /* Bitfields are always classified as integer. Handle them
3419 early, since later code would consider them to be
3420 misaligned integers. */
3421 if (DECL_BIT_FIELD (field
))
3423 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3424 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3425 + tree_low_cst (DECL_SIZE (field
), 0)
3428 merge_classes (X86_64_INTEGER_CLASS
,
3433 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3434 TREE_TYPE (field
), subclasses
,
3435 (int_bit_position (field
)
3436 + bit_offset
) % 256);
3439 for (i
= 0; i
< num
; i
++)
3442 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3444 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3452 /* Arrays are handled as small records. */
3455 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3456 TREE_TYPE (type
), subclasses
, bit_offset
);
3460 /* The partial classes are now full classes. */
3461 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3462 subclasses
[0] = X86_64_SSE_CLASS
;
3463 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3464 subclasses
[0] = X86_64_INTEGER_CLASS
;
3466 for (i
= 0; i
< words
; i
++)
3467 classes
[i
] = subclasses
[i
% num
];
3472 case QUAL_UNION_TYPE
:
3473 /* Unions are similar to RECORD_TYPE but offset is always 0.
3475 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3477 if (TREE_CODE (field
) == FIELD_DECL
)
3481 if (TREE_TYPE (field
) == error_mark_node
)
3484 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3485 TREE_TYPE (field
), subclasses
,
3489 for (i
= 0; i
< num
; i
++)
3490 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3499 /* Final merger cleanup. */
3500 for (i
= 0; i
< words
; i
++)
3502 /* If one class is MEMORY, everything should be passed in
3504 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3507 /* The X86_64_SSEUP_CLASS should be always preceded by
3508 X86_64_SSE_CLASS. */
3509 if (classes
[i
] == X86_64_SSEUP_CLASS
3510 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3511 classes
[i
] = X86_64_SSE_CLASS
;
3513 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3514 if (classes
[i
] == X86_64_X87UP_CLASS
3515 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3516 classes
[i
] = X86_64_SSE_CLASS
;
3521 /* Compute alignment needed. We align all types to natural boundaries with
3522 exception of XFmode that is aligned to 64bits. */
3523 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3525 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3528 mode_alignment
= 128;
3529 else if (mode
== XCmode
)
3530 mode_alignment
= 256;
3531 if (COMPLEX_MODE_P (mode
))
3532 mode_alignment
/= 2;
3533 /* Misaligned fields are always returned in memory. */
3534 if (bit_offset
% mode_alignment
)
3538 /* for V1xx modes, just use the base mode */
3539 if (VECTOR_MODE_P (mode
)
3540 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3541 mode
= GET_MODE_INNER (mode
);
3543 /* Classification of atomic types. */
3548 classes
[0] = X86_64_SSE_CLASS
;
3551 classes
[0] = X86_64_SSE_CLASS
;
3552 classes
[1] = X86_64_SSEUP_CLASS
;
3561 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3562 classes
[0] = X86_64_INTEGERSI_CLASS
;
3564 classes
[0] = X86_64_INTEGER_CLASS
;
3568 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3573 if (!(bit_offset
% 64))
3574 classes
[0] = X86_64_SSESF_CLASS
;
3576 classes
[0] = X86_64_SSE_CLASS
;
3579 classes
[0] = X86_64_SSEDF_CLASS
;
3582 classes
[0] = X86_64_X87_CLASS
;
3583 classes
[1] = X86_64_X87UP_CLASS
;
3586 classes
[0] = X86_64_SSE_CLASS
;
3587 classes
[1] = X86_64_SSEUP_CLASS
;
3590 classes
[0] = X86_64_SSE_CLASS
;
3593 classes
[0] = X86_64_SSEDF_CLASS
;
3594 classes
[1] = X86_64_SSEDF_CLASS
;
3597 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3600 /* This modes is larger than 16 bytes. */
3608 classes
[0] = X86_64_SSE_CLASS
;
3609 classes
[1] = X86_64_SSEUP_CLASS
;
3615 classes
[0] = X86_64_SSE_CLASS
;
3621 gcc_assert (VECTOR_MODE_P (mode
));
3626 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3628 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3629 classes
[0] = X86_64_INTEGERSI_CLASS
;
3631 classes
[0] = X86_64_INTEGER_CLASS
;
3632 classes
[1] = X86_64_INTEGER_CLASS
;
3633 return 1 + (bytes
> 8);
3637 /* Examine the argument and return set number of register required in each
3638 class. Return 0 iff parameter should be passed in memory. */
3640 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3641 int *int_nregs
, int *sse_nregs
)
3643 enum x86_64_reg_class
class[MAX_CLASSES
];
3644 int n
= classify_argument (mode
, type
, class, 0);
3650 for (n
--; n
>= 0; n
--)
3653 case X86_64_INTEGER_CLASS
:
3654 case X86_64_INTEGERSI_CLASS
:
3657 case X86_64_SSE_CLASS
:
3658 case X86_64_SSESF_CLASS
:
3659 case X86_64_SSEDF_CLASS
:
3662 case X86_64_NO_CLASS
:
3663 case X86_64_SSEUP_CLASS
:
3665 case X86_64_X87_CLASS
:
3666 case X86_64_X87UP_CLASS
:
3670 case X86_64_COMPLEX_X87_CLASS
:
3671 return in_return
? 2 : 0;
3672 case X86_64_MEMORY_CLASS
:
3678 /* Construct container for the argument used by GCC interface. See
3679 FUNCTION_ARG for the detailed description. */
3682 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3683 tree type
, int in_return
, int nintregs
, int nsseregs
,
3684 const int *intreg
, int sse_regno
)
3686 /* The following variables hold the static issued_error state. */
3687 static bool issued_sse_arg_error
;
3688 static bool issued_sse_ret_error
;
3689 static bool issued_x87_ret_error
;
3691 enum machine_mode tmpmode
;
3693 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3694 enum x86_64_reg_class
class[MAX_CLASSES
];
3698 int needed_sseregs
, needed_intregs
;
3699 rtx exp
[MAX_CLASSES
];
3702 n
= classify_argument (mode
, type
, class, 0);
3703 if (TARGET_DEBUG_ARG
)
3706 fprintf (stderr
, "Memory class\n");
3709 fprintf (stderr
, "Classes:");
3710 for (i
= 0; i
< n
; i
++)
3712 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3714 fprintf (stderr
, "\n");
3719 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3722 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3725 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3726 some less clueful developer tries to use floating-point anyway. */
3727 if (needed_sseregs
&& !TARGET_SSE
)
3731 if (!issued_sse_ret_error
)
3733 error ("SSE register return with SSE disabled");
3734 issued_sse_ret_error
= true;
3737 else if (!issued_sse_arg_error
)
3739 error ("SSE register argument with SSE disabled");
3740 issued_sse_arg_error
= true;
3745 /* Likewise, error if the ABI requires us to return values in the
3746 x87 registers and the user specified -mno-80387. */
3747 if (!TARGET_80387
&& in_return
)
3748 for (i
= 0; i
< n
; i
++)
3749 if (class[i
] == X86_64_X87_CLASS
3750 || class[i
] == X86_64_X87UP_CLASS
3751 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3753 if (!issued_x87_ret_error
)
3755 error ("x87 register return with x87 disabled");
3756 issued_x87_ret_error
= true;
3761 /* First construct simple cases. Avoid SCmode, since we want to use
3762 single register to pass this type. */
3763 if (n
== 1 && mode
!= SCmode
)
3766 case X86_64_INTEGER_CLASS
:
3767 case X86_64_INTEGERSI_CLASS
:
3768 return gen_rtx_REG (mode
, intreg
[0]);
3769 case X86_64_SSE_CLASS
:
3770 case X86_64_SSESF_CLASS
:
3771 case X86_64_SSEDF_CLASS
:
3772 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3773 case X86_64_X87_CLASS
:
3774 case X86_64_COMPLEX_X87_CLASS
:
3775 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3776 case X86_64_NO_CLASS
:
3777 /* Zero sized array, struct or class. */
3782 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3784 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3786 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3787 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3788 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3789 && class[1] == X86_64_INTEGER_CLASS
3790 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3791 && intreg
[0] + 1 == intreg
[1])
3792 return gen_rtx_REG (mode
, intreg
[0]);
3794 /* Otherwise figure out the entries of the PARALLEL. */
3795 for (i
= 0; i
< n
; i
++)
3799 case X86_64_NO_CLASS
:
3801 case X86_64_INTEGER_CLASS
:
3802 case X86_64_INTEGERSI_CLASS
:
3803 /* Merge TImodes on aligned occasions here too. */
3804 if (i
* 8 + 8 > bytes
)
3805 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3806 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3810 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3811 if (tmpmode
== BLKmode
)
3813 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3814 gen_rtx_REG (tmpmode
, *intreg
),
3818 case X86_64_SSESF_CLASS
:
3819 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3820 gen_rtx_REG (SFmode
,
3821 SSE_REGNO (sse_regno
)),
3825 case X86_64_SSEDF_CLASS
:
3826 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3827 gen_rtx_REG (DFmode
,
3828 SSE_REGNO (sse_regno
)),
3832 case X86_64_SSE_CLASS
:
3833 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3837 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3838 gen_rtx_REG (tmpmode
,
3839 SSE_REGNO (sse_regno
)),
3841 if (tmpmode
== TImode
)
3850 /* Empty aligned struct, union or class. */
3854 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3855 for (i
= 0; i
< nexps
; i
++)
3856 XVECEXP (ret
, 0, i
) = exp
[i
];
3860 /* Update the data in CUM to advance over an argument
3861 of mode MODE and data type TYPE.
3862 (TYPE is null for libcalls where that information may not be available.) */
3865 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3866 tree type
, int named
)
3869 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3870 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3873 mode
= type_natural_mode (type
);
3875 if (TARGET_DEBUG_ARG
)
3876 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3877 "mode=%s, named=%d)\n\n",
3878 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3879 GET_MODE_NAME (mode
), named
);
3883 int int_nregs
, sse_nregs
;
3884 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3885 cum
->words
+= words
;
3886 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3888 cum
->nregs
-= int_nregs
;
3889 cum
->sse_nregs
-= sse_nregs
;
3890 cum
->regno
+= int_nregs
;
3891 cum
->sse_regno
+= sse_nregs
;
3894 cum
->words
+= words
;
3912 cum
->words
+= words
;
3913 cum
->nregs
-= words
;
3914 cum
->regno
+= words
;
3916 if (cum
->nregs
<= 0)
3924 if (cum
->float_in_sse
< 2)
3927 if (cum
->float_in_sse
< 1)
3938 if (!type
|| !AGGREGATE_TYPE_P (type
))
3940 cum
->sse_words
+= words
;
3941 cum
->sse_nregs
-= 1;
3942 cum
->sse_regno
+= 1;
3943 if (cum
->sse_nregs
<= 0)
3955 if (!type
|| !AGGREGATE_TYPE_P (type
))
3957 cum
->mmx_words
+= words
;
3958 cum
->mmx_nregs
-= 1;
3959 cum
->mmx_regno
+= 1;
3960 if (cum
->mmx_nregs
<= 0)
3971 /* Define where to put the arguments to a function.
3972 Value is zero to push the argument on the stack,
3973 or a hard register in which to store the argument.
3975 MODE is the argument's machine mode.
3976 TYPE is the data type of the argument (as a tree).
3977 This is null for libcalls where that information may
3979 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3980 the preceding args and about the function being called.
3981 NAMED is nonzero if this argument is a named parameter
3982 (otherwise it is an extra parameter matching an ellipsis). */
3985 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3986 tree type
, int named
)
3988 enum machine_mode mode
= orig_mode
;
3991 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3992 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3993 static bool warnedsse
, warnedmmx
;
3995 /* To simplify the code below, represent vector types with a vector mode
3996 even if MMX/SSE are not active. */
3997 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3998 mode
= type_natural_mode (type
);
4000 /* Handle a hidden AL argument containing number of registers for varargs
4001 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4003 if (mode
== VOIDmode
)
4006 return GEN_INT (cum
->maybe_vaarg
4007 ? (cum
->sse_nregs
< 0
4015 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4017 &x86_64_int_parameter_registers
[cum
->regno
],
4022 /* For now, pass fp/complex values on the stack. */
4034 if (words
<= cum
->nregs
)
4036 int regno
= cum
->regno
;
4038 /* Fastcall allocates the first two DWORD (SImode) or
4039 smaller arguments to ECX and EDX. */
4042 if (mode
== BLKmode
|| mode
== DImode
)
4045 /* ECX not EAX is the first allocated register. */
4049 ret
= gen_rtx_REG (mode
, regno
);
4053 if (cum
->float_in_sse
< 2)
4056 if (cum
->float_in_sse
< 1)
4066 if (!type
|| !AGGREGATE_TYPE_P (type
))
4068 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4071 warning (0, "SSE vector argument without SSE enabled "
4075 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4076 cum
->sse_regno
+ FIRST_SSE_REG
);
4083 if (!type
|| !AGGREGATE_TYPE_P (type
))
4085 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4088 warning (0, "MMX vector argument without MMX enabled "
4092 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4093 cum
->mmx_regno
+ FIRST_MMX_REG
);
4098 if (TARGET_DEBUG_ARG
)
4101 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4102 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4105 print_simple_rtl (stderr
, ret
);
4107 fprintf (stderr
, ", stack");
4109 fprintf (stderr
, " )\n");
4115 /* A C expression that indicates when an argument must be passed by
4116 reference. If nonzero for an argument, a copy of that argument is
4117 made in memory and a pointer to the argument is passed instead of
4118 the argument itself. The pointer is passed in whatever way is
4119 appropriate for passing a pointer to that type. */
4122 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4123 enum machine_mode mode ATTRIBUTE_UNUSED
,
4124 tree type
, bool named ATTRIBUTE_UNUSED
)
4129 if (type
&& int_size_in_bytes (type
) == -1)
4131 if (TARGET_DEBUG_ARG
)
4132 fprintf (stderr
, "function_arg_pass_by_reference\n");
4139 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4140 ABI. Only called if TARGET_SSE. */
4142 contains_128bit_aligned_vector_p (tree type
)
4144 enum machine_mode mode
= TYPE_MODE (type
);
4145 if (SSE_REG_MODE_P (mode
)
4146 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4148 if (TYPE_ALIGN (type
) < 128)
4151 if (AGGREGATE_TYPE_P (type
))
4153 /* Walk the aggregates recursively. */
4154 switch (TREE_CODE (type
))
4158 case QUAL_UNION_TYPE
:
4162 /* Walk all the structure fields. */
4163 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4165 if (TREE_CODE (field
) == FIELD_DECL
4166 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4173 /* Just for use if some languages passes arrays by value. */
4174 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4185 /* Gives the alignment boundary, in bits, of an argument with the
4186 specified mode and type. */
4189 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4193 align
= TYPE_ALIGN (type
);
4195 align
= GET_MODE_ALIGNMENT (mode
);
4196 if (align
< PARM_BOUNDARY
)
4197 align
= PARM_BOUNDARY
;
4200 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4201 make an exception for SSE modes since these require 128bit
4204 The handling here differs from field_alignment. ICC aligns MMX
4205 arguments to 4 byte boundaries, while structure fields are aligned
4206 to 8 byte boundaries. */
4208 align
= PARM_BOUNDARY
;
4211 if (!SSE_REG_MODE_P (mode
))
4212 align
= PARM_BOUNDARY
;
4216 if (!contains_128bit_aligned_vector_p (type
))
4217 align
= PARM_BOUNDARY
;
4225 /* Return true if N is a possible register number of function value. */
4227 ix86_function_value_regno_p (int regno
)
4233 return ((regno
) == 0
4234 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4235 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4237 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4238 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4239 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4244 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4245 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4249 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4256 /* Define how to find the value returned by a function.
4257 VALTYPE is the data type of the value (as a tree).
4258 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4259 otherwise, FUNC is 0. */
4261 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4262 bool outgoing ATTRIBUTE_UNUSED
)
4264 enum machine_mode natmode
= type_natural_mode (valtype
);
4268 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4269 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4270 x86_64_int_return_registers
, 0);
4271 /* For zero sized structures, construct_container return NULL, but we
4272 need to keep rest of compiler happy by returning meaningful value. */
4274 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4279 tree fn
= NULL_TREE
, fntype
;
4281 && DECL_P (fntype_or_decl
))
4282 fn
= fntype_or_decl
;
4283 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4284 return gen_rtx_REG (TYPE_MODE (valtype
),
4285 ix86_value_regno (natmode
, fn
, fntype
));
4289 /* Return true iff type is returned in memory. */
4291 ix86_return_in_memory (tree type
)
4293 int needed_intregs
, needed_sseregs
, size
;
4294 enum machine_mode mode
= type_natural_mode (type
);
4297 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4299 if (mode
== BLKmode
)
4302 size
= int_size_in_bytes (type
);
4304 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4307 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4309 /* User-created vectors small enough to fit in EAX. */
4313 /* MMX/3dNow values are returned in MM0,
4314 except when it doesn't exits. */
4316 return (TARGET_MMX
? 0 : 1);
4318 /* SSE values are returned in XMM0, except when it doesn't exist. */
4320 return (TARGET_SSE
? 0 : 1);
4334 /* When returning SSE vector types, we have a choice of either
4335 (1) being abi incompatible with a -march switch, or
4336 (2) generating an error.
4337 Given no good solution, I think the safest thing is one warning.
4338 The user won't be able to use -Werror, but....
4340 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4341 called in response to actually generating a caller or callee that
4342 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4343 via aggregate_value_p for general type probing from tree-ssa. */
4346 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4348 static bool warnedsse
, warnedmmx
;
4352 /* Look at the return type of the function, not the function type. */
4353 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4355 if (!TARGET_SSE
&& !warnedsse
)
4358 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4361 warning (0, "SSE vector return without SSE enabled "
4366 if (!TARGET_MMX
&& !warnedmmx
)
4368 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4371 warning (0, "MMX vector return without MMX enabled "
4380 /* Define how to find the value returned by a library function
4381 assuming the value has mode MODE. */
4383 ix86_libcall_value (enum machine_mode mode
)
4397 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4400 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4404 return gen_rtx_REG (mode
, 0);
4408 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4411 /* Given a mode, return the register to use for a return value. */
4414 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4416 gcc_assert (!TARGET_64BIT
);
4418 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4419 we normally prevent this case when mmx is not available. However
4420 some ABIs may require the result to be returned like DImode. */
4421 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4422 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4424 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4425 we prevent this case when sse is not available. However some ABIs
4426 may require the result to be returned like integer TImode. */
4427 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4428 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4430 /* Decimal floating point values can go in %eax, unlike other float modes. */
4431 if (DECIMAL_FLOAT_MODE_P (mode
))
4434 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4435 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4438 /* Floating point return values in %st(0), except for local functions when
4439 SSE math is enabled or for functions with sseregparm attribute. */
4440 if ((func
|| fntype
)
4441 && (mode
== SFmode
|| mode
== DFmode
))
4443 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4444 if ((sse_level
>= 1 && mode
== SFmode
)
4445 || (sse_level
== 2 && mode
== DFmode
))
4446 return FIRST_SSE_REG
;
4449 return FIRST_FLOAT_REG
;
4452 /* Create the va_list data type. */
4455 ix86_build_builtin_va_list (void)
4457 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4459 /* For i386 we use plain pointer to argument area. */
4461 return build_pointer_type (char_type_node
);
4463 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4464 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4466 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4467 unsigned_type_node
);
4468 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4469 unsigned_type_node
);
4470 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4472 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4475 va_list_gpr_counter_field
= f_gpr
;
4476 va_list_fpr_counter_field
= f_fpr
;
4478 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4479 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4480 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4481 DECL_FIELD_CONTEXT (f_sav
) = record
;
4483 TREE_CHAIN (record
) = type_decl
;
4484 TYPE_NAME (record
) = type_decl
;
4485 TYPE_FIELDS (record
) = f_gpr
;
4486 TREE_CHAIN (f_gpr
) = f_fpr
;
4487 TREE_CHAIN (f_fpr
) = f_ovf
;
4488 TREE_CHAIN (f_ovf
) = f_sav
;
4490 layout_type (record
);
4492 /* The correct type is an array type of one element. */
4493 return build_array_type (record
, build_index_type (size_zero_node
));
4496 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4499 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4500 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4503 CUMULATIVE_ARGS next_cum
;
4504 rtx save_area
= NULL_RTX
, mem
;
4517 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4520 /* Indicate to allocate space on the stack for varargs save area. */
4521 ix86_save_varrargs_registers
= 1;
4523 cfun
->stack_alignment_needed
= 128;
4525 fntype
= TREE_TYPE (current_function_decl
);
4526 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4527 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4528 != void_type_node
));
4530 /* For varargs, we do not want to skip the dummy va_dcl argument.
4531 For stdargs, we do want to skip the last named argument. */
4534 function_arg_advance (&next_cum
, mode
, type
, 1);
4537 save_area
= frame_pointer_rtx
;
4539 set
= get_varargs_alias_set ();
4541 for (i
= next_cum
.regno
;
4543 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4546 mem
= gen_rtx_MEM (Pmode
,
4547 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4548 MEM_NOTRAP_P (mem
) = 1;
4549 set_mem_alias_set (mem
, set
);
4550 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4551 x86_64_int_parameter_registers
[i
]));
4554 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4556 /* Now emit code to save SSE registers. The AX parameter contains number
4557 of SSE parameter registers used to call this function. We use
4558 sse_prologue_save insn template that produces computed jump across
4559 SSE saves. We need some preparation work to get this working. */
4561 label
= gen_label_rtx ();
4562 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4564 /* Compute address to jump to :
4565 label - 5*eax + nnamed_sse_arguments*5 */
4566 tmp_reg
= gen_reg_rtx (Pmode
);
4567 nsse_reg
= gen_reg_rtx (Pmode
);
4568 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4569 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4570 gen_rtx_MULT (Pmode
, nsse_reg
,
4572 if (next_cum
.sse_regno
)
4575 gen_rtx_CONST (DImode
,
4576 gen_rtx_PLUS (DImode
,
4578 GEN_INT (next_cum
.sse_regno
* 4))));
4580 emit_move_insn (nsse_reg
, label_ref
);
4581 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4583 /* Compute address of memory block we save into. We always use pointer
4584 pointing 127 bytes after first byte to store - this is needed to keep
4585 instruction size limited by 4 bytes. */
4586 tmp_reg
= gen_reg_rtx (Pmode
);
4587 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4588 plus_constant (save_area
,
4589 8 * REGPARM_MAX
+ 127)));
4590 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4591 MEM_NOTRAP_P (mem
) = 1;
4592 set_mem_alias_set (mem
, set
);
4593 set_mem_align (mem
, BITS_PER_WORD
);
4595 /* And finally do the dirty job! */
4596 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4597 GEN_INT (next_cum
.sse_regno
), label
));
4602 /* Implement va_start. */
4605 ix86_va_start (tree valist
, rtx nextarg
)
4607 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4608 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4609 tree gpr
, fpr
, ovf
, sav
, t
;
4612 /* Only 64bit target needs something special. */
4615 std_expand_builtin_va_start (valist
, nextarg
);
4619 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4620 f_fpr
= TREE_CHAIN (f_gpr
);
4621 f_ovf
= TREE_CHAIN (f_fpr
);
4622 f_sav
= TREE_CHAIN (f_ovf
);
4624 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4625 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4626 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4627 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4628 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4630 /* Count number of gp and fp argument registers used. */
4631 words
= current_function_args_info
.words
;
4632 n_gpr
= current_function_args_info
.regno
;
4633 n_fpr
= current_function_args_info
.sse_regno
;
4635 if (TARGET_DEBUG_ARG
)
4636 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4637 (int) words
, (int) n_gpr
, (int) n_fpr
);
4639 if (cfun
->va_list_gpr_size
)
4641 type
= TREE_TYPE (gpr
);
4642 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4643 build_int_cst (type
, n_gpr
* 8));
4644 TREE_SIDE_EFFECTS (t
) = 1;
4645 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4648 if (cfun
->va_list_fpr_size
)
4650 type
= TREE_TYPE (fpr
);
4651 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4652 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4653 TREE_SIDE_EFFECTS (t
) = 1;
4654 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4657 /* Find the overflow area. */
4658 type
= TREE_TYPE (ovf
);
4659 t
= make_tree (type
, virtual_incoming_args_rtx
);
4661 t
= build2 (PLUS_EXPR
, type
, t
,
4662 build_int_cst (type
, words
* UNITS_PER_WORD
));
4663 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4664 TREE_SIDE_EFFECTS (t
) = 1;
4665 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4667 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4669 /* Find the register save area.
4670 Prologue of the function save it right above stack frame. */
4671 type
= TREE_TYPE (sav
);
4672 t
= make_tree (type
, frame_pointer_rtx
);
4673 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4674 TREE_SIDE_EFFECTS (t
) = 1;
4675 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4679 /* Implement va_arg. */
4682 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4684 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4685 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4686 tree gpr
, fpr
, ovf
, sav
, t
;
4688 tree lab_false
, lab_over
= NULL_TREE
;
4693 enum machine_mode nat_mode
;
4695 /* Only 64bit target needs something special. */
4697 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4699 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4700 f_fpr
= TREE_CHAIN (f_gpr
);
4701 f_ovf
= TREE_CHAIN (f_fpr
);
4702 f_sav
= TREE_CHAIN (f_ovf
);
4704 valist
= build_va_arg_indirect_ref (valist
);
4705 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4706 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4707 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4708 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4710 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4712 type
= build_pointer_type (type
);
4713 size
= int_size_in_bytes (type
);
4714 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4716 nat_mode
= type_natural_mode (type
);
4717 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4718 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4720 /* Pull the value out of the saved registers. */
4722 addr
= create_tmp_var (ptr_type_node
, "addr");
4723 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4727 int needed_intregs
, needed_sseregs
;
4729 tree int_addr
, sse_addr
;
4731 lab_false
= create_artificial_label ();
4732 lab_over
= create_artificial_label ();
4734 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4736 need_temp
= (!REG_P (container
)
4737 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4738 || TYPE_ALIGN (type
) > 128));
4740 /* In case we are passing structure, verify that it is consecutive block
4741 on the register save area. If not we need to do moves. */
4742 if (!need_temp
&& !REG_P (container
))
4744 /* Verify that all registers are strictly consecutive */
4745 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4749 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4751 rtx slot
= XVECEXP (container
, 0, i
);
4752 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4753 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4761 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4763 rtx slot
= XVECEXP (container
, 0, i
);
4764 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4765 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4777 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4778 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4779 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4780 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4783 /* First ensure that we fit completely in registers. */
4786 t
= build_int_cst (TREE_TYPE (gpr
),
4787 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4788 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4789 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4790 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4791 gimplify_and_add (t
, pre_p
);
4795 t
= build_int_cst (TREE_TYPE (fpr
),
4796 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4798 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4799 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4800 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4801 gimplify_and_add (t
, pre_p
);
4804 /* Compute index to start of area used for integer regs. */
4807 /* int_addr = gpr + sav; */
4808 t
= fold_convert (ptr_type_node
, gpr
);
4809 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4810 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4811 gimplify_and_add (t
, pre_p
);
4815 /* sse_addr = fpr + sav; */
4816 t
= fold_convert (ptr_type_node
, fpr
);
4817 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4818 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4819 gimplify_and_add (t
, pre_p
);
4824 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4827 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4828 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4829 gimplify_and_add (t
, pre_p
);
4831 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4833 rtx slot
= XVECEXP (container
, 0, i
);
4834 rtx reg
= XEXP (slot
, 0);
4835 enum machine_mode mode
= GET_MODE (reg
);
4836 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4837 tree addr_type
= build_pointer_type (piece_type
);
4840 tree dest_addr
, dest
;
4842 if (SSE_REGNO_P (REGNO (reg
)))
4844 src_addr
= sse_addr
;
4845 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4849 src_addr
= int_addr
;
4850 src_offset
= REGNO (reg
) * 8;
4852 src_addr
= fold_convert (addr_type
, src_addr
);
4853 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4854 size_int (src_offset
)));
4855 src
= build_va_arg_indirect_ref (src_addr
);
4857 dest_addr
= fold_convert (addr_type
, addr
);
4858 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4859 size_int (INTVAL (XEXP (slot
, 1)))));
4860 dest
= build_va_arg_indirect_ref (dest_addr
);
4862 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4863 gimplify_and_add (t
, pre_p
);
4869 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4870 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4871 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4872 gimplify_and_add (t
, pre_p
);
4876 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4877 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4878 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4879 gimplify_and_add (t
, pre_p
);
4882 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4883 gimplify_and_add (t
, pre_p
);
4885 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4886 append_to_statement_list (t
, pre_p
);
4889 /* ... otherwise out of the overflow area. */
4891 /* Care for on-stack alignment if needed. */
4892 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4893 || integer_zerop (TYPE_SIZE (type
)))
4897 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4898 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4899 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4900 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4901 build_int_cst (TREE_TYPE (t
), -align
));
4903 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4905 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4906 gimplify_and_add (t2
, pre_p
);
4908 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4909 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4910 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4911 gimplify_and_add (t
, pre_p
);
4915 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4916 append_to_statement_list (t
, pre_p
);
4919 ptrtype
= build_pointer_type (type
);
4920 addr
= fold_convert (ptrtype
, addr
);
4923 addr
= build_va_arg_indirect_ref (addr
);
4924 return build_va_arg_indirect_ref (addr
);
4927 /* Return nonzero if OPNUM's MEM should be matched
4928 in movabs* patterns. */
4931 ix86_check_movabs (rtx insn
, int opnum
)
4935 set
= PATTERN (insn
);
4936 if (GET_CODE (set
) == PARALLEL
)
4937 set
= XVECEXP (set
, 0, 0);
4938 gcc_assert (GET_CODE (set
) == SET
);
4939 mem
= XEXP (set
, opnum
);
4940 while (GET_CODE (mem
) == SUBREG
)
4941 mem
= SUBREG_REG (mem
);
4942 gcc_assert (MEM_P (mem
));
4943 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4946 /* Initialize the table of extra 80387 mathematical constants. */
4949 init_ext_80387_constants (void)
4951 static const char * cst
[5] =
4953 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4954 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4955 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4956 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4957 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4961 for (i
= 0; i
< 5; i
++)
4963 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4964 /* Ensure each constant is rounded to XFmode precision. */
4965 real_convert (&ext_80387_constants_table
[i
],
4966 XFmode
, &ext_80387_constants_table
[i
]);
4969 ext_80387_constants_init
= 1;
4972 /* Return true if the constant is something that can be loaded with
4973 a special instruction. */
4976 standard_80387_constant_p (rtx x
)
4980 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4983 if (x
== CONST0_RTX (GET_MODE (x
)))
4985 if (x
== CONST1_RTX (GET_MODE (x
)))
4988 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4990 /* For XFmode constants, try to find a special 80387 instruction when
4991 optimizing for size or on those CPUs that benefit from them. */
4992 if (GET_MODE (x
) == XFmode
4993 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4997 if (! ext_80387_constants_init
)
4998 init_ext_80387_constants ();
5000 for (i
= 0; i
< 5; i
++)
5001 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5005 /* Load of the constant -0.0 or -1.0 will be split as
5006 fldz;fchs or fld1;fchs sequence. */
5007 if (real_isnegzero (&r
))
5009 if (real_identical (&r
, &dconstm1
))
5015 /* Return the opcode of the special instruction to be used to load
5019 standard_80387_constant_opcode (rtx x
)
5021 switch (standard_80387_constant_p (x
))
5045 /* Return the CONST_DOUBLE representing the 80387 constant that is
5046 loaded by the specified special instruction. The argument IDX
5047 matches the return value from standard_80387_constant_p. */
5050 standard_80387_constant_rtx (int idx
)
5054 if (! ext_80387_constants_init
)
5055 init_ext_80387_constants ();
5071 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5075 /* Return 1 if mode is a valid mode for sse. */
5077 standard_sse_mode_p (enum machine_mode mode
)
5094 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5097 standard_sse_constant_p (rtx x
)
5099 enum machine_mode mode
= GET_MODE (x
);
5101 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5103 if (vector_all_ones_operand (x
, mode
)
5104 && standard_sse_mode_p (mode
))
5105 return TARGET_SSE2
? 2 : -1;
5110 /* Return the opcode of the special instruction to be used to load
5114 standard_sse_constant_opcode (rtx insn
, rtx x
)
5116 switch (standard_sse_constant_p (x
))
5119 if (get_attr_mode (insn
) == MODE_V4SF
)
5120 return "xorps\t%0, %0";
5121 else if (get_attr_mode (insn
) == MODE_V2DF
)
5122 return "xorpd\t%0, %0";
5124 return "pxor\t%0, %0";
5126 return "pcmpeqd\t%0, %0";
5131 /* Returns 1 if OP contains a symbol reference */
5134 symbolic_reference_mentioned_p (rtx op
)
5139 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5142 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5143 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5149 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5150 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5154 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5161 /* Return 1 if it is appropriate to emit `ret' instructions in the
5162 body of a function. Do this only if the epilogue is simple, needing a
5163 couple of insns. Prior to reloading, we can't tell how many registers
5164 must be saved, so return 0 then. Return 0 if there is no frame
5165 marker to de-allocate. */
5168 ix86_can_use_return_insn_p (void)
5170 struct ix86_frame frame
;
5172 if (! reload_completed
|| frame_pointer_needed
)
5175 /* Don't allow more than 32 pop, since that's all we can do
5176 with one instruction. */
5177 if (current_function_pops_args
5178 && current_function_args_size
>= 32768)
5181 ix86_compute_frame_layout (&frame
);
5182 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5185 /* Value should be nonzero if functions must have frame pointers.
5186 Zero means the frame pointer need not be set up (and parms may
5187 be accessed via the stack pointer) in functions that seem suitable. */
5190 ix86_frame_pointer_required (void)
5192 /* If we accessed previous frames, then the generated code expects
5193 to be able to access the saved ebp value in our frame. */
5194 if (cfun
->machine
->accesses_prev_frame
)
5197 /* Several x86 os'es need a frame pointer for other reasons,
5198 usually pertaining to setjmp. */
5199 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5202 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5203 the frame pointer by default. Turn it back on now if we've not
5204 got a leaf function. */
5205 if (TARGET_OMIT_LEAF_FRAME_POINTER
5206 && (!current_function_is_leaf
5207 || ix86_current_function_calls_tls_descriptor
))
5210 if (current_function_profile
)
5216 /* Record that the current function accesses previous call frames. */
5219 ix86_setup_frame_addresses (void)
5221 cfun
->machine
->accesses_prev_frame
= 1;
5224 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5225 # define USE_HIDDEN_LINKONCE 1
5227 # define USE_HIDDEN_LINKONCE 0
5230 static int pic_labels_used
;
5232 /* Fills in the label name that should be used for a pc thunk for
5233 the given register. */
5236 get_pc_thunk_name (char name
[32], unsigned int regno
)
5238 gcc_assert (!TARGET_64BIT
);
5240 if (USE_HIDDEN_LINKONCE
)
5241 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5243 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5247 /* This function generates code for -fpic that loads %ebx with
5248 the return address of the caller and then returns. */
5251 ix86_file_end (void)
5256 for (regno
= 0; regno
< 8; ++regno
)
5260 if (! ((pic_labels_used
>> regno
) & 1))
5263 get_pc_thunk_name (name
, regno
);
5268 switch_to_section (darwin_sections
[text_coal_section
]);
5269 fputs ("\t.weak_definition\t", asm_out_file
);
5270 assemble_name (asm_out_file
, name
);
5271 fputs ("\n\t.private_extern\t", asm_out_file
);
5272 assemble_name (asm_out_file
, name
);
5273 fputs ("\n", asm_out_file
);
5274 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5278 if (USE_HIDDEN_LINKONCE
)
5282 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5284 TREE_PUBLIC (decl
) = 1;
5285 TREE_STATIC (decl
) = 1;
5286 DECL_ONE_ONLY (decl
) = 1;
5288 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5289 switch_to_section (get_named_section (decl
, NULL
, 0));
5291 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5292 fputs ("\t.hidden\t", asm_out_file
);
5293 assemble_name (asm_out_file
, name
);
5294 fputc ('\n', asm_out_file
);
5295 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5299 switch_to_section (text_section
);
5300 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5303 xops
[0] = gen_rtx_REG (SImode
, regno
);
5304 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5305 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5306 output_asm_insn ("ret", xops
);
5309 if (NEED_INDICATE_EXEC_STACK
)
5310 file_end_indicate_exec_stack ();
5313 /* Emit code for the SET_GOT patterns. */
5316 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5321 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5323 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5325 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5328 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5330 output_asm_insn ("call\t%a2", xops
);
5333 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5334 is what will be referenced by the Mach-O PIC subsystem. */
5336 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5339 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5340 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5343 output_asm_insn ("pop{l}\t%0", xops
);
5348 get_pc_thunk_name (name
, REGNO (dest
));
5349 pic_labels_used
|= 1 << REGNO (dest
);
5351 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5352 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5353 output_asm_insn ("call\t%X2", xops
);
5354 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5355 is what will be referenced by the Mach-O PIC subsystem. */
5358 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5360 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5361 CODE_LABEL_NUMBER (label
));
5368 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5369 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5371 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5376 /* Generate an "push" pattern for input ARG. */
5381 return gen_rtx_SET (VOIDmode
,
5383 gen_rtx_PRE_DEC (Pmode
,
5384 stack_pointer_rtx
)),
5388 /* Return >= 0 if there is an unused call-clobbered register available
5389 for the entire function. */
5392 ix86_select_alt_pic_regnum (void)
5394 if (current_function_is_leaf
&& !current_function_profile
5395 && !ix86_current_function_calls_tls_descriptor
)
5398 for (i
= 2; i
>= 0; --i
)
5399 if (!regs_ever_live
[i
])
5403 return INVALID_REGNUM
;
5406 /* Return 1 if we need to save REGNO. */
5408 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5410 if (pic_offset_table_rtx
5411 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5412 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5413 || current_function_profile
5414 || current_function_calls_eh_return
5415 || current_function_uses_const_pool
))
5417 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5422 if (current_function_calls_eh_return
&& maybe_eh_return
)
5427 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5428 if (test
== INVALID_REGNUM
)
5435 if (cfun
->machine
->force_align_arg_pointer
5436 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5439 return (regs_ever_live
[regno
]
5440 && !call_used_regs
[regno
]
5441 && !fixed_regs
[regno
]
5442 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5445 /* Return number of registers to be saved on the stack. */
5448 ix86_nsaved_regs (void)
5453 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5454 if (ix86_save_reg (regno
, true))
5459 /* Return the offset between two registers, one to be eliminated, and the other
5460 its replacement, at the start of a routine. */
5463 ix86_initial_elimination_offset (int from
, int to
)
5465 struct ix86_frame frame
;
5466 ix86_compute_frame_layout (&frame
);
5468 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5469 return frame
.hard_frame_pointer_offset
;
5470 else if (from
== FRAME_POINTER_REGNUM
5471 && to
== HARD_FRAME_POINTER_REGNUM
)
5472 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5475 gcc_assert (to
== STACK_POINTER_REGNUM
);
5477 if (from
== ARG_POINTER_REGNUM
)
5478 return frame
.stack_pointer_offset
;
5480 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5481 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5485 /* Fill structure ix86_frame about frame of currently computed function. */
5488 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5490 HOST_WIDE_INT total_size
;
5491 unsigned int stack_alignment_needed
;
5492 HOST_WIDE_INT offset
;
5493 unsigned int preferred_alignment
;
5494 HOST_WIDE_INT size
= get_frame_size ();
5496 frame
->nregs
= ix86_nsaved_regs ();
5499 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5500 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5502 /* During reload iteration the amount of registers saved can change.
5503 Recompute the value as needed. Do not recompute when amount of registers
5504 didn't change as reload does multiple calls to the function and does not
5505 expect the decision to change within single iteration. */
5507 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5509 int count
= frame
->nregs
;
5511 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5512 /* The fast prologue uses move instead of push to save registers. This
5513 is significantly longer, but also executes faster as modern hardware
5514 can execute the moves in parallel, but can't do that for push/pop.
5516 Be careful about choosing what prologue to emit: When function takes
5517 many instructions to execute we may use slow version as well as in
5518 case function is known to be outside hot spot (this is known with
5519 feedback only). Weight the size of function by number of registers
5520 to save as it is cheap to use one or two push instructions but very
5521 slow to use many of them. */
5523 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5524 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5525 || (flag_branch_probabilities
5526 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5527 cfun
->machine
->use_fast_prologue_epilogue
= false;
5529 cfun
->machine
->use_fast_prologue_epilogue
5530 = !expensive_function_p (count
);
5532 if (TARGET_PROLOGUE_USING_MOVE
5533 && cfun
->machine
->use_fast_prologue_epilogue
)
5534 frame
->save_regs_using_mov
= true;
5536 frame
->save_regs_using_mov
= false;
5539 /* Skip return address and saved base pointer. */
5540 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5542 frame
->hard_frame_pointer_offset
= offset
;
5544 /* Do some sanity checking of stack_alignment_needed and
5545 preferred_alignment, since i386 port is the only using those features
5546 that may break easily. */
5548 gcc_assert (!size
|| stack_alignment_needed
);
5549 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5550 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5551 gcc_assert (stack_alignment_needed
5552 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5554 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5555 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5557 /* Register save area */
5558 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5561 if (ix86_save_varrargs_registers
)
5563 offset
+= X86_64_VARARGS_SIZE
;
5564 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5567 frame
->va_arg_size
= 0;
5569 /* Align start of frame for local function. */
5570 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5571 & -stack_alignment_needed
) - offset
;
5573 offset
+= frame
->padding1
;
5575 /* Frame pointer points here. */
5576 frame
->frame_pointer_offset
= offset
;
5580 /* Add outgoing arguments area. Can be skipped if we eliminated
5581 all the function calls as dead code.
5582 Skipping is however impossible when function calls alloca. Alloca
5583 expander assumes that last current_function_outgoing_args_size
5584 of stack frame are unused. */
5585 if (ACCUMULATE_OUTGOING_ARGS
5586 && (!current_function_is_leaf
|| current_function_calls_alloca
5587 || ix86_current_function_calls_tls_descriptor
))
5589 offset
+= current_function_outgoing_args_size
;
5590 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5593 frame
->outgoing_arguments_size
= 0;
5595 /* Align stack boundary. Only needed if we're calling another function
5597 if (!current_function_is_leaf
|| current_function_calls_alloca
5598 || ix86_current_function_calls_tls_descriptor
)
5599 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5600 & -preferred_alignment
) - offset
;
5602 frame
->padding2
= 0;
5604 offset
+= frame
->padding2
;
5606 /* We've reached end of stack frame. */
5607 frame
->stack_pointer_offset
= offset
;
5609 /* Size prologue needs to allocate. */
5610 frame
->to_allocate
=
5611 (size
+ frame
->padding1
+ frame
->padding2
5612 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5614 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5615 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5616 frame
->save_regs_using_mov
= false;
5618 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5619 && current_function_is_leaf
5620 && !ix86_current_function_calls_tls_descriptor
)
5622 frame
->red_zone_size
= frame
->to_allocate
;
5623 if (frame
->save_regs_using_mov
)
5624 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5625 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5626 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5629 frame
->red_zone_size
= 0;
5630 frame
->to_allocate
-= frame
->red_zone_size
;
5631 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5633 fprintf (stderr
, "\n");
5634 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5635 fprintf (stderr
, "size: %ld\n", (long)size
);
5636 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5637 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5638 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5639 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5640 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5641 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5642 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5643 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5644 (long)frame
->hard_frame_pointer_offset
);
5645 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5646 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5647 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5648 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5652 /* Emit code to save registers in the prologue. */
5655 ix86_emit_save_regs (void)
5660 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5661 if (ix86_save_reg (regno
, true))
5663 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5664 RTX_FRAME_RELATED_P (insn
) = 1;
5668 /* Emit code to save registers using MOV insns. First register
5669 is restored from POINTER + OFFSET. */
5671 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5676 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5677 if (ix86_save_reg (regno
, true))
5679 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5681 gen_rtx_REG (Pmode
, regno
));
5682 RTX_FRAME_RELATED_P (insn
) = 1;
5683 offset
+= UNITS_PER_WORD
;
5687 /* Expand prologue or epilogue stack adjustment.
5688 The pattern exist to put a dependency on all ebp-based memory accesses.
5689 STYLE should be negative if instructions should be marked as frame related,
5690 zero if %r11 register is live and cannot be freely used and positive
5694 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5699 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5700 else if (x86_64_immediate_operand (offset
, DImode
))
5701 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5705 /* r11 is used by indirect sibcall return as well, set before the
5706 epilogue and used after the epilogue. ATM indirect sibcall
5707 shouldn't be used together with huge frame sizes in one
5708 function because of the frame_size check in sibcall.c. */
5710 r11
= gen_rtx_REG (DImode
, R11_REG
);
5711 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5713 RTX_FRAME_RELATED_P (insn
) = 1;
5714 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5718 RTX_FRAME_RELATED_P (insn
) = 1;
5721 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5724 ix86_internal_arg_pointer (void)
5726 bool has_force_align_arg_pointer
=
5727 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5728 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5729 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5730 && DECL_NAME (current_function_decl
)
5731 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5732 && DECL_FILE_SCOPE_P (current_function_decl
))
5733 || ix86_force_align_arg_pointer
5734 || has_force_align_arg_pointer
)
5736 /* Nested functions can't realign the stack due to a register
5738 if (DECL_CONTEXT (current_function_decl
)
5739 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5741 if (ix86_force_align_arg_pointer
)
5742 warning (0, "-mstackrealign ignored for nested functions");
5743 if (has_force_align_arg_pointer
)
5744 error ("%s not supported for nested functions",
5745 ix86_force_align_arg_pointer_string
);
5746 return virtual_incoming_args_rtx
;
5748 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5749 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5752 return virtual_incoming_args_rtx
;
5755 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5756 This is called from dwarf2out.c to emit call frame instructions
5757 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5759 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5761 rtx unspec
= SET_SRC (pattern
);
5762 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5766 case UNSPEC_REG_SAVE
:
5767 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5768 SET_DEST (pattern
));
5770 case UNSPEC_DEF_CFA
:
5771 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5772 INTVAL (XVECEXP (unspec
, 0, 0)));
5779 /* Expand the prologue into a bunch of separate insns. */
5782 ix86_expand_prologue (void)
5786 struct ix86_frame frame
;
5787 HOST_WIDE_INT allocate
;
5789 ix86_compute_frame_layout (&frame
);
5791 if (cfun
->machine
->force_align_arg_pointer
)
5795 /* Grab the argument pointer. */
5796 x
= plus_constant (stack_pointer_rtx
, 4);
5797 y
= cfun
->machine
->force_align_arg_pointer
;
5798 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5799 RTX_FRAME_RELATED_P (insn
) = 1;
5801 /* The unwind info consists of two parts: install the fafp as the cfa,
5802 and record the fafp as the "save register" of the stack pointer.
5803 The later is there in order that the unwinder can see where it
5804 should restore the stack pointer across the and insn. */
5805 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5806 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5807 RTX_FRAME_RELATED_P (x
) = 1;
5808 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5810 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5811 RTX_FRAME_RELATED_P (y
) = 1;
5812 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5813 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5814 REG_NOTES (insn
) = x
;
5816 /* Align the stack. */
5817 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5820 /* And here we cheat like madmen with the unwind info. We force the
5821 cfa register back to sp+4, which is exactly what it was at the
5822 start of the function. Re-pushing the return address results in
5823 the return at the same spot relative to the cfa, and thus is
5824 correct wrt the unwind info. */
5825 x
= cfun
->machine
->force_align_arg_pointer
;
5826 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5827 insn
= emit_insn (gen_push (x
));
5828 RTX_FRAME_RELATED_P (insn
) = 1;
5831 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5832 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5833 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5834 REG_NOTES (insn
) = x
;
5837 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5838 slower on all targets. Also sdb doesn't like it. */
5840 if (frame_pointer_needed
)
5842 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5843 RTX_FRAME_RELATED_P (insn
) = 1;
5845 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5846 RTX_FRAME_RELATED_P (insn
) = 1;
5849 allocate
= frame
.to_allocate
;
5851 if (!frame
.save_regs_using_mov
)
5852 ix86_emit_save_regs ();
5854 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5856 /* When using red zone we may start register saving before allocating
5857 the stack frame saving one cycle of the prologue. */
5858 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5859 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5860 : stack_pointer_rtx
,
5861 -frame
.nregs
* UNITS_PER_WORD
);
5865 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5866 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5867 GEN_INT (-allocate
), -1);
5870 /* Only valid for Win32. */
5871 rtx eax
= gen_rtx_REG (SImode
, 0);
5872 bool eax_live
= ix86_eax_live_at_start_p ();
5875 gcc_assert (!TARGET_64BIT
);
5879 emit_insn (gen_push (eax
));
5883 emit_move_insn (eax
, GEN_INT (allocate
));
5885 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5886 RTX_FRAME_RELATED_P (insn
) = 1;
5887 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5888 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5889 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5890 t
, REG_NOTES (insn
));
5894 if (frame_pointer_needed
)
5895 t
= plus_constant (hard_frame_pointer_rtx
,
5898 - frame
.nregs
* UNITS_PER_WORD
);
5900 t
= plus_constant (stack_pointer_rtx
, allocate
);
5901 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5905 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5907 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5908 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5910 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5911 -frame
.nregs
* UNITS_PER_WORD
);
5914 pic_reg_used
= false;
5915 if (pic_offset_table_rtx
5916 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5917 || current_function_profile
))
5919 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5921 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5922 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5924 pic_reg_used
= true;
5930 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5932 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5934 /* Even with accurate pre-reload life analysis, we can wind up
5935 deleting all references to the pic register after reload.
5936 Consider if cross-jumping unifies two sides of a branch
5937 controlled by a comparison vs the only read from a global.
5938 In which case, allow the set_got to be deleted, though we're
5939 too late to do anything about the ebx save in the prologue. */
5940 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5943 /* Prevent function calls from be scheduled before the call to mcount.
5944 In the pic_reg_used case, make sure that the got load isn't deleted. */
5945 if (current_function_profile
)
5946 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5949 /* Emit code to restore saved registers using MOV insns. First register
5950 is restored from POINTER + OFFSET. */
5952 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5953 int maybe_eh_return
)
5956 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5958 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5959 if (ix86_save_reg (regno
, maybe_eh_return
))
5961 /* Ensure that adjust_address won't be forced to produce pointer
5962 out of range allowed by x86-64 instruction set. */
5963 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5967 r11
= gen_rtx_REG (DImode
, R11_REG
);
5968 emit_move_insn (r11
, GEN_INT (offset
));
5969 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5970 base_address
= gen_rtx_MEM (Pmode
, r11
);
5973 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5974 adjust_address (base_address
, Pmode
, offset
));
5975 offset
+= UNITS_PER_WORD
;
5979 /* Restore function stack, frame, and registers. */
5982 ix86_expand_epilogue (int style
)
5985 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5986 struct ix86_frame frame
;
5987 HOST_WIDE_INT offset
;
5989 ix86_compute_frame_layout (&frame
);
5991 /* Calculate start of saved registers relative to ebp. Special care
5992 must be taken for the normal return case of a function using
5993 eh_return: the eax and edx registers are marked as saved, but not
5994 restored along this path. */
5995 offset
= frame
.nregs
;
5996 if (current_function_calls_eh_return
&& style
!= 2)
5998 offset
*= -UNITS_PER_WORD
;
6000 /* If we're only restoring one register and sp is not valid then
6001 using a move instruction to restore the register since it's
6002 less work than reloading sp and popping the register.
6004 The default code result in stack adjustment using add/lea instruction,
6005 while this code results in LEAVE instruction (or discrete equivalent),
6006 so it is profitable in some other cases as well. Especially when there
6007 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6008 and there is exactly one register to pop. This heuristic may need some
6009 tuning in future. */
6010 if ((!sp_valid
&& frame
.nregs
<= 1)
6011 || (TARGET_EPILOGUE_USING_MOVE
6012 && cfun
->machine
->use_fast_prologue_epilogue
6013 && (frame
.nregs
> 1 || frame
.to_allocate
))
6014 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6015 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6016 && cfun
->machine
->use_fast_prologue_epilogue
6017 && frame
.nregs
== 1)
6018 || current_function_calls_eh_return
)
6020 /* Restore registers. We can use ebp or esp to address the memory
6021 locations. If both are available, default to ebp, since offsets
6022 are known to be small. Only exception is esp pointing directly to the
6023 end of block of saved registers, where we may simplify addressing
6026 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6027 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6028 frame
.to_allocate
, style
== 2);
6030 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6031 offset
, style
== 2);
6033 /* eh_return epilogues need %ecx added to the stack pointer. */
6036 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6038 if (frame_pointer_needed
)
6040 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6041 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6042 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6044 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6045 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6047 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6052 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6053 tmp
= plus_constant (tmp
, (frame
.to_allocate
6054 + frame
.nregs
* UNITS_PER_WORD
));
6055 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6058 else if (!frame_pointer_needed
)
6059 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6060 GEN_INT (frame
.to_allocate
6061 + frame
.nregs
* UNITS_PER_WORD
),
6063 /* If not an i386, mov & pop is faster than "leave". */
6064 else if (TARGET_USE_LEAVE
|| optimize_size
6065 || !cfun
->machine
->use_fast_prologue_epilogue
)
6066 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6069 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6070 hard_frame_pointer_rtx
,
6073 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6075 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6080 /* First step is to deallocate the stack frame so that we can
6081 pop the registers. */
6084 gcc_assert (frame_pointer_needed
);
6085 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6086 hard_frame_pointer_rtx
,
6087 GEN_INT (offset
), style
);
6089 else if (frame
.to_allocate
)
6090 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6091 GEN_INT (frame
.to_allocate
), style
);
6093 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6094 if (ix86_save_reg (regno
, false))
6097 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6099 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6101 if (frame_pointer_needed
)
6103 /* Leave results in shorter dependency chains on CPUs that are
6104 able to grok it fast. */
6105 if (TARGET_USE_LEAVE
)
6106 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6107 else if (TARGET_64BIT
)
6108 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6110 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6114 if (cfun
->machine
->force_align_arg_pointer
)
6116 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6117 cfun
->machine
->force_align_arg_pointer
,
6121 /* Sibcall epilogues don't want a return instruction. */
6125 if (current_function_pops_args
&& current_function_args_size
)
6127 rtx popc
= GEN_INT (current_function_pops_args
);
6129 /* i386 can only pop 64K bytes. If asked to pop more, pop
6130 return address, do explicit add, and jump indirectly to the
6133 if (current_function_pops_args
>= 65536)
6135 rtx ecx
= gen_rtx_REG (SImode
, 2);
6137 /* There is no "pascal" calling convention in 64bit ABI. */
6138 gcc_assert (!TARGET_64BIT
);
6140 emit_insn (gen_popsi1 (ecx
));
6141 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6142 emit_jump_insn (gen_return_indirect_internal (ecx
));
6145 emit_jump_insn (gen_return_pop_internal (popc
));
6148 emit_jump_insn (gen_return_internal ());
6151 /* Reset from the function's potential modifications. */
6154 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6155 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6157 if (pic_offset_table_rtx
)
6158 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6160 /* Mach-O doesn't support labels at the end of objects, so if
6161 it looks like we might want one, insert a NOP. */
6163 rtx insn
= get_last_insn ();
6166 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6167 insn
= PREV_INSN (insn
);
6171 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6172 fputs ("\tnop\n", file
);
6178 /* Extract the parts of an RTL expression that is a valid memory address
6179 for an instruction. Return 0 if the structure of the address is
6180 grossly off. Return -1 if the address contains ASHIFT, so it is not
6181 strictly valid, but still used for computing length of lea instruction. */
6184 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6186 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6187 rtx base_reg
, index_reg
;
6188 HOST_WIDE_INT scale
= 1;
6189 rtx scale_rtx
= NULL_RTX
;
6191 enum ix86_address_seg seg
= SEG_DEFAULT
;
6193 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6195 else if (GET_CODE (addr
) == PLUS
)
6205 addends
[n
++] = XEXP (op
, 1);
6208 while (GET_CODE (op
) == PLUS
);
6213 for (i
= n
; i
>= 0; --i
)
6216 switch (GET_CODE (op
))
6221 index
= XEXP (op
, 0);
6222 scale_rtx
= XEXP (op
, 1);
6226 if (XINT (op
, 1) == UNSPEC_TP
6227 && TARGET_TLS_DIRECT_SEG_REFS
6228 && seg
== SEG_DEFAULT
)
6229 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6258 else if (GET_CODE (addr
) == MULT
)
6260 index
= XEXP (addr
, 0); /* index*scale */
6261 scale_rtx
= XEXP (addr
, 1);
6263 else if (GET_CODE (addr
) == ASHIFT
)
6267 /* We're called for lea too, which implements ashift on occasion. */
6268 index
= XEXP (addr
, 0);
6269 tmp
= XEXP (addr
, 1);
6270 if (!CONST_INT_P (tmp
))
6272 scale
= INTVAL (tmp
);
6273 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6279 disp
= addr
; /* displacement */
6281 /* Extract the integral value of scale. */
6284 if (!CONST_INT_P (scale_rtx
))
6286 scale
= INTVAL (scale_rtx
);
6289 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6290 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6292 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6293 if (base_reg
&& index_reg
&& scale
== 1
6294 && (index_reg
== arg_pointer_rtx
6295 || index_reg
== frame_pointer_rtx
6296 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6299 tmp
= base
, base
= index
, index
= tmp
;
6300 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6303 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6304 if ((base_reg
== hard_frame_pointer_rtx
6305 || base_reg
== frame_pointer_rtx
6306 || base_reg
== arg_pointer_rtx
) && !disp
)
6309 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6310 Avoid this by transforming to [%esi+0]. */
6311 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6312 && base_reg
&& !index_reg
&& !disp
6314 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6317 /* Special case: encode reg+reg instead of reg*2. */
6318 if (!base
&& index
&& scale
&& scale
== 2)
6319 base
= index
, base_reg
= index_reg
, scale
= 1;
6321 /* Special case: scaling cannot be encoded without base or displacement. */
6322 if (!base
&& !disp
&& index
&& scale
!= 1)
6334 /* Return cost of the memory address x.
6335 For i386, it is better to use a complex address than let gcc copy
6336 the address into a reg and make a new pseudo. But not if the address
6337 requires to two regs - that would mean more pseudos with longer
6340 ix86_address_cost (rtx x
)
6342 struct ix86_address parts
;
6344 int ok
= ix86_decompose_address (x
, &parts
);
6348 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6349 parts
.base
= SUBREG_REG (parts
.base
);
6350 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6351 parts
.index
= SUBREG_REG (parts
.index
);
6353 /* More complex memory references are better. */
6354 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6356 if (parts
.seg
!= SEG_DEFAULT
)
6359 /* Attempt to minimize number of registers in the address. */
6361 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6363 && (!REG_P (parts
.index
)
6364 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6368 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6370 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6371 && parts
.base
!= parts
.index
)
6374 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6375 since it's predecode logic can't detect the length of instructions
6376 and it degenerates to vector decoded. Increase cost of such
6377 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6378 to split such addresses or even refuse such addresses at all.
6380 Following addressing modes are affected:
6385 The first and last case may be avoidable by explicitly coding the zero in
6386 memory address, but I don't have AMD-K6 machine handy to check this
6390 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6391 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6392 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6398 /* If X is a machine specific address (i.e. a symbol or label being
6399 referenced as a displacement from the GOT implemented using an
6400 UNSPEC), then return the base term. Otherwise return X. */
6403 ix86_find_base_term (rtx x
)
6409 if (GET_CODE (x
) != CONST
)
6412 if (GET_CODE (term
) == PLUS
6413 && (CONST_INT_P (XEXP (term
, 1))
6414 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6415 term
= XEXP (term
, 0);
6416 if (GET_CODE (term
) != UNSPEC
6417 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6420 term
= XVECEXP (term
, 0, 0);
6422 if (GET_CODE (term
) != SYMBOL_REF
6423 && GET_CODE (term
) != LABEL_REF
)
6429 term
= ix86_delegitimize_address (x
);
6431 if (GET_CODE (term
) != SYMBOL_REF
6432 && GET_CODE (term
) != LABEL_REF
)
6438 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6439 this is used for to form addresses to local data when -fPIC is in
6443 darwin_local_data_pic (rtx disp
)
6445 if (GET_CODE (disp
) == MINUS
)
6447 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6448 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6449 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6451 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6452 if (! strcmp (sym_name
, "<pic base>"))
6460 /* Determine if a given RTX is a valid constant. We already know this
6461 satisfies CONSTANT_P. */
6464 legitimate_constant_p (rtx x
)
6466 switch (GET_CODE (x
))
6471 if (GET_CODE (x
) == PLUS
)
6473 if (!CONST_INT_P (XEXP (x
, 1)))
6478 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6481 /* Only some unspecs are valid as "constants". */
6482 if (GET_CODE (x
) == UNSPEC
)
6483 switch (XINT (x
, 1))
6486 return TARGET_64BIT
;
6489 x
= XVECEXP (x
, 0, 0);
6490 return (GET_CODE (x
) == SYMBOL_REF
6491 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6493 x
= XVECEXP (x
, 0, 0);
6494 return (GET_CODE (x
) == SYMBOL_REF
6495 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6500 /* We must have drilled down to a symbol. */
6501 if (GET_CODE (x
) == LABEL_REF
)
6503 if (GET_CODE (x
) != SYMBOL_REF
)
6508 /* TLS symbols are never valid. */
6509 if (SYMBOL_REF_TLS_MODEL (x
))
6514 if (GET_MODE (x
) == TImode
6515 && x
!= CONST0_RTX (TImode
)
6521 if (x
== CONST0_RTX (GET_MODE (x
)))
6529 /* Otherwise we handle everything else in the move patterns. */
6533 /* Determine if it's legal to put X into the constant pool. This
6534 is not possible for the address of thread-local symbols, which
6535 is checked above. */
6538 ix86_cannot_force_const_mem (rtx x
)
6540 /* We can always put integral constants and vectors in memory. */
6541 switch (GET_CODE (x
))
6551 return !legitimate_constant_p (x
);
6554 /* Determine if a given RTX is a valid constant address. */
6557 constant_address_p (rtx x
)
6559 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6562 /* Nonzero if the constant value X is a legitimate general operand
6563 when generating PIC code. It is given that flag_pic is on and
6564 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6567 legitimate_pic_operand_p (rtx x
)
6571 switch (GET_CODE (x
))
6574 inner
= XEXP (x
, 0);
6575 if (GET_CODE (inner
) == PLUS
6576 && CONST_INT_P (XEXP (inner
, 1)))
6577 inner
= XEXP (inner
, 0);
6579 /* Only some unspecs are valid as "constants". */
6580 if (GET_CODE (inner
) == UNSPEC
)
6581 switch (XINT (inner
, 1))
6584 return TARGET_64BIT
;
6586 x
= XVECEXP (inner
, 0, 0);
6587 return (GET_CODE (x
) == SYMBOL_REF
6588 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6596 return legitimate_pic_address_disp_p (x
);
6603 /* Determine if a given CONST RTX is a valid memory displacement
6607 legitimate_pic_address_disp_p (rtx disp
)
6611 /* In 64bit mode we can allow direct addresses of symbols and labels
6612 when they are not dynamic symbols. */
6615 rtx op0
= disp
, op1
;
6617 switch (GET_CODE (disp
))
6623 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6625 op0
= XEXP (XEXP (disp
, 0), 0);
6626 op1
= XEXP (XEXP (disp
, 0), 1);
6627 if (!CONST_INT_P (op1
)
6628 || INTVAL (op1
) >= 16*1024*1024
6629 || INTVAL (op1
) < -16*1024*1024)
6631 if (GET_CODE (op0
) == LABEL_REF
)
6633 if (GET_CODE (op0
) != SYMBOL_REF
)
6638 /* TLS references should always be enclosed in UNSPEC. */
6639 if (SYMBOL_REF_TLS_MODEL (op0
))
6641 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6649 if (GET_CODE (disp
) != CONST
)
6651 disp
= XEXP (disp
, 0);
6655 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6656 of GOT tables. We should not need these anyway. */
6657 if (GET_CODE (disp
) != UNSPEC
6658 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6659 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6662 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6663 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6669 if (GET_CODE (disp
) == PLUS
)
6671 if (!CONST_INT_P (XEXP (disp
, 1)))
6673 disp
= XEXP (disp
, 0);
6677 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6680 if (GET_CODE (disp
) != UNSPEC
)
6683 switch (XINT (disp
, 1))
6688 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6690 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6691 While ABI specify also 32bit relocation but we don't produce it in
6692 small PIC model at all. */
6693 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6694 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6696 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6698 case UNSPEC_GOTTPOFF
:
6699 case UNSPEC_GOTNTPOFF
:
6700 case UNSPEC_INDNTPOFF
:
6703 disp
= XVECEXP (disp
, 0, 0);
6704 return (GET_CODE (disp
) == SYMBOL_REF
6705 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6707 disp
= XVECEXP (disp
, 0, 0);
6708 return (GET_CODE (disp
) == SYMBOL_REF
6709 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6711 disp
= XVECEXP (disp
, 0, 0);
6712 return (GET_CODE (disp
) == SYMBOL_REF
6713 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6719 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6720 memory address for an instruction. The MODE argument is the machine mode
6721 for the MEM expression that wants to use this address.
6723 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6724 convert common non-canonical forms to canonical form so that they will
6728 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6730 struct ix86_address parts
;
6731 rtx base
, index
, disp
;
6732 HOST_WIDE_INT scale
;
6733 const char *reason
= NULL
;
6734 rtx reason_rtx
= NULL_RTX
;
6736 if (TARGET_DEBUG_ADDR
)
6739 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6740 GET_MODE_NAME (mode
), strict
);
6744 if (ix86_decompose_address (addr
, &parts
) <= 0)
6746 reason
= "decomposition failed";
6751 index
= parts
.index
;
6753 scale
= parts
.scale
;
6755 /* Validate base register.
6757 Don't allow SUBREG's that span more than a word here. It can lead to spill
6758 failures when the base is one word out of a two word structure, which is
6759 represented internally as a DImode int. */
6768 else if (GET_CODE (base
) == SUBREG
6769 && REG_P (SUBREG_REG (base
))
6770 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6772 reg
= SUBREG_REG (base
);
6775 reason
= "base is not a register";
6779 if (GET_MODE (base
) != Pmode
)
6781 reason
= "base is not in Pmode";
6785 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6786 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6788 reason
= "base is not valid";
6793 /* Validate index register.
6795 Don't allow SUBREG's that span more than a word here -- same as above. */
6804 else if (GET_CODE (index
) == SUBREG
6805 && REG_P (SUBREG_REG (index
))
6806 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6808 reg
= SUBREG_REG (index
);
6811 reason
= "index is not a register";
6815 if (GET_MODE (index
) != Pmode
)
6817 reason
= "index is not in Pmode";
6821 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6822 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6824 reason
= "index is not valid";
6829 /* Validate scale factor. */
6832 reason_rtx
= GEN_INT (scale
);
6835 reason
= "scale without index";
6839 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6841 reason
= "scale is not a valid multiplier";
6846 /* Validate displacement. */
6851 if (GET_CODE (disp
) == CONST
6852 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6853 switch (XINT (XEXP (disp
, 0), 1))
6855 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6856 used. While ABI specify also 32bit relocations, we don't produce
6857 them at all and use IP relative instead. */
6860 gcc_assert (flag_pic
);
6862 goto is_legitimate_pic
;
6863 reason
= "64bit address unspec";
6866 case UNSPEC_GOTPCREL
:
6867 gcc_assert (flag_pic
);
6868 goto is_legitimate_pic
;
6870 case UNSPEC_GOTTPOFF
:
6871 case UNSPEC_GOTNTPOFF
:
6872 case UNSPEC_INDNTPOFF
:
6878 reason
= "invalid address unspec";
6882 else if (SYMBOLIC_CONST (disp
)
6886 && MACHOPIC_INDIRECT
6887 && !machopic_operand_p (disp
)
6893 if (TARGET_64BIT
&& (index
|| base
))
6895 /* foo@dtpoff(%rX) is ok. */
6896 if (GET_CODE (disp
) != CONST
6897 || GET_CODE (XEXP (disp
, 0)) != PLUS
6898 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6899 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6900 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6901 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6903 reason
= "non-constant pic memory reference";
6907 else if (! legitimate_pic_address_disp_p (disp
))
6909 reason
= "displacement is an invalid pic construct";
6913 /* This code used to verify that a symbolic pic displacement
6914 includes the pic_offset_table_rtx register.
6916 While this is good idea, unfortunately these constructs may
6917 be created by "adds using lea" optimization for incorrect
6926 This code is nonsensical, but results in addressing
6927 GOT table with pic_offset_table_rtx base. We can't
6928 just refuse it easily, since it gets matched by
6929 "addsi3" pattern, that later gets split to lea in the
6930 case output register differs from input. While this
6931 can be handled by separate addsi pattern for this case
6932 that never results in lea, this seems to be easier and
6933 correct fix for crash to disable this test. */
6935 else if (GET_CODE (disp
) != LABEL_REF
6936 && !CONST_INT_P (disp
)
6937 && (GET_CODE (disp
) != CONST
6938 || !legitimate_constant_p (disp
))
6939 && (GET_CODE (disp
) != SYMBOL_REF
6940 || !legitimate_constant_p (disp
)))
6942 reason
= "displacement is not constant";
6945 else if (TARGET_64BIT
6946 && !x86_64_immediate_operand (disp
, VOIDmode
))
6948 reason
= "displacement is out of range";
6953 /* Everything looks valid. */
6954 if (TARGET_DEBUG_ADDR
)
6955 fprintf (stderr
, "Success.\n");
6959 if (TARGET_DEBUG_ADDR
)
6961 fprintf (stderr
, "Error: %s\n", reason
);
6962 debug_rtx (reason_rtx
);
6967 /* Return a unique alias set for the GOT. */
6969 static HOST_WIDE_INT
6970 ix86_GOT_alias_set (void)
6972 static HOST_WIDE_INT set
= -1;
6974 set
= new_alias_set ();
6978 /* Return a legitimate reference for ORIG (an address) using the
6979 register REG. If REG is 0, a new pseudo is generated.
6981 There are two types of references that must be handled:
6983 1. Global data references must load the address from the GOT, via
6984 the PIC reg. An insn is emitted to do this load, and the reg is
6987 2. Static data references, constant pool addresses, and code labels
6988 compute the address as an offset from the GOT, whose base is in
6989 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6990 differentiate them from global data objects. The returned
6991 address is the PIC reg + an unspec constant.
6993 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6994 reg also appears in the address. */
6997 legitimize_pic_address (rtx orig
, rtx reg
)
7004 if (TARGET_MACHO
&& !TARGET_64BIT
)
7007 reg
= gen_reg_rtx (Pmode
);
7008 /* Use the generic Mach-O PIC machinery. */
7009 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7013 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7015 else if (TARGET_64BIT
7016 && ix86_cmodel
!= CM_SMALL_PIC
7017 && local_symbolic_operand (addr
, Pmode
))
7020 /* This symbol may be referenced via a displacement from the PIC
7021 base address (@GOTOFF). */
7023 if (reload_in_progress
)
7024 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7025 if (GET_CODE (addr
) == CONST
)
7026 addr
= XEXP (addr
, 0);
7027 if (GET_CODE (addr
) == PLUS
)
7029 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7030 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7033 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7034 new = gen_rtx_CONST (Pmode
, new);
7036 tmpreg
= gen_reg_rtx (Pmode
);
7039 emit_move_insn (tmpreg
, new);
7043 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7044 tmpreg
, 1, OPTAB_DIRECT
);
7047 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7049 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
7051 /* This symbol may be referenced via a displacement from the PIC
7052 base address (@GOTOFF). */
7054 if (reload_in_progress
)
7055 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7056 if (GET_CODE (addr
) == CONST
)
7057 addr
= XEXP (addr
, 0);
7058 if (GET_CODE (addr
) == PLUS
)
7060 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7061 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7064 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7065 new = gen_rtx_CONST (Pmode
, new);
7066 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7070 emit_move_insn (reg
, new);
7074 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7078 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7079 new = gen_rtx_CONST (Pmode
, new);
7080 new = gen_const_mem (Pmode
, new);
7081 set_mem_alias_set (new, ix86_GOT_alias_set ());
7084 reg
= gen_reg_rtx (Pmode
);
7085 /* Use directly gen_movsi, otherwise the address is loaded
7086 into register for CSE. We don't want to CSE this addresses,
7087 instead we CSE addresses from the GOT table, so skip this. */
7088 emit_insn (gen_movsi (reg
, new));
7093 /* This symbol must be referenced via a load from the
7094 Global Offset Table (@GOT). */
7096 if (reload_in_progress
)
7097 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7098 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7099 new = gen_rtx_CONST (Pmode
, new);
7100 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7101 new = gen_const_mem (Pmode
, new);
7102 set_mem_alias_set (new, ix86_GOT_alias_set ());
7105 reg
= gen_reg_rtx (Pmode
);
7106 emit_move_insn (reg
, new);
7112 if (CONST_INT_P (addr
)
7113 && !x86_64_immediate_operand (addr
, VOIDmode
))
7117 emit_move_insn (reg
, addr
);
7121 new = force_reg (Pmode
, addr
);
7123 else if (GET_CODE (addr
) == CONST
)
7125 addr
= XEXP (addr
, 0);
7127 /* We must match stuff we generate before. Assume the only
7128 unspecs that can get here are ours. Not that we could do
7129 anything with them anyway.... */
7130 if (GET_CODE (addr
) == UNSPEC
7131 || (GET_CODE (addr
) == PLUS
7132 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7134 gcc_assert (GET_CODE (addr
) == PLUS
);
7136 if (GET_CODE (addr
) == PLUS
)
7138 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7140 /* Check first to see if this is a constant offset from a @GOTOFF
7141 symbol reference. */
7142 if (local_symbolic_operand (op0
, Pmode
)
7143 && CONST_INT_P (op1
))
7147 if (reload_in_progress
)
7148 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7149 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7151 new = gen_rtx_PLUS (Pmode
, new, op1
);
7152 new = gen_rtx_CONST (Pmode
, new);
7153 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7157 emit_move_insn (reg
, new);
7163 if (INTVAL (op1
) < -16*1024*1024
7164 || INTVAL (op1
) >= 16*1024*1024)
7166 if (!x86_64_immediate_operand (op1
, Pmode
))
7167 op1
= force_reg (Pmode
, op1
);
7168 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7174 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7175 new = legitimize_pic_address (XEXP (addr
, 1),
7176 base
== reg
? NULL_RTX
: reg
);
7178 if (CONST_INT_P (new))
7179 new = plus_constant (base
, INTVAL (new));
7182 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7184 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7185 new = XEXP (new, 1);
7187 new = gen_rtx_PLUS (Pmode
, base
, new);
7195 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7198 get_thread_pointer (int to_reg
)
7202 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7206 reg
= gen_reg_rtx (Pmode
);
7207 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7208 insn
= emit_insn (insn
);
7213 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7214 false if we expect this to be used for a memory address and true if
7215 we expect to load the address into a register. */
7218 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7220 rtx dest
, base
, off
, pic
, tp
;
7225 case TLS_MODEL_GLOBAL_DYNAMIC
:
7226 dest
= gen_reg_rtx (Pmode
);
7227 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7229 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7231 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7234 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7235 insns
= get_insns ();
7238 emit_libcall_block (insns
, dest
, rax
, x
);
7240 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7241 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7243 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7245 if (TARGET_GNU2_TLS
)
7247 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7249 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7253 case TLS_MODEL_LOCAL_DYNAMIC
:
7254 base
= gen_reg_rtx (Pmode
);
7255 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7257 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7259 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7262 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7263 insns
= get_insns ();
7266 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7267 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7268 emit_libcall_block (insns
, base
, rax
, note
);
7270 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7271 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7273 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7275 if (TARGET_GNU2_TLS
)
7277 rtx x
= ix86_tls_module_base ();
7279 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7280 gen_rtx_MINUS (Pmode
, x
, tp
));
7283 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7284 off
= gen_rtx_CONST (Pmode
, off
);
7286 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7288 if (TARGET_GNU2_TLS
)
7290 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7292 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7297 case TLS_MODEL_INITIAL_EXEC
:
7301 type
= UNSPEC_GOTNTPOFF
;
7305 if (reload_in_progress
)
7306 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7307 pic
= pic_offset_table_rtx
;
7308 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7310 else if (!TARGET_ANY_GNU_TLS
)
7312 pic
= gen_reg_rtx (Pmode
);
7313 emit_insn (gen_set_got (pic
));
7314 type
= UNSPEC_GOTTPOFF
;
7319 type
= UNSPEC_INDNTPOFF
;
7322 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7323 off
= gen_rtx_CONST (Pmode
, off
);
7325 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7326 off
= gen_const_mem (Pmode
, off
);
7327 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7329 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7331 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7332 off
= force_reg (Pmode
, off
);
7333 return gen_rtx_PLUS (Pmode
, base
, off
);
7337 base
= get_thread_pointer (true);
7338 dest
= gen_reg_rtx (Pmode
);
7339 emit_insn (gen_subsi3 (dest
, base
, off
));
7343 case TLS_MODEL_LOCAL_EXEC
:
7344 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7345 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7346 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7347 off
= gen_rtx_CONST (Pmode
, off
);
7349 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7351 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7352 return gen_rtx_PLUS (Pmode
, base
, off
);
7356 base
= get_thread_pointer (true);
7357 dest
= gen_reg_rtx (Pmode
);
7358 emit_insn (gen_subsi3 (dest
, base
, off
));
7369 /* Try machine-dependent ways of modifying an illegitimate address
7370 to be legitimate. If we find one, return the new, valid address.
7371 This macro is used in only one place: `memory_address' in explow.c.
7373 OLDX is the address as it was before break_out_memory_refs was called.
7374 In some cases it is useful to look at this to decide what needs to be done.
7376 MODE and WIN are passed so that this macro can use
7377 GO_IF_LEGITIMATE_ADDRESS.
7379 It is always safe for this macro to do nothing. It exists to recognize
7380 opportunities to optimize the output.
7382 For the 80386, we handle X+REG by loading X into a register R and
7383 using R+REG. R will go in a general reg and indexing will be used.
7384 However, if REG is a broken-out memory address or multiplication,
7385 nothing needs to be done because REG can certainly go in a general reg.
7387 When -fpic is used, special handling is needed for symbolic references.
7388 See comments by legitimize_pic_address in i386.c for details. */
7391 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7396 if (TARGET_DEBUG_ADDR
)
7398 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7399 GET_MODE_NAME (mode
));
7403 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7405 return legitimize_tls_address (x
, log
, false);
7406 if (GET_CODE (x
) == CONST
7407 && GET_CODE (XEXP (x
, 0)) == PLUS
7408 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7409 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7411 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7412 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7415 if (flag_pic
&& SYMBOLIC_CONST (x
))
7416 return legitimize_pic_address (x
, 0);
7418 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7419 if (GET_CODE (x
) == ASHIFT
7420 && CONST_INT_P (XEXP (x
, 1))
7421 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7424 log
= INTVAL (XEXP (x
, 1));
7425 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7426 GEN_INT (1 << log
));
7429 if (GET_CODE (x
) == PLUS
)
7431 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7433 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7434 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7435 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7438 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7439 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7440 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7441 GEN_INT (1 << log
));
7444 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7445 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7446 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7449 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7450 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7451 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7452 GEN_INT (1 << log
));
7455 /* Put multiply first if it isn't already. */
7456 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7458 rtx tmp
= XEXP (x
, 0);
7459 XEXP (x
, 0) = XEXP (x
, 1);
7464 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7465 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7466 created by virtual register instantiation, register elimination, and
7467 similar optimizations. */
7468 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7471 x
= gen_rtx_PLUS (Pmode
,
7472 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7473 XEXP (XEXP (x
, 1), 0)),
7474 XEXP (XEXP (x
, 1), 1));
7478 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7479 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7480 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7481 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7482 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7483 && CONSTANT_P (XEXP (x
, 1)))
7486 rtx other
= NULL_RTX
;
7488 if (CONST_INT_P (XEXP (x
, 1)))
7490 constant
= XEXP (x
, 1);
7491 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7493 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7495 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7496 other
= XEXP (x
, 1);
7504 x
= gen_rtx_PLUS (Pmode
,
7505 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7506 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7507 plus_constant (other
, INTVAL (constant
)));
7511 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7514 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7517 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7520 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7523 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7527 && REG_P (XEXP (x
, 1))
7528 && REG_P (XEXP (x
, 0)))
7531 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7534 x
= legitimize_pic_address (x
, 0);
7537 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7540 if (REG_P (XEXP (x
, 0)))
7542 rtx temp
= gen_reg_rtx (Pmode
);
7543 rtx val
= force_operand (XEXP (x
, 1), temp
);
7545 emit_move_insn (temp
, val
);
7551 else if (REG_P (XEXP (x
, 1)))
7553 rtx temp
= gen_reg_rtx (Pmode
);
7554 rtx val
= force_operand (XEXP (x
, 0), temp
);
7556 emit_move_insn (temp
, val
);
7566 /* Print an integer constant expression in assembler syntax. Addition
7567 and subtraction are the only arithmetic that may appear in these
7568 expressions. FILE is the stdio stream to write to, X is the rtx, and
7569 CODE is the operand print code from the output string. */
7572 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7576 switch (GET_CODE (x
))
7579 gcc_assert (flag_pic
);
7584 output_addr_const (file
, x
);
7585 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7586 fputs ("@PLT", file
);
7593 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7594 assemble_name (asm_out_file
, buf
);
7598 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7602 /* This used to output parentheses around the expression,
7603 but that does not work on the 386 (either ATT or BSD assembler). */
7604 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7608 if (GET_MODE (x
) == VOIDmode
)
7610 /* We can use %d if the number is <32 bits and positive. */
7611 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7612 fprintf (file
, "0x%lx%08lx",
7613 (unsigned long) CONST_DOUBLE_HIGH (x
),
7614 (unsigned long) CONST_DOUBLE_LOW (x
));
7616 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7619 /* We can't handle floating point constants;
7620 PRINT_OPERAND must handle them. */
7621 output_operand_lossage ("floating constant misused");
7625 /* Some assemblers need integer constants to appear first. */
7626 if (CONST_INT_P (XEXP (x
, 0)))
7628 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7630 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7634 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7635 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7637 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7643 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7644 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7646 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7648 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7652 gcc_assert (XVECLEN (x
, 0) == 1);
7653 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7654 switch (XINT (x
, 1))
7657 fputs ("@GOT", file
);
7660 fputs ("@GOTOFF", file
);
7662 case UNSPEC_GOTPCREL
:
7663 fputs ("@GOTPCREL(%rip)", file
);
7665 case UNSPEC_GOTTPOFF
:
7666 /* FIXME: This might be @TPOFF in Sun ld too. */
7667 fputs ("@GOTTPOFF", file
);
7670 fputs ("@TPOFF", file
);
7674 fputs ("@TPOFF", file
);
7676 fputs ("@NTPOFF", file
);
7679 fputs ("@DTPOFF", file
);
7681 case UNSPEC_GOTNTPOFF
:
7683 fputs ("@GOTTPOFF(%rip)", file
);
7685 fputs ("@GOTNTPOFF", file
);
7687 case UNSPEC_INDNTPOFF
:
7688 fputs ("@INDNTPOFF", file
);
7691 output_operand_lossage ("invalid UNSPEC as operand");
7697 output_operand_lossage ("invalid expression as operand");
7701 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7702 We need to emit DTP-relative relocations. */
7705 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7707 fputs (ASM_LONG
, file
);
7708 output_addr_const (file
, x
);
7709 fputs ("@DTPOFF", file
);
7715 fputs (", 0", file
);
7722 /* In the name of slightly smaller debug output, and to cater to
7723 general assembler lossage, recognize PIC+GOTOFF and turn it back
7724 into a direct symbol reference.
7726 On Darwin, this is necessary to avoid a crash, because Darwin
7727 has a different PIC label for each routine but the DWARF debugging
7728 information is not associated with any particular routine, so it's
7729 necessary to remove references to the PIC label from RTL stored by
7730 the DWARF output code. */
7733 ix86_delegitimize_address (rtx orig_x
)
7736 /* reg_addend is NULL or a multiple of some register. */
7737 rtx reg_addend
= NULL_RTX
;
7738 /* const_addend is NULL or a const_int. */
7739 rtx const_addend
= NULL_RTX
;
7740 /* This is the result, or NULL. */
7741 rtx result
= NULL_RTX
;
7748 if (GET_CODE (x
) != CONST
7749 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7750 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7753 return XVECEXP (XEXP (x
, 0), 0, 0);
7756 if (GET_CODE (x
) != PLUS
7757 || GET_CODE (XEXP (x
, 1)) != CONST
)
7760 if (REG_P (XEXP (x
, 0))
7761 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7762 /* %ebx + GOT/GOTOFF */
7764 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7766 /* %ebx + %reg * scale + GOT/GOTOFF */
7767 reg_addend
= XEXP (x
, 0);
7768 if (REG_P (XEXP (reg_addend
, 0))
7769 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7770 reg_addend
= XEXP (reg_addend
, 1);
7771 else if (REG_P (XEXP (reg_addend
, 1))
7772 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7773 reg_addend
= XEXP (reg_addend
, 0);
7776 if (!REG_P (reg_addend
)
7777 && GET_CODE (reg_addend
) != MULT
7778 && GET_CODE (reg_addend
) != ASHIFT
)
7784 x
= XEXP (XEXP (x
, 1), 0);
7785 if (GET_CODE (x
) == PLUS
7786 && CONST_INT_P (XEXP (x
, 1)))
7788 const_addend
= XEXP (x
, 1);
7792 if (GET_CODE (x
) == UNSPEC
7793 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7794 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7795 result
= XVECEXP (x
, 0, 0);
7797 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7799 result
= XEXP (x
, 0);
7805 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7807 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7812 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7817 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7819 enum rtx_code second_code
, bypass_code
;
7820 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7821 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7822 code
= ix86_fp_compare_code_to_integer (code
);
7826 code
= reverse_condition (code
);
7837 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7841 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7842 Those same assemblers have the same but opposite lossage on cmov. */
7843 gcc_assert (mode
== CCmode
);
7844 suffix
= fp
? "nbe" : "a";
7864 gcc_assert (mode
== CCmode
);
7886 gcc_assert (mode
== CCmode
);
7887 suffix
= fp
? "nb" : "ae";
7890 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7894 gcc_assert (mode
== CCmode
);
7898 suffix
= fp
? "u" : "p";
7901 suffix
= fp
? "nu" : "np";
7906 fputs (suffix
, file
);
7909 /* Print the name of register X to FILE based on its machine mode and number.
7910 If CODE is 'w', pretend the mode is HImode.
7911 If CODE is 'b', pretend the mode is QImode.
7912 If CODE is 'k', pretend the mode is SImode.
7913 If CODE is 'q', pretend the mode is DImode.
7914 If CODE is 'h', pretend the reg is the 'high' byte register.
7915 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7918 print_reg (rtx x
, int code
, FILE *file
)
7920 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7921 && REGNO (x
) != FRAME_POINTER_REGNUM
7922 && REGNO (x
) != FLAGS_REG
7923 && REGNO (x
) != FPSR_REG
7924 && REGNO (x
) != FPCR_REG
);
7926 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7929 if (code
== 'w' || MMX_REG_P (x
))
7931 else if (code
== 'b')
7933 else if (code
== 'k')
7935 else if (code
== 'q')
7937 else if (code
== 'y')
7939 else if (code
== 'h')
7942 code
= GET_MODE_SIZE (GET_MODE (x
));
7944 /* Irritatingly, AMD extended registers use different naming convention
7945 from the normal registers. */
7946 if (REX_INT_REG_P (x
))
7948 gcc_assert (TARGET_64BIT
);
7952 error ("extended registers have no high halves");
7955 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7958 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7961 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7964 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7967 error ("unsupported operand size for extended register");
7975 if (STACK_TOP_P (x
))
7977 fputs ("st(0)", file
);
7984 if (! ANY_FP_REG_P (x
))
7985 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7990 fputs (hi_reg_name
[REGNO (x
)], file
);
7993 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7995 fputs (qi_reg_name
[REGNO (x
)], file
);
7998 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8000 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8007 /* Locate some local-dynamic symbol still in use by this function
8008 so that we can print its name in some tls_local_dynamic_base
8012 get_some_local_dynamic_name (void)
8016 if (cfun
->machine
->some_ld_name
)
8017 return cfun
->machine
->some_ld_name
;
8019 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8021 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8022 return cfun
->machine
->some_ld_name
;
8028 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8032 if (GET_CODE (x
) == SYMBOL_REF
8033 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8035 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8043 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8044 C -- print opcode suffix for set/cmov insn.
8045 c -- like C, but print reversed condition
8046 F,f -- likewise, but for floating-point.
8047 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8049 R -- print the prefix for register names.
8050 z -- print the opcode suffix for the size of the current operand.
8051 * -- print a star (in certain assembler syntax)
8052 A -- print an absolute memory reference.
8053 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8054 s -- print a shift double count, followed by the assemblers argument
8056 b -- print the QImode name of the register for the indicated operand.
8057 %b0 would print %al if operands[0] is reg 0.
8058 w -- likewise, print the HImode name of the register.
8059 k -- likewise, print the SImode name of the register.
8060 q -- likewise, print the DImode name of the register.
8061 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8062 y -- print "st(0)" instead of "st" as a register.
8063 D -- print condition for SSE cmp instruction.
8064 P -- if PIC, print an @PLT suffix.
8065 X -- don't print any sort of PIC '@' suffix for a symbol.
8066 & -- print some in-use local-dynamic symbol name.
8067 H -- print a memory address offset by 8; used for sse high-parts
8071 print_operand (FILE *file
, rtx x
, int code
)
8078 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8083 assemble_name (file
, get_some_local_dynamic_name ());
8087 switch (ASSEMBLER_DIALECT
)
8094 /* Intel syntax. For absolute addresses, registers should not
8095 be surrounded by braces. */
8099 PRINT_OPERAND (file
, x
, 0);
8109 PRINT_OPERAND (file
, x
, 0);
8114 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8119 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8124 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8129 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8134 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8139 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8144 /* 387 opcodes don't get size suffixes if the operands are
8146 if (STACK_REG_P (x
))
8149 /* Likewise if using Intel opcodes. */
8150 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8153 /* This is the size of op from size of operand. */
8154 switch (GET_MODE_SIZE (GET_MODE (x
)))
8161 #ifdef HAVE_GAS_FILDS_FISTS
8167 if (GET_MODE (x
) == SFmode
)
8182 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8184 #ifdef GAS_MNEMONICS
8210 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8212 PRINT_OPERAND (file
, x
, 0);
8218 /* Little bit of braindamage here. The SSE compare instructions
8219 does use completely different names for the comparisons that the
8220 fp conditional moves. */
8221 switch (GET_CODE (x
))
8236 fputs ("unord", file
);
8240 fputs ("neq", file
);
8244 fputs ("nlt", file
);
8248 fputs ("nle", file
);
8251 fputs ("ord", file
);
8258 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8259 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8261 switch (GET_MODE (x
))
8263 case HImode
: putc ('w', file
); break;
8265 case SFmode
: putc ('l', file
); break;
8267 case DFmode
: putc ('q', file
); break;
8268 default: gcc_unreachable ();
8275 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8278 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8279 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8282 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8285 /* Like above, but reverse condition */
8287 /* Check to see if argument to %c is really a constant
8288 and not a condition code which needs to be reversed. */
8289 if (!COMPARISON_P (x
))
8291 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8294 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8297 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8298 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8301 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8305 /* It doesn't actually matter what mode we use here, as we're
8306 only going to use this for printing. */
8307 x
= adjust_address_nv (x
, DImode
, 8);
8314 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8317 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8320 int pred_val
= INTVAL (XEXP (x
, 0));
8322 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8323 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8325 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8326 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8328 /* Emit hints only in the case default branch prediction
8329 heuristics would fail. */
8330 if (taken
!= cputaken
)
8332 /* We use 3e (DS) prefix for taken branches and
8333 2e (CS) prefix for not taken branches. */
8335 fputs ("ds ; ", file
);
8337 fputs ("cs ; ", file
);
8344 output_operand_lossage ("invalid operand code '%c'", code
);
8349 print_reg (x
, code
, file
);
8353 /* No `byte ptr' prefix for call instructions. */
8354 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8357 switch (GET_MODE_SIZE (GET_MODE (x
)))
8359 case 1: size
= "BYTE"; break;
8360 case 2: size
= "WORD"; break;
8361 case 4: size
= "DWORD"; break;
8362 case 8: size
= "QWORD"; break;
8363 case 12: size
= "XWORD"; break;
8364 case 16: size
= "XMMWORD"; break;
8369 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8372 else if (code
== 'w')
8374 else if (code
== 'k')
8378 fputs (" PTR ", file
);
8382 /* Avoid (%rip) for call operands. */
8383 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8384 && !CONST_INT_P (x
))
8385 output_addr_const (file
, x
);
8386 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8387 output_operand_lossage ("invalid constraints for operand");
8392 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8397 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8398 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8400 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8402 fprintf (file
, "0x%08lx", l
);
8405 /* These float cases don't actually occur as immediate operands. */
8406 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8410 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8411 fprintf (file
, "%s", dstr
);
8414 else if (GET_CODE (x
) == CONST_DOUBLE
8415 && GET_MODE (x
) == XFmode
)
8419 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8420 fprintf (file
, "%s", dstr
);
8425 /* We have patterns that allow zero sets of memory, for instance.
8426 In 64-bit mode, we should probably support all 8-byte vectors,
8427 since we can in fact encode that into an immediate. */
8428 if (GET_CODE (x
) == CONST_VECTOR
)
8430 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8436 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8438 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8441 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8442 || GET_CODE (x
) == LABEL_REF
)
8444 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8447 fputs ("OFFSET FLAT:", file
);
8450 if (CONST_INT_P (x
))
8451 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8453 output_pic_addr_const (file
, x
, code
);
8455 output_addr_const (file
, x
);
8459 /* Print a memory operand whose address is ADDR. */
8462 print_operand_address (FILE *file
, rtx addr
)
8464 struct ix86_address parts
;
8465 rtx base
, index
, disp
;
8467 int ok
= ix86_decompose_address (addr
, &parts
);
8472 index
= parts
.index
;
8474 scale
= parts
.scale
;
8482 if (USER_LABEL_PREFIX
[0] == 0)
8484 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8490 if (!base
&& !index
)
8492 /* Displacement only requires special attention. */
8494 if (CONST_INT_P (disp
))
8496 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8498 if (USER_LABEL_PREFIX
[0] == 0)
8500 fputs ("ds:", file
);
8502 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8505 output_pic_addr_const (file
, disp
, 0);
8507 output_addr_const (file
, disp
);
8509 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8512 if (GET_CODE (disp
) == CONST
8513 && GET_CODE (XEXP (disp
, 0)) == PLUS
8514 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8515 disp
= XEXP (XEXP (disp
, 0), 0);
8516 if (GET_CODE (disp
) == LABEL_REF
8517 || (GET_CODE (disp
) == SYMBOL_REF
8518 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8519 fputs ("(%rip)", file
);
8524 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8529 output_pic_addr_const (file
, disp
, 0);
8530 else if (GET_CODE (disp
) == LABEL_REF
)
8531 output_asm_label (disp
);
8533 output_addr_const (file
, disp
);
8538 print_reg (base
, 0, file
);
8542 print_reg (index
, 0, file
);
8544 fprintf (file
, ",%d", scale
);
8550 rtx offset
= NULL_RTX
;
8554 /* Pull out the offset of a symbol; print any symbol itself. */
8555 if (GET_CODE (disp
) == CONST
8556 && GET_CODE (XEXP (disp
, 0)) == PLUS
8557 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8559 offset
= XEXP (XEXP (disp
, 0), 1);
8560 disp
= gen_rtx_CONST (VOIDmode
,
8561 XEXP (XEXP (disp
, 0), 0));
8565 output_pic_addr_const (file
, disp
, 0);
8566 else if (GET_CODE (disp
) == LABEL_REF
)
8567 output_asm_label (disp
);
8568 else if (CONST_INT_P (disp
))
8571 output_addr_const (file
, disp
);
8577 print_reg (base
, 0, file
);
8580 if (INTVAL (offset
) >= 0)
8582 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8586 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8593 print_reg (index
, 0, file
);
8595 fprintf (file
, "*%d", scale
);
8603 output_addr_const_extra (FILE *file
, rtx x
)
8607 if (GET_CODE (x
) != UNSPEC
)
8610 op
= XVECEXP (x
, 0, 0);
8611 switch (XINT (x
, 1))
8613 case UNSPEC_GOTTPOFF
:
8614 output_addr_const (file
, op
);
8615 /* FIXME: This might be @TPOFF in Sun ld. */
8616 fputs ("@GOTTPOFF", file
);
8619 output_addr_const (file
, op
);
8620 fputs ("@TPOFF", file
);
8623 output_addr_const (file
, op
);
8625 fputs ("@TPOFF", file
);
8627 fputs ("@NTPOFF", file
);
8630 output_addr_const (file
, op
);
8631 fputs ("@DTPOFF", file
);
8633 case UNSPEC_GOTNTPOFF
:
8634 output_addr_const (file
, op
);
8636 fputs ("@GOTTPOFF(%rip)", file
);
8638 fputs ("@GOTNTPOFF", file
);
8640 case UNSPEC_INDNTPOFF
:
8641 output_addr_const (file
, op
);
8642 fputs ("@INDNTPOFF", file
);
8652 /* Split one or more DImode RTL references into pairs of SImode
8653 references. The RTL can be REG, offsettable MEM, integer constant, or
8654 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8655 split and "num" is its length. lo_half and hi_half are output arrays
8656 that parallel "operands". */
8659 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8663 rtx op
= operands
[num
];
8665 /* simplify_subreg refuse to split volatile memory addresses,
8666 but we still have to handle it. */
8669 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8670 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8674 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8675 GET_MODE (op
) == VOIDmode
8676 ? DImode
: GET_MODE (op
), 0);
8677 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8678 GET_MODE (op
) == VOIDmode
8679 ? DImode
: GET_MODE (op
), 4);
8683 /* Split one or more TImode RTL references into pairs of DImode
8684 references. The RTL can be REG, offsettable MEM, integer constant, or
8685 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8686 split and "num" is its length. lo_half and hi_half are output arrays
8687 that parallel "operands". */
8690 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8694 rtx op
= operands
[num
];
8696 /* simplify_subreg refuse to split volatile memory addresses, but we
8697 still have to handle it. */
8700 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8701 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8705 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8706 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8711 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8712 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8713 is the expression of the binary operation. The output may either be
8714 emitted here, or returned to the caller, like all output_* functions.
8716 There is no guarantee that the operands are the same mode, as they
8717 might be within FLOAT or FLOAT_EXTEND expressions. */
8719 #ifndef SYSV386_COMPAT
8720 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8721 wants to fix the assemblers because that causes incompatibility
8722 with gcc. No-one wants to fix gcc because that causes
8723 incompatibility with assemblers... You can use the option of
8724 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8725 #define SYSV386_COMPAT 1
8729 output_387_binary_op (rtx insn
, rtx
*operands
)
8731 static char buf
[30];
8734 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8736 #ifdef ENABLE_CHECKING
8737 /* Even if we do not want to check the inputs, this documents input
8738 constraints. Which helps in understanding the following code. */
8739 if (STACK_REG_P (operands
[0])
8740 && ((REG_P (operands
[1])
8741 && REGNO (operands
[0]) == REGNO (operands
[1])
8742 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8743 || (REG_P (operands
[2])
8744 && REGNO (operands
[0]) == REGNO (operands
[2])
8745 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8746 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8749 gcc_assert (is_sse
);
8752 switch (GET_CODE (operands
[3]))
8755 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8756 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8764 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8765 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8773 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8774 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8782 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8783 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8797 if (GET_MODE (operands
[0]) == SFmode
)
8798 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8800 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8805 switch (GET_CODE (operands
[3]))
8809 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8811 rtx temp
= operands
[2];
8812 operands
[2] = operands
[1];
8816 /* know operands[0] == operands[1]. */
8818 if (MEM_P (operands
[2]))
8824 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8826 if (STACK_TOP_P (operands
[0]))
8827 /* How is it that we are storing to a dead operand[2]?
8828 Well, presumably operands[1] is dead too. We can't
8829 store the result to st(0) as st(0) gets popped on this
8830 instruction. Instead store to operands[2] (which I
8831 think has to be st(1)). st(1) will be popped later.
8832 gcc <= 2.8.1 didn't have this check and generated
8833 assembly code that the Unixware assembler rejected. */
8834 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8836 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8840 if (STACK_TOP_P (operands
[0]))
8841 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8843 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8848 if (MEM_P (operands
[1]))
8854 if (MEM_P (operands
[2]))
8860 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8863 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8864 derived assemblers, confusingly reverse the direction of
8865 the operation for fsub{r} and fdiv{r} when the
8866 destination register is not st(0). The Intel assembler
8867 doesn't have this brain damage. Read !SYSV386_COMPAT to
8868 figure out what the hardware really does. */
8869 if (STACK_TOP_P (operands
[0]))
8870 p
= "{p\t%0, %2|rp\t%2, %0}";
8872 p
= "{rp\t%2, %0|p\t%0, %2}";
8874 if (STACK_TOP_P (operands
[0]))
8875 /* As above for fmul/fadd, we can't store to st(0). */
8876 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8878 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8883 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8886 if (STACK_TOP_P (operands
[0]))
8887 p
= "{rp\t%0, %1|p\t%1, %0}";
8889 p
= "{p\t%1, %0|rp\t%0, %1}";
8891 if (STACK_TOP_P (operands
[0]))
8892 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8894 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8899 if (STACK_TOP_P (operands
[0]))
8901 if (STACK_TOP_P (operands
[1]))
8902 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8904 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8907 else if (STACK_TOP_P (operands
[1]))
8910 p
= "{\t%1, %0|r\t%0, %1}";
8912 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8918 p
= "{r\t%2, %0|\t%0, %2}";
8920 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8933 /* Return needed mode for entity in optimize_mode_switching pass. */
8936 ix86_mode_needed (int entity
, rtx insn
)
8938 enum attr_i387_cw mode
;
8940 /* The mode UNINITIALIZED is used to store control word after a
8941 function call or ASM pattern. The mode ANY specify that function
8942 has no requirements on the control word and make no changes in the
8943 bits we are interested in. */
8946 || (NONJUMP_INSN_P (insn
)
8947 && (asm_noperands (PATTERN (insn
)) >= 0
8948 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8949 return I387_CW_UNINITIALIZED
;
8951 if (recog_memoized (insn
) < 0)
8954 mode
= get_attr_i387_cw (insn
);
8959 if (mode
== I387_CW_TRUNC
)
8964 if (mode
== I387_CW_FLOOR
)
8969 if (mode
== I387_CW_CEIL
)
8974 if (mode
== I387_CW_MASK_PM
)
8985 /* Output code to initialize control word copies used by trunc?f?i and
8986 rounding patterns. CURRENT_MODE is set to current control word,
8987 while NEW_MODE is set to new control word. */
8990 emit_i387_cw_initialization (int mode
)
8992 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8997 rtx reg
= gen_reg_rtx (HImode
);
8999 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9000 emit_move_insn (reg
, copy_rtx (stored_mode
));
9002 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9007 /* round toward zero (truncate) */
9008 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9009 slot
= SLOT_CW_TRUNC
;
9013 /* round down toward -oo */
9014 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9015 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9016 slot
= SLOT_CW_FLOOR
;
9020 /* round up toward +oo */
9021 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9022 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9023 slot
= SLOT_CW_CEIL
;
9026 case I387_CW_MASK_PM
:
9027 /* mask precision exception for nearbyint() */
9028 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9029 slot
= SLOT_CW_MASK_PM
;
9041 /* round toward zero (truncate) */
9042 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9043 slot
= SLOT_CW_TRUNC
;
9047 /* round down toward -oo */
9048 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9049 slot
= SLOT_CW_FLOOR
;
9053 /* round up toward +oo */
9054 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9055 slot
= SLOT_CW_CEIL
;
9058 case I387_CW_MASK_PM
:
9059 /* mask precision exception for nearbyint() */
9060 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9061 slot
= SLOT_CW_MASK_PM
;
9069 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9071 new_mode
= assign_386_stack_local (HImode
, slot
);
9072 emit_move_insn (new_mode
, reg
);
9075 /* Output code for INSN to convert a float to a signed int. OPERANDS
9076 are the insn operands. The output may be [HSD]Imode and the input
9077 operand may be [SDX]Fmode. */
9080 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9082 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9083 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9084 int round_mode
= get_attr_i387_cw (insn
);
9086 /* Jump through a hoop or two for DImode, since the hardware has no
9087 non-popping instruction. We used to do this a different way, but
9088 that was somewhat fragile and broke with post-reload splitters. */
9089 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9090 output_asm_insn ("fld\t%y1", operands
);
9092 gcc_assert (STACK_TOP_P (operands
[1]));
9093 gcc_assert (MEM_P (operands
[0]));
9096 output_asm_insn ("fisttp%z0\t%0", operands
);
9099 if (round_mode
!= I387_CW_ANY
)
9100 output_asm_insn ("fldcw\t%3", operands
);
9101 if (stack_top_dies
|| dimode_p
)
9102 output_asm_insn ("fistp%z0\t%0", operands
);
9104 output_asm_insn ("fist%z0\t%0", operands
);
9105 if (round_mode
!= I387_CW_ANY
)
9106 output_asm_insn ("fldcw\t%2", operands
);
9112 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9113 have the values zero or one, indicates the ffreep insn's operand
9114 from the OPERANDS array. */
9117 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9119 if (TARGET_USE_FFREEP
)
9120 #if HAVE_AS_IX86_FFREEP
9121 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9124 static char retval
[] = ".word\t0xc_df";
9125 int regno
= REGNO (operands
[opno
]);
9127 gcc_assert (FP_REGNO_P (regno
));
9129 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9134 return opno
? "fstp\t%y1" : "fstp\t%y0";
9138 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9139 should be used. UNORDERED_P is true when fucom should be used. */
9142 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9145 rtx cmp_op0
, cmp_op1
;
9146 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9150 cmp_op0
= operands
[0];
9151 cmp_op1
= operands
[1];
9155 cmp_op0
= operands
[1];
9156 cmp_op1
= operands
[2];
9161 if (GET_MODE (operands
[0]) == SFmode
)
9163 return "ucomiss\t{%1, %0|%0, %1}";
9165 return "comiss\t{%1, %0|%0, %1}";
9168 return "ucomisd\t{%1, %0|%0, %1}";
9170 return "comisd\t{%1, %0|%0, %1}";
9173 gcc_assert (STACK_TOP_P (cmp_op0
));
9175 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9177 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9181 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9182 return output_387_ffreep (operands
, 1);
9185 return "ftst\n\tfnstsw\t%0";
9188 if (STACK_REG_P (cmp_op1
)
9190 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9191 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9193 /* If both the top of the 387 stack dies, and the other operand
9194 is also a stack register that dies, then this must be a
9195 `fcompp' float compare */
9199 /* There is no double popping fcomi variant. Fortunately,
9200 eflags is immune from the fstp's cc clobbering. */
9202 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9204 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9205 return output_387_ffreep (operands
, 0);
9210 return "fucompp\n\tfnstsw\t%0";
9212 return "fcompp\n\tfnstsw\t%0";
9217 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9219 static const char * const alt
[16] =
9221 "fcom%z2\t%y2\n\tfnstsw\t%0",
9222 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9223 "fucom%z2\t%y2\n\tfnstsw\t%0",
9224 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9226 "ficom%z2\t%y2\n\tfnstsw\t%0",
9227 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9231 "fcomi\t{%y1, %0|%0, %y1}",
9232 "fcomip\t{%y1, %0|%0, %y1}",
9233 "fucomi\t{%y1, %0|%0, %y1}",
9234 "fucomip\t{%y1, %0|%0, %y1}",
9245 mask
= eflags_p
<< 3;
9246 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9247 mask
|= unordered_p
<< 1;
9248 mask
|= stack_top_dies
;
9250 gcc_assert (mask
< 16);
9259 ix86_output_addr_vec_elt (FILE *file
, int value
)
9261 const char *directive
= ASM_LONG
;
9265 directive
= ASM_QUAD
;
9267 gcc_assert (!TARGET_64BIT
);
9270 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9274 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9277 fprintf (file
, "%s%s%d-%s%d\n",
9278 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9279 else if (HAVE_AS_GOTOFF_IN_DATA
)
9280 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9282 else if (TARGET_MACHO
)
9284 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9285 machopic_output_function_base_name (file
);
9286 fprintf(file
, "\n");
9290 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9291 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9294 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9298 ix86_expand_clear (rtx dest
)
9302 /* We play register width games, which are only valid after reload. */
9303 gcc_assert (reload_completed
);
9305 /* Avoid HImode and its attendant prefix byte. */
9306 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9307 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9309 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9311 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9312 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9314 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9315 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9321 /* X is an unchanging MEM. If it is a constant pool reference, return
9322 the constant pool rtx, else NULL. */
9325 maybe_get_pool_constant (rtx x
)
9327 x
= ix86_delegitimize_address (XEXP (x
, 0));
9329 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9330 return get_pool_constant (x
);
9336 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9338 int strict
= (reload_in_progress
|| reload_completed
);
9340 enum tls_model model
;
9345 if (GET_CODE (op1
) == SYMBOL_REF
)
9347 model
= SYMBOL_REF_TLS_MODEL (op1
);
9350 op1
= legitimize_tls_address (op1
, model
, true);
9351 op1
= force_operand (op1
, op0
);
9356 else if (GET_CODE (op1
) == CONST
9357 && GET_CODE (XEXP (op1
, 0)) == PLUS
9358 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9360 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9363 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9364 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9365 op1
= force_operand (op1
, NULL
);
9366 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9367 op0
, 1, OPTAB_DIRECT
);
9373 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9375 if (TARGET_MACHO
&& !TARGET_64BIT
)
9380 rtx temp
= ((reload_in_progress
9381 || ((op0
&& REG_P (op0
))
9383 ? op0
: gen_reg_rtx (Pmode
));
9384 op1
= machopic_indirect_data_reference (op1
, temp
);
9385 op1
= machopic_legitimize_pic_address (op1
, mode
,
9386 temp
== op1
? 0 : temp
);
9388 else if (MACHOPIC_INDIRECT
)
9389 op1
= machopic_indirect_data_reference (op1
, 0);
9397 op1
= force_reg (Pmode
, op1
);
9399 op1
= legitimize_address (op1
, op1
, Pmode
);
9405 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9406 || !push_operand (op0
, mode
))
9408 op1
= force_reg (mode
, op1
);
9410 if (push_operand (op0
, mode
)
9411 && ! general_no_elim_operand (op1
, mode
))
9412 op1
= copy_to_mode_reg (mode
, op1
);
9414 /* Force large constants in 64bit compilation into register
9415 to get them CSEed. */
9416 if (TARGET_64BIT
&& mode
== DImode
9417 && immediate_operand (op1
, mode
)
9418 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9419 && !register_operand (op0
, mode
)
9420 && optimize
&& !reload_completed
&& !reload_in_progress
)
9421 op1
= copy_to_mode_reg (mode
, op1
);
9423 if (FLOAT_MODE_P (mode
))
9425 /* If we are loading a floating point constant to a register,
9426 force the value to memory now, since we'll get better code
9427 out the back end. */
9431 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9433 op1
= validize_mem (force_const_mem (mode
, op1
));
9434 if (!register_operand (op0
, mode
))
9436 rtx temp
= gen_reg_rtx (mode
);
9437 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9438 emit_move_insn (op0
, temp
);
9445 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9449 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9451 rtx op0
= operands
[0], op1
= operands
[1];
9453 /* Force constants other than zero into memory. We do not know how
9454 the instructions used to build constants modify the upper 64 bits
9455 of the register, once we have that information we may be able
9456 to handle some of them more efficiently. */
9457 if ((reload_in_progress
| reload_completed
) == 0
9458 && register_operand (op0
, mode
)
9460 && standard_sse_constant_p (op1
) <= 0)
9461 op1
= validize_mem (force_const_mem (mode
, op1
));
9463 /* Make operand1 a register if it isn't already. */
9465 && !register_operand (op0
, mode
)
9466 && !register_operand (op1
, mode
))
9468 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9472 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9475 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9476 straight to ix86_expand_vector_move. */
9479 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9488 /* If we're optimizing for size, movups is the smallest. */
9491 op0
= gen_lowpart (V4SFmode
, op0
);
9492 op1
= gen_lowpart (V4SFmode
, op1
);
9493 emit_insn (gen_sse_movups (op0
, op1
));
9497 /* ??? If we have typed data, then it would appear that using
9498 movdqu is the only way to get unaligned data loaded with
9500 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9502 op0
= gen_lowpart (V16QImode
, op0
);
9503 op1
= gen_lowpart (V16QImode
, op1
);
9504 emit_insn (gen_sse2_movdqu (op0
, op1
));
9508 if (TARGET_SSE2
&& mode
== V2DFmode
)
9512 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9514 op0
= gen_lowpart (V2DFmode
, op0
);
9515 op1
= gen_lowpart (V2DFmode
, op1
);
9516 emit_insn (gen_sse2_movupd (op0
, op1
));
9520 /* When SSE registers are split into halves, we can avoid
9521 writing to the top half twice. */
9522 if (TARGET_SSE_SPLIT_REGS
)
9524 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9529 /* ??? Not sure about the best option for the Intel chips.
9530 The following would seem to satisfy; the register is
9531 entirely cleared, breaking the dependency chain. We
9532 then store to the upper half, with a dependency depth
9533 of one. A rumor has it that Intel recommends two movsd
9534 followed by an unpacklpd, but this is unconfirmed. And
9535 given that the dependency depth of the unpacklpd would
9536 still be one, I'm not sure why this would be better. */
9537 zero
= CONST0_RTX (V2DFmode
);
9540 m
= adjust_address (op1
, DFmode
, 0);
9541 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9542 m
= adjust_address (op1
, DFmode
, 8);
9543 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9547 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9549 op0
= gen_lowpart (V4SFmode
, op0
);
9550 op1
= gen_lowpart (V4SFmode
, op1
);
9551 emit_insn (gen_sse_movups (op0
, op1
));
9555 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9556 emit_move_insn (op0
, CONST0_RTX (mode
));
9558 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9560 if (mode
!= V4SFmode
)
9561 op0
= gen_lowpart (V4SFmode
, op0
);
9562 m
= adjust_address (op1
, V2SFmode
, 0);
9563 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9564 m
= adjust_address (op1
, V2SFmode
, 8);
9565 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9568 else if (MEM_P (op0
))
9570 /* If we're optimizing for size, movups is the smallest. */
9573 op0
= gen_lowpart (V4SFmode
, op0
);
9574 op1
= gen_lowpart (V4SFmode
, op1
);
9575 emit_insn (gen_sse_movups (op0
, op1
));
9579 /* ??? Similar to above, only less clear because of quote
9580 typeless stores unquote. */
9581 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9582 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9584 op0
= gen_lowpart (V16QImode
, op0
);
9585 op1
= gen_lowpart (V16QImode
, op1
);
9586 emit_insn (gen_sse2_movdqu (op0
, op1
));
9590 if (TARGET_SSE2
&& mode
== V2DFmode
)
9592 m
= adjust_address (op0
, DFmode
, 0);
9593 emit_insn (gen_sse2_storelpd (m
, op1
));
9594 m
= adjust_address (op0
, DFmode
, 8);
9595 emit_insn (gen_sse2_storehpd (m
, op1
));
9599 if (mode
!= V4SFmode
)
9600 op1
= gen_lowpart (V4SFmode
, op1
);
9601 m
= adjust_address (op0
, V2SFmode
, 0);
9602 emit_insn (gen_sse_storelps (m
, op1
));
9603 m
= adjust_address (op0
, V2SFmode
, 8);
9604 emit_insn (gen_sse_storehps (m
, op1
));
9611 /* Expand a push in MODE. This is some mode for which we do not support
9612 proper push instructions, at least from the registers that we expect
9613 the value to live in. */
9616 ix86_expand_push (enum machine_mode mode
, rtx x
)
9620 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9621 GEN_INT (-GET_MODE_SIZE (mode
)),
9622 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9623 if (tmp
!= stack_pointer_rtx
)
9624 emit_move_insn (stack_pointer_rtx
, tmp
);
9626 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9627 emit_move_insn (tmp
, x
);
9630 /* Helper function of ix86_fixup_binary_operands to canonicalize
9631 operand order. Returns true if the operands should be swapped. */
9634 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9637 rtx dst
= operands
[0];
9638 rtx src1
= operands
[1];
9639 rtx src2
= operands
[2];
9641 /* If the operation is not commutative, we can't do anything. */
9642 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9645 /* Highest priority is that src1 should match dst. */
9646 if (rtx_equal_p (dst
, src1
))
9648 if (rtx_equal_p (dst
, src2
))
9651 /* Next highest priority is that immediate constants come second. */
9652 if (immediate_operand (src2
, mode
))
9654 if (immediate_operand (src1
, mode
))
9657 /* Lowest priority is that memory references should come second. */
9667 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9668 destination to use for the operation. If different from the true
9669 destination in operands[0], a copy operation will be required. */
9672 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9675 rtx dst
= operands
[0];
9676 rtx src1
= operands
[1];
9677 rtx src2
= operands
[2];
9679 /* Canonicalize operand order. */
9680 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9687 /* Both source operands cannot be in memory. */
9688 if (MEM_P (src1
) && MEM_P (src2
))
9690 /* Optimization: Only read from memory once. */
9691 if (rtx_equal_p (src1
, src2
))
9693 src2
= force_reg (mode
, src2
);
9697 src2
= force_reg (mode
, src2
);
9700 /* If the destination is memory, and we do not have matching source
9701 operands, do things in registers. */
9702 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9703 dst
= gen_reg_rtx (mode
);
9705 /* Source 1 cannot be a constant. */
9706 if (CONSTANT_P (src1
))
9707 src1
= force_reg (mode
, src1
);
9709 /* Source 1 cannot be a non-matching memory. */
9710 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9711 src1
= force_reg (mode
, src1
);
9718 /* Similarly, but assume that the destination has already been
9722 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9723 enum machine_mode mode
, rtx operands
[])
9725 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9726 gcc_assert (dst
== operands
[0]);
9729 /* Attempt to expand a binary operator. Make the expansion closer to the
9730 actual machine, then just general_operand, which will allow 3 separate
9731 memory references (one output, two input) in a single insn. */
9734 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9737 rtx src1
, src2
, dst
, op
, clob
;
9739 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9743 /* Emit the instruction. */
9745 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9746 if (reload_in_progress
)
9748 /* Reload doesn't know about the flags register, and doesn't know that
9749 it doesn't want to clobber it. We can only do this with PLUS. */
9750 gcc_assert (code
== PLUS
);
9755 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9756 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9759 /* Fix up the destination if needed. */
9760 if (dst
!= operands
[0])
9761 emit_move_insn (operands
[0], dst
);
9764 /* Return TRUE or FALSE depending on whether the binary operator meets the
9765 appropriate constraints. */
9768 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9771 rtx dst
= operands
[0];
9772 rtx src1
= operands
[1];
9773 rtx src2
= operands
[2];
9775 /* Both source operands cannot be in memory. */
9776 if (MEM_P (src1
) && MEM_P (src2
))
9779 /* Canonicalize operand order for commutative operators. */
9780 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9787 /* If the destination is memory, we must have a matching source operand. */
9788 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9791 /* Source 1 cannot be a constant. */
9792 if (CONSTANT_P (src1
))
9795 /* Source 1 cannot be a non-matching memory. */
9796 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9802 /* Attempt to expand a unary operator. Make the expansion closer to the
9803 actual machine, then just general_operand, which will allow 2 separate
9804 memory references (one output, one input) in a single insn. */
9807 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9810 int matching_memory
;
9811 rtx src
, dst
, op
, clob
;
9816 /* If the destination is memory, and we do not have matching source
9817 operands, do things in registers. */
9818 matching_memory
= 0;
9821 if (rtx_equal_p (dst
, src
))
9822 matching_memory
= 1;
9824 dst
= gen_reg_rtx (mode
);
9827 /* When source operand is memory, destination must match. */
9828 if (MEM_P (src
) && !matching_memory
)
9829 src
= force_reg (mode
, src
);
9831 /* Emit the instruction. */
9833 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9834 if (reload_in_progress
|| code
== NOT
)
9836 /* Reload doesn't know about the flags register, and doesn't know that
9837 it doesn't want to clobber it. */
9838 gcc_assert (code
== NOT
);
9843 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9844 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9847 /* Fix up the destination if needed. */
9848 if (dst
!= operands
[0])
9849 emit_move_insn (operands
[0], dst
);
9852 /* Return TRUE or FALSE depending on whether the unary operator meets the
9853 appropriate constraints. */
9856 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9857 enum machine_mode mode ATTRIBUTE_UNUSED
,
9858 rtx operands
[2] ATTRIBUTE_UNUSED
)
9860 /* If one of operands is memory, source and destination must match. */
9861 if ((MEM_P (operands
[0])
9862 || MEM_P (operands
[1]))
9863 && ! rtx_equal_p (operands
[0], operands
[1]))
9868 /* Convert an SF or DFmode value in an SSE register into an unsigned SImode.
9869 When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
9870 conversion, and ignoring the upper 32 bits of the result. On x86_64,
9871 there is an equivalent SSE %xmm->signed-int-64 conversion.
9873 On x86_32, we don't have the instruction, nor the 64-bit destination
9874 register it requires. Do the conversion inline in the SSE registers.
9875 Requires SSE2. For x86_32, -mfpmath=sse, !optimize_size only. */
9878 ix86_expand_convert_uns_si_sse (rtx target
, rtx input
)
9880 REAL_VALUE_TYPE TWO31r
;
9881 enum machine_mode mode
, vecmode
;
9882 rtx two31
, value
, large
, sign
, result_vec
, zero_or_two31
, x
;
9884 mode
= GET_MODE (input
);
9885 vecmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9887 real_ldexp (&TWO31r
, &dconst1
, 31);
9888 two31
= const_double_from_real_value (TWO31r
, mode
);
9889 two31
= ix86_build_const_vector (mode
, true, two31
);
9890 two31
= force_reg (vecmode
, two31
);
9892 value
= gen_reg_rtx (vecmode
);
9893 ix86_expand_vector_init_one_nonzero (false, vecmode
, value
, input
, 0);
9895 large
= gen_reg_rtx (vecmode
);
9896 x
= gen_rtx_fmt_ee (LE
, vecmode
, two31
, value
);
9897 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
9899 zero_or_two31
= gen_reg_rtx (vecmode
);
9900 x
= gen_rtx_AND (vecmode
, large
, two31
);
9901 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
9903 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
9904 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
9906 result_vec
= gen_reg_rtx (V4SImode
);
9908 x
= gen_sse2_cvttps2dq (result_vec
, value
);
9910 x
= gen_sse2_cvttpd2dq (result_vec
, value
);
9913 sign
= gen_reg_rtx (V4SImode
);
9914 emit_insn (gen_ashlv4si3 (sign
, gen_lowpart (V4SImode
, large
),
9917 emit_insn (gen_xorv4si3 (result_vec
, result_vec
, sign
));
9919 ix86_expand_vector_extract (false, target
, result_vec
, 0);
9922 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9923 Expects the 64-bit DImode to be supplied in a pair of integral
9924 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9925 -mfpmath=sse, !optimize_size only. */
9928 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
9930 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
9931 rtx int_xmm
, fp_xmm
;
9932 rtx biases
, exponents
;
9935 int_xmm
= gen_reg_rtx (V4SImode
);
9936 if (TARGET_INTER_UNIT_MOVES
)
9937 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
9938 else if (TARGET_SSE_SPLIT_REGS
)
9940 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
9941 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
9945 x
= gen_reg_rtx (V2DImode
);
9946 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
9947 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
9950 x
= gen_rtx_CONST_VECTOR (V4SImode
,
9951 gen_rtvec (4, GEN_INT (0x43300000UL
),
9952 GEN_INT (0x45300000UL
),
9953 const0_rtx
, const0_rtx
));
9954 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
9956 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9957 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
9959 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9960 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9961 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9962 (0x1.0p84 + double(fp_value_hi_xmm)).
9963 Note these exponents differ by 32. */
9965 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
9967 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9968 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9969 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
9970 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
9971 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
9972 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
9973 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
9974 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
9975 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
9977 /* Add the upper and lower DFmode values together. */
9979 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
9982 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
9983 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
9984 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
9987 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
9990 /* Convert an unsigned SImode value into a DFmode. Only currently used
9991 for SSE, but applicable anywhere. */
9994 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
9996 REAL_VALUE_TYPE TWO31r
;
9999 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10000 NULL
, 1, OPTAB_DIRECT
);
10002 fp
= gen_reg_rtx (DFmode
);
10003 emit_insn (gen_floatsidf2 (fp
, x
));
10005 real_ldexp (&TWO31r
, &dconst1
, 31);
10006 x
= const_double_from_real_value (TWO31r
, DFmode
);
10008 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10010 emit_move_insn (target
, x
);
10013 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10014 32-bit mode; otherwise we have a direct convert instruction. */
10017 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10019 REAL_VALUE_TYPE TWO32r
;
10020 rtx fp_lo
, fp_hi
, x
;
10022 fp_lo
= gen_reg_rtx (DFmode
);
10023 fp_hi
= gen_reg_rtx (DFmode
);
10025 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10027 real_ldexp (&TWO32r
, &dconst1
, 32);
10028 x
= const_double_from_real_value (TWO32r
, DFmode
);
10029 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10031 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10033 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10036 emit_move_insn (target
, x
);
10039 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10040 For x86_32, -mfpmath=sse, !optimize_size only. */
10042 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10044 REAL_VALUE_TYPE ONE16r
;
10045 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10047 real_ldexp (&ONE16r
, &dconst1
, 16);
10048 x
= const_double_from_real_value (ONE16r
, SFmode
);
10049 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10050 NULL
, 0, OPTAB_DIRECT
);
10051 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10052 NULL
, 0, OPTAB_DIRECT
);
10053 fp_hi
= gen_reg_rtx (SFmode
);
10054 fp_lo
= gen_reg_rtx (SFmode
);
10055 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10056 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10057 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10059 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10061 if (!rtx_equal_p (target
, fp_hi
))
10062 emit_move_insn (target
, fp_hi
);
10065 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10066 then replicate the value for all elements of the vector
10070 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10077 v
= gen_rtvec (4, value
, value
, value
, value
);
10079 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10080 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10081 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10085 v
= gen_rtvec (2, value
, value
);
10087 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10088 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10091 gcc_unreachable ();
10095 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10096 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10097 true, then replicate the mask for all elements of the vector register.
10098 If INVERT is true, then create a mask excluding the sign bit. */
10101 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10103 enum machine_mode vec_mode
;
10104 HOST_WIDE_INT hi
, lo
;
10109 /* Find the sign bit, sign extended to 2*HWI. */
10110 if (mode
== SFmode
)
10111 lo
= 0x80000000, hi
= lo
< 0;
10112 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10113 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10115 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10118 lo
= ~lo
, hi
= ~hi
;
10120 /* Force this value into the low part of a fp vector constant. */
10121 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10122 mask
= gen_lowpart (mode
, mask
);
10124 v
= ix86_build_const_vector (mode
, vect
, mask
);
10125 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10126 return force_reg (vec_mode
, v
);
10129 /* Generate code for floating point ABS or NEG. */
10132 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10135 rtx mask
, set
, use
, clob
, dst
, src
;
10136 bool matching_memory
;
10137 bool use_sse
= false;
10138 bool vector_mode
= VECTOR_MODE_P (mode
);
10139 enum machine_mode elt_mode
= mode
;
10143 elt_mode
= GET_MODE_INNER (mode
);
10146 else if (TARGET_SSE_MATH
)
10147 use_sse
= SSE_FLOAT_MODE_P (mode
);
10149 /* NEG and ABS performed with SSE use bitwise mask operations.
10150 Create the appropriate mask now. */
10152 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10159 /* If the destination is memory, and we don't have matching source
10160 operands or we're using the x87, do things in registers. */
10161 matching_memory
= false;
10164 if (use_sse
&& rtx_equal_p (dst
, src
))
10165 matching_memory
= true;
10167 dst
= gen_reg_rtx (mode
);
10169 if (MEM_P (src
) && !matching_memory
)
10170 src
= force_reg (mode
, src
);
10174 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10175 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10180 set
= gen_rtx_fmt_e (code
, mode
, src
);
10181 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10184 use
= gen_rtx_USE (VOIDmode
, mask
);
10185 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10186 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10187 gen_rtvec (3, set
, use
, clob
)));
10193 if (dst
!= operands
[0])
10194 emit_move_insn (operands
[0], dst
);
10197 /* Expand a copysign operation. Special case operand 0 being a constant. */
10200 ix86_expand_copysign (rtx operands
[])
10202 enum machine_mode mode
, vmode
;
10203 rtx dest
, op0
, op1
, mask
, nmask
;
10205 dest
= operands
[0];
10209 mode
= GET_MODE (dest
);
10210 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10212 if (GET_CODE (op0
) == CONST_DOUBLE
)
10216 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10217 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10219 if (op0
== CONST0_RTX (mode
))
10220 op0
= CONST0_RTX (vmode
);
10223 if (mode
== SFmode
)
10224 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10225 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10227 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10228 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10231 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10233 if (mode
== SFmode
)
10234 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10236 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10240 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10241 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10243 if (mode
== SFmode
)
10244 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10246 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10250 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10251 be a constant, and so has already been expanded into a vector constant. */
10254 ix86_split_copysign_const (rtx operands
[])
10256 enum machine_mode mode
, vmode
;
10257 rtx dest
, op0
, op1
, mask
, x
;
10259 dest
= operands
[0];
10262 mask
= operands
[3];
10264 mode
= GET_MODE (dest
);
10265 vmode
= GET_MODE (mask
);
10267 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10268 x
= gen_rtx_AND (vmode
, dest
, mask
);
10269 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10271 if (op0
!= CONST0_RTX (vmode
))
10273 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10274 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10278 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10279 so we have to do two masks. */
10282 ix86_split_copysign_var (rtx operands
[])
10284 enum machine_mode mode
, vmode
;
10285 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10287 dest
= operands
[0];
10288 scratch
= operands
[1];
10291 nmask
= operands
[4];
10292 mask
= operands
[5];
10294 mode
= GET_MODE (dest
);
10295 vmode
= GET_MODE (mask
);
10297 if (rtx_equal_p (op0
, op1
))
10299 /* Shouldn't happen often (it's useless, obviously), but when it does
10300 we'd generate incorrect code if we continue below. */
10301 emit_move_insn (dest
, op0
);
10305 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10307 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10309 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10310 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10313 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10314 x
= gen_rtx_NOT (vmode
, dest
);
10315 x
= gen_rtx_AND (vmode
, x
, op0
);
10316 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10320 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10322 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10324 else /* alternative 2,4 */
10326 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10327 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10328 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10330 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10332 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10334 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10335 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10337 else /* alternative 3,4 */
10339 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10341 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10342 x
= gen_rtx_AND (vmode
, dest
, op0
);
10344 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10347 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10348 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10351 /* Return TRUE or FALSE depending on whether the first SET in INSN
10352 has source and destination with matching CC modes, and that the
10353 CC mode is at least as constrained as REQ_MODE. */
10356 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10359 enum machine_mode set_mode
;
10361 set
= PATTERN (insn
);
10362 if (GET_CODE (set
) == PARALLEL
)
10363 set
= XVECEXP (set
, 0, 0);
10364 gcc_assert (GET_CODE (set
) == SET
);
10365 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10367 set_mode
= GET_MODE (SET_DEST (set
));
10371 if (req_mode
!= CCNOmode
10372 && (req_mode
!= CCmode
10373 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10377 if (req_mode
== CCGCmode
)
10381 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10385 if (req_mode
== CCZmode
)
10392 gcc_unreachable ();
10395 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10398 /* Generate insn patterns to do an integer compare of OPERANDS. */
10401 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10403 enum machine_mode cmpmode
;
10406 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10407 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10409 /* This is very simple, but making the interface the same as in the
10410 FP case makes the rest of the code easier. */
10411 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10412 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10414 /* Return the test that should be put into the flags user, i.e.
10415 the bcc, scc, or cmov instruction. */
10416 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10419 /* Figure out whether to use ordered or unordered fp comparisons.
10420 Return the appropriate mode to use. */
10423 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10425 /* ??? In order to make all comparisons reversible, we do all comparisons
10426 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10427 all forms trapping and nontrapping comparisons, we can make inequality
10428 comparisons trapping again, since it results in better code when using
10429 FCOM based compares. */
10430 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10434 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10436 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10437 return ix86_fp_compare_mode (code
);
10440 /* Only zero flag is needed. */
10441 case EQ
: /* ZF=0 */
10442 case NE
: /* ZF!=0 */
10444 /* Codes needing carry flag. */
10445 case GEU
: /* CF=0 */
10446 case GTU
: /* CF=0 & ZF=0 */
10447 case LTU
: /* CF=1 */
10448 case LEU
: /* CF=1 | ZF=1 */
10450 /* Codes possibly doable only with sign flag when
10451 comparing against zero. */
10452 case GE
: /* SF=OF or SF=0 */
10453 case LT
: /* SF<>OF or SF=1 */
10454 if (op1
== const0_rtx
)
10457 /* For other cases Carry flag is not required. */
10459 /* Codes doable only with sign flag when comparing
10460 against zero, but we miss jump instruction for it
10461 so we need to use relational tests against overflow
10462 that thus needs to be zero. */
10463 case GT
: /* ZF=0 & SF=OF */
10464 case LE
: /* ZF=1 | SF<>OF */
10465 if (op1
== const0_rtx
)
10469 /* strcmp pattern do (use flags) and combine may ask us for proper
10474 gcc_unreachable ();
10478 /* Return the fixed registers used for condition codes. */
10481 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10488 /* If two condition code modes are compatible, return a condition code
10489 mode which is compatible with both. Otherwise, return
10492 static enum machine_mode
10493 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10498 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10501 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10502 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10508 gcc_unreachable ();
10530 /* These are only compatible with themselves, which we already
10536 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10539 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10541 enum rtx_code swapped_code
= swap_condition (code
);
10542 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10543 || (ix86_fp_comparison_cost (swapped_code
)
10544 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10547 /* Swap, force into registers, or otherwise massage the two operands
10548 to a fp comparison. The operands are updated in place; the new
10549 comparison code is returned. */
10551 static enum rtx_code
10552 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10554 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10555 rtx op0
= *pop0
, op1
= *pop1
;
10556 enum machine_mode op_mode
= GET_MODE (op0
);
10557 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10559 /* All of the unordered compare instructions only work on registers.
10560 The same is true of the fcomi compare instructions. The XFmode
10561 compare instructions require registers except when comparing
10562 against zero or when converting operand 1 from fixed point to
10566 && (fpcmp_mode
== CCFPUmode
10567 || (op_mode
== XFmode
10568 && ! (standard_80387_constant_p (op0
) == 1
10569 || standard_80387_constant_p (op1
) == 1)
10570 && GET_CODE (op1
) != FLOAT
)
10571 || ix86_use_fcomi_compare (code
)))
10573 op0
= force_reg (op_mode
, op0
);
10574 op1
= force_reg (op_mode
, op1
);
10578 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10579 things around if they appear profitable, otherwise force op0
10580 into a register. */
10582 if (standard_80387_constant_p (op0
) == 0
10584 && ! (standard_80387_constant_p (op1
) == 0
10588 tmp
= op0
, op0
= op1
, op1
= tmp
;
10589 code
= swap_condition (code
);
10593 op0
= force_reg (op_mode
, op0
);
10595 if (CONSTANT_P (op1
))
10597 int tmp
= standard_80387_constant_p (op1
);
10599 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10603 op1
= force_reg (op_mode
, op1
);
10606 op1
= force_reg (op_mode
, op1
);
10610 /* Try to rearrange the comparison to make it cheaper. */
10611 if (ix86_fp_comparison_cost (code
)
10612 > ix86_fp_comparison_cost (swap_condition (code
))
10613 && (REG_P (op1
) || !no_new_pseudos
))
10616 tmp
= op0
, op0
= op1
, op1
= tmp
;
10617 code
= swap_condition (code
);
10619 op0
= force_reg (op_mode
, op0
);
10627 /* Convert comparison codes we use to represent FP comparison to integer
10628 code that will result in proper branch. Return UNKNOWN if no such code
10632 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10661 /* Split comparison code CODE into comparisons we can do using branch
10662 instructions. BYPASS_CODE is comparison code for branch that will
10663 branch around FIRST_CODE and SECOND_CODE. If some of branches
10664 is not required, set value to UNKNOWN.
10665 We never require more than two branches. */
10668 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10669 enum rtx_code
*first_code
,
10670 enum rtx_code
*second_code
)
10672 *first_code
= code
;
10673 *bypass_code
= UNKNOWN
;
10674 *second_code
= UNKNOWN
;
10676 /* The fcomi comparison sets flags as follows:
10686 case GT
: /* GTU - CF=0 & ZF=0 */
10687 case GE
: /* GEU - CF=0 */
10688 case ORDERED
: /* PF=0 */
10689 case UNORDERED
: /* PF=1 */
10690 case UNEQ
: /* EQ - ZF=1 */
10691 case UNLT
: /* LTU - CF=1 */
10692 case UNLE
: /* LEU - CF=1 | ZF=1 */
10693 case LTGT
: /* EQ - ZF=0 */
10695 case LT
: /* LTU - CF=1 - fails on unordered */
10696 *first_code
= UNLT
;
10697 *bypass_code
= UNORDERED
;
10699 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10700 *first_code
= UNLE
;
10701 *bypass_code
= UNORDERED
;
10703 case EQ
: /* EQ - ZF=1 - fails on unordered */
10704 *first_code
= UNEQ
;
10705 *bypass_code
= UNORDERED
;
10707 case NE
: /* NE - ZF=0 - fails on unordered */
10708 *first_code
= LTGT
;
10709 *second_code
= UNORDERED
;
10711 case UNGE
: /* GEU - CF=0 - fails on unordered */
10713 *second_code
= UNORDERED
;
10715 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10717 *second_code
= UNORDERED
;
10720 gcc_unreachable ();
10722 if (!TARGET_IEEE_FP
)
10724 *second_code
= UNKNOWN
;
10725 *bypass_code
= UNKNOWN
;
10729 /* Return cost of comparison done fcom + arithmetics operations on AX.
10730 All following functions do use number of instructions as a cost metrics.
10731 In future this should be tweaked to compute bytes for optimize_size and
10732 take into account performance of various instructions on various CPUs. */
10734 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10736 if (!TARGET_IEEE_FP
)
10738 /* The cost of code output by ix86_expand_fp_compare. */
10762 gcc_unreachable ();
10766 /* Return cost of comparison done using fcomi operation.
10767 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10769 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10771 enum rtx_code bypass_code
, first_code
, second_code
;
10772 /* Return arbitrarily high cost when instruction is not supported - this
10773 prevents gcc from using it. */
10776 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10777 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10780 /* Return cost of comparison done using sahf operation.
10781 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10783 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10785 enum rtx_code bypass_code
, first_code
, second_code
;
10786 /* Return arbitrarily high cost when instruction is not preferred - this
10787 avoids gcc from using it. */
10788 if (!TARGET_USE_SAHF
&& !optimize_size
)
10790 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10791 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10794 /* Compute cost of the comparison done using any method.
10795 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10797 ix86_fp_comparison_cost (enum rtx_code code
)
10799 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10802 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10803 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10805 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10806 if (min
> sahf_cost
)
10808 if (min
> fcomi_cost
)
10813 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10816 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10817 rtx
*second_test
, rtx
*bypass_test
)
10819 enum machine_mode fpcmp_mode
, intcmp_mode
;
10821 int cost
= ix86_fp_comparison_cost (code
);
10822 enum rtx_code bypass_code
, first_code
, second_code
;
10824 fpcmp_mode
= ix86_fp_compare_mode (code
);
10825 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10828 *second_test
= NULL_RTX
;
10830 *bypass_test
= NULL_RTX
;
10832 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10834 /* Do fcomi/sahf based test when profitable. */
10835 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10836 && (second_code
== UNKNOWN
|| second_test
)
10837 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10841 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10842 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10848 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10849 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10851 scratch
= gen_reg_rtx (HImode
);
10852 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10853 emit_insn (gen_x86_sahf_1 (scratch
));
10856 /* The FP codes work out to act like unsigned. */
10857 intcmp_mode
= fpcmp_mode
;
10859 if (bypass_code
!= UNKNOWN
)
10860 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10861 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10863 if (second_code
!= UNKNOWN
)
10864 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10865 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10870 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10871 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10872 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10874 scratch
= gen_reg_rtx (HImode
);
10875 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10877 /* In the unordered case, we have to check C2 for NaN's, which
10878 doesn't happen to work out to anything nice combination-wise.
10879 So do some bit twiddling on the value we've got in AH to come
10880 up with an appropriate set of condition codes. */
10882 intcmp_mode
= CCNOmode
;
10887 if (code
== GT
|| !TARGET_IEEE_FP
)
10889 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10894 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10895 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10896 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10897 intcmp_mode
= CCmode
;
10903 if (code
== LT
&& TARGET_IEEE_FP
)
10905 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10906 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10907 intcmp_mode
= CCmode
;
10912 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10918 if (code
== GE
|| !TARGET_IEEE_FP
)
10920 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10925 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10926 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10933 if (code
== LE
&& TARGET_IEEE_FP
)
10935 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10936 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10937 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10938 intcmp_mode
= CCmode
;
10943 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10949 if (code
== EQ
&& TARGET_IEEE_FP
)
10951 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10952 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10953 intcmp_mode
= CCmode
;
10958 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10965 if (code
== NE
&& TARGET_IEEE_FP
)
10967 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10968 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10974 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10980 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10984 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10989 gcc_unreachable ();
10993 /* Return the test that should be put into the flags user, i.e.
10994 the bcc, scc, or cmov instruction. */
10995 return gen_rtx_fmt_ee (code
, VOIDmode
,
10996 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11001 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11004 op0
= ix86_compare_op0
;
11005 op1
= ix86_compare_op1
;
11008 *second_test
= NULL_RTX
;
11010 *bypass_test
= NULL_RTX
;
11012 if (ix86_compare_emitted
)
11014 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11015 ix86_compare_emitted
= NULL_RTX
;
11017 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11018 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11019 second_test
, bypass_test
);
11021 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11026 /* Return true if the CODE will result in nontrivial jump sequence. */
11028 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11030 enum rtx_code bypass_code
, first_code
, second_code
;
11033 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11034 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11038 ix86_expand_branch (enum rtx_code code
, rtx label
)
11042 /* If we have emitted a compare insn, go straight to simple.
11043 ix86_expand_compare won't emit anything if ix86_compare_emitted
11045 if (ix86_compare_emitted
)
11048 switch (GET_MODE (ix86_compare_op0
))
11054 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11055 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11056 gen_rtx_LABEL_REF (VOIDmode
, label
),
11058 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11067 enum rtx_code bypass_code
, first_code
, second_code
;
11069 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11070 &ix86_compare_op1
);
11072 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11074 /* Check whether we will use the natural sequence with one jump. If
11075 so, we can expand jump early. Otherwise delay expansion by
11076 creating compound insn to not confuse optimizers. */
11077 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11080 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11081 gen_rtx_LABEL_REF (VOIDmode
, label
),
11082 pc_rtx
, NULL_RTX
, NULL_RTX
);
11086 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11087 ix86_compare_op0
, ix86_compare_op1
);
11088 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11089 gen_rtx_LABEL_REF (VOIDmode
, label
),
11091 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11093 use_fcomi
= ix86_use_fcomi_compare (code
);
11094 vec
= rtvec_alloc (3 + !use_fcomi
);
11095 RTVEC_ELT (vec
, 0) = tmp
;
11097 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11099 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11102 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11104 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11113 /* Expand DImode branch into multiple compare+branch. */
11115 rtx lo
[2], hi
[2], label2
;
11116 enum rtx_code code1
, code2
, code3
;
11117 enum machine_mode submode
;
11119 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11121 tmp
= ix86_compare_op0
;
11122 ix86_compare_op0
= ix86_compare_op1
;
11123 ix86_compare_op1
= tmp
;
11124 code
= swap_condition (code
);
11126 if (GET_MODE (ix86_compare_op0
) == DImode
)
11128 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11129 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11134 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11135 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11139 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11140 avoid two branches. This costs one extra insn, so disable when
11141 optimizing for size. */
11143 if ((code
== EQ
|| code
== NE
)
11145 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11150 if (hi
[1] != const0_rtx
)
11151 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11152 NULL_RTX
, 0, OPTAB_WIDEN
);
11155 if (lo
[1] != const0_rtx
)
11156 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11157 NULL_RTX
, 0, OPTAB_WIDEN
);
11159 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11160 NULL_RTX
, 0, OPTAB_WIDEN
);
11162 ix86_compare_op0
= tmp
;
11163 ix86_compare_op1
= const0_rtx
;
11164 ix86_expand_branch (code
, label
);
11168 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11169 op1 is a constant and the low word is zero, then we can just
11170 examine the high word. */
11172 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11175 case LT
: case LTU
: case GE
: case GEU
:
11176 ix86_compare_op0
= hi
[0];
11177 ix86_compare_op1
= hi
[1];
11178 ix86_expand_branch (code
, label
);
11184 /* Otherwise, we need two or three jumps. */
11186 label2
= gen_label_rtx ();
11189 code2
= swap_condition (code
);
11190 code3
= unsigned_condition (code
);
11194 case LT
: case GT
: case LTU
: case GTU
:
11197 case LE
: code1
= LT
; code2
= GT
; break;
11198 case GE
: code1
= GT
; code2
= LT
; break;
11199 case LEU
: code1
= LTU
; code2
= GTU
; break;
11200 case GEU
: code1
= GTU
; code2
= LTU
; break;
11202 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11203 case NE
: code2
= UNKNOWN
; break;
11206 gcc_unreachable ();
11211 * if (hi(a) < hi(b)) goto true;
11212 * if (hi(a) > hi(b)) goto false;
11213 * if (lo(a) < lo(b)) goto true;
11217 ix86_compare_op0
= hi
[0];
11218 ix86_compare_op1
= hi
[1];
11220 if (code1
!= UNKNOWN
)
11221 ix86_expand_branch (code1
, label
);
11222 if (code2
!= UNKNOWN
)
11223 ix86_expand_branch (code2
, label2
);
11225 ix86_compare_op0
= lo
[0];
11226 ix86_compare_op1
= lo
[1];
11227 ix86_expand_branch (code3
, label
);
11229 if (code2
!= UNKNOWN
)
11230 emit_label (label2
);
11235 gcc_unreachable ();
11239 /* Split branch based on floating point condition. */
11241 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11242 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11244 rtx second
, bypass
;
11245 rtx label
= NULL_RTX
;
11247 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11250 if (target2
!= pc_rtx
)
11253 code
= reverse_condition_maybe_unordered (code
);
11258 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11259 tmp
, &second
, &bypass
);
11261 /* Remove pushed operand from stack. */
11263 ix86_free_from_memory (GET_MODE (pushed
));
11265 if (split_branch_probability
>= 0)
11267 /* Distribute the probabilities across the jumps.
11268 Assume the BYPASS and SECOND to be always test
11270 probability
= split_branch_probability
;
11272 /* Value of 1 is low enough to make no need for probability
11273 to be updated. Later we may run some experiments and see
11274 if unordered values are more frequent in practice. */
11276 bypass_probability
= 1;
11278 second_probability
= 1;
11280 if (bypass
!= NULL_RTX
)
11282 label
= gen_label_rtx ();
11283 i
= emit_jump_insn (gen_rtx_SET
11285 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11287 gen_rtx_LABEL_REF (VOIDmode
,
11290 if (bypass_probability
>= 0)
11292 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11293 GEN_INT (bypass_probability
),
11296 i
= emit_jump_insn (gen_rtx_SET
11298 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11299 condition
, target1
, target2
)));
11300 if (probability
>= 0)
11302 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11303 GEN_INT (probability
),
11305 if (second
!= NULL_RTX
)
11307 i
= emit_jump_insn (gen_rtx_SET
11309 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11311 if (second_probability
>= 0)
11313 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11314 GEN_INT (second_probability
),
11317 if (label
!= NULL_RTX
)
11318 emit_label (label
);
11322 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11324 rtx ret
, tmp
, tmpreg
, equiv
;
11325 rtx second_test
, bypass_test
;
11327 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11328 return 0; /* FAIL */
11330 gcc_assert (GET_MODE (dest
) == QImode
);
11332 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11333 PUT_MODE (ret
, QImode
);
11338 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11339 if (bypass_test
|| second_test
)
11341 rtx test
= second_test
;
11343 rtx tmp2
= gen_reg_rtx (QImode
);
11346 gcc_assert (!second_test
);
11347 test
= bypass_test
;
11349 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11351 PUT_MODE (test
, QImode
);
11352 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11355 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11357 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11360 /* Attach a REG_EQUAL note describing the comparison result. */
11361 if (ix86_compare_op0
&& ix86_compare_op1
)
11363 equiv
= simplify_gen_relational (code
, QImode
,
11364 GET_MODE (ix86_compare_op0
),
11365 ix86_compare_op0
, ix86_compare_op1
);
11366 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11369 return 1; /* DONE */
11372 /* Expand comparison setting or clearing carry flag. Return true when
11373 successful and set pop for the operation. */
11375 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11377 enum machine_mode mode
=
11378 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11380 /* Do not handle DImode compares that go through special path. Also we can't
11381 deal with FP compares yet. This is possible to add. */
11382 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11384 if (FLOAT_MODE_P (mode
))
11386 rtx second_test
= NULL
, bypass_test
= NULL
;
11387 rtx compare_op
, compare_seq
;
11389 /* Shortcut: following common codes never translate into carry flag compares. */
11390 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11391 || code
== ORDERED
|| code
== UNORDERED
)
11394 /* These comparisons require zero flag; swap operands so they won't. */
11395 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11396 && !TARGET_IEEE_FP
)
11401 code
= swap_condition (code
);
11404 /* Try to expand the comparison and verify that we end up with carry flag
11405 based comparison. This is fails to be true only when we decide to expand
11406 comparison using arithmetic that is not too common scenario. */
11408 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11409 &second_test
, &bypass_test
);
11410 compare_seq
= get_insns ();
11413 if (second_test
|| bypass_test
)
11415 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11416 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11417 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11419 code
= GET_CODE (compare_op
);
11420 if (code
!= LTU
&& code
!= GEU
)
11422 emit_insn (compare_seq
);
11426 if (!INTEGRAL_MODE_P (mode
))
11434 /* Convert a==0 into (unsigned)a<1. */
11437 if (op1
!= const0_rtx
)
11440 code
= (code
== EQ
? LTU
: GEU
);
11443 /* Convert a>b into b<a or a>=b-1. */
11446 if (CONST_INT_P (op1
))
11448 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11449 /* Bail out on overflow. We still can swap operands but that
11450 would force loading of the constant into register. */
11451 if (op1
== const0_rtx
11452 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11454 code
= (code
== GTU
? GEU
: LTU
);
11461 code
= (code
== GTU
? LTU
: GEU
);
11465 /* Convert a>=0 into (unsigned)a<0x80000000. */
11468 if (mode
== DImode
|| op1
!= const0_rtx
)
11470 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11471 code
= (code
== LT
? GEU
: LTU
);
11475 if (mode
== DImode
|| op1
!= constm1_rtx
)
11477 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11478 code
= (code
== LE
? GEU
: LTU
);
11484 /* Swapping operands may cause constant to appear as first operand. */
11485 if (!nonimmediate_operand (op0
, VOIDmode
))
11487 if (no_new_pseudos
)
11489 op0
= force_reg (mode
, op0
);
11491 ix86_compare_op0
= op0
;
11492 ix86_compare_op1
= op1
;
11493 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11494 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11499 ix86_expand_int_movcc (rtx operands
[])
11501 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11502 rtx compare_seq
, compare_op
;
11503 rtx second_test
, bypass_test
;
11504 enum machine_mode mode
= GET_MODE (operands
[0]);
11505 bool sign_bit_compare_p
= false;;
11508 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11509 compare_seq
= get_insns ();
11512 compare_code
= GET_CODE (compare_op
);
11514 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11515 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11516 sign_bit_compare_p
= true;
11518 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11519 HImode insns, we'd be swallowed in word prefix ops. */
11521 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11522 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11523 && CONST_INT_P (operands
[2])
11524 && CONST_INT_P (operands
[3]))
11526 rtx out
= operands
[0];
11527 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11528 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11529 HOST_WIDE_INT diff
;
11532 /* Sign bit compares are better done using shifts than we do by using
11534 if (sign_bit_compare_p
11535 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11536 ix86_compare_op1
, &compare_op
))
11538 /* Detect overlap between destination and compare sources. */
11541 if (!sign_bit_compare_p
)
11543 bool fpcmp
= false;
11545 compare_code
= GET_CODE (compare_op
);
11547 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11548 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11551 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11554 /* To simplify rest of code, restrict to the GEU case. */
11555 if (compare_code
== LTU
)
11557 HOST_WIDE_INT tmp
= ct
;
11560 compare_code
= reverse_condition (compare_code
);
11561 code
= reverse_condition (code
);
11566 PUT_CODE (compare_op
,
11567 reverse_condition_maybe_unordered
11568 (GET_CODE (compare_op
)));
11570 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11574 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11575 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11576 tmp
= gen_reg_rtx (mode
);
11578 if (mode
== DImode
)
11579 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11581 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11585 if (code
== GT
|| code
== GE
)
11586 code
= reverse_condition (code
);
11589 HOST_WIDE_INT tmp
= ct
;
11594 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11595 ix86_compare_op1
, VOIDmode
, 0, -1);
11608 tmp
= expand_simple_binop (mode
, PLUS
,
11610 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11621 tmp
= expand_simple_binop (mode
, IOR
,
11623 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11625 else if (diff
== -1 && ct
)
11635 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11637 tmp
= expand_simple_binop (mode
, PLUS
,
11638 copy_rtx (tmp
), GEN_INT (cf
),
11639 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11647 * andl cf - ct, dest
11657 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11660 tmp
= expand_simple_binop (mode
, AND
,
11662 gen_int_mode (cf
- ct
, mode
),
11663 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11665 tmp
= expand_simple_binop (mode
, PLUS
,
11666 copy_rtx (tmp
), GEN_INT (ct
),
11667 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11670 if (!rtx_equal_p (tmp
, out
))
11671 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11673 return 1; /* DONE */
11679 tmp
= ct
, ct
= cf
, cf
= tmp
;
11681 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11683 /* We may be reversing unordered compare to normal compare, that
11684 is not valid in general (we may convert non-trapping condition
11685 to trapping one), however on i386 we currently emit all
11686 comparisons unordered. */
11687 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11688 code
= reverse_condition_maybe_unordered (code
);
11692 compare_code
= reverse_condition (compare_code
);
11693 code
= reverse_condition (code
);
11697 compare_code
= UNKNOWN
;
11698 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11699 && CONST_INT_P (ix86_compare_op1
))
11701 if (ix86_compare_op1
== const0_rtx
11702 && (code
== LT
|| code
== GE
))
11703 compare_code
= code
;
11704 else if (ix86_compare_op1
== constm1_rtx
)
11708 else if (code
== GT
)
11713 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11714 if (compare_code
!= UNKNOWN
11715 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11716 && (cf
== -1 || ct
== -1))
11718 /* If lea code below could be used, only optimize
11719 if it results in a 2 insn sequence. */
11721 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11722 || diff
== 3 || diff
== 5 || diff
== 9)
11723 || (compare_code
== LT
&& ct
== -1)
11724 || (compare_code
== GE
&& cf
== -1))
11727 * notl op1 (if necessary)
11735 code
= reverse_condition (code
);
11738 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11739 ix86_compare_op1
, VOIDmode
, 0, -1);
11741 out
= expand_simple_binop (mode
, IOR
,
11743 out
, 1, OPTAB_DIRECT
);
11744 if (out
!= operands
[0])
11745 emit_move_insn (operands
[0], out
);
11747 return 1; /* DONE */
11752 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11753 || diff
== 3 || diff
== 5 || diff
== 9)
11754 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11756 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11762 * lea cf(dest*(ct-cf)),dest
11766 * This also catches the degenerate setcc-only case.
11772 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11773 ix86_compare_op1
, VOIDmode
, 0, 1);
11776 /* On x86_64 the lea instruction operates on Pmode, so we need
11777 to get arithmetics done in proper mode to match. */
11779 tmp
= copy_rtx (out
);
11783 out1
= copy_rtx (out
);
11784 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11788 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11794 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11797 if (!rtx_equal_p (tmp
, out
))
11800 out
= force_operand (tmp
, copy_rtx (out
));
11802 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11804 if (!rtx_equal_p (out
, operands
[0]))
11805 emit_move_insn (operands
[0], copy_rtx (out
));
11807 return 1; /* DONE */
11811 * General case: Jumpful:
11812 * xorl dest,dest cmpl op1, op2
11813 * cmpl op1, op2 movl ct, dest
11814 * setcc dest jcc 1f
11815 * decl dest movl cf, dest
11816 * andl (cf-ct),dest 1:
11819 * Size 20. Size 14.
11821 * This is reasonably steep, but branch mispredict costs are
11822 * high on modern cpus, so consider failing only if optimizing
11826 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11827 && BRANCH_COST
>= 2)
11833 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11834 /* We may be reversing unordered compare to normal compare,
11835 that is not valid in general (we may convert non-trapping
11836 condition to trapping one), however on i386 we currently
11837 emit all comparisons unordered. */
11838 code
= reverse_condition_maybe_unordered (code
);
11841 code
= reverse_condition (code
);
11842 if (compare_code
!= UNKNOWN
)
11843 compare_code
= reverse_condition (compare_code
);
11847 if (compare_code
!= UNKNOWN
)
11849 /* notl op1 (if needed)
11854 For x < 0 (resp. x <= -1) there will be no notl,
11855 so if possible swap the constants to get rid of the
11857 True/false will be -1/0 while code below (store flag
11858 followed by decrement) is 0/-1, so the constants need
11859 to be exchanged once more. */
11861 if (compare_code
== GE
|| !cf
)
11863 code
= reverse_condition (code
);
11868 HOST_WIDE_INT tmp
= cf
;
11873 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11874 ix86_compare_op1
, VOIDmode
, 0, -1);
11878 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11879 ix86_compare_op1
, VOIDmode
, 0, 1);
11881 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11882 copy_rtx (out
), 1, OPTAB_DIRECT
);
11885 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11886 gen_int_mode (cf
- ct
, mode
),
11887 copy_rtx (out
), 1, OPTAB_DIRECT
);
11889 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11890 copy_rtx (out
), 1, OPTAB_DIRECT
);
11891 if (!rtx_equal_p (out
, operands
[0]))
11892 emit_move_insn (operands
[0], copy_rtx (out
));
11894 return 1; /* DONE */
11898 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11900 /* Try a few things more with specific constants and a variable. */
11903 rtx var
, orig_out
, out
, tmp
;
11905 if (BRANCH_COST
<= 2)
11906 return 0; /* FAIL */
11908 /* If one of the two operands is an interesting constant, load a
11909 constant with the above and mask it in with a logical operation. */
11911 if (CONST_INT_P (operands
[2]))
11914 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11915 operands
[3] = constm1_rtx
, op
= and_optab
;
11916 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11917 operands
[3] = const0_rtx
, op
= ior_optab
;
11919 return 0; /* FAIL */
11921 else if (CONST_INT_P (operands
[3]))
11924 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11925 operands
[2] = constm1_rtx
, op
= and_optab
;
11926 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11927 operands
[2] = const0_rtx
, op
= ior_optab
;
11929 return 0; /* FAIL */
11932 return 0; /* FAIL */
11934 orig_out
= operands
[0];
11935 tmp
= gen_reg_rtx (mode
);
11938 /* Recurse to get the constant loaded. */
11939 if (ix86_expand_int_movcc (operands
) == 0)
11940 return 0; /* FAIL */
11942 /* Mask in the interesting variable. */
11943 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11945 if (!rtx_equal_p (out
, orig_out
))
11946 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11948 return 1; /* DONE */
11952 * For comparison with above,
11962 if (! nonimmediate_operand (operands
[2], mode
))
11963 operands
[2] = force_reg (mode
, operands
[2]);
11964 if (! nonimmediate_operand (operands
[3], mode
))
11965 operands
[3] = force_reg (mode
, operands
[3]);
11967 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11969 rtx tmp
= gen_reg_rtx (mode
);
11970 emit_move_insn (tmp
, operands
[3]);
11973 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11975 rtx tmp
= gen_reg_rtx (mode
);
11976 emit_move_insn (tmp
, operands
[2]);
11980 if (! register_operand (operands
[2], VOIDmode
)
11982 || ! register_operand (operands
[3], VOIDmode
)))
11983 operands
[2] = force_reg (mode
, operands
[2]);
11986 && ! register_operand (operands
[3], VOIDmode
))
11987 operands
[3] = force_reg (mode
, operands
[3]);
11989 emit_insn (compare_seq
);
11990 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11991 gen_rtx_IF_THEN_ELSE (mode
,
11992 compare_op
, operands
[2],
11995 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11996 gen_rtx_IF_THEN_ELSE (mode
,
11998 copy_rtx (operands
[3]),
11999 copy_rtx (operands
[0]))));
12001 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12002 gen_rtx_IF_THEN_ELSE (mode
,
12004 copy_rtx (operands
[2]),
12005 copy_rtx (operands
[0]))));
12007 return 1; /* DONE */
12010 /* Swap, force into registers, or otherwise massage the two operands
12011 to an sse comparison with a mask result. Thus we differ a bit from
12012 ix86_prepare_fp_compare_args which expects to produce a flags result.
12014 The DEST operand exists to help determine whether to commute commutative
12015 operators. The POP0/POP1 operands are updated in place. The new
12016 comparison code is returned, or UNKNOWN if not implementable. */
12018 static enum rtx_code
12019 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12020 rtx
*pop0
, rtx
*pop1
)
12028 /* We have no LTGT as an operator. We could implement it with
12029 NE & ORDERED, but this requires an extra temporary. It's
12030 not clear that it's worth it. */
12037 /* These are supported directly. */
12044 /* For commutative operators, try to canonicalize the destination
12045 operand to be first in the comparison - this helps reload to
12046 avoid extra moves. */
12047 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12055 /* These are not supported directly. Swap the comparison operands
12056 to transform into something that is supported. */
12060 code
= swap_condition (code
);
12064 gcc_unreachable ();
12070 /* Detect conditional moves that exactly match min/max operational
12071 semantics. Note that this is IEEE safe, as long as we don't
12072 interchange the operands.
12074 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12075 and TRUE if the operation is successful and instructions are emitted. */
12078 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12079 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12081 enum machine_mode mode
;
12087 else if (code
== UNGE
)
12090 if_true
= if_false
;
12096 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12098 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12103 mode
= GET_MODE (dest
);
12105 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12106 but MODE may be a vector mode and thus not appropriate. */
12107 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12109 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12112 if_true
= force_reg (mode
, if_true
);
12113 v
= gen_rtvec (2, if_true
, if_false
);
12114 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12118 code
= is_min
? SMIN
: SMAX
;
12119 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12122 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12126 /* Expand an sse vector comparison. Return the register with the result. */
12129 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12130 rtx op_true
, rtx op_false
)
12132 enum machine_mode mode
= GET_MODE (dest
);
12135 cmp_op0
= force_reg (mode
, cmp_op0
);
12136 if (!nonimmediate_operand (cmp_op1
, mode
))
12137 cmp_op1
= force_reg (mode
, cmp_op1
);
12140 || reg_overlap_mentioned_p (dest
, op_true
)
12141 || reg_overlap_mentioned_p (dest
, op_false
))
12142 dest
= gen_reg_rtx (mode
);
12144 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12145 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12150 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12151 operations. This is used for both scalar and vector conditional moves. */
12154 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12156 enum machine_mode mode
= GET_MODE (dest
);
12159 if (op_false
== CONST0_RTX (mode
))
12161 op_true
= force_reg (mode
, op_true
);
12162 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12163 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12165 else if (op_true
== CONST0_RTX (mode
))
12167 op_false
= force_reg (mode
, op_false
);
12168 x
= gen_rtx_NOT (mode
, cmp
);
12169 x
= gen_rtx_AND (mode
, x
, op_false
);
12170 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12174 op_true
= force_reg (mode
, op_true
);
12175 op_false
= force_reg (mode
, op_false
);
12177 t2
= gen_reg_rtx (mode
);
12179 t3
= gen_reg_rtx (mode
);
12183 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12184 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12186 x
= gen_rtx_NOT (mode
, cmp
);
12187 x
= gen_rtx_AND (mode
, x
, op_false
);
12188 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12190 x
= gen_rtx_IOR (mode
, t3
, t2
);
12191 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12195 /* Expand a floating-point conditional move. Return true if successful. */
12198 ix86_expand_fp_movcc (rtx operands
[])
12200 enum machine_mode mode
= GET_MODE (operands
[0]);
12201 enum rtx_code code
= GET_CODE (operands
[1]);
12202 rtx tmp
, compare_op
, second_test
, bypass_test
;
12204 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12206 enum machine_mode cmode
;
12208 /* Since we've no cmove for sse registers, don't force bad register
12209 allocation just to gain access to it. Deny movcc when the
12210 comparison mode doesn't match the move mode. */
12211 cmode
= GET_MODE (ix86_compare_op0
);
12212 if (cmode
== VOIDmode
)
12213 cmode
= GET_MODE (ix86_compare_op1
);
12217 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12219 &ix86_compare_op1
);
12220 if (code
== UNKNOWN
)
12223 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12224 ix86_compare_op1
, operands
[2],
12228 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12229 ix86_compare_op1
, operands
[2], operands
[3]);
12230 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12234 /* The floating point conditional move instructions don't directly
12235 support conditions resulting from a signed integer comparison. */
12237 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12239 /* The floating point conditional move instructions don't directly
12240 support signed integer comparisons. */
12242 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12244 gcc_assert (!second_test
&& !bypass_test
);
12245 tmp
= gen_reg_rtx (QImode
);
12246 ix86_expand_setcc (code
, tmp
);
12248 ix86_compare_op0
= tmp
;
12249 ix86_compare_op1
= const0_rtx
;
12250 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12252 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12254 tmp
= gen_reg_rtx (mode
);
12255 emit_move_insn (tmp
, operands
[3]);
12258 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12260 tmp
= gen_reg_rtx (mode
);
12261 emit_move_insn (tmp
, operands
[2]);
12265 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12266 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12267 operands
[2], operands
[3])));
12269 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12270 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12271 operands
[3], operands
[0])));
12273 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12274 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12275 operands
[2], operands
[0])));
12280 /* Expand a floating-point vector conditional move; a vcond operation
12281 rather than a movcc operation. */
12284 ix86_expand_fp_vcond (rtx operands
[])
12286 enum rtx_code code
= GET_CODE (operands
[3]);
12289 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12290 &operands
[4], &operands
[5]);
12291 if (code
== UNKNOWN
)
12294 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12295 operands
[5], operands
[1], operands
[2]))
12298 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12299 operands
[1], operands
[2]);
12300 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12304 /* Expand a signed integral vector conditional move. */
12307 ix86_expand_int_vcond (rtx operands
[])
12309 enum machine_mode mode
= GET_MODE (operands
[0]);
12310 enum rtx_code code
= GET_CODE (operands
[3]);
12311 bool negate
= false;
12314 cop0
= operands
[4];
12315 cop1
= operands
[5];
12317 /* Canonicalize the comparison to EQ, GT, GTU. */
12328 code
= reverse_condition (code
);
12334 code
= reverse_condition (code
);
12340 code
= swap_condition (code
);
12341 x
= cop0
, cop0
= cop1
, cop1
= x
;
12345 gcc_unreachable ();
12348 /* Unsigned parallel compare is not supported by the hardware. Play some
12349 tricks to turn this into a signed comparison against 0. */
12352 cop0
= force_reg (mode
, cop0
);
12360 /* Perform a parallel modulo subtraction. */
12361 t1
= gen_reg_rtx (mode
);
12362 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12364 /* Extract the original sign bit of op0. */
12365 mask
= GEN_INT (-0x80000000);
12366 mask
= gen_rtx_CONST_VECTOR (mode
,
12367 gen_rtvec (4, mask
, mask
, mask
, mask
));
12368 mask
= force_reg (mode
, mask
);
12369 t2
= gen_reg_rtx (mode
);
12370 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12372 /* XOR it back into the result of the subtraction. This results
12373 in the sign bit set iff we saw unsigned underflow. */
12374 x
= gen_reg_rtx (mode
);
12375 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12383 /* Perform a parallel unsigned saturating subtraction. */
12384 x
= gen_reg_rtx (mode
);
12385 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12386 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12393 gcc_unreachable ();
12397 cop1
= CONST0_RTX (mode
);
12400 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12401 operands
[1+negate
], operands
[2-negate
]);
12403 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12404 operands
[2-negate
]);
12408 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12409 true if we should do zero extension, else sign extension. HIGH_P is
12410 true if we want the N/2 high elements, else the low elements. */
12413 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12415 enum machine_mode imode
= GET_MODE (operands
[1]);
12416 rtx (*unpack
)(rtx
, rtx
, rtx
);
12423 unpack
= gen_vec_interleave_highv16qi
;
12425 unpack
= gen_vec_interleave_lowv16qi
;
12429 unpack
= gen_vec_interleave_highv8hi
;
12431 unpack
= gen_vec_interleave_lowv8hi
;
12435 unpack
= gen_vec_interleave_highv4si
;
12437 unpack
= gen_vec_interleave_lowv4si
;
12440 gcc_unreachable ();
12443 dest
= gen_lowpart (imode
, operands
[0]);
12446 se
= force_reg (imode
, CONST0_RTX (imode
));
12448 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12449 operands
[1], pc_rtx
, pc_rtx
);
12451 emit_insn (unpack (dest
, operands
[1], se
));
12454 /* Expand conditional increment or decrement using adb/sbb instructions.
12455 The default case using setcc followed by the conditional move can be
12456 done by generic code. */
12458 ix86_expand_int_addcc (rtx operands
[])
12460 enum rtx_code code
= GET_CODE (operands
[1]);
12462 rtx val
= const0_rtx
;
12463 bool fpcmp
= false;
12464 enum machine_mode mode
= GET_MODE (operands
[0]);
12466 if (operands
[3] != const1_rtx
12467 && operands
[3] != constm1_rtx
)
12469 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12470 ix86_compare_op1
, &compare_op
))
12472 code
= GET_CODE (compare_op
);
12474 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12475 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12478 code
= ix86_fp_compare_code_to_integer (code
);
12485 PUT_CODE (compare_op
,
12486 reverse_condition_maybe_unordered
12487 (GET_CODE (compare_op
)));
12489 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12491 PUT_MODE (compare_op
, mode
);
12493 /* Construct either adc or sbb insn. */
12494 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12496 switch (GET_MODE (operands
[0]))
12499 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12502 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12505 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12508 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12511 gcc_unreachable ();
12516 switch (GET_MODE (operands
[0]))
12519 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12522 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12525 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12528 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12531 gcc_unreachable ();
12534 return 1; /* DONE */
12538 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12539 works for floating pointer parameters and nonoffsetable memories.
12540 For pushes, it returns just stack offsets; the values will be saved
12541 in the right order. Maximally three parts are generated. */
12544 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12549 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12551 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12553 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12554 gcc_assert (size
>= 2 && size
<= 3);
12556 /* Optimize constant pool reference to immediates. This is used by fp
12557 moves, that force all constants to memory to allow combining. */
12558 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12560 rtx tmp
= maybe_get_pool_constant (operand
);
12565 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12567 /* The only non-offsetable memories we handle are pushes. */
12568 int ok
= push_operand (operand
, VOIDmode
);
12572 operand
= copy_rtx (operand
);
12573 PUT_MODE (operand
, Pmode
);
12574 parts
[0] = parts
[1] = parts
[2] = operand
;
12578 if (GET_CODE (operand
) == CONST_VECTOR
)
12580 enum machine_mode imode
= int_mode_for_mode (mode
);
12581 /* Caution: if we looked through a constant pool memory above,
12582 the operand may actually have a different mode now. That's
12583 ok, since we want to pun this all the way back to an integer. */
12584 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12585 gcc_assert (operand
!= NULL
);
12591 if (mode
== DImode
)
12592 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12595 if (REG_P (operand
))
12597 gcc_assert (reload_completed
);
12598 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12599 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12601 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12603 else if (offsettable_memref_p (operand
))
12605 operand
= adjust_address (operand
, SImode
, 0);
12606 parts
[0] = operand
;
12607 parts
[1] = adjust_address (operand
, SImode
, 4);
12609 parts
[2] = adjust_address (operand
, SImode
, 8);
12611 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12616 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12620 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12621 parts
[2] = gen_int_mode (l
[2], SImode
);
12624 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12627 gcc_unreachable ();
12629 parts
[1] = gen_int_mode (l
[1], SImode
);
12630 parts
[0] = gen_int_mode (l
[0], SImode
);
12633 gcc_unreachable ();
12638 if (mode
== TImode
)
12639 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12640 if (mode
== XFmode
|| mode
== TFmode
)
12642 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12643 if (REG_P (operand
))
12645 gcc_assert (reload_completed
);
12646 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12647 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12649 else if (offsettable_memref_p (operand
))
12651 operand
= adjust_address (operand
, DImode
, 0);
12652 parts
[0] = operand
;
12653 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12655 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12660 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12661 real_to_target (l
, &r
, mode
);
12663 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12664 if (HOST_BITS_PER_WIDE_INT
>= 64)
12667 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12668 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12671 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12673 if (upper_mode
== SImode
)
12674 parts
[1] = gen_int_mode (l
[2], SImode
);
12675 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12678 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12679 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12682 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12685 gcc_unreachable ();
12692 /* Emit insns to perform a move or push of DI, DF, and XF values.
12693 Return false when normal moves are needed; true when all required
12694 insns have been emitted. Operands 2-4 contain the input values
12695 int the correct order; operands 5-7 contain the output values. */
12698 ix86_split_long_move (rtx operands
[])
12703 int collisions
= 0;
12704 enum machine_mode mode
= GET_MODE (operands
[0]);
12706 /* The DFmode expanders may ask us to move double.
12707 For 64bit target this is single move. By hiding the fact
12708 here we simplify i386.md splitters. */
12709 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12711 /* Optimize constant pool reference to immediates. This is used by
12712 fp moves, that force all constants to memory to allow combining. */
12714 if (MEM_P (operands
[1])
12715 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12716 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12717 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12718 if (push_operand (operands
[0], VOIDmode
))
12720 operands
[0] = copy_rtx (operands
[0]);
12721 PUT_MODE (operands
[0], Pmode
);
12724 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12725 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12726 emit_move_insn (operands
[0], operands
[1]);
12730 /* The only non-offsettable memory we handle is push. */
12731 if (push_operand (operands
[0], VOIDmode
))
12734 gcc_assert (!MEM_P (operands
[0])
12735 || offsettable_memref_p (operands
[0]));
12737 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12738 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12740 /* When emitting push, take care for source operands on the stack. */
12741 if (push
&& MEM_P (operands
[1])
12742 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12745 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12746 XEXP (part
[1][2], 0));
12747 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12748 XEXP (part
[1][1], 0));
12751 /* We need to do copy in the right order in case an address register
12752 of the source overlaps the destination. */
12753 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12755 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12757 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12760 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12763 /* Collision in the middle part can be handled by reordering. */
12764 if (collisions
== 1 && nparts
== 3
12765 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12768 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12769 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12772 /* If there are more collisions, we can't handle it by reordering.
12773 Do an lea to the last part and use only one colliding move. */
12774 else if (collisions
> 1)
12780 base
= part
[0][nparts
- 1];
12782 /* Handle the case when the last part isn't valid for lea.
12783 Happens in 64-bit mode storing the 12-byte XFmode. */
12784 if (GET_MODE (base
) != Pmode
)
12785 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12787 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12788 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12789 part
[1][1] = replace_equiv_address (part
[1][1],
12790 plus_constant (base
, UNITS_PER_WORD
));
12792 part
[1][2] = replace_equiv_address (part
[1][2],
12793 plus_constant (base
, 8));
12803 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12804 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12805 emit_move_insn (part
[0][2], part
[1][2]);
12810 /* In 64bit mode we don't have 32bit push available. In case this is
12811 register, it is OK - we will just use larger counterpart. We also
12812 retype memory - these comes from attempt to avoid REX prefix on
12813 moving of second half of TFmode value. */
12814 if (GET_MODE (part
[1][1]) == SImode
)
12816 switch (GET_CODE (part
[1][1]))
12819 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12823 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12827 gcc_unreachable ();
12830 if (GET_MODE (part
[1][0]) == SImode
)
12831 part
[1][0] = part
[1][1];
12834 emit_move_insn (part
[0][1], part
[1][1]);
12835 emit_move_insn (part
[0][0], part
[1][0]);
12839 /* Choose correct order to not overwrite the source before it is copied. */
12840 if ((REG_P (part
[0][0])
12841 && REG_P (part
[1][1])
12842 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12844 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12846 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12850 operands
[2] = part
[0][2];
12851 operands
[3] = part
[0][1];
12852 operands
[4] = part
[0][0];
12853 operands
[5] = part
[1][2];
12854 operands
[6] = part
[1][1];
12855 operands
[7] = part
[1][0];
12859 operands
[2] = part
[0][1];
12860 operands
[3] = part
[0][0];
12861 operands
[5] = part
[1][1];
12862 operands
[6] = part
[1][0];
12869 operands
[2] = part
[0][0];
12870 operands
[3] = part
[0][1];
12871 operands
[4] = part
[0][2];
12872 operands
[5] = part
[1][0];
12873 operands
[6] = part
[1][1];
12874 operands
[7] = part
[1][2];
12878 operands
[2] = part
[0][0];
12879 operands
[3] = part
[0][1];
12880 operands
[5] = part
[1][0];
12881 operands
[6] = part
[1][1];
12885 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12888 if (CONST_INT_P (operands
[5])
12889 && operands
[5] != const0_rtx
12890 && REG_P (operands
[2]))
12892 if (CONST_INT_P (operands
[6])
12893 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12894 operands
[6] = operands
[2];
12897 && CONST_INT_P (operands
[7])
12898 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12899 operands
[7] = operands
[2];
12903 && CONST_INT_P (operands
[6])
12904 && operands
[6] != const0_rtx
12905 && REG_P (operands
[3])
12906 && CONST_INT_P (operands
[7])
12907 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12908 operands
[7] = operands
[3];
12911 emit_move_insn (operands
[2], operands
[5]);
12912 emit_move_insn (operands
[3], operands
[6]);
12914 emit_move_insn (operands
[4], operands
[7]);
12919 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12920 left shift by a constant, either using a single shift or
12921 a sequence of add instructions. */
12924 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12928 emit_insn ((mode
== DImode
12930 : gen_adddi3
) (operand
, operand
, operand
));
12932 else if (!optimize_size
12933 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12936 for (i
=0; i
<count
; i
++)
12938 emit_insn ((mode
== DImode
12940 : gen_adddi3
) (operand
, operand
, operand
));
12944 emit_insn ((mode
== DImode
12946 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12950 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12952 rtx low
[2], high
[2];
12954 const int single_width
= mode
== DImode
? 32 : 64;
12956 if (CONST_INT_P (operands
[2]))
12958 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12959 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12961 if (count
>= single_width
)
12963 emit_move_insn (high
[0], low
[1]);
12964 emit_move_insn (low
[0], const0_rtx
);
12966 if (count
> single_width
)
12967 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12971 if (!rtx_equal_p (operands
[0], operands
[1]))
12972 emit_move_insn (operands
[0], operands
[1]);
12973 emit_insn ((mode
== DImode
12975 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12976 ix86_expand_ashl_const (low
[0], count
, mode
);
12981 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12983 if (operands
[1] == const1_rtx
)
12985 /* Assuming we've chosen a QImode capable registers, then 1 << N
12986 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12987 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12989 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12991 ix86_expand_clear (low
[0]);
12992 ix86_expand_clear (high
[0]);
12993 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12995 d
= gen_lowpart (QImode
, low
[0]);
12996 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12997 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12998 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13000 d
= gen_lowpart (QImode
, high
[0]);
13001 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13002 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13003 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13006 /* Otherwise, we can get the same results by manually performing
13007 a bit extract operation on bit 5/6, and then performing the two
13008 shifts. The two methods of getting 0/1 into low/high are exactly
13009 the same size. Avoiding the shift in the bit extract case helps
13010 pentium4 a bit; no one else seems to care much either way. */
13015 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13016 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13018 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13019 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13021 emit_insn ((mode
== DImode
13023 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13024 emit_insn ((mode
== DImode
13026 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13027 emit_move_insn (low
[0], high
[0]);
13028 emit_insn ((mode
== DImode
13030 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13033 emit_insn ((mode
== DImode
13035 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13036 emit_insn ((mode
== DImode
13038 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13042 if (operands
[1] == constm1_rtx
)
13044 /* For -1 << N, we can avoid the shld instruction, because we
13045 know that we're shifting 0...31/63 ones into a -1. */
13046 emit_move_insn (low
[0], constm1_rtx
);
13048 emit_move_insn (high
[0], low
[0]);
13050 emit_move_insn (high
[0], constm1_rtx
);
13054 if (!rtx_equal_p (operands
[0], operands
[1]))
13055 emit_move_insn (operands
[0], operands
[1]);
13057 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13058 emit_insn ((mode
== DImode
13060 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13063 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13065 if (TARGET_CMOVE
&& scratch
)
13067 ix86_expand_clear (scratch
);
13068 emit_insn ((mode
== DImode
13069 ? gen_x86_shift_adj_1
13070 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13073 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13077 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13079 rtx low
[2], high
[2];
13081 const int single_width
= mode
== DImode
? 32 : 64;
13083 if (CONST_INT_P (operands
[2]))
13085 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13086 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13088 if (count
== single_width
* 2 - 1)
13090 emit_move_insn (high
[0], high
[1]);
13091 emit_insn ((mode
== DImode
13093 : gen_ashrdi3
) (high
[0], high
[0],
13094 GEN_INT (single_width
- 1)));
13095 emit_move_insn (low
[0], high
[0]);
13098 else if (count
>= single_width
)
13100 emit_move_insn (low
[0], high
[1]);
13101 emit_move_insn (high
[0], low
[0]);
13102 emit_insn ((mode
== DImode
13104 : gen_ashrdi3
) (high
[0], high
[0],
13105 GEN_INT (single_width
- 1)));
13106 if (count
> single_width
)
13107 emit_insn ((mode
== DImode
13109 : gen_ashrdi3
) (low
[0], low
[0],
13110 GEN_INT (count
- single_width
)));
13114 if (!rtx_equal_p (operands
[0], operands
[1]))
13115 emit_move_insn (operands
[0], operands
[1]);
13116 emit_insn ((mode
== DImode
13118 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13119 emit_insn ((mode
== DImode
13121 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13126 if (!rtx_equal_p (operands
[0], operands
[1]))
13127 emit_move_insn (operands
[0], operands
[1]);
13129 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13131 emit_insn ((mode
== DImode
13133 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13134 emit_insn ((mode
== DImode
13136 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13138 if (TARGET_CMOVE
&& scratch
)
13140 emit_move_insn (scratch
, high
[0]);
13141 emit_insn ((mode
== DImode
13143 : gen_ashrdi3
) (scratch
, scratch
,
13144 GEN_INT (single_width
- 1)));
13145 emit_insn ((mode
== DImode
13146 ? gen_x86_shift_adj_1
13147 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13151 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13156 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13158 rtx low
[2], high
[2];
13160 const int single_width
= mode
== DImode
? 32 : 64;
13162 if (CONST_INT_P (operands
[2]))
13164 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13165 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13167 if (count
>= single_width
)
13169 emit_move_insn (low
[0], high
[1]);
13170 ix86_expand_clear (high
[0]);
13172 if (count
> single_width
)
13173 emit_insn ((mode
== DImode
13175 : gen_lshrdi3
) (low
[0], low
[0],
13176 GEN_INT (count
- single_width
)));
13180 if (!rtx_equal_p (operands
[0], operands
[1]))
13181 emit_move_insn (operands
[0], operands
[1]);
13182 emit_insn ((mode
== DImode
13184 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13185 emit_insn ((mode
== DImode
13187 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13192 if (!rtx_equal_p (operands
[0], operands
[1]))
13193 emit_move_insn (operands
[0], operands
[1]);
13195 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13197 emit_insn ((mode
== DImode
13199 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13200 emit_insn ((mode
== DImode
13202 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13204 /* Heh. By reversing the arguments, we can reuse this pattern. */
13205 if (TARGET_CMOVE
&& scratch
)
13207 ix86_expand_clear (scratch
);
13208 emit_insn ((mode
== DImode
13209 ? gen_x86_shift_adj_1
13210 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13214 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13218 /* Predict just emitted jump instruction to be taken with probability PROB. */
13220 predict_jump (int prob
)
13222 rtx insn
= get_last_insn ();
13223 gcc_assert (JUMP_P (insn
));
13225 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13230 /* Helper function for the string operations below. Dest VARIABLE whether
13231 it is aligned to VALUE bytes. If true, jump to the label. */
13233 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13235 rtx label
= gen_label_rtx ();
13236 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13237 if (GET_MODE (variable
) == DImode
)
13238 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13240 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13241 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13244 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13246 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13250 /* Adjust COUNTER by the VALUE. */
13252 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13254 if (GET_MODE (countreg
) == DImode
)
13255 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13257 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13260 /* Zero extend possibly SImode EXP to Pmode register. */
13262 ix86_zero_extend_to_Pmode (rtx exp
)
13265 if (GET_MODE (exp
) == VOIDmode
)
13266 return force_reg (Pmode
, exp
);
13267 if (GET_MODE (exp
) == Pmode
)
13268 return copy_to_mode_reg (Pmode
, exp
);
13269 r
= gen_reg_rtx (Pmode
);
13270 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13274 /* Divide COUNTREG by SCALE. */
13276 scale_counter (rtx countreg
, int scale
)
13279 rtx piece_size_mask
;
13283 if (CONST_INT_P (countreg
))
13284 return GEN_INT (INTVAL (countreg
) / scale
);
13285 gcc_assert (REG_P (countreg
));
13287 piece_size_mask
= GEN_INT (scale
- 1);
13288 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13289 GEN_INT (exact_log2 (scale
)),
13290 NULL
, 1, OPTAB_DIRECT
);
13294 /* When SRCPTR is non-NULL, output simple loop to move memory
13295 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13296 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13297 equivalent loop to set memory by VALUE (supposed to be in MODE).
13299 The size is rounded down to whole number of chunk size moved at once.
13300 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13304 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13305 rtx destptr
, rtx srcptr
, rtx value
,
13306 rtx count
, enum machine_mode mode
, int unroll
,
13309 rtx out_label
, top_label
, iter
, tmp
;
13310 enum machine_mode iter_mode
;
13311 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13312 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13318 iter_mode
= GET_MODE (count
);
13319 if (iter_mode
== VOIDmode
)
13320 iter_mode
= word_mode
;
13322 top_label
= gen_label_rtx ();
13323 out_label
= gen_label_rtx ();
13324 iter
= gen_reg_rtx (iter_mode
);
13326 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13327 NULL
, 1, OPTAB_DIRECT
);
13328 /* Those two should combine. */
13329 if (piece_size
== const1_rtx
)
13331 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13333 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13335 emit_move_insn (iter
, const0_rtx
);
13337 emit_label (top_label
);
13339 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13340 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13341 destmem
= change_address (destmem
, mode
, x_addr
);
13345 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13346 srcmem
= change_address (srcmem
, mode
, y_addr
);
13348 /* When unrolling for chips that reorder memory reads and writes,
13349 we can save registers by using single temporary.
13350 Also using 4 temporaries is overkill in 32bit mode. */
13351 if (!TARGET_64BIT
&& 0)
13353 for (i
= 0; i
< unroll
; i
++)
13358 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13360 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13362 emit_move_insn (destmem
, srcmem
);
13368 gcc_assert (unroll
<= 4);
13369 for (i
= 0; i
< unroll
; i
++)
13371 tmpreg
[i
] = gen_reg_rtx (mode
);
13375 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13377 emit_move_insn (tmpreg
[i
], srcmem
);
13379 for (i
= 0; i
< unroll
; i
++)
13384 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13386 emit_move_insn (destmem
, tmpreg
[i
]);
13391 for (i
= 0; i
< unroll
; i
++)
13395 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13396 emit_move_insn (destmem
, value
);
13399 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13400 true, OPTAB_LIB_WIDEN
);
13402 emit_move_insn (iter
, tmp
);
13404 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13406 if (expected_size
!= -1)
13408 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13409 if (expected_size
== 0)
13411 else if (expected_size
> REG_BR_PROB_BASE
)
13412 predict_jump (REG_BR_PROB_BASE
- 1);
13414 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13417 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13418 iter
= ix86_zero_extend_to_Pmode (iter
);
13419 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13420 true, OPTAB_LIB_WIDEN
);
13421 if (tmp
!= destptr
)
13422 emit_move_insn (destptr
, tmp
);
13425 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13426 true, OPTAB_LIB_WIDEN
);
13428 emit_move_insn (srcptr
, tmp
);
13430 emit_label (out_label
);
13433 /* Output "rep; mov" instruction.
13434 Arguments have same meaning as for previous function */
13436 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13437 rtx destptr
, rtx srcptr
,
13439 enum machine_mode mode
)
13445 /* If the size is known, it is shorter to use rep movs. */
13446 if (mode
== QImode
&& CONST_INT_P (count
)
13447 && !(INTVAL (count
) & 3))
13450 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13451 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13452 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13453 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13454 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13455 if (mode
!= QImode
)
13457 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13458 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13459 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13460 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13461 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13462 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13466 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13467 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13469 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13473 /* Output "rep; stos" instruction.
13474 Arguments have same meaning as for previous function */
13476 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13478 enum machine_mode mode
)
13483 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13484 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13485 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13486 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13487 if (mode
!= QImode
)
13489 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13490 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13491 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13494 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13495 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13499 emit_strmov (rtx destmem
, rtx srcmem
,
13500 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13502 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13503 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13504 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13507 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13509 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13510 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13513 if (CONST_INT_P (count
))
13515 HOST_WIDE_INT countval
= INTVAL (count
);
13518 if ((countval
& 0x16) && max_size
> 16)
13522 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13523 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13526 gcc_unreachable ();
13529 if ((countval
& 0x08) && max_size
> 8)
13532 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13535 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13536 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 4);
13540 if ((countval
& 0x04) && max_size
> 4)
13542 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13545 if ((countval
& 0x02) && max_size
> 2)
13547 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13550 if ((countval
& 0x01) && max_size
> 1)
13552 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13559 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13560 count
, 1, OPTAB_DIRECT
);
13561 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13562 count
, QImode
, 1, 4);
13566 /* When there are stringops, we can cheaply increase dest and src pointers.
13567 Otherwise we save code size by maintaining offset (zero is readily
13568 available from preceding rep operation) and using x86 addressing modes.
13570 if (TARGET_SINGLE_STRINGOP
)
13574 rtx label
= ix86_expand_aligntest (count
, 4, true);
13575 src
= change_address (srcmem
, SImode
, srcptr
);
13576 dest
= change_address (destmem
, SImode
, destptr
);
13577 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13578 emit_label (label
);
13579 LABEL_NUSES (label
) = 1;
13583 rtx label
= ix86_expand_aligntest (count
, 2, true);
13584 src
= change_address (srcmem
, HImode
, srcptr
);
13585 dest
= change_address (destmem
, HImode
, destptr
);
13586 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13587 emit_label (label
);
13588 LABEL_NUSES (label
) = 1;
13592 rtx label
= ix86_expand_aligntest (count
, 1, true);
13593 src
= change_address (srcmem
, QImode
, srcptr
);
13594 dest
= change_address (destmem
, QImode
, destptr
);
13595 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13596 emit_label (label
);
13597 LABEL_NUSES (label
) = 1;
13602 rtx offset
= force_reg (Pmode
, const0_rtx
);
13607 rtx label
= ix86_expand_aligntest (count
, 4, true);
13608 src
= change_address (srcmem
, SImode
, srcptr
);
13609 dest
= change_address (destmem
, SImode
, destptr
);
13610 emit_move_insn (dest
, src
);
13611 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13612 true, OPTAB_LIB_WIDEN
);
13614 emit_move_insn (offset
, tmp
);
13615 emit_label (label
);
13616 LABEL_NUSES (label
) = 1;
13620 rtx label
= ix86_expand_aligntest (count
, 2, true);
13621 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13622 src
= change_address (srcmem
, HImode
, tmp
);
13623 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13624 dest
= change_address (destmem
, HImode
, tmp
);
13625 emit_move_insn (dest
, src
);
13626 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13627 true, OPTAB_LIB_WIDEN
);
13629 emit_move_insn (offset
, tmp
);
13630 emit_label (label
);
13631 LABEL_NUSES (label
) = 1;
13635 rtx label
= ix86_expand_aligntest (count
, 1, true);
13636 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13637 src
= change_address (srcmem
, QImode
, tmp
);
13638 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13639 dest
= change_address (destmem
, QImode
, tmp
);
13640 emit_move_insn (dest
, src
);
13641 emit_label (label
);
13642 LABEL_NUSES (label
) = 1;
13647 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13649 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13650 rtx count
, int max_size
)
13653 expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13654 count
, 1, OPTAB_DIRECT
);
13655 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13656 gen_lowpart (QImode
, value
), count
, QImode
,
13660 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13662 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13666 if (CONST_INT_P (count
))
13668 HOST_WIDE_INT countval
= INTVAL (count
);
13671 if ((countval
& 0x16) && max_size
> 16)
13675 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13676 emit_insn (gen_strset (destptr
, dest
, value
));
13677 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13678 emit_insn (gen_strset (destptr
, dest
, value
));
13681 gcc_unreachable ();
13684 if ((countval
& 0x08) && max_size
> 8)
13688 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13689 emit_insn (gen_strset (destptr
, dest
, value
));
13693 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13694 emit_insn (gen_strset (destptr
, dest
, value
));
13695 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13696 emit_insn (gen_strset (destptr
, dest
, value
));
13700 if ((countval
& 0x04) && max_size
> 4)
13702 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13703 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13706 if ((countval
& 0x02) && max_size
> 2)
13708 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13709 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13712 if ((countval
& 0x01) && max_size
> 1)
13714 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13715 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13722 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13727 rtx label
= ix86_expand_aligntest (count
, 16, true);
13730 dest
= change_address (destmem
, DImode
, destptr
);
13731 emit_insn (gen_strset (destptr
, dest
, value
));
13732 emit_insn (gen_strset (destptr
, dest
, value
));
13736 dest
= change_address (destmem
, SImode
, destptr
);
13737 emit_insn (gen_strset (destptr
, dest
, value
));
13738 emit_insn (gen_strset (destptr
, dest
, value
));
13739 emit_insn (gen_strset (destptr
, dest
, value
));
13740 emit_insn (gen_strset (destptr
, dest
, value
));
13742 emit_label (label
);
13743 LABEL_NUSES (label
) = 1;
13747 rtx label
= ix86_expand_aligntest (count
, 8, true);
13750 dest
= change_address (destmem
, DImode
, destptr
);
13751 emit_insn (gen_strset (destptr
, dest
, value
));
13755 dest
= change_address (destmem
, SImode
, destptr
);
13756 emit_insn (gen_strset (destptr
, dest
, value
));
13757 emit_insn (gen_strset (destptr
, dest
, value
));
13759 emit_label (label
);
13760 LABEL_NUSES (label
) = 1;
13764 rtx label
= ix86_expand_aligntest (count
, 4, true);
13765 dest
= change_address (destmem
, SImode
, destptr
);
13766 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13767 emit_label (label
);
13768 LABEL_NUSES (label
) = 1;
13772 rtx label
= ix86_expand_aligntest (count
, 2, true);
13773 dest
= change_address (destmem
, HImode
, destptr
);
13774 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13775 emit_label (label
);
13776 LABEL_NUSES (label
) = 1;
13780 rtx label
= ix86_expand_aligntest (count
, 1, true);
13781 dest
= change_address (destmem
, QImode
, destptr
);
13782 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13783 emit_label (label
);
13784 LABEL_NUSES (label
) = 1;
13788 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13789 DESIRED_ALIGNMENT. */
13791 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13792 rtx destptr
, rtx srcptr
, rtx count
,
13793 int align
, int desired_alignment
)
13795 if (align
<= 1 && desired_alignment
> 1)
13797 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13798 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13799 destmem
= change_address (destmem
, QImode
, destptr
);
13800 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13801 ix86_adjust_counter (count
, 1);
13802 emit_label (label
);
13803 LABEL_NUSES (label
) = 1;
13805 if (align
<= 2 && desired_alignment
> 2)
13807 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13808 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13809 destmem
= change_address (destmem
, HImode
, destptr
);
13810 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13811 ix86_adjust_counter (count
, 2);
13812 emit_label (label
);
13813 LABEL_NUSES (label
) = 1;
13815 if (align
<= 4 && desired_alignment
> 4)
13817 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13818 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13819 destmem
= change_address (destmem
, SImode
, destptr
);
13820 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13821 ix86_adjust_counter (count
, 4);
13822 emit_label (label
);
13823 LABEL_NUSES (label
) = 1;
13825 gcc_assert (desired_alignment
<= 8);
13828 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13829 DESIRED_ALIGNMENT. */
13831 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13832 int align
, int desired_alignment
)
13834 if (align
<= 1 && desired_alignment
> 1)
13836 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13837 destmem
= change_address (destmem
, QImode
, destptr
);
13838 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13839 ix86_adjust_counter (count
, 1);
13840 emit_label (label
);
13841 LABEL_NUSES (label
) = 1;
13843 if (align
<= 2 && desired_alignment
> 2)
13845 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13846 destmem
= change_address (destmem
, HImode
, destptr
);
13847 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13848 ix86_adjust_counter (count
, 2);
13849 emit_label (label
);
13850 LABEL_NUSES (label
) = 1;
13852 if (align
<= 4 && desired_alignment
> 4)
13854 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13855 destmem
= change_address (destmem
, SImode
, destptr
);
13856 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13857 ix86_adjust_counter (count
, 4);
13858 emit_label (label
);
13859 LABEL_NUSES (label
) = 1;
13861 gcc_assert (desired_alignment
<= 8);
13864 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13865 static enum stringop_alg
13866 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13867 int *dynamic_check
)
13869 const struct stringop_algs
* algs
;
13871 *dynamic_check
= -1;
13873 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13875 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13876 if (stringop_alg
!= no_stringop
)
13877 return stringop_alg
;
13878 /* rep; movq or rep; movl is the smallest variant. */
13879 else if (optimize_size
)
13881 if (!count
|| (count
& 3))
13882 return rep_prefix_1_byte
;
13884 return rep_prefix_4_byte
;
13886 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13888 else if (expected_size
!= -1 && expected_size
< 4)
13889 return loop_1_byte
;
13890 else if (expected_size
!= -1)
13893 enum stringop_alg alg
= libcall
;
13894 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13896 gcc_assert (algs
->size
[i
].max
);
13897 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13899 if (algs
->size
[i
].alg
!= libcall
)
13900 alg
= algs
->size
[i
].alg
;
13901 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13902 last non-libcall inline algorithm. */
13903 if (TARGET_INLINE_ALL_STRINGOPS
)
13905 /* When the current size is best to be copied by a libcall,
13906 but we are still forced to inline, run the heuristic bellow
13907 that will pick code for medium sized blocks. */
13908 if (alg
!= libcall
)
13913 return algs
->size
[i
].alg
;
13916 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13918 /* When asked to inline the call anyway, try to pick meaningful choice.
13919 We look for maximal size of block that is faster to copy by hand and
13920 take blocks of at most of that size guessing that average size will
13921 be roughly half of the block.
13923 If this turns out to be bad, we might simply specify the preferred
13924 choice in ix86_costs. */
13925 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13926 && algs
->unknown_size
== libcall
)
13929 enum stringop_alg alg
;
13932 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13933 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13934 max
= algs
->size
[i
].max
;
13937 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13938 gcc_assert (*dynamic_check
== -1);
13939 gcc_assert (alg
!= libcall
);
13940 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13941 *dynamic_check
= max
;
13944 return algs
->unknown_size
;
13947 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13948 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13950 decide_alignment (int align
,
13951 enum stringop_alg alg
,
13954 int desired_align
= 0;
13958 gcc_unreachable ();
13960 case unrolled_loop
:
13961 desired_align
= GET_MODE_SIZE (Pmode
);
13963 case rep_prefix_8_byte
:
13966 case rep_prefix_4_byte
:
13967 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13968 copying whole cacheline at once. */
13969 if (TARGET_PENTIUMPRO
)
13974 case rep_prefix_1_byte
:
13975 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13976 copying whole cacheline at once. */
13977 if (TARGET_PENTIUMPRO
)
13991 if (desired_align
< align
)
13992 desired_align
= align
;
13993 if (expected_size
!= -1 && expected_size
< 4)
13994 desired_align
= align
;
13995 return desired_align
;
13998 /* Return the smallest power of 2 greater than VAL. */
14000 smallest_pow2_greater_than (int val
)
14008 /* Expand string move (memcpy) operation. Use i386 string operations when
14009 profitable. expand_clrmem contains similar code. The code depends upon
14010 architecture, block size and alignment, but always has the same
14013 1) Prologue guard: Conditional that jumps up to epilogues for small
14014 blocks that can be handled by epilogue alone. This is faster but
14015 also needed for correctness, since prologue assume the block is larger
14016 than the desired alignment.
14018 Optional dynamic check for size and libcall for large
14019 blocks is emitted here too, with -minline-stringops-dynamically.
14021 2) Prologue: copy first few bytes in order to get destination aligned
14022 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14023 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14024 We emit either a jump tree on power of two sized blocks, or a byte loop.
14026 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14027 with specified algorithm.
14029 4) Epilogue: code copying tail of the block that is too small to be
14030 handled by main body (or up to size guarded by prologue guard). */
14033 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14034 rtx expected_align_exp
, rtx expected_size_exp
)
14040 rtx jump_around_label
= NULL
;
14041 HOST_WIDE_INT align
= 1;
14042 unsigned HOST_WIDE_INT count
= 0;
14043 HOST_WIDE_INT expected_size
= -1;
14044 int size_needed
= 0, epilogue_size_needed
;
14045 int desired_align
= 0;
14046 enum stringop_alg alg
;
14049 if (CONST_INT_P (align_exp
))
14050 align
= INTVAL (align_exp
);
14051 /* i386 can do misaligned access on reasonably increased cost. */
14052 if (CONST_INT_P (expected_align_exp
)
14053 && INTVAL (expected_align_exp
) > align
)
14054 align
= INTVAL (expected_align_exp
);
14055 if (CONST_INT_P (count_exp
))
14056 count
= expected_size
= INTVAL (count_exp
);
14057 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14058 expected_size
= INTVAL (expected_size_exp
);
14060 /* Step 0: Decide on preferred algorithm, desired alignment and
14061 size of chunks to be copied by main loop. */
14063 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14064 desired_align
= decide_alignment (align
, alg
, expected_size
);
14066 if (!TARGET_ALIGN_STRINGOPS
)
14067 align
= desired_align
;
14069 if (alg
== libcall
)
14071 gcc_assert (alg
!= no_stringop
);
14073 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14074 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14075 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14080 gcc_unreachable ();
14082 size_needed
= GET_MODE_SIZE (Pmode
);
14084 case unrolled_loop
:
14085 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14087 case rep_prefix_8_byte
:
14090 case rep_prefix_4_byte
:
14093 case rep_prefix_1_byte
:
14099 epilogue_size_needed
= size_needed
;
14101 /* Step 1: Prologue guard. */
14103 /* Alignment code needs count to be in register. */
14104 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14106 enum machine_mode mode
= SImode
;
14107 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14109 count_exp
= force_reg (mode
, count_exp
);
14111 gcc_assert (desired_align
>= 1 && align
>= 1);
14113 /* Ensure that alignment prologue won't copy past end of block. */
14114 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14117 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14119 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14120 Make sure it is power of 2. */
14121 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14123 label
= gen_label_rtx ();
14124 emit_cmp_and_jump_insns (count_exp
,
14125 GEN_INT (epilogue_size_needed
),
14126 LTU
, 0, GET_MODE (count_exp
), 1, label
);
14127 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14128 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14130 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14132 /* Emit code to decide on runtime whether library call or inline should be
14134 if (dynamic_check
!= -1)
14136 rtx hot_label
= gen_label_rtx ();
14137 jump_around_label
= gen_label_rtx ();
14138 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14139 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14140 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14141 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14142 emit_jump (jump_around_label
);
14143 emit_label (hot_label
);
14146 /* Step 2: Alignment prologue. */
14148 if (desired_align
> align
)
14150 /* Except for the first move in epilogue, we no longer know
14151 constant offset in aliasing info. It don't seems to worth
14152 the pain to maintain it for the first move, so throw away
14154 src
= change_address (src
, BLKmode
, srcreg
);
14155 dst
= change_address (dst
, BLKmode
, destreg
);
14156 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14159 if (label
&& size_needed
== 1)
14161 emit_label (label
);
14162 LABEL_NUSES (label
) = 1;
14166 /* Step 3: Main loop. */
14172 gcc_unreachable ();
14174 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14175 count_exp
, QImode
, 1, expected_size
);
14178 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14179 count_exp
, Pmode
, 1, expected_size
);
14181 case unrolled_loop
:
14182 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14183 registers for 4 temporaries anyway. */
14184 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14185 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14188 case rep_prefix_8_byte
:
14189 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14192 case rep_prefix_4_byte
:
14193 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14196 case rep_prefix_1_byte
:
14197 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14201 /* Adjust properly the offset of src and dest memory for aliasing. */
14202 if (CONST_INT_P (count_exp
))
14204 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14205 (count
/ size_needed
) * size_needed
);
14206 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14207 (count
/ size_needed
) * size_needed
);
14211 src
= change_address (src
, BLKmode
, srcreg
);
14212 dst
= change_address (dst
, BLKmode
, destreg
);
14215 /* Step 4: Epilogue to copy the remaining bytes. */
14219 /* When the main loop is done, COUNT_EXP might hold original count,
14220 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14221 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14222 bytes. Compensate if needed. */
14224 if (size_needed
< epilogue_size_needed
)
14227 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14228 GEN_INT (size_needed
- 1), count_exp
, 1,
14230 if (tmp
!= count_exp
)
14231 emit_move_insn (count_exp
, tmp
);
14233 emit_label (label
);
14234 LABEL_NUSES (label
) = 1;
14237 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14238 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14239 epilogue_size_needed
);
14240 if (jump_around_label
)
14241 emit_label (jump_around_label
);
14245 /* Helper function for memcpy. For QImode value 0xXY produce
14246 0xXYXYXYXY of wide specified by MODE. This is essentially
14247 a * 0x10101010, but we can do slightly better than
14248 synth_mult by unwinding the sequence by hand on CPUs with
14251 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14253 enum machine_mode valmode
= GET_MODE (val
);
14255 int nops
= mode
== DImode
? 3 : 2;
14257 gcc_assert (mode
== SImode
|| mode
== DImode
);
14258 if (val
== const0_rtx
)
14259 return copy_to_mode_reg (mode
, const0_rtx
);
14260 if (CONST_INT_P (val
))
14262 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14266 if (mode
== DImode
)
14267 v
|= (v
<< 16) << 16;
14268 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14271 if (valmode
== VOIDmode
)
14273 if (valmode
!= QImode
)
14274 val
= gen_lowpart (QImode
, val
);
14275 if (mode
== QImode
)
14277 if (!TARGET_PARTIAL_REG_STALL
)
14279 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14280 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14281 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14282 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14284 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14285 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14286 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14291 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14293 if (!TARGET_PARTIAL_REG_STALL
)
14294 if (mode
== SImode
)
14295 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14297 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14300 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14301 NULL
, 1, OPTAB_DIRECT
);
14303 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14305 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14306 NULL
, 1, OPTAB_DIRECT
);
14307 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14308 if (mode
== SImode
)
14310 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14311 NULL
, 1, OPTAB_DIRECT
);
14312 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14317 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14318 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14319 alignment from ALIGN to DESIRED_ALIGN. */
14321 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14326 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14327 promoted_val
= promote_duplicated_reg (DImode
, val
);
14328 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14329 promoted_val
= promote_duplicated_reg (SImode
, val
);
14330 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14331 promoted_val
= promote_duplicated_reg (HImode
, val
);
14333 promoted_val
= val
;
14335 return promoted_val
;
14338 /* Expand string clear operation (bzero). Use i386 string operations when
14339 profitable. See expand_movmem comment for explanation of individual
14340 steps performed. */
14342 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14343 rtx expected_align_exp
, rtx expected_size_exp
)
14348 rtx jump_around_label
= NULL
;
14349 HOST_WIDE_INT align
= 1;
14350 unsigned HOST_WIDE_INT count
= 0;
14351 HOST_WIDE_INT expected_size
= -1;
14352 int size_needed
= 0, epilogue_size_needed
;
14353 int desired_align
= 0;
14354 enum stringop_alg alg
;
14355 rtx promoted_val
= NULL
;
14356 bool force_loopy_epilogue
= false;
14359 if (CONST_INT_P (align_exp
))
14360 align
= INTVAL (align_exp
);
14361 /* i386 can do misaligned access on reasonably increased cost. */
14362 if (CONST_INT_P (expected_align_exp
)
14363 && INTVAL (expected_align_exp
) > align
)
14364 align
= INTVAL (expected_align_exp
);
14365 if (CONST_INT_P (count_exp
))
14366 count
= expected_size
= INTVAL (count_exp
);
14367 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14368 expected_size
= INTVAL (expected_size_exp
);
14370 /* Step 0: Decide on preferred algorithm, desired alignment and
14371 size of chunks to be copied by main loop. */
14373 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14374 desired_align
= decide_alignment (align
, alg
, expected_size
);
14376 if (!TARGET_ALIGN_STRINGOPS
)
14377 align
= desired_align
;
14379 if (alg
== libcall
)
14381 gcc_assert (alg
!= no_stringop
);
14383 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14384 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14389 gcc_unreachable ();
14391 size_needed
= GET_MODE_SIZE (Pmode
);
14393 case unrolled_loop
:
14394 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14396 case rep_prefix_8_byte
:
14399 case rep_prefix_4_byte
:
14402 case rep_prefix_1_byte
:
14407 epilogue_size_needed
= size_needed
;
14409 /* Step 1: Prologue guard. */
14411 /* Alignment code needs count to be in register. */
14412 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14414 enum machine_mode mode
= SImode
;
14415 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14417 count_exp
= force_reg (mode
, count_exp
);
14419 /* Do the cheap promotion to allow better CSE across the
14420 main loop and epilogue (ie one load of the big constant in the
14421 front of all code. */
14422 if (CONST_INT_P (val_exp
))
14423 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14424 desired_align
, align
);
14425 /* Ensure that alignment prologue won't copy past end of block. */
14426 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14429 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14431 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14432 Make sure it is power of 2. */
14433 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14435 /* To improve performance of small blocks, we jump around the VAL
14436 promoting mode. This mean that if the promoted VAL is not constant,
14437 we might not use it in the epilogue and have to use byte
14439 if (epilogue_size_needed
> 2 && !promoted_val
)
14440 force_loopy_epilogue
= true;
14441 label
= gen_label_rtx ();
14442 emit_cmp_and_jump_insns (count_exp
,
14443 GEN_INT (epilogue_size_needed
),
14444 LTU
, 0, GET_MODE (count_exp
), 1, label
);
14445 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14446 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14448 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14450 if (dynamic_check
!= -1)
14452 rtx hot_label
= gen_label_rtx ();
14453 jump_around_label
= gen_label_rtx ();
14454 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14455 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14456 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14457 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14458 emit_jump (jump_around_label
);
14459 emit_label (hot_label
);
14462 /* Step 2: Alignment prologue. */
14464 /* Do the expensive promotion once we branched off the small blocks. */
14466 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14467 desired_align
, align
);
14468 gcc_assert (desired_align
>= 1 && align
>= 1);
14470 if (desired_align
> align
)
14472 /* Except for the first move in epilogue, we no longer know
14473 constant offset in aliasing info. It don't seems to worth
14474 the pain to maintain it for the first move, so throw away
14476 dst
= change_address (dst
, BLKmode
, destreg
);
14477 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14480 if (label
&& size_needed
== 1)
14482 emit_label (label
);
14483 LABEL_NUSES (label
) = 1;
14487 /* Step 3: Main loop. */
14493 gcc_unreachable ();
14495 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14496 count_exp
, QImode
, 1, expected_size
);
14499 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14500 count_exp
, Pmode
, 1, expected_size
);
14502 case unrolled_loop
:
14503 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14504 count_exp
, Pmode
, 4, expected_size
);
14506 case rep_prefix_8_byte
:
14507 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14510 case rep_prefix_4_byte
:
14511 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14514 case rep_prefix_1_byte
:
14515 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14519 /* Adjust properly the offset of src and dest memory for aliasing. */
14520 if (CONST_INT_P (count_exp
))
14521 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14522 (count
/ size_needed
) * size_needed
);
14524 dst
= change_address (dst
, BLKmode
, destreg
);
14526 /* Step 4: Epilogue to copy the remaining bytes. */
14530 /* When the main loop is done, COUNT_EXP might hold original count,
14531 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14532 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14533 bytes. Compensate if needed. */
14535 if (size_needed
< desired_align
- align
)
14538 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14539 GEN_INT (size_needed
- 1), count_exp
, 1,
14541 size_needed
= desired_align
- align
+ 1;
14542 if (tmp
!= count_exp
)
14543 emit_move_insn (count_exp
, tmp
);
14545 emit_label (label
);
14546 LABEL_NUSES (label
) = 1;
14548 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14550 if (force_loopy_epilogue
)
14551 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14554 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14557 if (jump_around_label
)
14558 emit_label (jump_around_label
);
14562 /* Expand strlen. */
14564 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14566 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14568 /* The generic case of strlen expander is long. Avoid it's
14569 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14571 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14572 && !TARGET_INLINE_ALL_STRINGOPS
14574 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14577 addr
= force_reg (Pmode
, XEXP (src
, 0));
14578 scratch1
= gen_reg_rtx (Pmode
);
14580 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14583 /* Well it seems that some optimizer does not combine a call like
14584 foo(strlen(bar), strlen(bar));
14585 when the move and the subtraction is done here. It does calculate
14586 the length just once when these instructions are done inside of
14587 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14588 often used and I use one fewer register for the lifetime of
14589 output_strlen_unroll() this is better. */
14591 emit_move_insn (out
, addr
);
14593 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14595 /* strlensi_unroll_1 returns the address of the zero at the end of
14596 the string, like memchr(), so compute the length by subtracting
14597 the start address. */
14599 emit_insn (gen_subdi3 (out
, out
, addr
));
14601 emit_insn (gen_subsi3 (out
, out
, addr
));
14606 scratch2
= gen_reg_rtx (Pmode
);
14607 scratch3
= gen_reg_rtx (Pmode
);
14608 scratch4
= force_reg (Pmode
, constm1_rtx
);
14610 emit_move_insn (scratch3
, addr
);
14611 eoschar
= force_reg (QImode
, eoschar
);
14613 src
= replace_equiv_address_nv (src
, scratch3
);
14615 /* If .md starts supporting :P, this can be done in .md. */
14616 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14617 scratch4
), UNSPEC_SCAS
);
14618 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14621 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14622 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14626 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14627 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14633 /* Expand the appropriate insns for doing strlen if not just doing
14636 out = result, initialized with the start address
14637 align_rtx = alignment of the address.
14638 scratch = scratch register, initialized with the startaddress when
14639 not aligned, otherwise undefined
14641 This is just the body. It needs the initializations mentioned above and
14642 some address computing at the end. These things are done in i386.md. */
14645 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14649 rtx align_2_label
= NULL_RTX
;
14650 rtx align_3_label
= NULL_RTX
;
14651 rtx align_4_label
= gen_label_rtx ();
14652 rtx end_0_label
= gen_label_rtx ();
14654 rtx tmpreg
= gen_reg_rtx (SImode
);
14655 rtx scratch
= gen_reg_rtx (SImode
);
14659 if (CONST_INT_P (align_rtx
))
14660 align
= INTVAL (align_rtx
);
14662 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14664 /* Is there a known alignment and is it less than 4? */
14667 rtx scratch1
= gen_reg_rtx (Pmode
);
14668 emit_move_insn (scratch1
, out
);
14669 /* Is there a known alignment and is it not 2? */
14672 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14673 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14675 /* Leave just the 3 lower bits. */
14676 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14677 NULL_RTX
, 0, OPTAB_WIDEN
);
14679 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14680 Pmode
, 1, align_4_label
);
14681 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14682 Pmode
, 1, align_2_label
);
14683 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14684 Pmode
, 1, align_3_label
);
14688 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14689 check if is aligned to 4 - byte. */
14691 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14692 NULL_RTX
, 0, OPTAB_WIDEN
);
14694 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14695 Pmode
, 1, align_4_label
);
14698 mem
= change_address (src
, QImode
, out
);
14700 /* Now compare the bytes. */
14702 /* Compare the first n unaligned byte on a byte per byte basis. */
14703 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14704 QImode
, 1, end_0_label
);
14706 /* Increment the address. */
14708 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14710 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14712 /* Not needed with an alignment of 2 */
14715 emit_label (align_2_label
);
14717 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14721 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14723 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14725 emit_label (align_3_label
);
14728 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14732 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14734 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14737 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14738 align this loop. It gives only huge programs, but does not help to
14740 emit_label (align_4_label
);
14742 mem
= change_address (src
, SImode
, out
);
14743 emit_move_insn (scratch
, mem
);
14745 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14747 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14749 /* This formula yields a nonzero result iff one of the bytes is zero.
14750 This saves three branches inside loop and many cycles. */
14752 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14753 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14754 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14755 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14756 gen_int_mode (0x80808080, SImode
)));
14757 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14762 rtx reg
= gen_reg_rtx (SImode
);
14763 rtx reg2
= gen_reg_rtx (Pmode
);
14764 emit_move_insn (reg
, tmpreg
);
14765 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14767 /* If zero is not in the first two bytes, move two bytes forward. */
14768 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14769 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14770 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14771 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14772 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14775 /* Emit lea manually to avoid clobbering of flags. */
14776 emit_insn (gen_rtx_SET (SImode
, reg2
,
14777 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14779 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14780 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14781 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14782 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14789 rtx end_2_label
= gen_label_rtx ();
14790 /* Is zero in the first two bytes? */
14792 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14793 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14794 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14795 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14796 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14798 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14799 JUMP_LABEL (tmp
) = end_2_label
;
14801 /* Not in the first two. Move two bytes forward. */
14802 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14804 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14806 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14808 emit_label (end_2_label
);
14812 /* Avoid branch in fixing the byte. */
14813 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14814 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14815 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14817 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14819 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14821 emit_label (end_0_label
);
14825 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14826 rtx callarg2 ATTRIBUTE_UNUSED
,
14827 rtx pop
, int sibcall
)
14829 rtx use
= NULL
, call
;
14831 if (pop
== const0_rtx
)
14833 gcc_assert (!TARGET_64BIT
|| !pop
);
14835 if (TARGET_MACHO
&& !TARGET_64BIT
)
14838 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14839 fnaddr
= machopic_indirect_call_target (fnaddr
);
14844 /* Static functions and indirect calls don't need the pic register. */
14845 if (! TARGET_64BIT
&& flag_pic
14846 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14847 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14848 use_reg (&use
, pic_offset_table_rtx
);
14851 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14853 rtx al
= gen_rtx_REG (QImode
, 0);
14854 emit_move_insn (al
, callarg2
);
14855 use_reg (&use
, al
);
14858 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14860 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14861 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14863 if (sibcall
&& TARGET_64BIT
14864 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14867 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14868 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14869 emit_move_insn (fnaddr
, addr
);
14870 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14873 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14875 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14878 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14879 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14880 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14883 call
= emit_call_insn (call
);
14885 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14889 /* Clear stack slot assignments remembered from previous functions.
14890 This is called from INIT_EXPANDERS once before RTL is emitted for each
14893 static struct machine_function
*
14894 ix86_init_machine_status (void)
14896 struct machine_function
*f
;
14898 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14899 f
->use_fast_prologue_epilogue_nregs
= -1;
14900 f
->tls_descriptor_call_expanded_p
= 0;
14905 /* Return a MEM corresponding to a stack slot with mode MODE.
14906 Allocate a new slot if necessary.
14908 The RTL for a function can have several slots available: N is
14909 which slot to use. */
14912 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14914 struct stack_local_entry
*s
;
14916 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14918 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14919 if (s
->mode
== mode
&& s
->n
== n
)
14920 return copy_rtx (s
->rtl
);
14922 s
= (struct stack_local_entry
*)
14923 ggc_alloc (sizeof (struct stack_local_entry
));
14926 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14928 s
->next
= ix86_stack_locals
;
14929 ix86_stack_locals
= s
;
14933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14935 static GTY(()) rtx ix86_tls_symbol
;
14937 ix86_tls_get_addr (void)
14940 if (!ix86_tls_symbol
)
14942 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14943 (TARGET_ANY_GNU_TLS
14945 ? "___tls_get_addr"
14946 : "__tls_get_addr");
14949 return ix86_tls_symbol
;
14952 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14954 static GTY(()) rtx ix86_tls_module_base_symbol
;
14956 ix86_tls_module_base (void)
14959 if (!ix86_tls_module_base_symbol
)
14961 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14962 "_TLS_MODULE_BASE_");
14963 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
14964 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
14967 return ix86_tls_module_base_symbol
;
14970 /* Calculate the length of the memory address in the instruction
14971 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14974 memory_address_length (rtx addr
)
14976 struct ix86_address parts
;
14977 rtx base
, index
, disp
;
14981 if (GET_CODE (addr
) == PRE_DEC
14982 || GET_CODE (addr
) == POST_INC
14983 || GET_CODE (addr
) == PRE_MODIFY
14984 || GET_CODE (addr
) == POST_MODIFY
)
14987 ok
= ix86_decompose_address (addr
, &parts
);
14990 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14991 parts
.base
= SUBREG_REG (parts
.base
);
14992 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14993 parts
.index
= SUBREG_REG (parts
.index
);
14996 index
= parts
.index
;
15001 - esp as the base always wants an index,
15002 - ebp as the base always wants a displacement. */
15004 /* Register Indirect. */
15005 if (base
&& !index
&& !disp
)
15007 /* esp (for its index) and ebp (for its displacement) need
15008 the two-byte modrm form. */
15009 if (addr
== stack_pointer_rtx
15010 || addr
== arg_pointer_rtx
15011 || addr
== frame_pointer_rtx
15012 || addr
== hard_frame_pointer_rtx
)
15016 /* Direct Addressing. */
15017 else if (disp
&& !base
&& !index
)
15022 /* Find the length of the displacement constant. */
15025 if (base
&& satisfies_constraint_K (disp
))
15030 /* ebp always wants a displacement. */
15031 else if (base
== hard_frame_pointer_rtx
)
15034 /* An index requires the two-byte modrm form.... */
15036 /* ...like esp, which always wants an index. */
15037 || base
== stack_pointer_rtx
15038 || base
== arg_pointer_rtx
15039 || base
== frame_pointer_rtx
)
15046 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15047 is set, expect that insn have 8bit immediate alternative. */
15049 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15053 extract_insn_cached (insn
);
15054 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15055 if (CONSTANT_P (recog_data
.operand
[i
]))
15058 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15062 switch (get_attr_mode (insn
))
15073 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15078 fatal_insn ("unknown insn mode", insn
);
15084 /* Compute default value for "length_address" attribute. */
15086 ix86_attr_length_address_default (rtx insn
)
15090 if (get_attr_type (insn
) == TYPE_LEA
)
15092 rtx set
= PATTERN (insn
);
15094 if (GET_CODE (set
) == PARALLEL
)
15095 set
= XVECEXP (set
, 0, 0);
15097 gcc_assert (GET_CODE (set
) == SET
);
15099 return memory_address_length (SET_SRC (set
));
15102 extract_insn_cached (insn
);
15103 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15104 if (MEM_P (recog_data
.operand
[i
]))
15106 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15112 /* Return the maximum number of instructions a cpu can issue. */
15115 ix86_issue_rate (void)
15119 case PROCESSOR_PENTIUM
:
15123 case PROCESSOR_PENTIUMPRO
:
15124 case PROCESSOR_PENTIUM4
:
15125 case PROCESSOR_ATHLON
:
15127 case PROCESSOR_AMDFAM10
:
15128 case PROCESSOR_NOCONA
:
15129 case PROCESSOR_GENERIC32
:
15130 case PROCESSOR_GENERIC64
:
15133 case PROCESSOR_CORE2
:
15141 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15142 by DEP_INSN and nothing set by DEP_INSN. */
15145 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15149 /* Simplify the test for uninteresting insns. */
15150 if (insn_type
!= TYPE_SETCC
15151 && insn_type
!= TYPE_ICMOV
15152 && insn_type
!= TYPE_FCMOV
15153 && insn_type
!= TYPE_IBR
)
15156 if ((set
= single_set (dep_insn
)) != 0)
15158 set
= SET_DEST (set
);
15161 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15162 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15163 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15164 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15166 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15167 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15172 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15175 /* This test is true if the dependent insn reads the flags but
15176 not any other potentially set register. */
15177 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15180 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15186 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15187 address with operands set by DEP_INSN. */
15190 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15194 if (insn_type
== TYPE_LEA
15197 addr
= PATTERN (insn
);
15199 if (GET_CODE (addr
) == PARALLEL
)
15200 addr
= XVECEXP (addr
, 0, 0);
15202 gcc_assert (GET_CODE (addr
) == SET
);
15204 addr
= SET_SRC (addr
);
15209 extract_insn_cached (insn
);
15210 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15211 if (MEM_P (recog_data
.operand
[i
]))
15213 addr
= XEXP (recog_data
.operand
[i
], 0);
15220 return modified_in_p (addr
, dep_insn
);
15224 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15226 enum attr_type insn_type
, dep_insn_type
;
15227 enum attr_memory memory
;
15229 int dep_insn_code_number
;
15231 /* Anti and output dependencies have zero cost on all CPUs. */
15232 if (REG_NOTE_KIND (link
) != 0)
15235 dep_insn_code_number
= recog_memoized (dep_insn
);
15237 /* If we can't recognize the insns, we can't really do anything. */
15238 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15241 insn_type
= get_attr_type (insn
);
15242 dep_insn_type
= get_attr_type (dep_insn
);
15246 case PROCESSOR_PENTIUM
:
15247 /* Address Generation Interlock adds a cycle of latency. */
15248 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15251 /* ??? Compares pair with jump/setcc. */
15252 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15255 /* Floating point stores require value to be ready one cycle earlier. */
15256 if (insn_type
== TYPE_FMOV
15257 && get_attr_memory (insn
) == MEMORY_STORE
15258 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15262 case PROCESSOR_PENTIUMPRO
:
15263 memory
= get_attr_memory (insn
);
15265 /* INT->FP conversion is expensive. */
15266 if (get_attr_fp_int_src (dep_insn
))
15269 /* There is one cycle extra latency between an FP op and a store. */
15270 if (insn_type
== TYPE_FMOV
15271 && (set
= single_set (dep_insn
)) != NULL_RTX
15272 && (set2
= single_set (insn
)) != NULL_RTX
15273 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15274 && MEM_P (SET_DEST (set2
)))
15277 /* Show ability of reorder buffer to hide latency of load by executing
15278 in parallel with previous instruction in case
15279 previous instruction is not needed to compute the address. */
15280 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15281 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15283 /* Claim moves to take one cycle, as core can issue one load
15284 at time and the next load can start cycle later. */
15285 if (dep_insn_type
== TYPE_IMOV
15286 || dep_insn_type
== TYPE_FMOV
)
15294 memory
= get_attr_memory (insn
);
15296 /* The esp dependency is resolved before the instruction is really
15298 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15299 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15302 /* INT->FP conversion is expensive. */
15303 if (get_attr_fp_int_src (dep_insn
))
15306 /* Show ability of reorder buffer to hide latency of load by executing
15307 in parallel with previous instruction in case
15308 previous instruction is not needed to compute the address. */
15309 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15310 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15312 /* Claim moves to take one cycle, as core can issue one load
15313 at time and the next load can start cycle later. */
15314 if (dep_insn_type
== TYPE_IMOV
15315 || dep_insn_type
== TYPE_FMOV
)
15324 case PROCESSOR_ATHLON
:
15326 case PROCESSOR_AMDFAM10
:
15327 case PROCESSOR_GENERIC32
:
15328 case PROCESSOR_GENERIC64
:
15329 memory
= get_attr_memory (insn
);
15331 /* Show ability of reorder buffer to hide latency of load by executing
15332 in parallel with previous instruction in case
15333 previous instruction is not needed to compute the address. */
15334 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15335 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15337 enum attr_unit unit
= get_attr_unit (insn
);
15340 /* Because of the difference between the length of integer and
15341 floating unit pipeline preparation stages, the memory operands
15342 for floating point are cheaper.
15344 ??? For Athlon it the difference is most probably 2. */
15345 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15348 loadcost
= TARGET_ATHLON
? 2 : 0;
15350 if (cost
>= loadcost
)
15363 /* How many alternative schedules to try. This should be as wide as the
15364 scheduling freedom in the DFA, but no wider. Making this value too
15365 large results extra work for the scheduler. */
15368 ia32_multipass_dfa_lookahead (void)
15370 if (ix86_tune
== PROCESSOR_PENTIUM
)
15373 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15374 || ix86_tune
== PROCESSOR_K6
)
15382 /* Compute the alignment given to a constant that is being placed in memory.
15383 EXP is the constant and ALIGN is the alignment that the object would
15385 The value of this function is used instead of that alignment to align
15389 ix86_constant_alignment (tree exp
, int align
)
15391 if (TREE_CODE (exp
) == REAL_CST
)
15393 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15395 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15398 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15399 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15400 return BITS_PER_WORD
;
15405 /* Compute the alignment for a static variable.
15406 TYPE is the data type, and ALIGN is the alignment that
15407 the object would ordinarily have. The value of this function is used
15408 instead of that alignment to align the object. */
15411 ix86_data_alignment (tree type
, int align
)
15413 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
15415 if (AGGREGATE_TYPE_P (type
)
15416 && TYPE_SIZE (type
)
15417 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15418 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15419 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15420 && align
< max_align
)
15423 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15424 to 16byte boundary. */
15427 if (AGGREGATE_TYPE_P (type
)
15428 && TYPE_SIZE (type
)
15429 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15430 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15431 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15435 if (TREE_CODE (type
) == ARRAY_TYPE
)
15437 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15439 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15442 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15445 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15447 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15450 else if ((TREE_CODE (type
) == RECORD_TYPE
15451 || TREE_CODE (type
) == UNION_TYPE
15452 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15453 && TYPE_FIELDS (type
))
15455 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15457 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15460 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15461 || TREE_CODE (type
) == INTEGER_TYPE
)
15463 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15465 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15472 /* Compute the alignment for a local variable.
15473 TYPE is the data type, and ALIGN is the alignment that
15474 the object would ordinarily have. The value of this macro is used
15475 instead of that alignment to align the object. */
15478 ix86_local_alignment (tree type
, int align
)
15480 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15481 to 16byte boundary. */
15484 if (AGGREGATE_TYPE_P (type
)
15485 && TYPE_SIZE (type
)
15486 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15487 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15488 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15491 if (TREE_CODE (type
) == ARRAY_TYPE
)
15493 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15495 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15498 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15500 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15502 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15505 else if ((TREE_CODE (type
) == RECORD_TYPE
15506 || TREE_CODE (type
) == UNION_TYPE
15507 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15508 && TYPE_FIELDS (type
))
15510 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15512 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15515 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15516 || TREE_CODE (type
) == INTEGER_TYPE
)
15519 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15521 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15527 /* Emit RTL insns to initialize the variable parts of a trampoline.
15528 FNADDR is an RTX for the address of the function's pure code.
15529 CXT is an RTX for the static chain value for the function. */
15531 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15535 /* Compute offset from the end of the jmp to the target function. */
15536 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15537 plus_constant (tramp
, 10),
15538 NULL_RTX
, 1, OPTAB_DIRECT
);
15539 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15540 gen_int_mode (0xb9, QImode
));
15541 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15542 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15543 gen_int_mode (0xe9, QImode
));
15544 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15549 /* Try to load address using shorter movl instead of movabs.
15550 We may want to support movq for kernel mode, but kernel does not use
15551 trampolines at the moment. */
15552 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15554 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15555 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15556 gen_int_mode (0xbb41, HImode
));
15557 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15558 gen_lowpart (SImode
, fnaddr
));
15563 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15564 gen_int_mode (0xbb49, HImode
));
15565 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15569 /* Load static chain using movabs to r10. */
15570 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15571 gen_int_mode (0xba49, HImode
));
15572 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15575 /* Jump to the r11 */
15576 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15577 gen_int_mode (0xff49, HImode
));
15578 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15579 gen_int_mode (0xe3, QImode
));
15581 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15584 #ifdef ENABLE_EXECUTE_STACK
15585 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15586 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15590 /* Codes for all the SSE/MMX builtins. */
15593 IX86_BUILTIN_ADDPS
,
15594 IX86_BUILTIN_ADDSS
,
15595 IX86_BUILTIN_DIVPS
,
15596 IX86_BUILTIN_DIVSS
,
15597 IX86_BUILTIN_MULPS
,
15598 IX86_BUILTIN_MULSS
,
15599 IX86_BUILTIN_SUBPS
,
15600 IX86_BUILTIN_SUBSS
,
15602 IX86_BUILTIN_CMPEQPS
,
15603 IX86_BUILTIN_CMPLTPS
,
15604 IX86_BUILTIN_CMPLEPS
,
15605 IX86_BUILTIN_CMPGTPS
,
15606 IX86_BUILTIN_CMPGEPS
,
15607 IX86_BUILTIN_CMPNEQPS
,
15608 IX86_BUILTIN_CMPNLTPS
,
15609 IX86_BUILTIN_CMPNLEPS
,
15610 IX86_BUILTIN_CMPNGTPS
,
15611 IX86_BUILTIN_CMPNGEPS
,
15612 IX86_BUILTIN_CMPORDPS
,
15613 IX86_BUILTIN_CMPUNORDPS
,
15614 IX86_BUILTIN_CMPEQSS
,
15615 IX86_BUILTIN_CMPLTSS
,
15616 IX86_BUILTIN_CMPLESS
,
15617 IX86_BUILTIN_CMPNEQSS
,
15618 IX86_BUILTIN_CMPNLTSS
,
15619 IX86_BUILTIN_CMPNLESS
,
15620 IX86_BUILTIN_CMPNGTSS
,
15621 IX86_BUILTIN_CMPNGESS
,
15622 IX86_BUILTIN_CMPORDSS
,
15623 IX86_BUILTIN_CMPUNORDSS
,
15625 IX86_BUILTIN_COMIEQSS
,
15626 IX86_BUILTIN_COMILTSS
,
15627 IX86_BUILTIN_COMILESS
,
15628 IX86_BUILTIN_COMIGTSS
,
15629 IX86_BUILTIN_COMIGESS
,
15630 IX86_BUILTIN_COMINEQSS
,
15631 IX86_BUILTIN_UCOMIEQSS
,
15632 IX86_BUILTIN_UCOMILTSS
,
15633 IX86_BUILTIN_UCOMILESS
,
15634 IX86_BUILTIN_UCOMIGTSS
,
15635 IX86_BUILTIN_UCOMIGESS
,
15636 IX86_BUILTIN_UCOMINEQSS
,
15638 IX86_BUILTIN_CVTPI2PS
,
15639 IX86_BUILTIN_CVTPS2PI
,
15640 IX86_BUILTIN_CVTSI2SS
,
15641 IX86_BUILTIN_CVTSI642SS
,
15642 IX86_BUILTIN_CVTSS2SI
,
15643 IX86_BUILTIN_CVTSS2SI64
,
15644 IX86_BUILTIN_CVTTPS2PI
,
15645 IX86_BUILTIN_CVTTSS2SI
,
15646 IX86_BUILTIN_CVTTSS2SI64
,
15648 IX86_BUILTIN_MAXPS
,
15649 IX86_BUILTIN_MAXSS
,
15650 IX86_BUILTIN_MINPS
,
15651 IX86_BUILTIN_MINSS
,
15653 IX86_BUILTIN_LOADUPS
,
15654 IX86_BUILTIN_STOREUPS
,
15655 IX86_BUILTIN_MOVSS
,
15657 IX86_BUILTIN_MOVHLPS
,
15658 IX86_BUILTIN_MOVLHPS
,
15659 IX86_BUILTIN_LOADHPS
,
15660 IX86_BUILTIN_LOADLPS
,
15661 IX86_BUILTIN_STOREHPS
,
15662 IX86_BUILTIN_STORELPS
,
15664 IX86_BUILTIN_MASKMOVQ
,
15665 IX86_BUILTIN_MOVMSKPS
,
15666 IX86_BUILTIN_PMOVMSKB
,
15668 IX86_BUILTIN_MOVNTPS
,
15669 IX86_BUILTIN_MOVNTQ
,
15671 IX86_BUILTIN_LOADDQU
,
15672 IX86_BUILTIN_STOREDQU
,
15674 IX86_BUILTIN_PACKSSWB
,
15675 IX86_BUILTIN_PACKSSDW
,
15676 IX86_BUILTIN_PACKUSWB
,
15678 IX86_BUILTIN_PADDB
,
15679 IX86_BUILTIN_PADDW
,
15680 IX86_BUILTIN_PADDD
,
15681 IX86_BUILTIN_PADDQ
,
15682 IX86_BUILTIN_PADDSB
,
15683 IX86_BUILTIN_PADDSW
,
15684 IX86_BUILTIN_PADDUSB
,
15685 IX86_BUILTIN_PADDUSW
,
15686 IX86_BUILTIN_PSUBB
,
15687 IX86_BUILTIN_PSUBW
,
15688 IX86_BUILTIN_PSUBD
,
15689 IX86_BUILTIN_PSUBQ
,
15690 IX86_BUILTIN_PSUBSB
,
15691 IX86_BUILTIN_PSUBSW
,
15692 IX86_BUILTIN_PSUBUSB
,
15693 IX86_BUILTIN_PSUBUSW
,
15696 IX86_BUILTIN_PANDN
,
15700 IX86_BUILTIN_PAVGB
,
15701 IX86_BUILTIN_PAVGW
,
15703 IX86_BUILTIN_PCMPEQB
,
15704 IX86_BUILTIN_PCMPEQW
,
15705 IX86_BUILTIN_PCMPEQD
,
15706 IX86_BUILTIN_PCMPGTB
,
15707 IX86_BUILTIN_PCMPGTW
,
15708 IX86_BUILTIN_PCMPGTD
,
15710 IX86_BUILTIN_PMADDWD
,
15712 IX86_BUILTIN_PMAXSW
,
15713 IX86_BUILTIN_PMAXUB
,
15714 IX86_BUILTIN_PMINSW
,
15715 IX86_BUILTIN_PMINUB
,
15717 IX86_BUILTIN_PMULHUW
,
15718 IX86_BUILTIN_PMULHW
,
15719 IX86_BUILTIN_PMULLW
,
15721 IX86_BUILTIN_PSADBW
,
15722 IX86_BUILTIN_PSHUFW
,
15724 IX86_BUILTIN_PSLLW
,
15725 IX86_BUILTIN_PSLLD
,
15726 IX86_BUILTIN_PSLLQ
,
15727 IX86_BUILTIN_PSRAW
,
15728 IX86_BUILTIN_PSRAD
,
15729 IX86_BUILTIN_PSRLW
,
15730 IX86_BUILTIN_PSRLD
,
15731 IX86_BUILTIN_PSRLQ
,
15732 IX86_BUILTIN_PSLLWI
,
15733 IX86_BUILTIN_PSLLDI
,
15734 IX86_BUILTIN_PSLLQI
,
15735 IX86_BUILTIN_PSRAWI
,
15736 IX86_BUILTIN_PSRADI
,
15737 IX86_BUILTIN_PSRLWI
,
15738 IX86_BUILTIN_PSRLDI
,
15739 IX86_BUILTIN_PSRLQI
,
15741 IX86_BUILTIN_PUNPCKHBW
,
15742 IX86_BUILTIN_PUNPCKHWD
,
15743 IX86_BUILTIN_PUNPCKHDQ
,
15744 IX86_BUILTIN_PUNPCKLBW
,
15745 IX86_BUILTIN_PUNPCKLWD
,
15746 IX86_BUILTIN_PUNPCKLDQ
,
15748 IX86_BUILTIN_SHUFPS
,
15750 IX86_BUILTIN_RCPPS
,
15751 IX86_BUILTIN_RCPSS
,
15752 IX86_BUILTIN_RSQRTPS
,
15753 IX86_BUILTIN_RSQRTSS
,
15754 IX86_BUILTIN_SQRTPS
,
15755 IX86_BUILTIN_SQRTSS
,
15757 IX86_BUILTIN_UNPCKHPS
,
15758 IX86_BUILTIN_UNPCKLPS
,
15760 IX86_BUILTIN_ANDPS
,
15761 IX86_BUILTIN_ANDNPS
,
15763 IX86_BUILTIN_XORPS
,
15766 IX86_BUILTIN_LDMXCSR
,
15767 IX86_BUILTIN_STMXCSR
,
15768 IX86_BUILTIN_SFENCE
,
15770 /* 3DNow! Original */
15771 IX86_BUILTIN_FEMMS
,
15772 IX86_BUILTIN_PAVGUSB
,
15773 IX86_BUILTIN_PF2ID
,
15774 IX86_BUILTIN_PFACC
,
15775 IX86_BUILTIN_PFADD
,
15776 IX86_BUILTIN_PFCMPEQ
,
15777 IX86_BUILTIN_PFCMPGE
,
15778 IX86_BUILTIN_PFCMPGT
,
15779 IX86_BUILTIN_PFMAX
,
15780 IX86_BUILTIN_PFMIN
,
15781 IX86_BUILTIN_PFMUL
,
15782 IX86_BUILTIN_PFRCP
,
15783 IX86_BUILTIN_PFRCPIT1
,
15784 IX86_BUILTIN_PFRCPIT2
,
15785 IX86_BUILTIN_PFRSQIT1
,
15786 IX86_BUILTIN_PFRSQRT
,
15787 IX86_BUILTIN_PFSUB
,
15788 IX86_BUILTIN_PFSUBR
,
15789 IX86_BUILTIN_PI2FD
,
15790 IX86_BUILTIN_PMULHRW
,
15792 /* 3DNow! Athlon Extensions */
15793 IX86_BUILTIN_PF2IW
,
15794 IX86_BUILTIN_PFNACC
,
15795 IX86_BUILTIN_PFPNACC
,
15796 IX86_BUILTIN_PI2FW
,
15797 IX86_BUILTIN_PSWAPDSI
,
15798 IX86_BUILTIN_PSWAPDSF
,
15801 IX86_BUILTIN_ADDPD
,
15802 IX86_BUILTIN_ADDSD
,
15803 IX86_BUILTIN_DIVPD
,
15804 IX86_BUILTIN_DIVSD
,
15805 IX86_BUILTIN_MULPD
,
15806 IX86_BUILTIN_MULSD
,
15807 IX86_BUILTIN_SUBPD
,
15808 IX86_BUILTIN_SUBSD
,
15810 IX86_BUILTIN_CMPEQPD
,
15811 IX86_BUILTIN_CMPLTPD
,
15812 IX86_BUILTIN_CMPLEPD
,
15813 IX86_BUILTIN_CMPGTPD
,
15814 IX86_BUILTIN_CMPGEPD
,
15815 IX86_BUILTIN_CMPNEQPD
,
15816 IX86_BUILTIN_CMPNLTPD
,
15817 IX86_BUILTIN_CMPNLEPD
,
15818 IX86_BUILTIN_CMPNGTPD
,
15819 IX86_BUILTIN_CMPNGEPD
,
15820 IX86_BUILTIN_CMPORDPD
,
15821 IX86_BUILTIN_CMPUNORDPD
,
15822 IX86_BUILTIN_CMPNEPD
,
15823 IX86_BUILTIN_CMPEQSD
,
15824 IX86_BUILTIN_CMPLTSD
,
15825 IX86_BUILTIN_CMPLESD
,
15826 IX86_BUILTIN_CMPNEQSD
,
15827 IX86_BUILTIN_CMPNLTSD
,
15828 IX86_BUILTIN_CMPNLESD
,
15829 IX86_BUILTIN_CMPORDSD
,
15830 IX86_BUILTIN_CMPUNORDSD
,
15831 IX86_BUILTIN_CMPNESD
,
15833 IX86_BUILTIN_COMIEQSD
,
15834 IX86_BUILTIN_COMILTSD
,
15835 IX86_BUILTIN_COMILESD
,
15836 IX86_BUILTIN_COMIGTSD
,
15837 IX86_BUILTIN_COMIGESD
,
15838 IX86_BUILTIN_COMINEQSD
,
15839 IX86_BUILTIN_UCOMIEQSD
,
15840 IX86_BUILTIN_UCOMILTSD
,
15841 IX86_BUILTIN_UCOMILESD
,
15842 IX86_BUILTIN_UCOMIGTSD
,
15843 IX86_BUILTIN_UCOMIGESD
,
15844 IX86_BUILTIN_UCOMINEQSD
,
15846 IX86_BUILTIN_MAXPD
,
15847 IX86_BUILTIN_MAXSD
,
15848 IX86_BUILTIN_MINPD
,
15849 IX86_BUILTIN_MINSD
,
15851 IX86_BUILTIN_ANDPD
,
15852 IX86_BUILTIN_ANDNPD
,
15854 IX86_BUILTIN_XORPD
,
15856 IX86_BUILTIN_SQRTPD
,
15857 IX86_BUILTIN_SQRTSD
,
15859 IX86_BUILTIN_UNPCKHPD
,
15860 IX86_BUILTIN_UNPCKLPD
,
15862 IX86_BUILTIN_SHUFPD
,
15864 IX86_BUILTIN_LOADUPD
,
15865 IX86_BUILTIN_STOREUPD
,
15866 IX86_BUILTIN_MOVSD
,
15868 IX86_BUILTIN_LOADHPD
,
15869 IX86_BUILTIN_LOADLPD
,
15871 IX86_BUILTIN_CVTDQ2PD
,
15872 IX86_BUILTIN_CVTDQ2PS
,
15874 IX86_BUILTIN_CVTPD2DQ
,
15875 IX86_BUILTIN_CVTPD2PI
,
15876 IX86_BUILTIN_CVTPD2PS
,
15877 IX86_BUILTIN_CVTTPD2DQ
,
15878 IX86_BUILTIN_CVTTPD2PI
,
15880 IX86_BUILTIN_CVTPI2PD
,
15881 IX86_BUILTIN_CVTSI2SD
,
15882 IX86_BUILTIN_CVTSI642SD
,
15884 IX86_BUILTIN_CVTSD2SI
,
15885 IX86_BUILTIN_CVTSD2SI64
,
15886 IX86_BUILTIN_CVTSD2SS
,
15887 IX86_BUILTIN_CVTSS2SD
,
15888 IX86_BUILTIN_CVTTSD2SI
,
15889 IX86_BUILTIN_CVTTSD2SI64
,
15891 IX86_BUILTIN_CVTPS2DQ
,
15892 IX86_BUILTIN_CVTPS2PD
,
15893 IX86_BUILTIN_CVTTPS2DQ
,
15895 IX86_BUILTIN_MOVNTI
,
15896 IX86_BUILTIN_MOVNTPD
,
15897 IX86_BUILTIN_MOVNTDQ
,
15900 IX86_BUILTIN_MASKMOVDQU
,
15901 IX86_BUILTIN_MOVMSKPD
,
15902 IX86_BUILTIN_PMOVMSKB128
,
15904 IX86_BUILTIN_PACKSSWB128
,
15905 IX86_BUILTIN_PACKSSDW128
,
15906 IX86_BUILTIN_PACKUSWB128
,
15908 IX86_BUILTIN_PADDB128
,
15909 IX86_BUILTIN_PADDW128
,
15910 IX86_BUILTIN_PADDD128
,
15911 IX86_BUILTIN_PADDQ128
,
15912 IX86_BUILTIN_PADDSB128
,
15913 IX86_BUILTIN_PADDSW128
,
15914 IX86_BUILTIN_PADDUSB128
,
15915 IX86_BUILTIN_PADDUSW128
,
15916 IX86_BUILTIN_PSUBB128
,
15917 IX86_BUILTIN_PSUBW128
,
15918 IX86_BUILTIN_PSUBD128
,
15919 IX86_BUILTIN_PSUBQ128
,
15920 IX86_BUILTIN_PSUBSB128
,
15921 IX86_BUILTIN_PSUBSW128
,
15922 IX86_BUILTIN_PSUBUSB128
,
15923 IX86_BUILTIN_PSUBUSW128
,
15925 IX86_BUILTIN_PAND128
,
15926 IX86_BUILTIN_PANDN128
,
15927 IX86_BUILTIN_POR128
,
15928 IX86_BUILTIN_PXOR128
,
15930 IX86_BUILTIN_PAVGB128
,
15931 IX86_BUILTIN_PAVGW128
,
15933 IX86_BUILTIN_PCMPEQB128
,
15934 IX86_BUILTIN_PCMPEQW128
,
15935 IX86_BUILTIN_PCMPEQD128
,
15936 IX86_BUILTIN_PCMPGTB128
,
15937 IX86_BUILTIN_PCMPGTW128
,
15938 IX86_BUILTIN_PCMPGTD128
,
15940 IX86_BUILTIN_PMADDWD128
,
15942 IX86_BUILTIN_PMAXSW128
,
15943 IX86_BUILTIN_PMAXUB128
,
15944 IX86_BUILTIN_PMINSW128
,
15945 IX86_BUILTIN_PMINUB128
,
15947 IX86_BUILTIN_PMULUDQ
,
15948 IX86_BUILTIN_PMULUDQ128
,
15949 IX86_BUILTIN_PMULHUW128
,
15950 IX86_BUILTIN_PMULHW128
,
15951 IX86_BUILTIN_PMULLW128
,
15953 IX86_BUILTIN_PSADBW128
,
15954 IX86_BUILTIN_PSHUFHW
,
15955 IX86_BUILTIN_PSHUFLW
,
15956 IX86_BUILTIN_PSHUFD
,
15958 IX86_BUILTIN_PSLLW128
,
15959 IX86_BUILTIN_PSLLD128
,
15960 IX86_BUILTIN_PSLLQ128
,
15961 IX86_BUILTIN_PSRAW128
,
15962 IX86_BUILTIN_PSRAD128
,
15963 IX86_BUILTIN_PSRLW128
,
15964 IX86_BUILTIN_PSRLD128
,
15965 IX86_BUILTIN_PSRLQ128
,
15966 IX86_BUILTIN_PSLLDQI128
,
15967 IX86_BUILTIN_PSLLWI128
,
15968 IX86_BUILTIN_PSLLDI128
,
15969 IX86_BUILTIN_PSLLQI128
,
15970 IX86_BUILTIN_PSRAWI128
,
15971 IX86_BUILTIN_PSRADI128
,
15972 IX86_BUILTIN_PSRLDQI128
,
15973 IX86_BUILTIN_PSRLWI128
,
15974 IX86_BUILTIN_PSRLDI128
,
15975 IX86_BUILTIN_PSRLQI128
,
15977 IX86_BUILTIN_PUNPCKHBW128
,
15978 IX86_BUILTIN_PUNPCKHWD128
,
15979 IX86_BUILTIN_PUNPCKHDQ128
,
15980 IX86_BUILTIN_PUNPCKHQDQ128
,
15981 IX86_BUILTIN_PUNPCKLBW128
,
15982 IX86_BUILTIN_PUNPCKLWD128
,
15983 IX86_BUILTIN_PUNPCKLDQ128
,
15984 IX86_BUILTIN_PUNPCKLQDQ128
,
15986 IX86_BUILTIN_CLFLUSH
,
15987 IX86_BUILTIN_MFENCE
,
15988 IX86_BUILTIN_LFENCE
,
15990 /* Prescott New Instructions. */
15991 IX86_BUILTIN_ADDSUBPS
,
15992 IX86_BUILTIN_HADDPS
,
15993 IX86_BUILTIN_HSUBPS
,
15994 IX86_BUILTIN_MOVSHDUP
,
15995 IX86_BUILTIN_MOVSLDUP
,
15996 IX86_BUILTIN_ADDSUBPD
,
15997 IX86_BUILTIN_HADDPD
,
15998 IX86_BUILTIN_HSUBPD
,
15999 IX86_BUILTIN_LDDQU
,
16001 IX86_BUILTIN_MONITOR
,
16002 IX86_BUILTIN_MWAIT
,
16005 IX86_BUILTIN_PHADDW
,
16006 IX86_BUILTIN_PHADDD
,
16007 IX86_BUILTIN_PHADDSW
,
16008 IX86_BUILTIN_PHSUBW
,
16009 IX86_BUILTIN_PHSUBD
,
16010 IX86_BUILTIN_PHSUBSW
,
16011 IX86_BUILTIN_PMADDUBSW
,
16012 IX86_BUILTIN_PMULHRSW
,
16013 IX86_BUILTIN_PSHUFB
,
16014 IX86_BUILTIN_PSIGNB
,
16015 IX86_BUILTIN_PSIGNW
,
16016 IX86_BUILTIN_PSIGND
,
16017 IX86_BUILTIN_PALIGNR
,
16018 IX86_BUILTIN_PABSB
,
16019 IX86_BUILTIN_PABSW
,
16020 IX86_BUILTIN_PABSD
,
16022 IX86_BUILTIN_PHADDW128
,
16023 IX86_BUILTIN_PHADDD128
,
16024 IX86_BUILTIN_PHADDSW128
,
16025 IX86_BUILTIN_PHSUBW128
,
16026 IX86_BUILTIN_PHSUBD128
,
16027 IX86_BUILTIN_PHSUBSW128
,
16028 IX86_BUILTIN_PMADDUBSW128
,
16029 IX86_BUILTIN_PMULHRSW128
,
16030 IX86_BUILTIN_PSHUFB128
,
16031 IX86_BUILTIN_PSIGNB128
,
16032 IX86_BUILTIN_PSIGNW128
,
16033 IX86_BUILTIN_PSIGND128
,
16034 IX86_BUILTIN_PALIGNR128
,
16035 IX86_BUILTIN_PABSB128
,
16036 IX86_BUILTIN_PABSW128
,
16037 IX86_BUILTIN_PABSD128
,
16039 /* AMDFAM10 - SSE4A New Instructions. */
16040 IX86_BUILTIN_MOVNTSD
,
16041 IX86_BUILTIN_MOVNTSS
,
16042 IX86_BUILTIN_EXTRQI
,
16043 IX86_BUILTIN_EXTRQ
,
16044 IX86_BUILTIN_INSERTQI
,
16045 IX86_BUILTIN_INSERTQ
,
16047 IX86_BUILTIN_VEC_INIT_V2SI
,
16048 IX86_BUILTIN_VEC_INIT_V4HI
,
16049 IX86_BUILTIN_VEC_INIT_V8QI
,
16050 IX86_BUILTIN_VEC_EXT_V2DF
,
16051 IX86_BUILTIN_VEC_EXT_V2DI
,
16052 IX86_BUILTIN_VEC_EXT_V4SF
,
16053 IX86_BUILTIN_VEC_EXT_V4SI
,
16054 IX86_BUILTIN_VEC_EXT_V8HI
,
16055 IX86_BUILTIN_VEC_EXT_V2SI
,
16056 IX86_BUILTIN_VEC_EXT_V4HI
,
16057 IX86_BUILTIN_VEC_SET_V8HI
,
16058 IX86_BUILTIN_VEC_SET_V4HI
,
16063 /* Table for the ix86 builtin decls. */
16064 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16066 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16067 * if the target_flags include one of MASK. Stores the function decl
16068 * in the ix86_builtins array.
16069 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16072 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16074 tree decl
= NULL_TREE
;
16076 if (mask
& target_flags
16077 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16079 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16081 ix86_builtins
[(int) code
] = decl
;
16087 /* Like def_builtin, but also marks the function decl "const". */
16090 def_builtin_const (int mask
, const char *name
, tree type
,
16091 enum ix86_builtins code
)
16093 tree decl
= def_builtin (mask
, name
, type
, code
);
16095 TREE_READONLY (decl
) = 1;
16099 /* Bits for builtin_description.flag. */
16101 /* Set when we don't support the comparison natively, and should
16102 swap_comparison in order to support it. */
16103 #define BUILTIN_DESC_SWAP_OPERANDS 1
16105 struct builtin_description
16107 const unsigned int mask
;
16108 const enum insn_code icode
;
16109 const char *const name
;
16110 const enum ix86_builtins code
;
16111 const enum rtx_code comparison
;
16112 const unsigned int flag
;
16115 static const struct builtin_description bdesc_comi
[] =
16117 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16118 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16119 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16120 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16121 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16122 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16123 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16124 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16125 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16126 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16127 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16128 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16129 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16130 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16131 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16132 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16133 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16134 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16135 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16136 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16137 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16138 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16139 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16140 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16143 static const struct builtin_description bdesc_2arg
[] =
16146 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16147 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16148 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16149 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16150 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16151 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16152 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16153 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16155 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16156 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16157 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16158 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16159 BUILTIN_DESC_SWAP_OPERANDS
},
16160 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16161 BUILTIN_DESC_SWAP_OPERANDS
},
16162 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16163 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16164 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16165 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16166 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16167 BUILTIN_DESC_SWAP_OPERANDS
},
16168 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16169 BUILTIN_DESC_SWAP_OPERANDS
},
16170 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16171 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16172 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16173 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16174 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16175 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16176 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16177 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16178 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16179 BUILTIN_DESC_SWAP_OPERANDS
},
16180 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16181 BUILTIN_DESC_SWAP_OPERANDS
},
16182 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16184 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16185 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16186 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16187 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16189 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16190 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16191 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16192 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16194 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16195 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16196 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16197 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16198 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16201 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16202 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16203 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16204 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16205 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16206 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16207 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16208 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16210 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16211 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16212 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16213 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16214 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16215 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16216 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16217 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16219 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16220 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16221 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16223 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16224 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16225 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16226 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16228 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16229 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16231 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16232 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16233 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16234 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16235 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16236 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16238 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16239 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16240 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16241 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16243 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16244 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16245 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16246 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16247 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16248 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16251 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16252 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16253 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16255 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16256 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16257 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16259 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16260 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16261 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16262 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16263 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16264 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16266 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16267 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16268 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16269 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16270 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16271 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16273 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16274 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16275 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16276 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16278 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16279 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16282 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16283 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16284 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16285 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16286 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16287 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16288 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16289 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16291 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16292 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16293 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16294 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16295 BUILTIN_DESC_SWAP_OPERANDS
},
16296 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16297 BUILTIN_DESC_SWAP_OPERANDS
},
16298 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16299 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16300 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16301 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16302 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16303 BUILTIN_DESC_SWAP_OPERANDS
},
16304 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16305 BUILTIN_DESC_SWAP_OPERANDS
},
16306 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16307 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16308 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16309 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16310 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16311 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16312 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16313 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16314 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16316 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16317 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16318 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16319 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16321 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16322 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16323 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16324 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16326 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16327 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16328 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16331 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16332 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16333 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16334 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16335 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16336 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16337 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16338 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16340 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16341 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16342 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16343 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16344 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16345 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16346 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16347 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16349 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16350 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16352 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16353 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16354 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16355 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16357 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16358 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16360 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16361 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16362 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16363 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16364 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16365 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16367 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16368 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16369 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16370 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16372 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16373 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16374 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16375 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16376 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16377 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16378 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16379 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16381 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16382 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16383 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16385 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16386 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16388 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16389 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16391 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16392 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16393 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16395 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16396 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16397 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16399 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16400 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16402 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16404 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16405 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16406 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16407 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16410 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16411 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16412 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16413 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16414 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16415 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16418 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16419 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16420 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16421 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16422 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16423 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16424 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16425 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16426 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16427 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16428 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16429 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16430 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16431 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16432 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16433 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16434 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16435 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16436 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16437 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16438 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16439 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16440 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16441 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16444 static const struct builtin_description bdesc_1arg
[] =
16446 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16447 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16449 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16450 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16451 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16453 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16454 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16455 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16456 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16457 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16458 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16460 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16461 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16463 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16465 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16466 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16468 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16469 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16470 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16471 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16472 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16474 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16476 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16477 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16478 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16479 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16481 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16482 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16483 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16486 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16487 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16490 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16491 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16492 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16493 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16494 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16495 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16499 ix86_init_builtins (void)
16502 ix86_init_mmx_sse_builtins ();
16505 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16506 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16509 ix86_init_mmx_sse_builtins (void)
16511 const struct builtin_description
* d
;
16514 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16515 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16516 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16517 tree V2DI_type_node
16518 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16519 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16520 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16521 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16522 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16523 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16524 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16526 tree pchar_type_node
= build_pointer_type (char_type_node
);
16527 tree pcchar_type_node
= build_pointer_type (
16528 build_type_variant (char_type_node
, 1, 0));
16529 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16530 tree pcfloat_type_node
= build_pointer_type (
16531 build_type_variant (float_type_node
, 1, 0));
16532 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16533 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16534 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16537 tree int_ftype_v4sf_v4sf
16538 = build_function_type_list (integer_type_node
,
16539 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16540 tree v4si_ftype_v4sf_v4sf
16541 = build_function_type_list (V4SI_type_node
,
16542 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16543 /* MMX/SSE/integer conversions. */
16544 tree int_ftype_v4sf
16545 = build_function_type_list (integer_type_node
,
16546 V4SF_type_node
, NULL_TREE
);
16547 tree int64_ftype_v4sf
16548 = build_function_type_list (long_long_integer_type_node
,
16549 V4SF_type_node
, NULL_TREE
);
16550 tree int_ftype_v8qi
16551 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16552 tree v4sf_ftype_v4sf_int
16553 = build_function_type_list (V4SF_type_node
,
16554 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16555 tree v4sf_ftype_v4sf_int64
16556 = build_function_type_list (V4SF_type_node
,
16557 V4SF_type_node
, long_long_integer_type_node
,
16559 tree v4sf_ftype_v4sf_v2si
16560 = build_function_type_list (V4SF_type_node
,
16561 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16563 /* Miscellaneous. */
16564 tree v8qi_ftype_v4hi_v4hi
16565 = build_function_type_list (V8QI_type_node
,
16566 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16567 tree v4hi_ftype_v2si_v2si
16568 = build_function_type_list (V4HI_type_node
,
16569 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16570 tree v4sf_ftype_v4sf_v4sf_int
16571 = build_function_type_list (V4SF_type_node
,
16572 V4SF_type_node
, V4SF_type_node
,
16573 integer_type_node
, NULL_TREE
);
16574 tree v2si_ftype_v4hi_v4hi
16575 = build_function_type_list (V2SI_type_node
,
16576 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16577 tree v4hi_ftype_v4hi_int
16578 = build_function_type_list (V4HI_type_node
,
16579 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16580 tree v4hi_ftype_v4hi_di
16581 = build_function_type_list (V4HI_type_node
,
16582 V4HI_type_node
, long_long_unsigned_type_node
,
16584 tree v2si_ftype_v2si_di
16585 = build_function_type_list (V2SI_type_node
,
16586 V2SI_type_node
, long_long_unsigned_type_node
,
16588 tree void_ftype_void
16589 = build_function_type (void_type_node
, void_list_node
);
16590 tree void_ftype_unsigned
16591 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16592 tree void_ftype_unsigned_unsigned
16593 = build_function_type_list (void_type_node
, unsigned_type_node
,
16594 unsigned_type_node
, NULL_TREE
);
16595 tree void_ftype_pcvoid_unsigned_unsigned
16596 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16597 unsigned_type_node
, unsigned_type_node
,
16599 tree unsigned_ftype_void
16600 = build_function_type (unsigned_type_node
, void_list_node
);
16601 tree v2si_ftype_v4sf
16602 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16603 /* Loads/stores. */
16604 tree void_ftype_v8qi_v8qi_pchar
16605 = build_function_type_list (void_type_node
,
16606 V8QI_type_node
, V8QI_type_node
,
16607 pchar_type_node
, NULL_TREE
);
16608 tree v4sf_ftype_pcfloat
16609 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16610 /* @@@ the type is bogus */
16611 tree v4sf_ftype_v4sf_pv2si
16612 = build_function_type_list (V4SF_type_node
,
16613 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16614 tree void_ftype_pv2si_v4sf
16615 = build_function_type_list (void_type_node
,
16616 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16617 tree void_ftype_pfloat_v4sf
16618 = build_function_type_list (void_type_node
,
16619 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16620 tree void_ftype_pdi_di
16621 = build_function_type_list (void_type_node
,
16622 pdi_type_node
, long_long_unsigned_type_node
,
16624 tree void_ftype_pv2di_v2di
16625 = build_function_type_list (void_type_node
,
16626 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16627 /* Normal vector unops. */
16628 tree v4sf_ftype_v4sf
16629 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16630 tree v16qi_ftype_v16qi
16631 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16632 tree v8hi_ftype_v8hi
16633 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16634 tree v4si_ftype_v4si
16635 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16636 tree v8qi_ftype_v8qi
16637 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16638 tree v4hi_ftype_v4hi
16639 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16641 /* Normal vector binops. */
16642 tree v4sf_ftype_v4sf_v4sf
16643 = build_function_type_list (V4SF_type_node
,
16644 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16645 tree v8qi_ftype_v8qi_v8qi
16646 = build_function_type_list (V8QI_type_node
,
16647 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16648 tree v4hi_ftype_v4hi_v4hi
16649 = build_function_type_list (V4HI_type_node
,
16650 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16651 tree v2si_ftype_v2si_v2si
16652 = build_function_type_list (V2SI_type_node
,
16653 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16654 tree di_ftype_di_di
16655 = build_function_type_list (long_long_unsigned_type_node
,
16656 long_long_unsigned_type_node
,
16657 long_long_unsigned_type_node
, NULL_TREE
);
16659 tree di_ftype_di_di_int
16660 = build_function_type_list (long_long_unsigned_type_node
,
16661 long_long_unsigned_type_node
,
16662 long_long_unsigned_type_node
,
16663 integer_type_node
, NULL_TREE
);
16665 tree v2si_ftype_v2sf
16666 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16667 tree v2sf_ftype_v2si
16668 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16669 tree v2si_ftype_v2si
16670 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16671 tree v2sf_ftype_v2sf
16672 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16673 tree v2sf_ftype_v2sf_v2sf
16674 = build_function_type_list (V2SF_type_node
,
16675 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16676 tree v2si_ftype_v2sf_v2sf
16677 = build_function_type_list (V2SI_type_node
,
16678 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16679 tree pint_type_node
= build_pointer_type (integer_type_node
);
16680 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16681 tree pcdouble_type_node
= build_pointer_type (
16682 build_type_variant (double_type_node
, 1, 0));
16683 tree int_ftype_v2df_v2df
16684 = build_function_type_list (integer_type_node
,
16685 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16687 tree void_ftype_pcvoid
16688 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16689 tree v4sf_ftype_v4si
16690 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16691 tree v4si_ftype_v4sf
16692 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16693 tree v2df_ftype_v4si
16694 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16695 tree v4si_ftype_v2df
16696 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16697 tree v2si_ftype_v2df
16698 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16699 tree v4sf_ftype_v2df
16700 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16701 tree v2df_ftype_v2si
16702 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16703 tree v2df_ftype_v4sf
16704 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16705 tree int_ftype_v2df
16706 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16707 tree int64_ftype_v2df
16708 = build_function_type_list (long_long_integer_type_node
,
16709 V2DF_type_node
, NULL_TREE
);
16710 tree v2df_ftype_v2df_int
16711 = build_function_type_list (V2DF_type_node
,
16712 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16713 tree v2df_ftype_v2df_int64
16714 = build_function_type_list (V2DF_type_node
,
16715 V2DF_type_node
, long_long_integer_type_node
,
16717 tree v4sf_ftype_v4sf_v2df
16718 = build_function_type_list (V4SF_type_node
,
16719 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16720 tree v2df_ftype_v2df_v4sf
16721 = build_function_type_list (V2DF_type_node
,
16722 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16723 tree v2df_ftype_v2df_v2df_int
16724 = build_function_type_list (V2DF_type_node
,
16725 V2DF_type_node
, V2DF_type_node
,
16728 tree v2df_ftype_v2df_pcdouble
16729 = build_function_type_list (V2DF_type_node
,
16730 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16731 tree void_ftype_pdouble_v2df
16732 = build_function_type_list (void_type_node
,
16733 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16734 tree void_ftype_pint_int
16735 = build_function_type_list (void_type_node
,
16736 pint_type_node
, integer_type_node
, NULL_TREE
);
16737 tree void_ftype_v16qi_v16qi_pchar
16738 = build_function_type_list (void_type_node
,
16739 V16QI_type_node
, V16QI_type_node
,
16740 pchar_type_node
, NULL_TREE
);
16741 tree v2df_ftype_pcdouble
16742 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16743 tree v2df_ftype_v2df_v2df
16744 = build_function_type_list (V2DF_type_node
,
16745 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16746 tree v16qi_ftype_v16qi_v16qi
16747 = build_function_type_list (V16QI_type_node
,
16748 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16749 tree v8hi_ftype_v8hi_v8hi
16750 = build_function_type_list (V8HI_type_node
,
16751 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16752 tree v4si_ftype_v4si_v4si
16753 = build_function_type_list (V4SI_type_node
,
16754 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16755 tree v2di_ftype_v2di_v2di
16756 = build_function_type_list (V2DI_type_node
,
16757 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16758 tree v2di_ftype_v2df_v2df
16759 = build_function_type_list (V2DI_type_node
,
16760 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16761 tree v2df_ftype_v2df
16762 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16763 tree v2di_ftype_v2di_int
16764 = build_function_type_list (V2DI_type_node
,
16765 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16766 tree v2di_ftype_v2di_v2di_int
16767 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16768 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16769 tree v4si_ftype_v4si_int
16770 = build_function_type_list (V4SI_type_node
,
16771 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16772 tree v8hi_ftype_v8hi_int
16773 = build_function_type_list (V8HI_type_node
,
16774 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16775 tree v8hi_ftype_v8hi_v2di
16776 = build_function_type_list (V8HI_type_node
,
16777 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16778 tree v4si_ftype_v4si_v2di
16779 = build_function_type_list (V4SI_type_node
,
16780 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16781 tree v4si_ftype_v8hi_v8hi
16782 = build_function_type_list (V4SI_type_node
,
16783 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16784 tree di_ftype_v8qi_v8qi
16785 = build_function_type_list (long_long_unsigned_type_node
,
16786 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16787 tree di_ftype_v2si_v2si
16788 = build_function_type_list (long_long_unsigned_type_node
,
16789 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16790 tree v2di_ftype_v16qi_v16qi
16791 = build_function_type_list (V2DI_type_node
,
16792 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16793 tree v2di_ftype_v4si_v4si
16794 = build_function_type_list (V2DI_type_node
,
16795 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16796 tree int_ftype_v16qi
16797 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16798 tree v16qi_ftype_pcchar
16799 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16800 tree void_ftype_pchar_v16qi
16801 = build_function_type_list (void_type_node
,
16802 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16804 tree v2di_ftype_v2di_unsigned_unsigned
16805 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16806 unsigned_type_node
, unsigned_type_node
,
16808 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16809 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16810 unsigned_type_node
, unsigned_type_node
,
16812 tree v2di_ftype_v2di_v16qi
16813 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16817 tree float128_type
;
16820 /* The __float80 type. */
16821 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16822 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16826 /* The __float80 type. */
16827 float80_type
= make_node (REAL_TYPE
);
16828 TYPE_PRECISION (float80_type
) = 80;
16829 layout_type (float80_type
);
16830 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16835 float128_type
= make_node (REAL_TYPE
);
16836 TYPE_PRECISION (float128_type
) = 128;
16837 layout_type (float128_type
);
16838 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16841 /* Add all builtins that are more or less simple operations on two
16843 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16845 /* Use one of the operands; the target can have a different mode for
16846 mask-generating compares. */
16847 enum machine_mode mode
;
16852 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16857 type
= v16qi_ftype_v16qi_v16qi
;
16860 type
= v8hi_ftype_v8hi_v8hi
;
16863 type
= v4si_ftype_v4si_v4si
;
16866 type
= v2di_ftype_v2di_v2di
;
16869 type
= v2df_ftype_v2df_v2df
;
16872 type
= v4sf_ftype_v4sf_v4sf
;
16875 type
= v8qi_ftype_v8qi_v8qi
;
16878 type
= v4hi_ftype_v4hi_v4hi
;
16881 type
= v2si_ftype_v2si_v2si
;
16884 type
= di_ftype_di_di
;
16888 gcc_unreachable ();
16891 /* Override for comparisons. */
16892 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16893 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16894 type
= v4si_ftype_v4sf_v4sf
;
16896 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16897 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16898 type
= v2di_ftype_v2df_v2df
;
16900 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16903 /* Add all builtins that are more or less simple operations on 1 operand. */
16904 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16906 enum machine_mode mode
;
16911 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16916 type
= v16qi_ftype_v16qi
;
16919 type
= v8hi_ftype_v8hi
;
16922 type
= v4si_ftype_v4si
;
16925 type
= v2df_ftype_v2df
;
16928 type
= v4sf_ftype_v4sf
;
16931 type
= v8qi_ftype_v8qi
;
16934 type
= v4hi_ftype_v4hi
;
16937 type
= v2si_ftype_v2si
;
16944 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16947 /* Add the remaining MMX insns with somewhat more complicated types. */
16948 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16949 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16950 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16951 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16953 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16954 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16955 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16957 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
16958 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
16960 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
16961 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
16963 /* comi/ucomi insns. */
16964 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16965 if (d
->mask
== MASK_SSE2
)
16966 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
16968 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
16970 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
16971 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
16972 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
16974 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
16975 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
16976 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
16977 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
16978 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
16979 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
16980 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
16981 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
16982 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
16983 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
16984 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
16986 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
16988 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
16989 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
16991 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
16992 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
16993 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
16994 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
16996 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
16997 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
16998 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
16999 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17001 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17003 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17005 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17006 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17007 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17008 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17009 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17010 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17012 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17014 /* Original 3DNow! */
17015 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17016 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17017 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17018 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17019 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17020 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17021 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17022 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17023 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17024 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17025 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17026 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17027 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17028 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17029 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17030 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17031 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17032 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17033 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17034 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17036 /* 3DNow! extension as used in the Athlon CPU. */
17037 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17038 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17039 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17040 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17041 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17042 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17045 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17047 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17048 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17050 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17051 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17053 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17054 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17055 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17056 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17057 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17059 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17060 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17061 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17062 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17064 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17065 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17067 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17069 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17070 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17072 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17073 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17074 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17075 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17076 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17078 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17080 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17081 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17082 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17083 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17085 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17086 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17087 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17089 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17090 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17091 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17092 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17094 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17095 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17096 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17098 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17099 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17101 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17102 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17104 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17105 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17106 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17108 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17109 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17110 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17112 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17113 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17115 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17116 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17117 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17118 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17120 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17121 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17122 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17123 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17125 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17126 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17128 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17130 /* Prescott New Instructions. */
17131 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17132 void_ftype_pcvoid_unsigned_unsigned
,
17133 IX86_BUILTIN_MONITOR
);
17134 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17135 void_ftype_unsigned_unsigned
,
17136 IX86_BUILTIN_MWAIT
);
17137 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
17139 IX86_BUILTIN_MOVSHDUP
);
17140 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
17142 IX86_BUILTIN_MOVSLDUP
);
17143 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17144 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17147 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17148 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17149 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17150 IX86_BUILTIN_PALIGNR
);
17152 /* AMDFAM10 SSE4A New built-ins */
17153 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17154 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17155 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17156 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17157 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17158 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17159 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17160 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17161 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17162 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17163 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17164 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17166 /* Access to the vec_init patterns. */
17167 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17168 integer_type_node
, NULL_TREE
);
17169 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17170 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17172 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17173 short_integer_type_node
,
17174 short_integer_type_node
,
17175 short_integer_type_node
, NULL_TREE
);
17176 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17177 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17179 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17180 char_type_node
, char_type_node
,
17181 char_type_node
, char_type_node
,
17182 char_type_node
, char_type_node
,
17183 char_type_node
, NULL_TREE
);
17184 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17185 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17187 /* Access to the vec_extract patterns. */
17188 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17189 integer_type_node
, NULL_TREE
);
17190 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17191 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17193 ftype
= build_function_type_list (long_long_integer_type_node
,
17194 V2DI_type_node
, integer_type_node
,
17196 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17197 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17199 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17200 integer_type_node
, NULL_TREE
);
17201 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17202 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17204 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17205 integer_type_node
, NULL_TREE
);
17206 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17207 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17209 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17210 integer_type_node
, NULL_TREE
);
17211 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17212 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17214 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17215 integer_type_node
, NULL_TREE
);
17216 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17217 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17219 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17220 integer_type_node
, NULL_TREE
);
17221 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17222 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17224 /* Access to the vec_set patterns. */
17225 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17227 integer_type_node
, NULL_TREE
);
17228 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17229 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17231 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17233 integer_type_node
, NULL_TREE
);
17234 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17235 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17238 /* Errors in the source file can cause expand_expr to return const0_rtx
17239 where we expect a vector. To avoid crashing, use one of the vector
17240 clear instructions. */
17242 safe_vector_operand (rtx x
, enum machine_mode mode
)
17244 if (x
== const0_rtx
)
17245 x
= CONST0_RTX (mode
);
17249 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17252 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
17255 tree arg0
= TREE_VALUE (arglist
);
17256 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17257 rtx op0
= expand_normal (arg0
);
17258 rtx op1
= expand_normal (arg1
);
17259 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17260 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17261 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17263 if (VECTOR_MODE_P (mode0
))
17264 op0
= safe_vector_operand (op0
, mode0
);
17265 if (VECTOR_MODE_P (mode1
))
17266 op1
= safe_vector_operand (op1
, mode1
);
17268 if (optimize
|| !target
17269 || GET_MODE (target
) != tmode
17270 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17271 target
= gen_reg_rtx (tmode
);
17273 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17275 rtx x
= gen_reg_rtx (V4SImode
);
17276 emit_insn (gen_sse2_loadd (x
, op1
));
17277 op1
= gen_lowpart (TImode
, x
);
17280 /* The insn must want input operands in the same modes as the
17282 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17283 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17285 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17286 op0
= copy_to_mode_reg (mode0
, op0
);
17287 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17288 op1
= copy_to_mode_reg (mode1
, op1
);
17290 /* ??? Using ix86_fixup_binary_operands is problematic when
17291 we've got mismatched modes. Fake it. */
17297 if (tmode
== mode0
&& tmode
== mode1
)
17299 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17303 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17305 op0
= force_reg (mode0
, op0
);
17306 op1
= force_reg (mode1
, op1
);
17307 target
= gen_reg_rtx (tmode
);
17310 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17317 /* Subroutine of ix86_expand_builtin to take care of stores. */
17320 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
17323 tree arg0
= TREE_VALUE (arglist
);
17324 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17325 rtx op0
= expand_normal (arg0
);
17326 rtx op1
= expand_normal (arg1
);
17327 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17328 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17330 if (VECTOR_MODE_P (mode1
))
17331 op1
= safe_vector_operand (op1
, mode1
);
17333 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17334 op1
= copy_to_mode_reg (mode1
, op1
);
17336 pat
= GEN_FCN (icode
) (op0
, op1
);
17342 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17345 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
17346 rtx target
, int do_load
)
17349 tree arg0
= TREE_VALUE (arglist
);
17350 rtx op0
= expand_normal (arg0
);
17351 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17352 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17354 if (optimize
|| !target
17355 || GET_MODE (target
) != tmode
17356 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17357 target
= gen_reg_rtx (tmode
);
17359 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17362 if (VECTOR_MODE_P (mode0
))
17363 op0
= safe_vector_operand (op0
, mode0
);
17365 if ((optimize
&& !register_operand (op0
, mode0
))
17366 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17367 op0
= copy_to_mode_reg (mode0
, op0
);
17370 pat
= GEN_FCN (icode
) (target
, op0
);
17377 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17378 sqrtss, rsqrtss, rcpss. */
17381 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
17384 tree arg0
= TREE_VALUE (arglist
);
17385 rtx op1
, op0
= expand_normal (arg0
);
17386 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17387 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17389 if (optimize
|| !target
17390 || GET_MODE (target
) != tmode
17391 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17392 target
= gen_reg_rtx (tmode
);
17394 if (VECTOR_MODE_P (mode0
))
17395 op0
= safe_vector_operand (op0
, mode0
);
17397 if ((optimize
&& !register_operand (op0
, mode0
))
17398 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17399 op0
= copy_to_mode_reg (mode0
, op0
);
17402 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17403 op1
= copy_to_mode_reg (mode0
, op1
);
17405 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17412 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17415 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
17419 tree arg0
= TREE_VALUE (arglist
);
17420 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17421 rtx op0
= expand_normal (arg0
);
17422 rtx op1
= expand_normal (arg1
);
17424 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17425 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17426 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17427 enum rtx_code comparison
= d
->comparison
;
17429 if (VECTOR_MODE_P (mode0
))
17430 op0
= safe_vector_operand (op0
, mode0
);
17431 if (VECTOR_MODE_P (mode1
))
17432 op1
= safe_vector_operand (op1
, mode1
);
17434 /* Swap operands if we have a comparison that isn't available in
17436 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17438 rtx tmp
= gen_reg_rtx (mode1
);
17439 emit_move_insn (tmp
, op1
);
17444 if (optimize
|| !target
17445 || GET_MODE (target
) != tmode
17446 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17447 target
= gen_reg_rtx (tmode
);
17449 if ((optimize
&& !register_operand (op0
, mode0
))
17450 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17451 op0
= copy_to_mode_reg (mode0
, op0
);
17452 if ((optimize
&& !register_operand (op1
, mode1
))
17453 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17454 op1
= copy_to_mode_reg (mode1
, op1
);
17456 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17457 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17464 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17467 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
17471 tree arg0
= TREE_VALUE (arglist
);
17472 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17473 rtx op0
= expand_normal (arg0
);
17474 rtx op1
= expand_normal (arg1
);
17476 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17477 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17478 enum rtx_code comparison
= d
->comparison
;
17480 if (VECTOR_MODE_P (mode0
))
17481 op0
= safe_vector_operand (op0
, mode0
);
17482 if (VECTOR_MODE_P (mode1
))
17483 op1
= safe_vector_operand (op1
, mode1
);
17485 /* Swap operands if we have a comparison that isn't available in
17487 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17494 target
= gen_reg_rtx (SImode
);
17495 emit_move_insn (target
, const0_rtx
);
17496 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17498 if ((optimize
&& !register_operand (op0
, mode0
))
17499 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17500 op0
= copy_to_mode_reg (mode0
, op0
);
17501 if ((optimize
&& !register_operand (op1
, mode1
))
17502 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17503 op1
= copy_to_mode_reg (mode1
, op1
);
17505 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17506 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17510 emit_insn (gen_rtx_SET (VOIDmode
,
17511 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17512 gen_rtx_fmt_ee (comparison
, QImode
,
17516 return SUBREG_REG (target
);
17519 /* Return the integer constant in ARG. Constrain it to be in the range
17520 of the subparts of VEC_TYPE; issue an error if not. */
17523 get_element_number (tree vec_type
, tree arg
)
17525 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17527 if (!host_integerp (arg
, 1)
17528 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17530 error ("selector must be an integer constant in the range 0..%wi", max
);
17537 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17538 ix86_expand_vector_init. We DO have language-level syntax for this, in
17539 the form of (type){ init-list }. Except that since we can't place emms
17540 instructions from inside the compiler, we can't allow the use of MMX
17541 registers unless the user explicitly asks for it. So we do *not* define
17542 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17543 we have builtins invoked by mmintrin.h that gives us license to emit
17544 these sorts of instructions. */
17547 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
17549 enum machine_mode tmode
= TYPE_MODE (type
);
17550 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17551 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17552 rtvec v
= rtvec_alloc (n_elt
);
17554 gcc_assert (VECTOR_MODE_P (tmode
));
17556 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
17558 rtx x
= expand_normal (TREE_VALUE (arglist
));
17559 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17562 gcc_assert (arglist
== NULL
);
17564 if (!target
|| !register_operand (target
, tmode
))
17565 target
= gen_reg_rtx (tmode
);
17567 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17571 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17572 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17573 had a language-level syntax for referencing vector elements. */
17576 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
17578 enum machine_mode tmode
, mode0
;
17583 arg0
= TREE_VALUE (arglist
);
17584 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17586 op0
= expand_normal (arg0
);
17587 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17589 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17590 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17591 gcc_assert (VECTOR_MODE_P (mode0
));
17593 op0
= force_reg (mode0
, op0
);
17595 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17596 target
= gen_reg_rtx (tmode
);
17598 ix86_expand_vector_extract (true, target
, op0
, elt
);
17603 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17604 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17605 a language-level syntax for referencing vector elements. */
17608 ix86_expand_vec_set_builtin (tree arglist
)
17610 enum machine_mode tmode
, mode1
;
17611 tree arg0
, arg1
, arg2
;
17615 arg0
= TREE_VALUE (arglist
);
17616 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17617 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17619 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17620 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17621 gcc_assert (VECTOR_MODE_P (tmode
));
17623 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17624 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17625 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17627 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17628 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17630 op0
= force_reg (tmode
, op0
);
17631 op1
= force_reg (mode1
, op1
);
17633 ix86_expand_vector_set (true, op0
, op1
, elt
);
17638 /* Expand an expression EXP that calls a built-in function,
17639 with result going to TARGET if that's convenient
17640 (and in mode MODE if that's convenient).
17641 SUBTARGET may be used as the target for computing one of EXP's operands.
17642 IGNORE is nonzero if the value is to be ignored. */
17645 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17646 enum machine_mode mode ATTRIBUTE_UNUSED
,
17647 int ignore ATTRIBUTE_UNUSED
)
17649 const struct builtin_description
*d
;
17651 enum insn_code icode
;
17652 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
17653 tree arglist
= TREE_OPERAND (exp
, 1);
17654 tree arg0
, arg1
, arg2
, arg3
;
17655 rtx op0
, op1
, op2
, op3
, pat
;
17656 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17657 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17661 case IX86_BUILTIN_EMMS
:
17662 emit_insn (gen_mmx_emms ());
17665 case IX86_BUILTIN_SFENCE
:
17666 emit_insn (gen_sse_sfence ());
17669 case IX86_BUILTIN_MASKMOVQ
:
17670 case IX86_BUILTIN_MASKMOVDQU
:
17671 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17672 ? CODE_FOR_mmx_maskmovq
17673 : CODE_FOR_sse2_maskmovdqu
);
17674 /* Note the arg order is different from the operand order. */
17675 arg1
= TREE_VALUE (arglist
);
17676 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
17677 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17678 op0
= expand_normal (arg0
);
17679 op1
= expand_normal (arg1
);
17680 op2
= expand_normal (arg2
);
17681 mode0
= insn_data
[icode
].operand
[0].mode
;
17682 mode1
= insn_data
[icode
].operand
[1].mode
;
17683 mode2
= insn_data
[icode
].operand
[2].mode
;
17685 op0
= force_reg (Pmode
, op0
);
17686 op0
= gen_rtx_MEM (mode1
, op0
);
17688 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17689 op0
= copy_to_mode_reg (mode0
, op0
);
17690 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17691 op1
= copy_to_mode_reg (mode1
, op1
);
17692 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17693 op2
= copy_to_mode_reg (mode2
, op2
);
17694 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17700 case IX86_BUILTIN_SQRTSS
:
17701 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
17702 case IX86_BUILTIN_RSQRTSS
:
17703 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
17704 case IX86_BUILTIN_RCPSS
:
17705 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
17707 case IX86_BUILTIN_LOADUPS
:
17708 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
17710 case IX86_BUILTIN_STOREUPS
:
17711 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
17713 case IX86_BUILTIN_LOADHPS
:
17714 case IX86_BUILTIN_LOADLPS
:
17715 case IX86_BUILTIN_LOADHPD
:
17716 case IX86_BUILTIN_LOADLPD
:
17717 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17718 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17719 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17720 : CODE_FOR_sse2_loadlpd
);
17721 arg0
= TREE_VALUE (arglist
);
17722 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17723 op0
= expand_normal (arg0
);
17724 op1
= expand_normal (arg1
);
17725 tmode
= insn_data
[icode
].operand
[0].mode
;
17726 mode0
= insn_data
[icode
].operand
[1].mode
;
17727 mode1
= insn_data
[icode
].operand
[2].mode
;
17729 op0
= force_reg (mode0
, op0
);
17730 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17731 if (optimize
|| target
== 0
17732 || GET_MODE (target
) != tmode
17733 || !register_operand (target
, tmode
))
17734 target
= gen_reg_rtx (tmode
);
17735 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17741 case IX86_BUILTIN_STOREHPS
:
17742 case IX86_BUILTIN_STORELPS
:
17743 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17744 : CODE_FOR_sse_storelps
);
17745 arg0
= TREE_VALUE (arglist
);
17746 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17747 op0
= expand_normal (arg0
);
17748 op1
= expand_normal (arg1
);
17749 mode0
= insn_data
[icode
].operand
[0].mode
;
17750 mode1
= insn_data
[icode
].operand
[1].mode
;
17752 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17753 op1
= force_reg (mode1
, op1
);
17755 pat
= GEN_FCN (icode
) (op0
, op1
);
17761 case IX86_BUILTIN_MOVNTPS
:
17762 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
17763 case IX86_BUILTIN_MOVNTQ
:
17764 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
17766 case IX86_BUILTIN_LDMXCSR
:
17767 op0
= expand_normal (TREE_VALUE (arglist
));
17768 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17769 emit_move_insn (target
, op0
);
17770 emit_insn (gen_sse_ldmxcsr (target
));
17773 case IX86_BUILTIN_STMXCSR
:
17774 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17775 emit_insn (gen_sse_stmxcsr (target
));
17776 return copy_to_mode_reg (SImode
, target
);
17778 case IX86_BUILTIN_SHUFPS
:
17779 case IX86_BUILTIN_SHUFPD
:
17780 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17781 ? CODE_FOR_sse_shufps
17782 : CODE_FOR_sse2_shufpd
);
17783 arg0
= TREE_VALUE (arglist
);
17784 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17785 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17786 op0
= expand_normal (arg0
);
17787 op1
= expand_normal (arg1
);
17788 op2
= expand_normal (arg2
);
17789 tmode
= insn_data
[icode
].operand
[0].mode
;
17790 mode0
= insn_data
[icode
].operand
[1].mode
;
17791 mode1
= insn_data
[icode
].operand
[2].mode
;
17792 mode2
= insn_data
[icode
].operand
[3].mode
;
17794 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17795 op0
= copy_to_mode_reg (mode0
, op0
);
17796 if ((optimize
&& !register_operand (op1
, mode1
))
17797 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17798 op1
= copy_to_mode_reg (mode1
, op1
);
17799 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17801 /* @@@ better error message */
17802 error ("mask must be an immediate");
17803 return gen_reg_rtx (tmode
);
17805 if (optimize
|| target
== 0
17806 || GET_MODE (target
) != tmode
17807 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17808 target
= gen_reg_rtx (tmode
);
17809 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17815 case IX86_BUILTIN_PSHUFW
:
17816 case IX86_BUILTIN_PSHUFD
:
17817 case IX86_BUILTIN_PSHUFHW
:
17818 case IX86_BUILTIN_PSHUFLW
:
17819 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17820 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17821 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17822 : CODE_FOR_mmx_pshufw
);
17823 arg0
= TREE_VALUE (arglist
);
17824 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17825 op0
= expand_normal (arg0
);
17826 op1
= expand_normal (arg1
);
17827 tmode
= insn_data
[icode
].operand
[0].mode
;
17828 mode1
= insn_data
[icode
].operand
[1].mode
;
17829 mode2
= insn_data
[icode
].operand
[2].mode
;
17831 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17832 op0
= copy_to_mode_reg (mode1
, op0
);
17833 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17835 /* @@@ better error message */
17836 error ("mask must be an immediate");
17840 || GET_MODE (target
) != tmode
17841 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17842 target
= gen_reg_rtx (tmode
);
17843 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17849 case IX86_BUILTIN_PSLLDQI128
:
17850 case IX86_BUILTIN_PSRLDQI128
:
17851 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17852 : CODE_FOR_sse2_lshrti3
);
17853 arg0
= TREE_VALUE (arglist
);
17854 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17855 op0
= expand_normal (arg0
);
17856 op1
= expand_normal (arg1
);
17857 tmode
= insn_data
[icode
].operand
[0].mode
;
17858 mode1
= insn_data
[icode
].operand
[1].mode
;
17859 mode2
= insn_data
[icode
].operand
[2].mode
;
17861 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17863 op0
= copy_to_reg (op0
);
17864 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17866 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17868 error ("shift must be an immediate");
17871 target
= gen_reg_rtx (V2DImode
);
17872 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
17878 case IX86_BUILTIN_FEMMS
:
17879 emit_insn (gen_mmx_femms ());
17882 case IX86_BUILTIN_PAVGUSB
:
17883 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
17885 case IX86_BUILTIN_PF2ID
:
17886 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
17888 case IX86_BUILTIN_PFACC
:
17889 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
17891 case IX86_BUILTIN_PFADD
:
17892 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
17894 case IX86_BUILTIN_PFCMPEQ
:
17895 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
17897 case IX86_BUILTIN_PFCMPGE
:
17898 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
17900 case IX86_BUILTIN_PFCMPGT
:
17901 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
17903 case IX86_BUILTIN_PFMAX
:
17904 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
17906 case IX86_BUILTIN_PFMIN
:
17907 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
17909 case IX86_BUILTIN_PFMUL
:
17910 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
17912 case IX86_BUILTIN_PFRCP
:
17913 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
17915 case IX86_BUILTIN_PFRCPIT1
:
17916 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
17918 case IX86_BUILTIN_PFRCPIT2
:
17919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
17921 case IX86_BUILTIN_PFRSQIT1
:
17922 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
17924 case IX86_BUILTIN_PFRSQRT
:
17925 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
17927 case IX86_BUILTIN_PFSUB
:
17928 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
17930 case IX86_BUILTIN_PFSUBR
:
17931 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
17933 case IX86_BUILTIN_PI2FD
:
17934 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
17936 case IX86_BUILTIN_PMULHRW
:
17937 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
17939 case IX86_BUILTIN_PF2IW
:
17940 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
17942 case IX86_BUILTIN_PFNACC
:
17943 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
17945 case IX86_BUILTIN_PFPNACC
:
17946 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
17948 case IX86_BUILTIN_PI2FW
:
17949 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
17951 case IX86_BUILTIN_PSWAPDSI
:
17952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
17954 case IX86_BUILTIN_PSWAPDSF
:
17955 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
17957 case IX86_BUILTIN_SQRTSD
:
17958 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
17959 case IX86_BUILTIN_LOADUPD
:
17960 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
17961 case IX86_BUILTIN_STOREUPD
:
17962 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
17964 case IX86_BUILTIN_MFENCE
:
17965 emit_insn (gen_sse2_mfence ());
17967 case IX86_BUILTIN_LFENCE
:
17968 emit_insn (gen_sse2_lfence ());
17971 case IX86_BUILTIN_CLFLUSH
:
17972 arg0
= TREE_VALUE (arglist
);
17973 op0
= expand_normal (arg0
);
17974 icode
= CODE_FOR_sse2_clflush
;
17975 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
17976 op0
= copy_to_mode_reg (Pmode
, op0
);
17978 emit_insn (gen_sse2_clflush (op0
));
17981 case IX86_BUILTIN_MOVNTPD
:
17982 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
17983 case IX86_BUILTIN_MOVNTDQ
:
17984 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
17985 case IX86_BUILTIN_MOVNTI
:
17986 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
17988 case IX86_BUILTIN_LOADDQU
:
17989 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
17990 case IX86_BUILTIN_STOREDQU
:
17991 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
17993 case IX86_BUILTIN_MONITOR
:
17994 arg0
= TREE_VALUE (arglist
);
17995 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17996 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17997 op0
= expand_normal (arg0
);
17998 op1
= expand_normal (arg1
);
17999 op2
= expand_normal (arg2
);
18001 op0
= copy_to_mode_reg (Pmode
, op0
);
18003 op1
= copy_to_mode_reg (SImode
, op1
);
18005 op2
= copy_to_mode_reg (SImode
, op2
);
18007 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18009 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18012 case IX86_BUILTIN_MWAIT
:
18013 arg0
= TREE_VALUE (arglist
);
18014 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
18015 op0
= expand_normal (arg0
);
18016 op1
= expand_normal (arg1
);
18018 op0
= copy_to_mode_reg (SImode
, op0
);
18020 op1
= copy_to_mode_reg (SImode
, op1
);
18021 emit_insn (gen_sse3_mwait (op0
, op1
));
18024 case IX86_BUILTIN_LDDQU
:
18025 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
18028 case IX86_BUILTIN_PALIGNR
:
18029 case IX86_BUILTIN_PALIGNR128
:
18030 if (fcode
== IX86_BUILTIN_PALIGNR
)
18032 icode
= CODE_FOR_ssse3_palignrdi
;
18037 icode
= CODE_FOR_ssse3_palignrti
;
18040 arg0
= TREE_VALUE (arglist
);
18041 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
18042 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
18043 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18044 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18045 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18046 tmode
= insn_data
[icode
].operand
[0].mode
;
18047 mode1
= insn_data
[icode
].operand
[1].mode
;
18048 mode2
= insn_data
[icode
].operand
[2].mode
;
18049 mode3
= insn_data
[icode
].operand
[3].mode
;
18051 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18053 op0
= copy_to_reg (op0
);
18054 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18056 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18058 op1
= copy_to_reg (op1
);
18059 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18061 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18063 error ("shift must be an immediate");
18066 target
= gen_reg_rtx (mode
);
18067 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18074 case IX86_BUILTIN_MOVNTSD
:
18075 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, arglist
);
18077 case IX86_BUILTIN_MOVNTSS
:
18078 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, arglist
);
18080 case IX86_BUILTIN_INSERTQ
:
18081 case IX86_BUILTIN_EXTRQ
:
18082 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18083 ? CODE_FOR_sse4a_extrq
18084 : CODE_FOR_sse4a_insertq
);
18085 arg0
= TREE_VALUE (arglist
);
18086 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
18087 op0
= expand_normal (arg0
);
18088 op1
= expand_normal (arg1
);
18089 tmode
= insn_data
[icode
].operand
[0].mode
;
18090 mode1
= insn_data
[icode
].operand
[1].mode
;
18091 mode2
= insn_data
[icode
].operand
[2].mode
;
18092 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18093 op0
= copy_to_mode_reg (mode1
, op0
);
18094 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18095 op1
= copy_to_mode_reg (mode2
, op1
);
18096 if (optimize
|| target
== 0
18097 || GET_MODE (target
) != tmode
18098 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18099 target
= gen_reg_rtx (tmode
);
18100 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18106 case IX86_BUILTIN_EXTRQI
:
18107 icode
= CODE_FOR_sse4a_extrqi
;
18108 arg0
= TREE_VALUE (arglist
);
18109 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
18110 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
18111 op0
= expand_normal (arg0
);
18112 op1
= expand_normal (arg1
);
18113 op2
= expand_normal (arg2
);
18114 tmode
= insn_data
[icode
].operand
[0].mode
;
18115 mode1
= insn_data
[icode
].operand
[1].mode
;
18116 mode2
= insn_data
[icode
].operand
[2].mode
;
18117 mode3
= insn_data
[icode
].operand
[3].mode
;
18118 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18119 op0
= copy_to_mode_reg (mode1
, op0
);
18120 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18122 error ("index mask must be an immediate");
18123 return gen_reg_rtx (tmode
);
18125 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18127 error ("length mask must be an immediate");
18128 return gen_reg_rtx (tmode
);
18130 if (optimize
|| target
== 0
18131 || GET_MODE (target
) != tmode
18132 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18133 target
= gen_reg_rtx (tmode
);
18134 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18140 case IX86_BUILTIN_INSERTQI
:
18141 icode
= CODE_FOR_sse4a_insertqi
;
18142 arg0
= TREE_VALUE (arglist
);
18143 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
18144 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
18145 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
18146 op0
= expand_normal (arg0
);
18147 op1
= expand_normal (arg1
);
18148 op2
= expand_normal (arg2
);
18149 op3
= expand_normal (arg3
);
18150 tmode
= insn_data
[icode
].operand
[0].mode
;
18151 mode1
= insn_data
[icode
].operand
[1].mode
;
18152 mode2
= insn_data
[icode
].operand
[2].mode
;
18153 mode3
= insn_data
[icode
].operand
[3].mode
;
18154 mode4
= insn_data
[icode
].operand
[4].mode
;
18156 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18157 op0
= copy_to_mode_reg (mode1
, op0
);
18159 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18160 op1
= copy_to_mode_reg (mode2
, op1
);
18162 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18164 error ("index mask must be an immediate");
18165 return gen_reg_rtx (tmode
);
18167 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18169 error ("length mask must be an immediate");
18170 return gen_reg_rtx (tmode
);
18172 if (optimize
|| target
== 0
18173 || GET_MODE (target
) != tmode
18174 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18175 target
= gen_reg_rtx (tmode
);
18176 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18182 case IX86_BUILTIN_VEC_INIT_V2SI
:
18183 case IX86_BUILTIN_VEC_INIT_V4HI
:
18184 case IX86_BUILTIN_VEC_INIT_V8QI
:
18185 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
18187 case IX86_BUILTIN_VEC_EXT_V2DF
:
18188 case IX86_BUILTIN_VEC_EXT_V2DI
:
18189 case IX86_BUILTIN_VEC_EXT_V4SF
:
18190 case IX86_BUILTIN_VEC_EXT_V4SI
:
18191 case IX86_BUILTIN_VEC_EXT_V8HI
:
18192 case IX86_BUILTIN_VEC_EXT_V2SI
:
18193 case IX86_BUILTIN_VEC_EXT_V4HI
:
18194 return ix86_expand_vec_ext_builtin (arglist
, target
);
18196 case IX86_BUILTIN_VEC_SET_V8HI
:
18197 case IX86_BUILTIN_VEC_SET_V4HI
:
18198 return ix86_expand_vec_set_builtin (arglist
);
18204 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18205 if (d
->code
== fcode
)
18207 /* Compares are treated specially. */
18208 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18209 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18210 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18211 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18212 return ix86_expand_sse_compare (d
, arglist
, target
);
18214 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
18217 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18218 if (d
->code
== fcode
)
18219 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
18221 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18222 if (d
->code
== fcode
)
18223 return ix86_expand_sse_comi (d
, arglist
, target
);
18225 gcc_unreachable ();
18228 /* Returns a function decl for a vectorized version of the builtin function
18229 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18230 if it is not available. */
18233 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18236 enum machine_mode in_mode
, out_mode
;
18239 if (TREE_CODE (type_out
) != VECTOR_TYPE
18240 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18243 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18244 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18245 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18246 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18250 case BUILT_IN_SQRT
:
18251 if (out_mode
== DFmode
&& out_n
== 2
18252 && in_mode
== DFmode
&& in_n
== 2)
18253 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18256 case BUILT_IN_SQRTF
:
18257 if (out_mode
== SFmode
&& out_n
== 4
18258 && in_mode
== SFmode
&& in_n
== 4)
18259 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18262 case BUILT_IN_LRINTF
:
18263 if (out_mode
== SImode
&& out_n
== 4
18264 && in_mode
== SFmode
&& in_n
== 4)
18265 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18275 /* Returns a decl of a function that implements conversion of the
18276 input vector of type TYPE, or NULL_TREE if it is not available. */
18279 ix86_builtin_conversion (enum tree_code code
, tree type
)
18281 if (TREE_CODE (type
) != VECTOR_TYPE
)
18287 switch (TYPE_MODE (type
))
18290 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18295 case FIX_TRUNC_EXPR
:
18296 switch (TYPE_MODE (type
))
18299 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18309 /* Store OPERAND to the memory after reload is completed. This means
18310 that we can't easily use assign_stack_local. */
18312 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18316 gcc_assert (reload_completed
);
18317 if (TARGET_RED_ZONE
)
18319 result
= gen_rtx_MEM (mode
,
18320 gen_rtx_PLUS (Pmode
,
18322 GEN_INT (-RED_ZONE_SIZE
)));
18323 emit_move_insn (result
, operand
);
18325 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18331 operand
= gen_lowpart (DImode
, operand
);
18335 gen_rtx_SET (VOIDmode
,
18336 gen_rtx_MEM (DImode
,
18337 gen_rtx_PRE_DEC (DImode
,
18338 stack_pointer_rtx
)),
18342 gcc_unreachable ();
18344 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18353 split_di (&operand
, 1, operands
, operands
+ 1);
18355 gen_rtx_SET (VOIDmode
,
18356 gen_rtx_MEM (SImode
,
18357 gen_rtx_PRE_DEC (Pmode
,
18358 stack_pointer_rtx
)),
18361 gen_rtx_SET (VOIDmode
,
18362 gen_rtx_MEM (SImode
,
18363 gen_rtx_PRE_DEC (Pmode
,
18364 stack_pointer_rtx
)),
18369 /* Store HImodes as SImodes. */
18370 operand
= gen_lowpart (SImode
, operand
);
18374 gen_rtx_SET (VOIDmode
,
18375 gen_rtx_MEM (GET_MODE (operand
),
18376 gen_rtx_PRE_DEC (SImode
,
18377 stack_pointer_rtx
)),
18381 gcc_unreachable ();
18383 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18388 /* Free operand from the memory. */
18390 ix86_free_from_memory (enum machine_mode mode
)
18392 if (!TARGET_RED_ZONE
)
18396 if (mode
== DImode
|| TARGET_64BIT
)
18400 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18401 to pop or add instruction if registers are available. */
18402 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18403 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18408 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18409 QImode must go into class Q_REGS.
18410 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18411 movdf to do mem-to-mem moves through integer regs. */
18413 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18415 enum machine_mode mode
= GET_MODE (x
);
18417 /* We're only allowed to return a subclass of CLASS. Many of the
18418 following checks fail for NO_REGS, so eliminate that early. */
18419 if (class == NO_REGS
)
18422 /* All classes can load zeros. */
18423 if (x
== CONST0_RTX (mode
))
18426 /* Force constants into memory if we are loading a (nonzero) constant into
18427 an MMX or SSE register. This is because there are no MMX/SSE instructions
18428 to load from a constant. */
18430 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18433 /* Prefer SSE regs only, if we can use them for math. */
18434 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18435 return SSE_CLASS_P (class) ? class : NO_REGS
;
18437 /* Floating-point constants need more complex checks. */
18438 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18440 /* General regs can load everything. */
18441 if (reg_class_subset_p (class, GENERAL_REGS
))
18444 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18445 zero above. We only want to wind up preferring 80387 registers if
18446 we plan on doing computation with them. */
18448 && standard_80387_constant_p (x
))
18450 /* Limit class to non-sse. */
18451 if (class == FLOAT_SSE_REGS
)
18453 if (class == FP_TOP_SSE_REGS
)
18455 if (class == FP_SECOND_SSE_REGS
)
18456 return FP_SECOND_REG
;
18457 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18464 /* Generally when we see PLUS here, it's the function invariant
18465 (plus soft-fp const_int). Which can only be computed into general
18467 if (GET_CODE (x
) == PLUS
)
18468 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18470 /* QImode constants are easy to load, but non-constant QImode data
18471 must go into Q_REGS. */
18472 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18474 if (reg_class_subset_p (class, Q_REGS
))
18476 if (reg_class_subset_p (Q_REGS
, class))
18484 /* Discourage putting floating-point values in SSE registers unless
18485 SSE math is being used, and likewise for the 387 registers. */
18487 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18489 enum machine_mode mode
= GET_MODE (x
);
18491 /* Restrict the output reload class to the register bank that we are doing
18492 math on. If we would like not to return a subset of CLASS, reject this
18493 alternative: if reload cannot do this, it will still use its choice. */
18494 mode
= GET_MODE (x
);
18495 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18496 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18498 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18500 if (class == FP_TOP_SSE_REGS
)
18502 else if (class == FP_SECOND_SSE_REGS
)
18503 return FP_SECOND_REG
;
18505 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18511 /* If we are copying between general and FP registers, we need a memory
18512 location. The same is true for SSE and MMX registers.
18514 The macro can't work reliably when one of the CLASSES is class containing
18515 registers from multiple units (SSE, MMX, integer). We avoid this by never
18516 combining those units in single alternative in the machine description.
18517 Ensure that this constraint holds to avoid unexpected surprises.
18519 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18520 enforce these sanity checks. */
18523 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18524 enum machine_mode mode
, int strict
)
18526 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18527 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18528 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18529 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18530 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18531 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18533 gcc_assert (!strict
);
18537 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18540 /* ??? This is a lie. We do have moves between mmx/general, and for
18541 mmx/sse2. But by saying we need secondary memory we discourage the
18542 register allocator from using the mmx registers unless needed. */
18543 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18546 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18548 /* SSE1 doesn't have any direct moves from other classes. */
18552 /* If the target says that inter-unit moves are more expensive
18553 than moving through memory, then don't generate them. */
18554 if (!TARGET_INTER_UNIT_MOVES
)
18557 /* Between SSE and general, we have moves no larger than word size. */
18558 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18565 /* Return true if the registers in CLASS cannot represent the change from
18566 modes FROM to TO. */
18569 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18570 enum reg_class
class)
18575 /* x87 registers can't do subreg at all, as all values are reformatted
18576 to extended precision. */
18577 if (MAYBE_FLOAT_CLASS_P (class))
18580 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18582 /* Vector registers do not support QI or HImode loads. If we don't
18583 disallow a change to these modes, reload will assume it's ok to
18584 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18585 the vec_dupv4hi pattern. */
18586 if (GET_MODE_SIZE (from
) < 4)
18589 /* Vector registers do not support subreg with nonzero offsets, which
18590 are otherwise valid for integer registers. Since we can't see
18591 whether we have a nonzero offset from here, prohibit all
18592 nonparadoxical subregs changing size. */
18593 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18600 /* Return the cost of moving data from a register in class CLASS1 to
18601 one in class CLASS2.
18603 It is not required that the cost always equal 2 when FROM is the same as TO;
18604 on some machines it is expensive to move between registers if they are not
18605 general registers. */
18608 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18609 enum reg_class class2
)
18611 /* In case we require secondary memory, compute cost of the store followed
18612 by load. In order to avoid bad register allocation choices, we need
18613 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18615 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18619 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18620 MEMORY_MOVE_COST (mode
, class1
, 1));
18621 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18622 MEMORY_MOVE_COST (mode
, class2
, 1));
18624 /* In case of copying from general_purpose_register we may emit multiple
18625 stores followed by single load causing memory size mismatch stall.
18626 Count this as arbitrarily high cost of 20. */
18627 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18630 /* In the case of FP/MMX moves, the registers actually overlap, and we
18631 have to switch modes in order to treat them differently. */
18632 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18633 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18639 /* Moves between SSE/MMX and integer unit are expensive. */
18640 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18641 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18642 return ix86_cost
->mmxsse_to_integer
;
18643 if (MAYBE_FLOAT_CLASS_P (class1
))
18644 return ix86_cost
->fp_move
;
18645 if (MAYBE_SSE_CLASS_P (class1
))
18646 return ix86_cost
->sse_move
;
18647 if (MAYBE_MMX_CLASS_P (class1
))
18648 return ix86_cost
->mmx_move
;
18652 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18655 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18657 /* Flags and only flags can only hold CCmode values. */
18658 if (CC_REGNO_P (regno
))
18659 return GET_MODE_CLASS (mode
) == MODE_CC
;
18660 if (GET_MODE_CLASS (mode
) == MODE_CC
18661 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18662 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18664 if (FP_REGNO_P (regno
))
18665 return VALID_FP_MODE_P (mode
);
18666 if (SSE_REGNO_P (regno
))
18668 /* We implement the move patterns for all vector modes into and
18669 out of SSE registers, even when no operation instructions
18671 return (VALID_SSE_REG_MODE (mode
)
18672 || VALID_SSE2_REG_MODE (mode
)
18673 || VALID_MMX_REG_MODE (mode
)
18674 || VALID_MMX_REG_MODE_3DNOW (mode
));
18676 if (MMX_REGNO_P (regno
))
18678 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18679 so if the register is available at all, then we can move data of
18680 the given mode into or out of it. */
18681 return (VALID_MMX_REG_MODE (mode
)
18682 || VALID_MMX_REG_MODE_3DNOW (mode
));
18685 if (mode
== QImode
)
18687 /* Take care for QImode values - they can be in non-QI regs,
18688 but then they do cause partial register stalls. */
18689 if (regno
< 4 || TARGET_64BIT
)
18691 if (!TARGET_PARTIAL_REG_STALL
)
18693 return reload_in_progress
|| reload_completed
;
18695 /* We handle both integer and floats in the general purpose registers. */
18696 else if (VALID_INT_MODE_P (mode
))
18698 else if (VALID_FP_MODE_P (mode
))
18700 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18701 on to use that value in smaller contexts, this can easily force a
18702 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18703 supporting DImode, allow it. */
18704 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18710 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18711 tieable integer mode. */
18714 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18723 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18726 return TARGET_64BIT
;
18733 /* Return true if MODE1 is accessible in a register that can hold MODE2
18734 without copying. That is, all register classes that can hold MODE2
18735 can also hold MODE1. */
18738 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18740 if (mode1
== mode2
)
18743 if (ix86_tieable_integer_mode_p (mode1
)
18744 && ix86_tieable_integer_mode_p (mode2
))
18747 /* MODE2 being XFmode implies fp stack or general regs, which means we
18748 can tie any smaller floating point modes to it. Note that we do not
18749 tie this with TFmode. */
18750 if (mode2
== XFmode
)
18751 return mode1
== SFmode
|| mode1
== DFmode
;
18753 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18754 that we can tie it with SFmode. */
18755 if (mode2
== DFmode
)
18756 return mode1
== SFmode
;
18758 /* If MODE2 is only appropriate for an SSE register, then tie with
18759 any other mode acceptable to SSE registers. */
18760 if (GET_MODE_SIZE (mode2
) >= 8
18761 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18762 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
18764 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18765 with any other mode acceptable to MMX registers. */
18766 if (GET_MODE_SIZE (mode2
) == 8
18767 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18768 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
18773 /* Return the cost of moving data of mode M between a
18774 register and memory. A value of 2 is the default; this cost is
18775 relative to those in `REGISTER_MOVE_COST'.
18777 If moving between registers and memory is more expensive than
18778 between two registers, you should define this macro to express the
18781 Model also increased moving costs of QImode registers in non
18785 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18787 if (FLOAT_CLASS_P (class))
18804 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18806 if (SSE_CLASS_P (class))
18809 switch (GET_MODE_SIZE (mode
))
18823 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18825 if (MMX_CLASS_P (class))
18828 switch (GET_MODE_SIZE (mode
))
18839 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18841 switch (GET_MODE_SIZE (mode
))
18845 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18846 : ix86_cost
->movzbl_load
);
18848 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18849 : ix86_cost
->int_store
[0] + 4);
18852 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18854 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18855 if (mode
== TFmode
)
18857 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18858 * (((int) GET_MODE_SIZE (mode
)
18859 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18863 /* Compute a (partial) cost for rtx X. Return true if the complete
18864 cost has been computed, and false if subexpressions should be
18865 scanned. In either case, *TOTAL contains the cost result. */
18868 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18870 enum machine_mode mode
= GET_MODE (x
);
18878 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18880 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18882 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18884 || (!GET_CODE (x
) != LABEL_REF
18885 && (GET_CODE (x
) != SYMBOL_REF
18886 || !SYMBOL_REF_LOCAL_P (x
)))))
18893 if (mode
== VOIDmode
)
18896 switch (standard_80387_constant_p (x
))
18901 default: /* Other constants */
18906 /* Start with (MEM (SYMBOL_REF)), since that's where
18907 it'll probably end up. Add a penalty for size. */
18908 *total
= (COSTS_N_INSNS (1)
18909 + (flag_pic
!= 0 && !TARGET_64BIT
)
18910 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18916 /* The zero extensions is often completely free on x86_64, so make
18917 it as cheap as possible. */
18918 if (TARGET_64BIT
&& mode
== DImode
18919 && GET_MODE (XEXP (x
, 0)) == SImode
)
18921 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18922 *total
= ix86_cost
->add
;
18924 *total
= ix86_cost
->movzx
;
18928 *total
= ix86_cost
->movsx
;
18932 if (CONST_INT_P (XEXP (x
, 1))
18933 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18935 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18938 *total
= ix86_cost
->add
;
18941 if ((value
== 2 || value
== 3)
18942 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
18944 *total
= ix86_cost
->lea
;
18954 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
18956 if (CONST_INT_P (XEXP (x
, 1)))
18958 if (INTVAL (XEXP (x
, 1)) > 32)
18959 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
18961 *total
= ix86_cost
->shift_const
* 2;
18965 if (GET_CODE (XEXP (x
, 1)) == AND
)
18966 *total
= ix86_cost
->shift_var
* 2;
18968 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
18973 if (CONST_INT_P (XEXP (x
, 1)))
18974 *total
= ix86_cost
->shift_const
;
18976 *total
= ix86_cost
->shift_var
;
18981 if (FLOAT_MODE_P (mode
))
18983 *total
= ix86_cost
->fmul
;
18988 rtx op0
= XEXP (x
, 0);
18989 rtx op1
= XEXP (x
, 1);
18991 if (CONST_INT_P (XEXP (x
, 1)))
18993 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18994 for (nbits
= 0; value
!= 0; value
&= value
- 1)
18998 /* This is arbitrary. */
19001 /* Compute costs correctly for widening multiplication. */
19002 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19003 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19004 == GET_MODE_SIZE (mode
))
19006 int is_mulwiden
= 0;
19007 enum machine_mode inner_mode
= GET_MODE (op0
);
19009 if (GET_CODE (op0
) == GET_CODE (op1
))
19010 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19011 else if (CONST_INT_P (op1
))
19013 if (GET_CODE (op0
) == SIGN_EXTEND
)
19014 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19017 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19021 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19024 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19025 + nbits
* ix86_cost
->mult_bit
19026 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19035 if (FLOAT_MODE_P (mode
))
19036 *total
= ix86_cost
->fdiv
;
19038 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19042 if (FLOAT_MODE_P (mode
))
19043 *total
= ix86_cost
->fadd
;
19044 else if (GET_MODE_CLASS (mode
) == MODE_INT
19045 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19047 if (GET_CODE (XEXP (x
, 0)) == PLUS
19048 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19049 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19050 && CONSTANT_P (XEXP (x
, 1)))
19052 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19053 if (val
== 2 || val
== 4 || val
== 8)
19055 *total
= ix86_cost
->lea
;
19056 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19057 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19059 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19063 else if (GET_CODE (XEXP (x
, 0)) == MULT
19064 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19066 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19067 if (val
== 2 || val
== 4 || val
== 8)
19069 *total
= ix86_cost
->lea
;
19070 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19071 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19075 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19077 *total
= ix86_cost
->lea
;
19078 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19079 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19080 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19087 if (FLOAT_MODE_P (mode
))
19089 *total
= ix86_cost
->fadd
;
19097 if (!TARGET_64BIT
&& mode
== DImode
)
19099 *total
= (ix86_cost
->add
* 2
19100 + (rtx_cost (XEXP (x
, 0), outer_code
)
19101 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19102 + (rtx_cost (XEXP (x
, 1), outer_code
)
19103 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19109 if (FLOAT_MODE_P (mode
))
19111 *total
= ix86_cost
->fchs
;
19117 if (!TARGET_64BIT
&& mode
== DImode
)
19118 *total
= ix86_cost
->add
* 2;
19120 *total
= ix86_cost
->add
;
19124 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19125 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19126 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19127 && XEXP (x
, 1) == const0_rtx
)
19129 /* This kind of construct is implemented using test[bwl].
19130 Treat it as if we had an AND. */
19131 *total
= (ix86_cost
->add
19132 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19133 + rtx_cost (const1_rtx
, outer_code
));
19139 if (!TARGET_SSE_MATH
19141 || (mode
== DFmode
&& !TARGET_SSE2
))
19146 if (FLOAT_MODE_P (mode
))
19147 *total
= ix86_cost
->fabs
;
19151 if (FLOAT_MODE_P (mode
))
19152 *total
= ix86_cost
->fsqrt
;
19156 if (XINT (x
, 1) == UNSPEC_TP
)
19167 static int current_machopic_label_num
;
19169 /* Given a symbol name and its associated stub, write out the
19170 definition of the stub. */
19173 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19175 unsigned int length
;
19176 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19177 int label
= ++current_machopic_label_num
;
19179 /* For 64-bit we shouldn't get here. */
19180 gcc_assert (!TARGET_64BIT
);
19182 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19183 symb
= (*targetm
.strip_name_encoding
) (symb
);
19185 length
= strlen (stub
);
19186 binder_name
= alloca (length
+ 32);
19187 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19189 length
= strlen (symb
);
19190 symbol_name
= alloca (length
+ 32);
19191 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19193 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19196 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19198 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19200 fprintf (file
, "%s:\n", stub
);
19201 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19205 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19206 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19207 fprintf (file
, "\tjmp\t*%%edx\n");
19210 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19212 fprintf (file
, "%s:\n", binder_name
);
19216 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19217 fprintf (file
, "\tpushl\t%%eax\n");
19220 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19222 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19224 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19225 fprintf (file
, "%s:\n", lazy_ptr_name
);
19226 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19227 fprintf (file
, "\t.long %s\n", binder_name
);
19231 darwin_x86_file_end (void)
19233 darwin_file_end ();
19236 #endif /* TARGET_MACHO */
19238 /* Order the registers for register allocator. */
19241 x86_order_regs_for_local_alloc (void)
19246 /* First allocate the local general purpose registers. */
19247 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19248 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19249 reg_alloc_order
[pos
++] = i
;
19251 /* Global general purpose registers. */
19252 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19253 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19254 reg_alloc_order
[pos
++] = i
;
19256 /* x87 registers come first in case we are doing FP math
19258 if (!TARGET_SSE_MATH
)
19259 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19260 reg_alloc_order
[pos
++] = i
;
19262 /* SSE registers. */
19263 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19264 reg_alloc_order
[pos
++] = i
;
19265 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19266 reg_alloc_order
[pos
++] = i
;
19268 /* x87 registers. */
19269 if (TARGET_SSE_MATH
)
19270 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19271 reg_alloc_order
[pos
++] = i
;
19273 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19274 reg_alloc_order
[pos
++] = i
;
19276 /* Initialize the rest of array as we do not allocate some registers
19278 while (pos
< FIRST_PSEUDO_REGISTER
)
19279 reg_alloc_order
[pos
++] = 0;
19282 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19283 struct attribute_spec.handler. */
19285 ix86_handle_struct_attribute (tree
*node
, tree name
,
19286 tree args ATTRIBUTE_UNUSED
,
19287 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19290 if (DECL_P (*node
))
19292 if (TREE_CODE (*node
) == TYPE_DECL
)
19293 type
= &TREE_TYPE (*node
);
19298 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19299 || TREE_CODE (*type
) == UNION_TYPE
)))
19301 warning (OPT_Wattributes
, "%qs attribute ignored",
19302 IDENTIFIER_POINTER (name
));
19303 *no_add_attrs
= true;
19306 else if ((is_attribute_p ("ms_struct", name
)
19307 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19308 || ((is_attribute_p ("gcc_struct", name
)
19309 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19311 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19312 IDENTIFIER_POINTER (name
));
19313 *no_add_attrs
= true;
19320 ix86_ms_bitfield_layout_p (tree record_type
)
19322 return (TARGET_MS_BITFIELD_LAYOUT
&&
19323 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19324 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19327 /* Returns an expression indicating where the this parameter is
19328 located on entry to the FUNCTION. */
19331 x86_this_parameter (tree function
)
19333 tree type
= TREE_TYPE (function
);
19337 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19338 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19341 if (ix86_function_regparm (type
, function
) > 0)
19345 parm
= TYPE_ARG_TYPES (type
);
19346 /* Figure out whether or not the function has a variable number of
19348 for (; parm
; parm
= TREE_CHAIN (parm
))
19349 if (TREE_VALUE (parm
) == void_type_node
)
19351 /* If not, the this parameter is in the first argument. */
19355 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19357 return gen_rtx_REG (SImode
, regno
);
19361 if (aggregate_value_p (TREE_TYPE (type
), type
))
19362 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19364 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19367 /* Determine whether x86_output_mi_thunk can succeed. */
19370 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19371 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19372 HOST_WIDE_INT vcall_offset
, tree function
)
19374 /* 64-bit can handle anything. */
19378 /* For 32-bit, everything's fine if we have one free register. */
19379 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19382 /* Need a free register for vcall_offset. */
19386 /* Need a free register for GOT references. */
19387 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19390 /* Otherwise ok. */
19394 /* Output the assembler code for a thunk function. THUNK_DECL is the
19395 declaration for the thunk function itself, FUNCTION is the decl for
19396 the target function. DELTA is an immediate constant offset to be
19397 added to THIS. If VCALL_OFFSET is nonzero, the word at
19398 *(*this + vcall_offset) should be added to THIS. */
19401 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19402 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19403 HOST_WIDE_INT vcall_offset
, tree function
)
19406 rtx
this = x86_this_parameter (function
);
19409 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19410 pull it in now and let DELTA benefit. */
19413 else if (vcall_offset
)
19415 /* Put the this parameter into %eax. */
19417 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19418 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19421 this_reg
= NULL_RTX
;
19423 /* Adjust the this parameter by a fixed constant. */
19426 xops
[0] = GEN_INT (delta
);
19427 xops
[1] = this_reg
? this_reg
: this;
19430 if (!x86_64_general_operand (xops
[0], DImode
))
19432 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19434 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19438 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19441 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19444 /* Adjust the this parameter by a value stored in the vtable. */
19448 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19451 int tmp_regno
= 2 /* ECX */;
19452 if (lookup_attribute ("fastcall",
19453 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19454 tmp_regno
= 0 /* EAX */;
19455 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19458 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19461 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19463 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19465 /* Adjust the this parameter. */
19466 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19467 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19469 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19470 xops
[0] = GEN_INT (vcall_offset
);
19472 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19473 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19475 xops
[1] = this_reg
;
19477 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19479 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19482 /* If necessary, drop THIS back to its stack slot. */
19483 if (this_reg
&& this_reg
!= this)
19485 xops
[0] = this_reg
;
19487 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19490 xops
[0] = XEXP (DECL_RTL (function
), 0);
19493 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19494 output_asm_insn ("jmp\t%P0", xops
);
19497 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19498 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19499 tmp
= gen_rtx_MEM (QImode
, tmp
);
19501 output_asm_insn ("jmp\t%A0", xops
);
19506 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19507 output_asm_insn ("jmp\t%P0", xops
);
19512 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19513 tmp
= (gen_rtx_SYMBOL_REF
19515 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19516 tmp
= gen_rtx_MEM (QImode
, tmp
);
19518 output_asm_insn ("jmp\t%0", xops
);
19521 #endif /* TARGET_MACHO */
19523 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19524 output_set_got (tmp
, NULL_RTX
);
19527 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19528 output_asm_insn ("jmp\t{*}%1", xops
);
19534 x86_file_start (void)
19536 default_file_start ();
19538 darwin_file_start ();
19540 if (X86_FILE_START_VERSION_DIRECTIVE
)
19541 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19542 if (X86_FILE_START_FLTUSED
)
19543 fputs ("\t.global\t__fltused\n", asm_out_file
);
19544 if (ix86_asm_dialect
== ASM_INTEL
)
19545 fputs ("\t.intel_syntax\n", asm_out_file
);
19549 x86_field_alignment (tree field
, int computed
)
19551 enum machine_mode mode
;
19552 tree type
= TREE_TYPE (field
);
19554 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19556 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19557 ? get_inner_array_type (type
) : type
);
19558 if (mode
== DFmode
|| mode
== DCmode
19559 || GET_MODE_CLASS (mode
) == MODE_INT
19560 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19561 return MIN (32, computed
);
19565 /* Output assembler code to FILE to increment profiler label # LABELNO
19566 for profiling a function entry. */
19568 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19573 #ifndef NO_PROFILE_COUNTERS
19574 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19576 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19580 #ifndef NO_PROFILE_COUNTERS
19581 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19583 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19587 #ifndef NO_PROFILE_COUNTERS
19588 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19589 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19591 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19595 #ifndef NO_PROFILE_COUNTERS
19596 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19597 PROFILE_COUNT_REGISTER
);
19599 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19603 /* We don't have exact information about the insn sizes, but we may assume
19604 quite safely that we are informed about all 1 byte insns and memory
19605 address sizes. This is enough to eliminate unnecessary padding in
19609 min_insn_size (rtx insn
)
19613 if (!INSN_P (insn
) || !active_insn_p (insn
))
19616 /* Discard alignments we've emit and jump instructions. */
19617 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19618 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19621 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19622 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19625 /* Important case - calls are always 5 bytes.
19626 It is common to have many calls in the row. */
19628 && symbolic_reference_mentioned_p (PATTERN (insn
))
19629 && !SIBLING_CALL_P (insn
))
19631 if (get_attr_length (insn
) <= 1)
19634 /* For normal instructions we may rely on the sizes of addresses
19635 and the presence of symbol to require 4 bytes of encoding.
19636 This is not the case for jumps where references are PC relative. */
19637 if (!JUMP_P (insn
))
19639 l
= get_attr_length_address (insn
);
19640 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19649 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19653 ix86_avoid_jump_misspredicts (void)
19655 rtx insn
, start
= get_insns ();
19656 int nbytes
= 0, njumps
= 0;
19659 /* Look for all minimal intervals of instructions containing 4 jumps.
19660 The intervals are bounded by START and INSN. NBYTES is the total
19661 size of instructions in the interval including INSN and not including
19662 START. When the NBYTES is smaller than 16 bytes, it is possible
19663 that the end of START and INSN ends up in the same 16byte page.
19665 The smallest offset in the page INSN can start is the case where START
19666 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19667 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19669 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19672 nbytes
+= min_insn_size (insn
);
19674 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19675 INSN_UID (insn
), min_insn_size (insn
));
19677 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19678 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19686 start
= NEXT_INSN (start
);
19687 if ((JUMP_P (start
)
19688 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19689 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19691 njumps
--, isjump
= 1;
19694 nbytes
-= min_insn_size (start
);
19696 gcc_assert (njumps
>= 0);
19698 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19699 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19701 if (njumps
== 3 && isjump
&& nbytes
< 16)
19703 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19706 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19707 INSN_UID (insn
), padsize
);
19708 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19713 /* AMD Athlon works faster
19714 when RET is not destination of conditional jump or directly preceded
19715 by other jump instruction. We avoid the penalty by inserting NOP just
19716 before the RET instructions in such cases. */
19718 ix86_pad_returns (void)
19723 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19725 basic_block bb
= e
->src
;
19726 rtx ret
= BB_END (bb
);
19728 bool replace
= false;
19730 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19731 || !maybe_hot_bb_p (bb
))
19733 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19734 if (active_insn_p (prev
) || LABEL_P (prev
))
19736 if (prev
&& LABEL_P (prev
))
19741 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19742 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19743 && !(e
->flags
& EDGE_FALLTHRU
))
19748 prev
= prev_active_insn (ret
);
19750 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19753 /* Empty functions get branch mispredict even when the jump destination
19754 is not visible to us. */
19755 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19760 emit_insn_before (gen_return_internal_long (), ret
);
19766 /* Implement machine specific optimizations. We implement padding of returns
19767 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19771 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19772 ix86_pad_returns ();
19773 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19774 ix86_avoid_jump_misspredicts ();
19777 /* Return nonzero when QImode register that must be represented via REX prefix
19780 x86_extended_QIreg_mentioned_p (rtx insn
)
19783 extract_insn_cached (insn
);
19784 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19785 if (REG_P (recog_data
.operand
[i
])
19786 && REGNO (recog_data
.operand
[i
]) >= 4)
19791 /* Return nonzero when P points to register encoded via REX prefix.
19792 Called via for_each_rtx. */
19794 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19796 unsigned int regno
;
19799 regno
= REGNO (*p
);
19800 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19803 /* Return true when INSN mentions register that must be encoded using REX
19806 x86_extended_reg_mentioned_p (rtx insn
)
19808 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19811 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19812 optabs would emit if we didn't have TFmode patterns. */
19815 x86_emit_floatuns (rtx operands
[2])
19817 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19818 enum machine_mode mode
, inmode
;
19820 inmode
= GET_MODE (operands
[1]);
19821 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19824 in
= force_reg (inmode
, operands
[1]);
19825 mode
= GET_MODE (out
);
19826 neglab
= gen_label_rtx ();
19827 donelab
= gen_label_rtx ();
19828 f0
= gen_reg_rtx (mode
);
19830 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
19832 expand_float (out
, in
, 0);
19834 emit_jump_insn (gen_jump (donelab
));
19837 emit_label (neglab
);
19839 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
19841 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
19843 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19845 expand_float (f0
, i0
, 0);
19847 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19849 emit_label (donelab
);
19852 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19853 with all elements equal to VAR. Return true if successful. */
19856 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19857 rtx target
, rtx val
)
19859 enum machine_mode smode
, wsmode
, wvmode
;
19874 val
= force_reg (GET_MODE_INNER (mode
), val
);
19875 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19876 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19882 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19884 val
= gen_lowpart (SImode
, val
);
19885 x
= gen_rtx_TRUNCATE (HImode
, val
);
19886 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19887 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19909 /* Extend HImode to SImode using a paradoxical SUBREG. */
19910 tmp1
= gen_reg_rtx (SImode
);
19911 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19912 /* Insert the SImode value as low element of V4SImode vector. */
19913 tmp2
= gen_reg_rtx (V4SImode
);
19914 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19915 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19916 CONST0_RTX (V4SImode
),
19918 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19919 /* Cast the V4SImode vector back to a V8HImode vector. */
19920 tmp1
= gen_reg_rtx (V8HImode
);
19921 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19922 /* Duplicate the low short through the whole low SImode word. */
19923 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19924 /* Cast the V8HImode vector back to a V4SImode vector. */
19925 tmp2
= gen_reg_rtx (V4SImode
);
19926 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19927 /* Replicate the low element of the V4SImode vector. */
19928 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19929 /* Cast the V2SImode back to V8HImode, and store in target. */
19930 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
19941 /* Extend QImode to SImode using a paradoxical SUBREG. */
19942 tmp1
= gen_reg_rtx (SImode
);
19943 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19944 /* Insert the SImode value as low element of V4SImode vector. */
19945 tmp2
= gen_reg_rtx (V4SImode
);
19946 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19947 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19948 CONST0_RTX (V4SImode
),
19950 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19951 /* Cast the V4SImode vector back to a V16QImode vector. */
19952 tmp1
= gen_reg_rtx (V16QImode
);
19953 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
19954 /* Duplicate the low byte through the whole low SImode word. */
19955 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19956 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19957 /* Cast the V16QImode vector back to a V4SImode vector. */
19958 tmp2
= gen_reg_rtx (V4SImode
);
19959 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19960 /* Replicate the low element of the V4SImode vector. */
19961 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19962 /* Cast the V2SImode back to V16QImode, and store in target. */
19963 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
19971 /* Replicate the value once into the next wider mode and recurse. */
19972 val
= convert_modes (wsmode
, smode
, val
, true);
19973 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
19974 GEN_INT (GET_MODE_BITSIZE (smode
)),
19975 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19976 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
19978 x
= gen_reg_rtx (wvmode
);
19979 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
19980 gcc_unreachable ();
19981 emit_move_insn (target
, gen_lowpart (mode
, x
));
19989 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19990 whose ONE_VAR element is VAR, and other elements are zero. Return true
19994 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
19995 rtx target
, rtx var
, int one_var
)
19997 enum machine_mode vsimode
;
20013 var
= force_reg (GET_MODE_INNER (mode
), var
);
20014 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20015 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20020 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20021 new_target
= gen_reg_rtx (mode
);
20023 new_target
= target
;
20024 var
= force_reg (GET_MODE_INNER (mode
), var
);
20025 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20026 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20027 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20030 /* We need to shuffle the value to the correct position, so
20031 create a new pseudo to store the intermediate result. */
20033 /* With SSE2, we can use the integer shuffle insns. */
20034 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20036 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20038 GEN_INT (one_var
== 1 ? 0 : 1),
20039 GEN_INT (one_var
== 2 ? 0 : 1),
20040 GEN_INT (one_var
== 3 ? 0 : 1)));
20041 if (target
!= new_target
)
20042 emit_move_insn (target
, new_target
);
20046 /* Otherwise convert the intermediate result to V4SFmode and
20047 use the SSE1 shuffle instructions. */
20048 if (mode
!= V4SFmode
)
20050 tmp
= gen_reg_rtx (V4SFmode
);
20051 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20056 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20058 GEN_INT (one_var
== 1 ? 0 : 1),
20059 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20060 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20062 if (mode
!= V4SFmode
)
20063 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20064 else if (tmp
!= target
)
20065 emit_move_insn (target
, tmp
);
20067 else if (target
!= new_target
)
20068 emit_move_insn (target
, new_target
);
20073 vsimode
= V4SImode
;
20079 vsimode
= V2SImode
;
20085 /* Zero extend the variable element to SImode and recurse. */
20086 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20088 x
= gen_reg_rtx (vsimode
);
20089 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20091 gcc_unreachable ();
20093 emit_move_insn (target
, gen_lowpart (mode
, x
));
20101 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20102 consisting of the values in VALS. It is known that all elements
20103 except ONE_VAR are constants. Return true if successful. */
20106 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20107 rtx target
, rtx vals
, int one_var
)
20109 rtx var
= XVECEXP (vals
, 0, one_var
);
20110 enum machine_mode wmode
;
20113 const_vec
= copy_rtx (vals
);
20114 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20115 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20123 /* For the two element vectors, it's just as easy to use
20124 the general case. */
20140 /* There's no way to set one QImode entry easily. Combine
20141 the variable value with its adjacent constant value, and
20142 promote to an HImode set. */
20143 x
= XVECEXP (vals
, 0, one_var
^ 1);
20146 var
= convert_modes (HImode
, QImode
, var
, true);
20147 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20148 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20149 x
= GEN_INT (INTVAL (x
) & 0xff);
20153 var
= convert_modes (HImode
, QImode
, var
, true);
20154 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20156 if (x
!= const0_rtx
)
20157 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20158 1, OPTAB_LIB_WIDEN
);
20160 x
= gen_reg_rtx (wmode
);
20161 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20162 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20164 emit_move_insn (target
, gen_lowpart (mode
, x
));
20171 emit_move_insn (target
, const_vec
);
20172 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20176 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20177 all values variable, and none identical. */
20180 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20181 rtx target
, rtx vals
)
20183 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20184 rtx op0
= NULL
, op1
= NULL
;
20185 bool use_vec_concat
= false;
20191 if (!mmx_ok
&& !TARGET_SSE
)
20197 /* For the two element vectors, we always implement VEC_CONCAT. */
20198 op0
= XVECEXP (vals
, 0, 0);
20199 op1
= XVECEXP (vals
, 0, 1);
20200 use_vec_concat
= true;
20204 half_mode
= V2SFmode
;
20207 half_mode
= V2SImode
;
20213 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20214 Recurse to load the two halves. */
20216 op0
= gen_reg_rtx (half_mode
);
20217 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20218 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20220 op1
= gen_reg_rtx (half_mode
);
20221 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20222 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20224 use_vec_concat
= true;
20235 gcc_unreachable ();
20238 if (use_vec_concat
)
20240 if (!register_operand (op0
, half_mode
))
20241 op0
= force_reg (half_mode
, op0
);
20242 if (!register_operand (op1
, half_mode
))
20243 op1
= force_reg (half_mode
, op1
);
20245 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20246 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20250 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20251 enum machine_mode inner_mode
;
20252 rtx words
[4], shift
;
20254 inner_mode
= GET_MODE_INNER (mode
);
20255 n_elts
= GET_MODE_NUNITS (mode
);
20256 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20257 n_elt_per_word
= n_elts
/ n_words
;
20258 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20260 for (i
= 0; i
< n_words
; ++i
)
20262 rtx word
= NULL_RTX
;
20264 for (j
= 0; j
< n_elt_per_word
; ++j
)
20266 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20267 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20273 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20274 word
, 1, OPTAB_LIB_WIDEN
);
20275 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20276 word
, 1, OPTAB_LIB_WIDEN
);
20284 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20285 else if (n_words
== 2)
20287 rtx tmp
= gen_reg_rtx (mode
);
20288 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20289 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20290 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20291 emit_move_insn (target
, tmp
);
20293 else if (n_words
== 4)
20295 rtx tmp
= gen_reg_rtx (V4SImode
);
20296 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20297 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20298 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20301 gcc_unreachable ();
20305 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20306 instructions unless MMX_OK is true. */
20309 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20311 enum machine_mode mode
= GET_MODE (target
);
20312 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20313 int n_elts
= GET_MODE_NUNITS (mode
);
20314 int n_var
= 0, one_var
= -1;
20315 bool all_same
= true, all_const_zero
= true;
20319 for (i
= 0; i
< n_elts
; ++i
)
20321 x
= XVECEXP (vals
, 0, i
);
20322 if (!CONSTANT_P (x
))
20323 n_var
++, one_var
= i
;
20324 else if (x
!= CONST0_RTX (inner_mode
))
20325 all_const_zero
= false;
20326 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20330 /* Constants are best loaded from the constant pool. */
20333 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20337 /* If all values are identical, broadcast the value. */
20339 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20340 XVECEXP (vals
, 0, 0)))
20343 /* Values where only one field is non-constant are best loaded from
20344 the pool and overwritten via move later. */
20348 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20349 XVECEXP (vals
, 0, one_var
),
20353 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20357 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20361 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20363 enum machine_mode mode
= GET_MODE (target
);
20364 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20365 bool use_vec_merge
= false;
20374 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20375 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20377 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20379 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20380 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20390 /* For the two element vectors, we implement a VEC_CONCAT with
20391 the extraction of the other element. */
20393 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20394 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20397 op0
= val
, op1
= tmp
;
20399 op0
= tmp
, op1
= val
;
20401 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20402 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20410 use_vec_merge
= true;
20414 /* tmp = target = A B C D */
20415 tmp
= copy_to_reg (target
);
20416 /* target = A A B B */
20417 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20418 /* target = X A B B */
20419 ix86_expand_vector_set (false, target
, val
, 0);
20420 /* target = A X C D */
20421 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20422 GEN_INT (1), GEN_INT (0),
20423 GEN_INT (2+4), GEN_INT (3+4)));
20427 /* tmp = target = A B C D */
20428 tmp
= copy_to_reg (target
);
20429 /* tmp = X B C D */
20430 ix86_expand_vector_set (false, tmp
, val
, 0);
20431 /* target = A B X D */
20432 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20433 GEN_INT (0), GEN_INT (1),
20434 GEN_INT (0+4), GEN_INT (3+4)));
20438 /* tmp = target = A B C D */
20439 tmp
= copy_to_reg (target
);
20440 /* tmp = X B C D */
20441 ix86_expand_vector_set (false, tmp
, val
, 0);
20442 /* target = A B X D */
20443 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20444 GEN_INT (0), GEN_INT (1),
20445 GEN_INT (2+4), GEN_INT (0+4)));
20449 gcc_unreachable ();
20454 /* Element 0 handled by vec_merge below. */
20457 use_vec_merge
= true;
20463 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20464 store into element 0, then shuffle them back. */
20468 order
[0] = GEN_INT (elt
);
20469 order
[1] = const1_rtx
;
20470 order
[2] = const2_rtx
;
20471 order
[3] = GEN_INT (3);
20472 order
[elt
] = const0_rtx
;
20474 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20475 order
[1], order
[2], order
[3]));
20477 ix86_expand_vector_set (false, target
, val
, 0);
20479 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20480 order
[1], order
[2], order
[3]));
20484 /* For SSE1, we have to reuse the V4SF code. */
20485 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20486 gen_lowpart (SFmode
, val
), elt
);
20491 use_vec_merge
= TARGET_SSE2
;
20494 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20505 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20506 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20507 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20511 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20513 emit_move_insn (mem
, target
);
20515 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20516 emit_move_insn (tmp
, val
);
20518 emit_move_insn (target
, mem
);
20523 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20525 enum machine_mode mode
= GET_MODE (vec
);
20526 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20527 bool use_vec_extr
= false;
20540 use_vec_extr
= true;
20552 tmp
= gen_reg_rtx (mode
);
20553 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20554 GEN_INT (elt
), GEN_INT (elt
),
20555 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20559 tmp
= gen_reg_rtx (mode
);
20560 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20564 gcc_unreachable ();
20567 use_vec_extr
= true;
20582 tmp
= gen_reg_rtx (mode
);
20583 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20584 GEN_INT (elt
), GEN_INT (elt
),
20585 GEN_INT (elt
), GEN_INT (elt
)));
20589 tmp
= gen_reg_rtx (mode
);
20590 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20594 gcc_unreachable ();
20597 use_vec_extr
= true;
20602 /* For SSE1, we have to reuse the V4SF code. */
20603 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20604 gen_lowpart (V4SFmode
, vec
), elt
);
20610 use_vec_extr
= TARGET_SSE2
;
20613 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20618 /* ??? Could extract the appropriate HImode element and shift. */
20625 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20626 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20628 /* Let the rtl optimizers know about the zero extension performed. */
20629 if (inner_mode
== HImode
)
20631 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20632 target
= gen_lowpart (SImode
, target
);
20635 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20639 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20641 emit_move_insn (mem
, vec
);
20643 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20644 emit_move_insn (target
, tmp
);
20648 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20649 pattern to reduce; DEST is the destination; IN is the input vector. */
20652 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20654 rtx tmp1
, tmp2
, tmp3
;
20656 tmp1
= gen_reg_rtx (V4SFmode
);
20657 tmp2
= gen_reg_rtx (V4SFmode
);
20658 tmp3
= gen_reg_rtx (V4SFmode
);
20660 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20661 emit_insn (fn (tmp2
, tmp1
, in
));
20663 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20664 GEN_INT (1), GEN_INT (1),
20665 GEN_INT (1+4), GEN_INT (1+4)));
20666 emit_insn (fn (dest
, tmp2
, tmp3
));
20669 /* Target hook for scalar_mode_supported_p. */
20671 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20673 if (DECIMAL_FLOAT_MODE_P (mode
))
20676 return default_scalar_mode_supported_p (mode
);
20679 /* Implements target hook vector_mode_supported_p. */
20681 ix86_vector_mode_supported_p (enum machine_mode mode
)
20683 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20685 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20687 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20689 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20694 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20696 We do this in the new i386 backend to maintain source compatibility
20697 with the old cc0-based compiler. */
20700 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20701 tree inputs ATTRIBUTE_UNUSED
,
20704 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20706 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20711 /* Return true if this goes in small data/bss. */
20714 ix86_in_large_data_p (tree exp
)
20716 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20719 /* Functions are never large data. */
20720 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20723 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20725 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20726 if (strcmp (section
, ".ldata") == 0
20727 || strcmp (section
, ".lbss") == 0)
20733 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20735 /* If this is an incomplete type with size 0, then we can't put it
20736 in data because it might be too big when completed. */
20737 if (!size
|| size
> ix86_section_threshold
)
20744 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20746 default_encode_section_info (decl
, rtl
, first
);
20748 if (TREE_CODE (decl
) == VAR_DECL
20749 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20750 && ix86_in_large_data_p (decl
))
20751 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20754 /* Worker function for REVERSE_CONDITION. */
20757 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20759 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20760 ? reverse_condition (code
)
20761 : reverse_condition_maybe_unordered (code
));
20764 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20768 output_387_reg_move (rtx insn
, rtx
*operands
)
20770 if (REG_P (operands
[1])
20771 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20773 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20774 return output_387_ffreep (operands
, 0);
20775 return "fstp\t%y0";
20777 if (STACK_TOP_P (operands
[0]))
20778 return "fld%z1\t%y1";
20782 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20783 FP status register is set. */
20786 ix86_emit_fp_unordered_jump (rtx label
)
20788 rtx reg
= gen_reg_rtx (HImode
);
20791 emit_insn (gen_x86_fnstsw_1 (reg
));
20793 if (TARGET_USE_SAHF
)
20795 emit_insn (gen_x86_sahf_1 (reg
));
20797 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20798 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20802 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20804 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20805 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20808 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20809 gen_rtx_LABEL_REF (VOIDmode
, label
),
20811 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20812 emit_jump_insn (temp
);
20815 /* Output code to perform a log1p XFmode calculation. */
20817 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20819 rtx label1
= gen_label_rtx ();
20820 rtx label2
= gen_label_rtx ();
20822 rtx tmp
= gen_reg_rtx (XFmode
);
20823 rtx tmp2
= gen_reg_rtx (XFmode
);
20825 emit_insn (gen_absxf2 (tmp
, op1
));
20826 emit_insn (gen_cmpxf (tmp
,
20827 CONST_DOUBLE_FROM_REAL_VALUE (
20828 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20830 emit_jump_insn (gen_bge (label1
));
20832 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20833 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20834 emit_jump (label2
);
20836 emit_label (label1
);
20837 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20838 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20839 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20840 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20842 emit_label (label2
);
20845 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20848 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20851 /* With Binutils 2.15, the "@unwind" marker must be specified on
20852 every occurrence of the ".eh_frame" section, not just the first
20855 && strcmp (name
, ".eh_frame") == 0)
20857 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20858 flags
& SECTION_WRITE
? "aw" : "a");
20861 default_elf_asm_named_section (name
, flags
, decl
);
20864 /* Return the mangling of TYPE if it is an extended fundamental type. */
20866 static const char *
20867 ix86_mangle_fundamental_type (tree type
)
20869 switch (TYPE_MODE (type
))
20872 /* __float128 is "g". */
20875 /* "long double" or __float80 is "e". */
20882 /* For 32-bit code we can save PIC register setup by using
20883 __stack_chk_fail_local hidden function instead of calling
20884 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20885 register, so it is better to call __stack_chk_fail directly. */
20888 ix86_stack_protect_fail (void)
20890 return TARGET_64BIT
20891 ? default_external_stack_protect_fail ()
20892 : default_hidden_stack_protect_fail ();
20895 /* Select a format to encode pointers in exception handling data. CODE
20896 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20897 true if the symbol may be affected by dynamic relocations.
20899 ??? All x86 object file formats are capable of representing this.
20900 After all, the relocation needed is the same as for the call insn.
20901 Whether or not a particular assembler allows us to enter such, I
20902 guess we'll have to see. */
20904 asm_preferred_eh_data_format (int code
, int global
)
20908 int type
= DW_EH_PE_sdata8
;
20910 || ix86_cmodel
== CM_SMALL_PIC
20911 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20912 type
= DW_EH_PE_sdata4
;
20913 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20915 if (ix86_cmodel
== CM_SMALL
20916 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20917 return DW_EH_PE_udata4
;
20918 return DW_EH_PE_absptr
;
20921 /* Expand copysign from SIGN to the positive value ABS_VALUE
20922 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20925 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20927 enum machine_mode mode
= GET_MODE (sign
);
20928 rtx sgn
= gen_reg_rtx (mode
);
20929 if (mask
== NULL_RTX
)
20931 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20932 if (!VECTOR_MODE_P (mode
))
20934 /* We need to generate a scalar mode mask in this case. */
20935 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20936 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20937 mask
= gen_reg_rtx (mode
);
20938 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20942 mask
= gen_rtx_NOT (mode
, mask
);
20943 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20944 gen_rtx_AND (mode
, mask
, sign
)));
20945 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20946 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20949 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20950 mask for masking out the sign-bit is stored in *SMASK, if that is
20953 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20955 enum machine_mode mode
= GET_MODE (op0
);
20958 xa
= gen_reg_rtx (mode
);
20959 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20960 if (!VECTOR_MODE_P (mode
))
20962 /* We need to generate a scalar mode mask in this case. */
20963 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20964 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20965 mask
= gen_reg_rtx (mode
);
20966 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20968 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
20969 gen_rtx_AND (mode
, op0
, mask
)));
20977 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20978 swapping the operands if SWAP_OPERANDS is true. The expanded
20979 code is a forward jump to a newly created label in case the
20980 comparison is true. The generated label rtx is returned. */
20982 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
20983 bool swap_operands
)
20994 label
= gen_label_rtx ();
20995 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
20996 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20997 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
20998 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
20999 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21000 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21001 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21002 JUMP_LABEL (tmp
) = label
;
21007 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21008 using comparison code CODE. Operands are swapped for the comparison if
21009 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21011 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21012 bool swap_operands
)
21014 enum machine_mode mode
= GET_MODE (op0
);
21015 rtx mask
= gen_reg_rtx (mode
);
21024 if (mode
== DFmode
)
21025 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21026 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21028 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21029 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21034 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21035 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21037 ix86_gen_TWO52 (enum machine_mode mode
)
21039 REAL_VALUE_TYPE TWO52r
;
21042 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21043 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21044 TWO52
= force_reg (mode
, TWO52
);
21049 /* Expand SSE sequence for computing lround from OP1 storing
21052 ix86_expand_lround (rtx op0
, rtx op1
)
21054 /* C code for the stuff we're doing below:
21055 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21058 enum machine_mode mode
= GET_MODE (op1
);
21059 const struct real_format
*fmt
;
21060 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21063 /* load nextafter (0.5, 0.0) */
21064 fmt
= REAL_MODE_FORMAT (mode
);
21065 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21066 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21068 /* adj = copysign (0.5, op1) */
21069 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21070 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21072 /* adj = op1 + adj */
21073 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21075 /* op0 = (imode)adj */
21076 expand_fix (op0
, adj
, 0);
21079 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21082 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21084 /* C code for the stuff we're doing below (for do_floor):
21086 xi -= (double)xi > op1 ? 1 : 0;
21089 enum machine_mode fmode
= GET_MODE (op1
);
21090 enum machine_mode imode
= GET_MODE (op0
);
21091 rtx ireg
, freg
, label
, tmp
;
21093 /* reg = (long)op1 */
21094 ireg
= gen_reg_rtx (imode
);
21095 expand_fix (ireg
, op1
, 0);
21097 /* freg = (double)reg */
21098 freg
= gen_reg_rtx (fmode
);
21099 expand_float (freg
, ireg
, 0);
21101 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21102 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21103 freg
, op1
, !do_floor
);
21104 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21105 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21106 emit_move_insn (ireg
, tmp
);
21108 emit_label (label
);
21109 LABEL_NUSES (label
) = 1;
21111 emit_move_insn (op0
, ireg
);
21114 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21115 result in OPERAND0. */
21117 ix86_expand_rint (rtx operand0
, rtx operand1
)
21119 /* C code for the stuff we're doing below:
21120 xa = fabs (operand1);
21121 if (!isless (xa, 2**52))
21123 xa = xa + 2**52 - 2**52;
21124 return copysign (xa, operand1);
21126 enum machine_mode mode
= GET_MODE (operand0
);
21127 rtx res
, xa
, label
, TWO52
, mask
;
21129 res
= gen_reg_rtx (mode
);
21130 emit_move_insn (res
, operand1
);
21132 /* xa = abs (operand1) */
21133 xa
= ix86_expand_sse_fabs (res
, &mask
);
21135 /* if (!isless (xa, TWO52)) goto label; */
21136 TWO52
= ix86_gen_TWO52 (mode
);
21137 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21139 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21140 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21142 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21144 emit_label (label
);
21145 LABEL_NUSES (label
) = 1;
21147 emit_move_insn (operand0
, res
);
21150 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21153 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21155 /* C code for the stuff we expand below.
21156 double xa = fabs (x), x2;
21157 if (!isless (xa, TWO52))
21159 xa = xa + TWO52 - TWO52;
21160 x2 = copysign (xa, x);
21169 enum machine_mode mode
= GET_MODE (operand0
);
21170 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21172 TWO52
= ix86_gen_TWO52 (mode
);
21174 /* Temporary for holding the result, initialized to the input
21175 operand to ease control flow. */
21176 res
= gen_reg_rtx (mode
);
21177 emit_move_insn (res
, operand1
);
21179 /* xa = abs (operand1) */
21180 xa
= ix86_expand_sse_fabs (res
, &mask
);
21182 /* if (!isless (xa, TWO52)) goto label; */
21183 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21185 /* xa = xa + TWO52 - TWO52; */
21186 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21187 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21189 /* xa = copysign (xa, operand1) */
21190 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21192 /* generate 1.0 or -1.0 */
21193 one
= force_reg (mode
,
21194 const_double_from_real_value (do_floor
21195 ? dconst1
: dconstm1
, mode
));
21197 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21198 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21199 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21200 gen_rtx_AND (mode
, one
, tmp
)));
21201 /* We always need to subtract here to preserve signed zero. */
21202 tmp
= expand_simple_binop (mode
, MINUS
,
21203 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21204 emit_move_insn (res
, tmp
);
21206 emit_label (label
);
21207 LABEL_NUSES (label
) = 1;
21209 emit_move_insn (operand0
, res
);
21212 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21215 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21217 /* C code for the stuff we expand below.
21218 double xa = fabs (x), x2;
21219 if (!isless (xa, TWO52))
21221 x2 = (double)(long)x;
21228 if (HONOR_SIGNED_ZEROS (mode))
21229 return copysign (x2, x);
21232 enum machine_mode mode
= GET_MODE (operand0
);
21233 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21235 TWO52
= ix86_gen_TWO52 (mode
);
21237 /* Temporary for holding the result, initialized to the input
21238 operand to ease control flow. */
21239 res
= gen_reg_rtx (mode
);
21240 emit_move_insn (res
, operand1
);
21242 /* xa = abs (operand1) */
21243 xa
= ix86_expand_sse_fabs (res
, &mask
);
21245 /* if (!isless (xa, TWO52)) goto label; */
21246 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21248 /* xa = (double)(long)x */
21249 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21250 expand_fix (xi
, res
, 0);
21251 expand_float (xa
, xi
, 0);
21254 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21256 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21257 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21258 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21259 gen_rtx_AND (mode
, one
, tmp
)));
21260 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21261 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21262 emit_move_insn (res
, tmp
);
21264 if (HONOR_SIGNED_ZEROS (mode
))
21265 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21267 emit_label (label
);
21268 LABEL_NUSES (label
) = 1;
21270 emit_move_insn (operand0
, res
);
21273 /* Expand SSE sequence for computing round from OPERAND1 storing
21274 into OPERAND0. Sequence that works without relying on DImode truncation
21275 via cvttsd2siq that is only available on 64bit targets. */
21277 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21279 /* C code for the stuff we expand below.
21280 double xa = fabs (x), xa2, x2;
21281 if (!isless (xa, TWO52))
21283 Using the absolute value and copying back sign makes
21284 -0.0 -> -0.0 correct.
21285 xa2 = xa + TWO52 - TWO52;
21290 else if (dxa > 0.5)
21292 x2 = copysign (xa2, x);
21295 enum machine_mode mode
= GET_MODE (operand0
);
21296 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21298 TWO52
= ix86_gen_TWO52 (mode
);
21300 /* Temporary for holding the result, initialized to the input
21301 operand to ease control flow. */
21302 res
= gen_reg_rtx (mode
);
21303 emit_move_insn (res
, operand1
);
21305 /* xa = abs (operand1) */
21306 xa
= ix86_expand_sse_fabs (res
, &mask
);
21308 /* if (!isless (xa, TWO52)) goto label; */
21309 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21311 /* xa2 = xa + TWO52 - TWO52; */
21312 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21313 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21315 /* dxa = xa2 - xa; */
21316 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21318 /* generate 0.5, 1.0 and -0.5 */
21319 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21320 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21321 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21325 tmp
= gen_reg_rtx (mode
);
21326 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21327 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21328 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21329 gen_rtx_AND (mode
, one
, tmp
)));
21330 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21331 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21332 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21333 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21334 gen_rtx_AND (mode
, one
, tmp
)));
21335 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21337 /* res = copysign (xa2, operand1) */
21338 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21340 emit_label (label
);
21341 LABEL_NUSES (label
) = 1;
21343 emit_move_insn (operand0
, res
);
21346 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21349 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21351 /* C code for SSE variant we expand below.
21352 double xa = fabs (x), x2;
21353 if (!isless (xa, TWO52))
21355 x2 = (double)(long)x;
21356 if (HONOR_SIGNED_ZEROS (mode))
21357 return copysign (x2, x);
21360 enum machine_mode mode
= GET_MODE (operand0
);
21361 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21363 TWO52
= ix86_gen_TWO52 (mode
);
21365 /* Temporary for holding the result, initialized to the input
21366 operand to ease control flow. */
21367 res
= gen_reg_rtx (mode
);
21368 emit_move_insn (res
, operand1
);
21370 /* xa = abs (operand1) */
21371 xa
= ix86_expand_sse_fabs (res
, &mask
);
21373 /* if (!isless (xa, TWO52)) goto label; */
21374 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21376 /* x = (double)(long)x */
21377 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21378 expand_fix (xi
, res
, 0);
21379 expand_float (res
, xi
, 0);
21381 if (HONOR_SIGNED_ZEROS (mode
))
21382 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21384 emit_label (label
);
21385 LABEL_NUSES (label
) = 1;
21387 emit_move_insn (operand0
, res
);
21390 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21393 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21395 enum machine_mode mode
= GET_MODE (operand0
);
21396 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21398 /* C code for SSE variant we expand below.
21399 double xa = fabs (x), x2;
21400 if (!isless (xa, TWO52))
21402 xa2 = xa + TWO52 - TWO52;
21406 x2 = copysign (xa2, x);
21410 TWO52
= ix86_gen_TWO52 (mode
);
21412 /* Temporary for holding the result, initialized to the input
21413 operand to ease control flow. */
21414 res
= gen_reg_rtx (mode
);
21415 emit_move_insn (res
, operand1
);
21417 /* xa = abs (operand1) */
21418 xa
= ix86_expand_sse_fabs (res
, &smask
);
21420 /* if (!isless (xa, TWO52)) goto label; */
21421 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21423 /* res = xa + TWO52 - TWO52; */
21424 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21425 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21426 emit_move_insn (res
, tmp
);
21429 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21431 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21432 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21433 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21434 gen_rtx_AND (mode
, mask
, one
)));
21435 tmp
= expand_simple_binop (mode
, MINUS
,
21436 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21437 emit_move_insn (res
, tmp
);
21439 /* res = copysign (res, operand1) */
21440 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21442 emit_label (label
);
21443 LABEL_NUSES (label
) = 1;
21445 emit_move_insn (operand0
, res
);
21448 /* Expand SSE sequence for computing round from OPERAND1 storing
21451 ix86_expand_round (rtx operand0
, rtx operand1
)
21453 /* C code for the stuff we're doing below:
21454 double xa = fabs (x);
21455 if (!isless (xa, TWO52))
21457 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21458 return copysign (xa, x);
21460 enum machine_mode mode
= GET_MODE (operand0
);
21461 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21462 const struct real_format
*fmt
;
21463 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21465 /* Temporary for holding the result, initialized to the input
21466 operand to ease control flow. */
21467 res
= gen_reg_rtx (mode
);
21468 emit_move_insn (res
, operand1
);
21470 TWO52
= ix86_gen_TWO52 (mode
);
21471 xa
= ix86_expand_sse_fabs (res
, &mask
);
21472 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21474 /* load nextafter (0.5, 0.0) */
21475 fmt
= REAL_MODE_FORMAT (mode
);
21476 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21477 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21479 /* xa = xa + 0.5 */
21480 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21481 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21483 /* xa = (double)(int64_t)xa */
21484 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21485 expand_fix (xi
, xa
, 0);
21486 expand_float (xa
, xi
, 0);
21488 /* res = copysign (xa, operand1) */
21489 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21491 emit_label (label
);
21492 LABEL_NUSES (label
) = 1;
21494 emit_move_insn (operand0
, res
);
21497 #include "gt-i386.h"