1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1043 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1209 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1248 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1250 static enum stringop_alg stringop_alg
= no_stringop
;
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1259 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1260 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1265 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1267 /* ax, dx, cx, bx */
1268 AREG
, DREG
, CREG
, BREG
,
1269 /* si, di, bp, sp */
1270 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1272 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1273 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1278 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1280 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1282 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1283 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1284 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1288 /* The "default" register map used in 32bit mode. */
1290 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1301 static int const x86_64_int_parameter_registers
[6] =
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1307 static int const x86_64_int_return_registers
[4] =
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1378 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1392 rtx ix86_compare_op0
= NULL_RTX
;
1393 rtx ix86_compare_op1
= NULL_RTX
;
1394 rtx ix86_compare_emitted
= NULL_RTX
;
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1399 /* Define the structure for the machine field in struct function. */
1401 struct stack_local_entry
GTY(())
1403 unsigned short mode
;
1406 struct stack_local_entry
*next
;
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1433 HOST_WIDE_INT frame
;
1435 int outgoing_arguments_size
;
1438 HOST_WIDE_INT to_allocate
;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset
;
1441 HOST_WIDE_INT hard_frame_pointer_offset
;
1442 HOST_WIDE_INT stack_pointer_offset
;
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov
;
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel
;
1452 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1454 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath
;
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune
;
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch
;
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse
;
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm
;
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer
;
1473 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary
;
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost
;
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1484 int ix86_section_threshold
= 65536;
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix
[16];
1488 int internal_label_prefix_len
;
1490 static bool ix86_handle_option (size_t, const char *, int);
1491 static void output_pic_addr_const (FILE *, rtx
, int);
1492 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1494 static const char *get_some_local_dynamic_name (void);
1495 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1496 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1497 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1499 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1500 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1502 static rtx
get_thread_pointer (int);
1503 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1504 static void get_pc_thunk_name (char [32], unsigned int);
1505 static rtx
gen_push (rtx
);
1506 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1507 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1508 static struct machine_function
* ix86_init_machine_status (void);
1509 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1510 static int ix86_nsaved_regs (void);
1511 static void ix86_emit_save_regs (void);
1512 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1513 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1514 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1515 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1516 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1517 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1518 static int ix86_issue_rate (void);
1519 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1520 static int ia32_multipass_dfa_lookahead (void);
1521 static void ix86_init_mmx_sse_builtins (void);
1522 static rtx
x86_this_parameter (tree
);
1523 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1524 HOST_WIDE_INT
, tree
);
1525 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1526 static void x86_file_start (void);
1527 static void ix86_reorg (void);
1528 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1529 static tree
ix86_build_builtin_va_list (void);
1530 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1532 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1533 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1534 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1536 static int ix86_address_cost (rtx
);
1537 static bool ix86_cannot_force_const_mem (rtx
);
1538 static rtx
ix86_delegitimize_address (rtx
);
1540 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1542 struct builtin_description
;
1543 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1545 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1547 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1548 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1549 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1550 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1551 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1552 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1553 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1554 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1555 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1556 static int ix86_fp_comparison_cost (enum rtx_code code
);
1557 static unsigned int ix86_select_alt_pic_regnum (void);
1558 static int ix86_save_reg (unsigned int, int);
1559 static void ix86_compute_frame_layout (struct ix86_frame
*);
1560 static int ix86_comp_type_attributes (tree
, tree
);
1561 static int ix86_function_regparm (tree
, tree
);
1562 const struct attribute_spec ix86_attribute_table
[];
1563 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1564 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1565 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1566 static bool contains_128bit_aligned_vector_p (tree
);
1567 static rtx
ix86_struct_value_rtx (tree
, int);
1568 static bool ix86_ms_bitfield_layout_p (tree
);
1569 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1570 static int extended_reg_mentioned_1 (rtx
*, void *);
1571 static bool ix86_rtx_costs (rtx
, int, int, int *);
1572 static int min_insn_size (rtx
);
1573 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1574 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1575 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1577 static void ix86_init_builtins (void);
1578 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1579 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1580 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1581 static const char *ix86_mangle_fundamental_type (tree
);
1582 static tree
ix86_stack_protect_fail (void);
1583 static rtx
ix86_internal_arg_pointer (void);
1584 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1585 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1588 /* This function is only used on Solaris. */
1589 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1592 /* Register class used for passing given 64bit part of the argument.
1593 These represent classes as documented by the PS ABI, with the exception
1594 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1595 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1597 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1598 whenever possible (upper half does contain padding).
1600 enum x86_64_reg_class
1603 X86_64_INTEGER_CLASS
,
1604 X86_64_INTEGERSI_CLASS
,
1611 X86_64_COMPLEX_X87_CLASS
,
1614 static const char * const x86_64_reg_class_name
[] = {
1615 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1616 "sseup", "x87", "x87up", "cplx87", "no"
1619 #define MAX_CLASSES 4
1621 /* Table of constants used by fldpi, fldln2, etc.... */
1622 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1623 static bool ext_80387_constants_init
= 0;
1624 static void init_ext_80387_constants (void);
1625 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1626 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1627 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1628 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1629 unsigned HOST_WIDE_INT align
)
1632 /* Initialize the GCC target structure. */
1633 #undef TARGET_ATTRIBUTE_TABLE
1634 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1636 # undef TARGET_MERGE_DECL_ATTRIBUTES
1637 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1640 #undef TARGET_COMP_TYPE_ATTRIBUTES
1641 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1643 #undef TARGET_INIT_BUILTINS
1644 #define TARGET_INIT_BUILTINS ix86_init_builtins
1645 #undef TARGET_EXPAND_BUILTIN
1646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1650 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1651 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1653 #undef TARGET_ASM_FUNCTION_EPILOGUE
1654 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1656 #undef TARGET_ENCODE_SECTION_INFO
1657 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1658 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1660 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1663 #undef TARGET_ASM_OPEN_PAREN
1664 #define TARGET_ASM_OPEN_PAREN ""
1665 #undef TARGET_ASM_CLOSE_PAREN
1666 #define TARGET_ASM_CLOSE_PAREN ""
1668 #undef TARGET_ASM_ALIGNED_HI_OP
1669 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1670 #undef TARGET_ASM_ALIGNED_SI_OP
1671 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1673 #undef TARGET_ASM_ALIGNED_DI_OP
1674 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1677 #undef TARGET_ASM_UNALIGNED_HI_OP
1678 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1679 #undef TARGET_ASM_UNALIGNED_SI_OP
1680 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1681 #undef TARGET_ASM_UNALIGNED_DI_OP
1682 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1684 #undef TARGET_SCHED_ADJUST_COST
1685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1686 #undef TARGET_SCHED_ISSUE_RATE
1687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1690 ia32_multipass_dfa_lookahead
1692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1696 #undef TARGET_HAVE_TLS
1697 #define TARGET_HAVE_TLS true
1699 #undef TARGET_CANNOT_FORCE_CONST_MEM
1700 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1701 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1702 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1704 #undef TARGET_DELEGITIMIZE_ADDRESS
1705 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1707 #undef TARGET_MS_BITFIELD_LAYOUT_P
1708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1711 #undef TARGET_BINDS_LOCAL_P
1712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1715 #undef TARGET_ASM_OUTPUT_MI_THUNK
1716 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1717 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1718 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1720 #undef TARGET_ASM_FILE_START
1721 #define TARGET_ASM_FILE_START x86_file_start
1723 #undef TARGET_DEFAULT_TARGET_FLAGS
1724 #define TARGET_DEFAULT_TARGET_FLAGS \
1726 | TARGET_64BIT_DEFAULT \
1727 | TARGET_SUBTARGET_DEFAULT \
1728 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1730 #undef TARGET_HANDLE_OPTION
1731 #define TARGET_HANDLE_OPTION ix86_handle_option
1733 #undef TARGET_RTX_COSTS
1734 #define TARGET_RTX_COSTS ix86_rtx_costs
1735 #undef TARGET_ADDRESS_COST
1736 #define TARGET_ADDRESS_COST ix86_address_cost
1738 #undef TARGET_FIXED_CONDITION_CODE_REGS
1739 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1740 #undef TARGET_CC_MODES_COMPATIBLE
1741 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1743 #undef TARGET_MACHINE_DEPENDENT_REORG
1744 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1746 #undef TARGET_BUILD_BUILTIN_VA_LIST
1747 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1749 #undef TARGET_MD_ASM_CLOBBERS
1750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1752 #undef TARGET_PROMOTE_PROTOTYPES
1753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1754 #undef TARGET_STRUCT_VALUE_RTX
1755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1758 #undef TARGET_MUST_PASS_IN_STACK
1759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1760 #undef TARGET_PASS_BY_REFERENCE
1761 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1762 #undef TARGET_INTERNAL_ARG_POINTER
1763 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1764 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1765 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1768 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1770 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1771 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1774 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1782 #undef TARGET_INSERT_ATTRIBUTES
1783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1786 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1787 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1789 #undef TARGET_STACK_PROTECT_FAIL
1790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1792 #undef TARGET_FUNCTION_VALUE
1793 #define TARGET_FUNCTION_VALUE ix86_function_value
1795 struct gcc_target targetm
= TARGET_INITIALIZER
;
1798 /* The svr4 ABI for the i386 says that records and unions are returned
1800 #ifndef DEFAULT_PCC_STRUCT_RETURN
1801 #define DEFAULT_PCC_STRUCT_RETURN 1
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1814 target_flags
&= ~MASK_3DNOW_A
;
1815 target_flags_explicit
|= MASK_3DNOW_A
;
1822 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1823 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1830 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1831 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1838 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1839 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1846 target_flags
&= ~MASK_SSE4A
;
1847 target_flags_explicit
|= MASK_SSE4A
;
1856 /* Sometimes certain combinations of command options do not make
1857 sense on a particular target machine. You can define a macro
1858 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1859 defined, is executed once just after all the command options have
1862 Don't use this macro to turn on various extra optimizations for
1863 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1866 override_options (void)
1869 int ix86_tune_defaulted
= 0;
1870 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1872 /* Comes from final.c -- no real reason to change it. */
1873 #define MAX_CODE_ALIGN 16
1877 const struct processor_costs
*cost
; /* Processor costs */
1878 const int target_enable
; /* Target flags to enable. */
1879 const int target_disable
; /* Target flags to disable. */
1880 const int align_loop
; /* Default alignments. */
1881 const int align_loop_max_skip
;
1882 const int align_jump
;
1883 const int align_jump_max_skip
;
1884 const int align_func
;
1886 const processor_target_table
[PROCESSOR_max
] =
1888 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1889 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1890 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1891 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1892 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1893 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1894 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1895 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1896 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1897 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1898 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1899 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1900 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1901 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1904 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1907 const char *const name
; /* processor name or nickname. */
1908 const enum processor_type processor
;
1909 const enum pta_flags
1915 PTA_PREFETCH_SSE
= 1 << 4,
1917 PTA_3DNOW_A
= 1 << 6,
1921 PTA_POPCNT
= 1 << 10,
1923 PTA_SSE4A
= 1 << 12,
1924 PTA_NO_SAHF
= 1 << 13
1927 const processor_alias_table
[] =
1929 {"i386", PROCESSOR_I386
, 0},
1930 {"i486", PROCESSOR_I486
, 0},
1931 {"i586", PROCESSOR_PENTIUM
, 0},
1932 {"pentium", PROCESSOR_PENTIUM
, 0},
1933 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1934 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1935 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1936 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1937 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1938 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1939 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1940 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1941 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1942 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1943 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1944 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1945 | PTA_MMX
| PTA_PREFETCH_SSE
},
1946 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1947 | PTA_MMX
| PTA_PREFETCH_SSE
},
1948 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1949 | PTA_MMX
| PTA_PREFETCH_SSE
},
1950 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1951 | PTA_MMX
| PTA_PREFETCH_SSE
1952 | PTA_CX16
| PTA_NO_SAHF
},
1953 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1954 | PTA_64BIT
| PTA_MMX
1955 | PTA_PREFETCH_SSE
| PTA_CX16
},
1956 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1958 {"k6", PROCESSOR_K6
, PTA_MMX
},
1959 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1960 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1961 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1963 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1964 | PTA_3DNOW
| PTA_3DNOW_A
},
1965 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1966 | PTA_3DNOW_A
| PTA_SSE
},
1967 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1968 | PTA_3DNOW_A
| PTA_SSE
},
1969 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1970 | PTA_3DNOW_A
| PTA_SSE
},
1971 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1972 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1973 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1974 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1976 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1977 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1978 | PTA_SSE2
| PTA_NO_SAHF
},
1979 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1980 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1981 | PTA_SSE2
| PTA_NO_SAHF
},
1982 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1983 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1984 | PTA_SSE2
| PTA_NO_SAHF
},
1985 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1986 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1987 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1988 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1989 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1990 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1993 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1995 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1996 SUBTARGET_OVERRIDE_OPTIONS
;
1999 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2000 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2003 /* -fPIC is the default for x86_64. */
2004 if (TARGET_MACHO
&& TARGET_64BIT
)
2007 /* Set the default values for switches whose default depends on TARGET_64BIT
2008 in case they weren't overwritten by command line options. */
2011 /* Mach-O doesn't support omitting the frame pointer for now. */
2012 if (flag_omit_frame_pointer
== 2)
2013 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2014 if (flag_asynchronous_unwind_tables
== 2)
2015 flag_asynchronous_unwind_tables
= 1;
2016 if (flag_pcc_struct_return
== 2)
2017 flag_pcc_struct_return
= 0;
2021 if (flag_omit_frame_pointer
== 2)
2022 flag_omit_frame_pointer
= 0;
2023 if (flag_asynchronous_unwind_tables
== 2)
2024 flag_asynchronous_unwind_tables
= 0;
2025 if (flag_pcc_struct_return
== 2)
2026 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2029 /* Need to check -mtune=generic first. */
2030 if (ix86_tune_string
)
2032 if (!strcmp (ix86_tune_string
, "generic")
2033 || !strcmp (ix86_tune_string
, "i686")
2034 /* As special support for cross compilers we read -mtune=native
2035 as -mtune=generic. With native compilers we won't see the
2036 -mtune=native, as it was changed by the driver. */
2037 || !strcmp (ix86_tune_string
, "native"))
2040 ix86_tune_string
= "generic64";
2042 ix86_tune_string
= "generic32";
2044 else if (!strncmp (ix86_tune_string
, "generic", 7))
2045 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2049 if (ix86_arch_string
)
2050 ix86_tune_string
= ix86_arch_string
;
2051 if (!ix86_tune_string
)
2053 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2054 ix86_tune_defaulted
= 1;
2057 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2058 need to use a sensible tune option. */
2059 if (!strcmp (ix86_tune_string
, "generic")
2060 || !strcmp (ix86_tune_string
, "x86-64")
2061 || !strcmp (ix86_tune_string
, "i686"))
2064 ix86_tune_string
= "generic64";
2066 ix86_tune_string
= "generic32";
2069 if (ix86_stringop_string
)
2071 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2072 stringop_alg
= rep_prefix_1_byte
;
2073 else if (!strcmp (ix86_stringop_string
, "libcall"))
2074 stringop_alg
= libcall
;
2075 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2076 stringop_alg
= rep_prefix_4_byte
;
2077 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2078 stringop_alg
= rep_prefix_8_byte
;
2079 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2080 stringop_alg
= loop_1_byte
;
2081 else if (!strcmp (ix86_stringop_string
, "loop"))
2082 stringop_alg
= loop
;
2083 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2084 stringop_alg
= unrolled_loop
;
2086 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2088 if (!strcmp (ix86_tune_string
, "x86-64"))
2089 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2090 "-mtune=generic instead as appropriate.");
2092 if (!ix86_arch_string
)
2093 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2094 if (!strcmp (ix86_arch_string
, "generic"))
2095 error ("generic CPU can be used only for -mtune= switch");
2096 if (!strncmp (ix86_arch_string
, "generic", 7))
2097 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2099 if (ix86_cmodel_string
!= 0)
2101 if (!strcmp (ix86_cmodel_string
, "small"))
2102 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2103 else if (!strcmp (ix86_cmodel_string
, "medium"))
2104 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2105 else if (!strcmp (ix86_cmodel_string
, "large"))
2106 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2108 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2109 else if (!strcmp (ix86_cmodel_string
, "32"))
2110 ix86_cmodel
= CM_32
;
2111 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2112 ix86_cmodel
= CM_KERNEL
;
2114 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2118 ix86_cmodel
= CM_32
;
2120 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2122 if (ix86_asm_string
!= 0)
2125 && !strcmp (ix86_asm_string
, "intel"))
2126 ix86_asm_dialect
= ASM_INTEL
;
2127 else if (!strcmp (ix86_asm_string
, "att"))
2128 ix86_asm_dialect
= ASM_ATT
;
2130 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2132 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2133 error ("code model %qs not supported in the %s bit mode",
2134 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2135 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2136 sorry ("%i-bit mode not compiled in",
2137 (target_flags
& MASK_64BIT
) ? 64 : 32);
2139 for (i
= 0; i
< pta_size
; i
++)
2140 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2142 ix86_arch
= processor_alias_table
[i
].processor
;
2143 /* Default cpu tuning to the architecture. */
2144 ix86_tune
= ix86_arch
;
2145 if (processor_alias_table
[i
].flags
& PTA_MMX
2146 && !(target_flags_explicit
& MASK_MMX
))
2147 target_flags
|= MASK_MMX
;
2148 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2149 && !(target_flags_explicit
& MASK_3DNOW
))
2150 target_flags
|= MASK_3DNOW
;
2151 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2152 && !(target_flags_explicit
& MASK_3DNOW_A
))
2153 target_flags
|= MASK_3DNOW_A
;
2154 if (processor_alias_table
[i
].flags
& PTA_SSE
2155 && !(target_flags_explicit
& MASK_SSE
))
2156 target_flags
|= MASK_SSE
;
2157 if (processor_alias_table
[i
].flags
& PTA_SSE2
2158 && !(target_flags_explicit
& MASK_SSE2
))
2159 target_flags
|= MASK_SSE2
;
2160 if (processor_alias_table
[i
].flags
& PTA_SSE3
2161 && !(target_flags_explicit
& MASK_SSE3
))
2162 target_flags
|= MASK_SSE3
;
2163 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2164 && !(target_flags_explicit
& MASK_SSSE3
))
2165 target_flags
|= MASK_SSSE3
;
2166 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2167 x86_prefetch_sse
= true;
2168 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2169 x86_cmpxchg16b
= true;
2170 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2171 && !(target_flags_explicit
& MASK_POPCNT
))
2172 target_flags
|= MASK_POPCNT
;
2173 if (processor_alias_table
[i
].flags
& PTA_ABM
2174 && !(target_flags_explicit
& MASK_ABM
))
2175 target_flags
|= MASK_ABM
;
2176 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2177 && !(target_flags_explicit
& MASK_SSE4A
))
2178 target_flags
|= MASK_SSE4A
;
2179 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2181 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2182 error ("CPU you selected does not support x86-64 "
2188 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2190 ix86_arch_mask
= 1u << ix86_arch
;
2191 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2192 ix86_arch_features
[i
] &= ix86_arch_mask
;
2194 for (i
= 0; i
< pta_size
; i
++)
2195 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2197 ix86_tune
= processor_alias_table
[i
].processor
;
2198 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2200 if (ix86_tune_defaulted
)
2202 ix86_tune_string
= "x86-64";
2203 for (i
= 0; i
< pta_size
; i
++)
2204 if (! strcmp (ix86_tune_string
,
2205 processor_alias_table
[i
].name
))
2207 ix86_tune
= processor_alias_table
[i
].processor
;
2210 error ("CPU you selected does not support x86-64 "
2213 /* Intel CPUs have always interpreted SSE prefetch instructions as
2214 NOPs; so, we can enable SSE prefetch instructions even when
2215 -mtune (rather than -march) points us to a processor that has them.
2216 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2217 higher processors. */
2218 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2219 x86_prefetch_sse
= true;
2223 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2225 ix86_tune_mask
= 1u << ix86_tune
;
2226 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2227 ix86_tune_features
[i
] &= ix86_tune_mask
;
2230 ix86_cost
= &size_cost
;
2232 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2233 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2234 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2236 /* Arrange to set up i386_stack_locals for all functions. */
2237 init_machine_status
= ix86_init_machine_status
;
2239 /* Validate -mregparm= value. */
2240 if (ix86_regparm_string
)
2242 i
= atoi (ix86_regparm_string
);
2243 if (i
< 0 || i
> REGPARM_MAX
)
2244 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2250 ix86_regparm
= REGPARM_MAX
;
2252 /* If the user has provided any of the -malign-* options,
2253 warn and use that value only if -falign-* is not set.
2254 Remove this code in GCC 3.2 or later. */
2255 if (ix86_align_loops_string
)
2257 warning (0, "-malign-loops is obsolete, use -falign-loops");
2258 if (align_loops
== 0)
2260 i
= atoi (ix86_align_loops_string
);
2261 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2262 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2264 align_loops
= 1 << i
;
2268 if (ix86_align_jumps_string
)
2270 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2271 if (align_jumps
== 0)
2273 i
= atoi (ix86_align_jumps_string
);
2274 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2275 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2277 align_jumps
= 1 << i
;
2281 if (ix86_align_funcs_string
)
2283 warning (0, "-malign-functions is obsolete, use -falign-functions");
2284 if (align_functions
== 0)
2286 i
= atoi (ix86_align_funcs_string
);
2287 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2288 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2290 align_functions
= 1 << i
;
2294 /* Default align_* from the processor table. */
2295 if (align_loops
== 0)
2297 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2298 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2300 if (align_jumps
== 0)
2302 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2303 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2305 if (align_functions
== 0)
2307 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2310 /* Validate -mbranch-cost= value, or provide default. */
2311 ix86_branch_cost
= ix86_cost
->branch_cost
;
2312 if (ix86_branch_cost_string
)
2314 i
= atoi (ix86_branch_cost_string
);
2316 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2318 ix86_branch_cost
= i
;
2320 if (ix86_section_threshold_string
)
2322 i
= atoi (ix86_section_threshold_string
);
2324 error ("-mlarge-data-threshold=%d is negative", i
);
2326 ix86_section_threshold
= i
;
2329 if (ix86_tls_dialect_string
)
2331 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2332 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2333 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2334 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2335 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2336 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2338 error ("bad value (%s) for -mtls-dialect= switch",
2339 ix86_tls_dialect_string
);
2342 /* Keep nonleaf frame pointers. */
2343 if (flag_omit_frame_pointer
)
2344 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2345 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2346 flag_omit_frame_pointer
= 1;
2348 /* If we're doing fast math, we don't care about comparison order
2349 wrt NaNs. This lets us use a shorter comparison sequence. */
2350 if (flag_finite_math_only
)
2351 target_flags
&= ~MASK_IEEE_FP
;
2353 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2354 since the insns won't need emulation. */
2355 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2356 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2358 /* Likewise, if the target doesn't have a 387, or we've specified
2359 software floating point, don't use 387 inline intrinsics. */
2361 target_flags
|= MASK_NO_FANCY_MATH_387
;
2363 /* Turn on SSE3 builtins for -mssse3. */
2365 target_flags
|= MASK_SSE3
;
2367 /* Turn on SSE3 builtins for -msse4a. */
2369 target_flags
|= MASK_SSE3
;
2371 /* Turn on SSE2 builtins for -msse3. */
2373 target_flags
|= MASK_SSE2
;
2375 /* Turn on SSE builtins for -msse2. */
2377 target_flags
|= MASK_SSE
;
2379 /* Turn on MMX builtins for -msse. */
2382 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2383 x86_prefetch_sse
= true;
2386 /* Turn on MMX builtins for 3Dnow. */
2388 target_flags
|= MASK_MMX
;
2390 /* Turn on POPCNT builtins for -mabm. */
2392 target_flags
|= MASK_POPCNT
;
2396 if (TARGET_ALIGN_DOUBLE
)
2397 error ("-malign-double makes no sense in the 64bit mode");
2399 error ("-mrtd calling convention not supported in the 64bit mode");
2401 /* Enable by default the SSE and MMX builtins. Do allow the user to
2402 explicitly disable any of these. In particular, disabling SSE and
2403 MMX for kernel code is extremely useful. */
2405 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2406 & ~target_flags_explicit
);
2410 /* i386 ABI does not specify red zone. It still makes sense to use it
2411 when programmer takes care to stack from being destroyed. */
2412 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2413 target_flags
|= MASK_NO_RED_ZONE
;
2416 /* Validate -mpreferred-stack-boundary= value, or provide default.
2417 The default of 128 bits is for Pentium III's SSE __m128. We can't
2418 change it because of optimize_size. Otherwise, we can't mix object
2419 files compiled with -Os and -On. */
2420 ix86_preferred_stack_boundary
= 128;
2421 if (ix86_preferred_stack_boundary_string
)
2423 i
= atoi (ix86_preferred_stack_boundary_string
);
2424 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2425 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2426 TARGET_64BIT
? 4 : 2);
2428 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2431 /* Accept -msseregparm only if at least SSE support is enabled. */
2432 if (TARGET_SSEREGPARM
2434 error ("-msseregparm used without SSE enabled");
2436 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2437 if (ix86_fpmath_string
!= 0)
2439 if (! strcmp (ix86_fpmath_string
, "387"))
2440 ix86_fpmath
= FPMATH_387
;
2441 else if (! strcmp (ix86_fpmath_string
, "sse"))
2445 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2446 ix86_fpmath
= FPMATH_387
;
2449 ix86_fpmath
= FPMATH_SSE
;
2451 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2452 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2456 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2457 ix86_fpmath
= FPMATH_387
;
2459 else if (!TARGET_80387
)
2461 warning (0, "387 instruction set disabled, using SSE arithmetics");
2462 ix86_fpmath
= FPMATH_SSE
;
2465 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2468 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2471 /* If the i387 is disabled, then do not return values in it. */
2473 target_flags
&= ~MASK_FLOAT_RETURNS
;
2475 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2476 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2478 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2480 /* ??? Unwind info is not correct around the CFG unless either a frame
2481 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2482 unwind info generation to be aware of the CFG and propagating states
2484 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2485 || flag_exceptions
|| flag_non_call_exceptions
)
2486 && flag_omit_frame_pointer
2487 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2489 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2490 warning (0, "unwind tables currently require either a frame pointer "
2491 "or -maccumulate-outgoing-args for correctness");
2492 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2495 /* For sane SSE instruction set generation we need fcomi instruction.
2496 It is safe to enable all CMOVE instructions. */
2500 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2503 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2504 p
= strchr (internal_label_prefix
, 'X');
2505 internal_label_prefix_len
= p
- internal_label_prefix
;
2509 /* When scheduling description is not available, disable scheduler pass
2510 so it won't slow down the compilation and make x87 code slower. */
2511 if (!TARGET_SCHEDULE
)
2512 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2514 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2515 set_param_value ("simultaneous-prefetches",
2516 ix86_cost
->simultaneous_prefetches
);
2517 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2518 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2521 /* switch to the appropriate section for output of DECL.
2522 DECL is either a `VAR_DECL' node or a constant of some sort.
2523 RELOC indicates whether forming the initial value of DECL requires
2524 link-time relocations. */
2527 x86_64_elf_select_section (tree decl
, int reloc
,
2528 unsigned HOST_WIDE_INT align
)
2530 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2531 && ix86_in_large_data_p (decl
))
2533 const char *sname
= NULL
;
2534 unsigned int flags
= SECTION_WRITE
;
2535 switch (categorize_decl_for_section (decl
, reloc
))
2540 case SECCAT_DATA_REL
:
2541 sname
= ".ldata.rel";
2543 case SECCAT_DATA_REL_LOCAL
:
2544 sname
= ".ldata.rel.local";
2546 case SECCAT_DATA_REL_RO
:
2547 sname
= ".ldata.rel.ro";
2549 case SECCAT_DATA_REL_RO_LOCAL
:
2550 sname
= ".ldata.rel.ro.local";
2554 flags
|= SECTION_BSS
;
2557 case SECCAT_RODATA_MERGE_STR
:
2558 case SECCAT_RODATA_MERGE_STR_INIT
:
2559 case SECCAT_RODATA_MERGE_CONST
:
2563 case SECCAT_SRODATA
:
2570 /* We don't split these for medium model. Place them into
2571 default sections and hope for best. */
2576 /* We might get called with string constants, but get_named_section
2577 doesn't like them as they are not DECLs. Also, we need to set
2578 flags in that case. */
2580 return get_section (sname
, flags
, NULL
);
2581 return get_named_section (decl
, sname
, reloc
);
2584 return default_elf_select_section (decl
, reloc
, align
);
2587 /* Build up a unique section name, expressed as a
2588 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2589 RELOC indicates whether the initial value of EXP requires
2590 link-time relocations. */
2593 x86_64_elf_unique_section (tree decl
, int reloc
)
2595 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2596 && ix86_in_large_data_p (decl
))
2598 const char *prefix
= NULL
;
2599 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2600 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2602 switch (categorize_decl_for_section (decl
, reloc
))
2605 case SECCAT_DATA_REL
:
2606 case SECCAT_DATA_REL_LOCAL
:
2607 case SECCAT_DATA_REL_RO
:
2608 case SECCAT_DATA_REL_RO_LOCAL
:
2609 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2612 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2615 case SECCAT_RODATA_MERGE_STR
:
2616 case SECCAT_RODATA_MERGE_STR_INIT
:
2617 case SECCAT_RODATA_MERGE_CONST
:
2618 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2620 case SECCAT_SRODATA
:
2627 /* We don't split these for medium model. Place them into
2628 default sections and hope for best. */
2636 plen
= strlen (prefix
);
2638 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2639 name
= targetm
.strip_name_encoding (name
);
2640 nlen
= strlen (name
);
2642 string
= alloca (nlen
+ plen
+ 1);
2643 memcpy (string
, prefix
, plen
);
2644 memcpy (string
+ plen
, name
, nlen
+ 1);
2646 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2650 default_unique_section (decl
, reloc
);
2653 #ifdef COMMON_ASM_OP
2654 /* This says how to output assembler code to declare an
2655 uninitialized external linkage data object.
2657 For medium model x86-64 we need to use .largecomm opcode for
2660 x86_elf_aligned_common (FILE *file
,
2661 const char *name
, unsigned HOST_WIDE_INT size
,
2664 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2665 && size
> (unsigned int)ix86_section_threshold
)
2666 fprintf (file
, ".largecomm\t");
2668 fprintf (file
, "%s", COMMON_ASM_OP
);
2669 assemble_name (file
, name
);
2670 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2671 size
, align
/ BITS_PER_UNIT
);
2674 /* Utility function for targets to use in implementing
2675 ASM_OUTPUT_ALIGNED_BSS. */
2678 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2679 const char *name
, unsigned HOST_WIDE_INT size
,
2682 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2683 && size
> (unsigned int)ix86_section_threshold
)
2684 switch_to_section (get_named_section (decl
, ".lbss", 0));
2686 switch_to_section (bss_section
);
2687 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2688 #ifdef ASM_DECLARE_OBJECT_NAME
2689 last_assemble_variable_decl
= decl
;
2690 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2692 /* Standard thing is just output label for the object. */
2693 ASM_OUTPUT_LABEL (file
, name
);
2694 #endif /* ASM_DECLARE_OBJECT_NAME */
2695 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2699 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2701 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2702 make the problem with not enough registers even worse. */
2703 #ifdef INSN_SCHEDULING
2705 flag_schedule_insns
= 0;
2709 /* The Darwin libraries never set errno, so we might as well
2710 avoid calling them when that's the only reason we would. */
2711 flag_errno_math
= 0;
2713 /* The default values of these switches depend on the TARGET_64BIT
2714 that is not known at this moment. Mark these values with 2 and
2715 let user the to override these. In case there is no command line option
2716 specifying them, we will set the defaults in override_options. */
2718 flag_omit_frame_pointer
= 2;
2719 flag_pcc_struct_return
= 2;
2720 flag_asynchronous_unwind_tables
= 2;
2721 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2722 SUBTARGET_OPTIMIZATION_OPTIONS
;
2726 /* Table of valid machine attributes. */
2727 const struct attribute_spec ix86_attribute_table
[] =
2729 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2730 /* Stdcall attribute says callee is responsible for popping arguments
2731 if they are not variable. */
2732 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2733 /* Fastcall attribute says callee is responsible for popping arguments
2734 if they are not variable. */
2735 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2736 /* Cdecl attribute says the callee is a normal C declaration */
2737 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2738 /* Regparm attribute specifies how many integer arguments are to be
2739 passed in registers. */
2740 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2741 /* Sseregparm attribute says we are using x86_64 calling conventions
2742 for FP arguments. */
2743 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2744 /* force_align_arg_pointer says this function realigns the stack at entry. */
2745 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2746 false, true, true, ix86_handle_cconv_attribute
},
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2749 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2750 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2752 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2753 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2754 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2755 SUBTARGET_ATTRIBUTE_TABLE
,
2757 { NULL
, 0, 0, false, false, false, NULL
}
2760 /* Decide whether we can make a sibling call to a function. DECL is the
2761 declaration of the function being targeted by the call and EXP is the
2762 CALL_EXPR representing the call. */
2765 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2770 /* If we are generating position-independent code, we cannot sibcall
2771 optimize any indirect call, or a direct call to a global function,
2772 as the PLT requires %ebx be live. */
2773 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2780 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2781 if (POINTER_TYPE_P (func
))
2782 func
= TREE_TYPE (func
);
2785 /* Check that the return value locations are the same. Like
2786 if we are returning floats on the 80387 register stack, we cannot
2787 make a sibcall from a function that doesn't return a float to a
2788 function that does or, conversely, from a function that does return
2789 a float to a function that doesn't; the necessary stack adjustment
2790 would not be executed. This is also the place we notice
2791 differences in the return value ABI. Note that it is ok for one
2792 of the functions to have void return type as long as the return
2793 value of the other is passed in a register. */
2794 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2795 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2797 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2799 if (!rtx_equal_p (a
, b
))
2802 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2804 else if (!rtx_equal_p (a
, b
))
2807 /* If this call is indirect, we'll need to be able to use a call-clobbered
2808 register for the address of the target function. Make sure that all
2809 such registers are not used for passing parameters. */
2810 if (!decl
&& !TARGET_64BIT
)
2814 /* We're looking at the CALL_EXPR, we need the type of the function. */
2815 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2816 type
= TREE_TYPE (type
); /* pointer type */
2817 type
= TREE_TYPE (type
); /* function type */
2819 if (ix86_function_regparm (type
, NULL
) >= 3)
2821 /* ??? Need to count the actual number of registers to be used,
2822 not the possible number of registers. Fix later. */
2827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2828 /* Dllimport'd functions are also called indirectly. */
2829 if (decl
&& DECL_DLLIMPORT_P (decl
)
2830 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2834 /* If we forced aligned the stack, then sibcalling would unalign the
2835 stack, which may break the called function. */
2836 if (cfun
->machine
->force_align_arg_pointer
)
2839 /* Otherwise okay. That also includes certain types of indirect calls. */
2843 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2844 calling convention attributes;
2845 arguments as in struct attribute_spec.handler. */
2848 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2850 int flags ATTRIBUTE_UNUSED
,
2853 if (TREE_CODE (*node
) != FUNCTION_TYPE
2854 && TREE_CODE (*node
) != METHOD_TYPE
2855 && TREE_CODE (*node
) != FIELD_DECL
2856 && TREE_CODE (*node
) != TYPE_DECL
)
2858 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2859 IDENTIFIER_POINTER (name
));
2860 *no_add_attrs
= true;
2864 /* Can combine regparm with all attributes but fastcall. */
2865 if (is_attribute_p ("regparm", name
))
2869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2871 error ("fastcall and regparm attributes are not compatible");
2874 cst
= TREE_VALUE (args
);
2875 if (TREE_CODE (cst
) != INTEGER_CST
)
2877 warning (OPT_Wattributes
,
2878 "%qs attribute requires an integer constant argument",
2879 IDENTIFIER_POINTER (name
));
2880 *no_add_attrs
= true;
2882 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2884 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2885 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2886 *no_add_attrs
= true;
2890 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2891 TYPE_ATTRIBUTES (*node
))
2892 && compare_tree_int (cst
, REGPARM_MAX
-1))
2894 error ("%s functions limited to %d register parameters",
2895 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2903 warning (OPT_Wattributes
, "%qs attribute ignored",
2904 IDENTIFIER_POINTER (name
));
2905 *no_add_attrs
= true;
2909 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2910 if (is_attribute_p ("fastcall", name
))
2912 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2914 error ("fastcall and cdecl attributes are not compatible");
2916 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2918 error ("fastcall and stdcall attributes are not compatible");
2920 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2922 error ("fastcall and regparm attributes are not compatible");
2926 /* Can combine stdcall with fastcall (redundant), regparm and
2928 else if (is_attribute_p ("stdcall", name
))
2930 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2932 error ("stdcall and cdecl attributes are not compatible");
2934 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2936 error ("stdcall and fastcall attributes are not compatible");
2940 /* Can combine cdecl with regparm and sseregparm. */
2941 else if (is_attribute_p ("cdecl", name
))
2943 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2945 error ("stdcall and cdecl attributes are not compatible");
2947 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2949 error ("fastcall and cdecl attributes are not compatible");
2953 /* Can combine sseregparm with all attributes. */
2958 /* Return 0 if the attributes for two types are incompatible, 1 if they
2959 are compatible, and 2 if they are nearly compatible (which causes a
2960 warning to be generated). */
2963 ix86_comp_type_attributes (tree type1
, tree type2
)
2965 /* Check for mismatch of non-default calling convention. */
2966 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2968 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2971 /* Check for mismatched fastcall/regparm types. */
2972 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2973 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2974 || (ix86_function_regparm (type1
, NULL
)
2975 != ix86_function_regparm (type2
, NULL
)))
2978 /* Check for mismatched sseregparm types. */
2979 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2980 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2983 /* Check for mismatched return types (cdecl vs stdcall). */
2984 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2985 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2991 /* Return the regparm value for a function with the indicated TYPE and DECL.
2992 DECL may be NULL when calling function indirectly
2993 or considering a libcall. */
2996 ix86_function_regparm (tree type
, tree decl
)
2999 int regparm
= ix86_regparm
;
3000 bool user_convention
= false;
3004 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3007 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3008 user_convention
= true;
3011 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3014 user_convention
= true;
3017 /* Use register calling convention for local functions when possible. */
3018 if (!TARGET_64BIT
&& !user_convention
&& decl
3019 && flag_unit_at_a_time
&& !profile_flag
)
3021 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3024 int local_regparm
, globals
= 0, regno
;
3026 /* Make sure no regparm register is taken by a global register
3028 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3029 if (global_regs
[local_regparm
])
3031 /* We can't use regparm(3) for nested functions as these use
3032 static chain pointer in third argument. */
3033 if (local_regparm
== 3
3034 && decl_function_context (decl
)
3035 && !DECL_NO_STATIC_CHAIN (decl
))
3037 /* If the function realigns its stackpointer, the
3038 prologue will clobber %ecx. If we've already
3039 generated code for the callee, the callee
3040 DECL_STRUCT_FUNCTION is gone, so we fall back to
3041 scanning the attributes for the self-realigning
3043 if ((DECL_STRUCT_FUNCTION (decl
)
3044 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
3045 || (!DECL_STRUCT_FUNCTION (decl
)
3046 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3047 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3049 /* Each global register variable increases register preassure,
3050 so the more global reg vars there are, the smaller regparm
3051 optimization use, unless requested by the user explicitly. */
3052 for (regno
= 0; regno
< 6; regno
++)
3053 if (global_regs
[regno
])
3056 = globals
< local_regparm
? local_regparm
- globals
: 0;
3058 if (local_regparm
> regparm
)
3059 regparm
= local_regparm
;
3066 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3067 DFmode (2) arguments in SSE registers for a function with the
3068 indicated TYPE and DECL. DECL may be NULL when calling function
3069 indirectly or considering a libcall. Otherwise return 0. */
3072 ix86_function_sseregparm (tree type
, tree decl
)
3074 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3075 by the sseregparm attribute. */
3076 if (TARGET_SSEREGPARM
3078 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3083 error ("Calling %qD with attribute sseregparm without "
3084 "SSE/SSE2 enabled", decl
);
3086 error ("Calling %qT with attribute sseregparm without "
3087 "SSE/SSE2 enabled", type
);
3094 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3095 (and DFmode for SSE2) arguments in SSE registers,
3096 even for 32-bit targets. */
3097 if (!TARGET_64BIT
&& decl
3098 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3100 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3102 return TARGET_SSE2
? 2 : 1;
3108 /* Return true if EAX is live at the start of the function. Used by
3109 ix86_expand_prologue to determine if we need special help before
3110 calling allocate_stack_worker. */
3113 ix86_eax_live_at_start_p (void)
3115 /* Cheat. Don't bother working forward from ix86_function_regparm
3116 to the function type to whether an actual argument is located in
3117 eax. Instead just look at cfg info, which is still close enough
3118 to correct at this point. This gives false positives for broken
3119 functions that might use uninitialized data that happens to be
3120 allocated in eax, but who cares? */
3121 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3124 /* Value is the number of bytes of arguments automatically
3125 popped when returning from a subroutine call.
3126 FUNDECL is the declaration node of the function (as a tree),
3127 FUNTYPE is the data type of the function (as a tree),
3128 or for a library call it is an identifier node for the subroutine name.
3129 SIZE is the number of bytes of arguments passed on the stack.
3131 On the 80386, the RTD insn may be used to pop them if the number
3132 of args is fixed, but if the number is variable then the caller
3133 must pop them all. RTD can't be used for library calls now
3134 because the library is compiled with the Unix compiler.
3135 Use of RTD is a selectable option, since it is incompatible with
3136 standard Unix calling sequences. If the option is not selected,
3137 the caller must always pop the args.
3139 The attribute stdcall is equivalent to RTD on a per module basis. */
3142 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3144 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3146 /* Cdecl functions override -mrtd, and never pop the stack. */
3147 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3149 /* Stdcall and fastcall functions will pop the stack if not
3151 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3152 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3156 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3157 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3158 == void_type_node
)))
3162 /* Lose any fake structure return argument if it is passed on the stack. */
3163 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3165 && !KEEP_AGGREGATE_RETURN_POINTER
)
3167 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3170 return GET_MODE_SIZE (Pmode
);
3176 /* Argument support functions. */
3178 /* Return true when register may be used to pass function parameters. */
3180 ix86_function_arg_regno_p (int regno
)
3186 return (regno
< REGPARM_MAX
3187 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3189 return (regno
< REGPARM_MAX
3190 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3191 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3192 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3193 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3198 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3203 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3204 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3207 /* RAX is used as hidden argument to va_arg functions. */
3210 for (i
= 0; i
< REGPARM_MAX
; i
++)
3211 if (regno
== x86_64_int_parameter_registers
[i
])
3216 /* Return if we do not know how to pass TYPE solely in registers. */
3219 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3221 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3224 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3225 The layout_type routine is crafty and tries to trick us into passing
3226 currently unsupported vector types on the stack by using TImode. */
3227 return (!TARGET_64BIT
&& mode
== TImode
3228 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3231 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3232 for a call to a function whose data type is FNTYPE.
3233 For a library call, FNTYPE is 0. */
3236 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3237 tree fntype
, /* tree ptr for function decl */
3238 rtx libname
, /* SYMBOL_REF of library name or 0 */
3241 static CUMULATIVE_ARGS zero_cum
;
3242 tree param
, next_param
;
3244 if (TARGET_DEBUG_ARG
)
3246 fprintf (stderr
, "\ninit_cumulative_args (");
3248 fprintf (stderr
, "fntype code = %s, ret code = %s",
3249 tree_code_name
[(int) TREE_CODE (fntype
)],
3250 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3252 fprintf (stderr
, "no fntype");
3255 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3260 /* Set up the number of registers to use for passing arguments. */
3261 cum
->nregs
= ix86_regparm
;
3263 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3265 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3266 cum
->warn_sse
= true;
3267 cum
->warn_mmx
= true;
3268 cum
->maybe_vaarg
= false;
3270 /* Use ecx and edx registers if function has fastcall attribute,
3271 else look for regparm information. */
3272 if (fntype
&& !TARGET_64BIT
)
3274 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3280 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3283 /* Set up the number of SSE registers used for passing SFmode
3284 and DFmode arguments. Warn for mismatching ABI. */
3285 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3287 /* Determine if this function has variable arguments. This is
3288 indicated by the last argument being 'void_type_mode' if there
3289 are no variable arguments. If there are variable arguments, then
3290 we won't pass anything in registers in 32-bit mode. */
3292 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3294 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3295 param
!= 0; param
= next_param
)
3297 next_param
= TREE_CHAIN (param
);
3298 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3308 cum
->float_in_sse
= 0;
3310 cum
->maybe_vaarg
= true;
3314 if ((!fntype
&& !libname
)
3315 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3316 cum
->maybe_vaarg
= true;
3318 if (TARGET_DEBUG_ARG
)
3319 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3324 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3325 But in the case of vector types, it is some vector mode.
3327 When we have only some of our vector isa extensions enabled, then there
3328 are some modes for which vector_mode_supported_p is false. For these
3329 modes, the generic vector support in gcc will choose some non-vector mode
3330 in order to implement the type. By computing the natural mode, we'll
3331 select the proper ABI location for the operand and not depend on whatever
3332 the middle-end decides to do with these vector types. */
3334 static enum machine_mode
3335 type_natural_mode (tree type
)
3337 enum machine_mode mode
= TYPE_MODE (type
);
3339 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3341 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3342 if ((size
== 8 || size
== 16)
3343 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3344 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3346 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3348 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3349 mode
= MIN_MODE_VECTOR_FLOAT
;
3351 mode
= MIN_MODE_VECTOR_INT
;
3353 /* Get the mode which has this inner mode and number of units. */
3354 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3355 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3356 && GET_MODE_INNER (mode
) == innermode
)
3366 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3367 this may not agree with the mode that the type system has chosen for the
3368 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3369 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3372 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3377 if (orig_mode
!= BLKmode
)
3378 tmp
= gen_rtx_REG (orig_mode
, regno
);
3381 tmp
= gen_rtx_REG (mode
, regno
);
3382 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3383 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3389 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3390 of this code is to classify each 8bytes of incoming argument by the register
3391 class and assign registers accordingly. */
3393 /* Return the union class of CLASS1 and CLASS2.
3394 See the x86-64 PS ABI for details. */
3396 static enum x86_64_reg_class
3397 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3399 /* Rule #1: If both classes are equal, this is the resulting class. */
3400 if (class1
== class2
)
3403 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3405 if (class1
== X86_64_NO_CLASS
)
3407 if (class2
== X86_64_NO_CLASS
)
3410 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3411 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3412 return X86_64_MEMORY_CLASS
;
3414 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3415 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3416 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3417 return X86_64_INTEGERSI_CLASS
;
3418 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3419 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3420 return X86_64_INTEGER_CLASS
;
3422 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3424 if (class1
== X86_64_X87_CLASS
3425 || class1
== X86_64_X87UP_CLASS
3426 || class1
== X86_64_COMPLEX_X87_CLASS
3427 || class2
== X86_64_X87_CLASS
3428 || class2
== X86_64_X87UP_CLASS
3429 || class2
== X86_64_COMPLEX_X87_CLASS
)
3430 return X86_64_MEMORY_CLASS
;
3432 /* Rule #6: Otherwise class SSE is used. */
3433 return X86_64_SSE_CLASS
;
3436 /* Classify the argument of type TYPE and mode MODE.
3437 CLASSES will be filled by the register class used to pass each word
3438 of the operand. The number of words is returned. In case the parameter
3439 should be passed in memory, 0 is returned. As a special case for zero
3440 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3442 BIT_OFFSET is used internally for handling records and specifies offset
3443 of the offset in bits modulo 256 to avoid overflow cases.
3445 See the x86-64 PS ABI for details.
3449 classify_argument (enum machine_mode mode
, tree type
,
3450 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3452 HOST_WIDE_INT bytes
=
3453 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3454 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3456 /* Variable sized entities are always passed/returned in memory. */
3460 if (mode
!= VOIDmode
3461 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3464 if (type
&& AGGREGATE_TYPE_P (type
))
3468 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3470 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3474 for (i
= 0; i
< words
; i
++)
3475 classes
[i
] = X86_64_NO_CLASS
;
3477 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3478 signalize memory class, so handle it as special case. */
3481 classes
[0] = X86_64_NO_CLASS
;
3485 /* Classify each field of record and merge classes. */
3486 switch (TREE_CODE (type
))
3489 /* And now merge the fields of structure. */
3490 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3492 if (TREE_CODE (field
) == FIELD_DECL
)
3496 if (TREE_TYPE (field
) == error_mark_node
)
3499 /* Bitfields are always classified as integer. Handle them
3500 early, since later code would consider them to be
3501 misaligned integers. */
3502 if (DECL_BIT_FIELD (field
))
3504 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3505 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3506 + tree_low_cst (DECL_SIZE (field
), 0)
3509 merge_classes (X86_64_INTEGER_CLASS
,
3514 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3515 TREE_TYPE (field
), subclasses
,
3516 (int_bit_position (field
)
3517 + bit_offset
) % 256);
3520 for (i
= 0; i
< num
; i
++)
3523 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3525 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3533 /* Arrays are handled as small records. */
3536 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3537 TREE_TYPE (type
), subclasses
, bit_offset
);
3541 /* The partial classes are now full classes. */
3542 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3543 subclasses
[0] = X86_64_SSE_CLASS
;
3544 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3545 subclasses
[0] = X86_64_INTEGER_CLASS
;
3547 for (i
= 0; i
< words
; i
++)
3548 classes
[i
] = subclasses
[i
% num
];
3553 case QUAL_UNION_TYPE
:
3554 /* Unions are similar to RECORD_TYPE but offset is always 0.
3556 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3558 if (TREE_CODE (field
) == FIELD_DECL
)
3562 if (TREE_TYPE (field
) == error_mark_node
)
3565 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3566 TREE_TYPE (field
), subclasses
,
3570 for (i
= 0; i
< num
; i
++)
3571 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3580 /* Final merger cleanup. */
3581 for (i
= 0; i
< words
; i
++)
3583 /* If one class is MEMORY, everything should be passed in
3585 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3588 /* The X86_64_SSEUP_CLASS should be always preceded by
3589 X86_64_SSE_CLASS. */
3590 if (classes
[i
] == X86_64_SSEUP_CLASS
3591 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3592 classes
[i
] = X86_64_SSE_CLASS
;
3594 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3595 if (classes
[i
] == X86_64_X87UP_CLASS
3596 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3597 classes
[i
] = X86_64_SSE_CLASS
;
3602 /* Compute alignment needed. We align all types to natural boundaries with
3603 exception of XFmode that is aligned to 64bits. */
3604 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3606 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3609 mode_alignment
= 128;
3610 else if (mode
== XCmode
)
3611 mode_alignment
= 256;
3612 if (COMPLEX_MODE_P (mode
))
3613 mode_alignment
/= 2;
3614 /* Misaligned fields are always returned in memory. */
3615 if (bit_offset
% mode_alignment
)
3619 /* for V1xx modes, just use the base mode */
3620 if (VECTOR_MODE_P (mode
)
3621 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3622 mode
= GET_MODE_INNER (mode
);
3624 /* Classification of atomic types. */
3629 classes
[0] = X86_64_SSE_CLASS
;
3632 classes
[0] = X86_64_SSE_CLASS
;
3633 classes
[1] = X86_64_SSEUP_CLASS
;
3642 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3643 classes
[0] = X86_64_INTEGERSI_CLASS
;
3645 classes
[0] = X86_64_INTEGER_CLASS
;
3649 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3654 if (!(bit_offset
% 64))
3655 classes
[0] = X86_64_SSESF_CLASS
;
3657 classes
[0] = X86_64_SSE_CLASS
;
3660 classes
[0] = X86_64_SSEDF_CLASS
;
3663 classes
[0] = X86_64_X87_CLASS
;
3664 classes
[1] = X86_64_X87UP_CLASS
;
3667 classes
[0] = X86_64_SSE_CLASS
;
3668 classes
[1] = X86_64_SSEUP_CLASS
;
3671 classes
[0] = X86_64_SSE_CLASS
;
3674 classes
[0] = X86_64_SSEDF_CLASS
;
3675 classes
[1] = X86_64_SSEDF_CLASS
;
3678 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3681 /* This modes is larger than 16 bytes. */
3689 classes
[0] = X86_64_SSE_CLASS
;
3690 classes
[1] = X86_64_SSEUP_CLASS
;
3696 classes
[0] = X86_64_SSE_CLASS
;
3702 gcc_assert (VECTOR_MODE_P (mode
));
3707 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3709 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3710 classes
[0] = X86_64_INTEGERSI_CLASS
;
3712 classes
[0] = X86_64_INTEGER_CLASS
;
3713 classes
[1] = X86_64_INTEGER_CLASS
;
3714 return 1 + (bytes
> 8);
3718 /* Examine the argument and return set number of register required in each
3719 class. Return 0 iff parameter should be passed in memory. */
3721 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3722 int *int_nregs
, int *sse_nregs
)
3724 enum x86_64_reg_class
class[MAX_CLASSES
];
3725 int n
= classify_argument (mode
, type
, class, 0);
3731 for (n
--; n
>= 0; n
--)
3734 case X86_64_INTEGER_CLASS
:
3735 case X86_64_INTEGERSI_CLASS
:
3738 case X86_64_SSE_CLASS
:
3739 case X86_64_SSESF_CLASS
:
3740 case X86_64_SSEDF_CLASS
:
3743 case X86_64_NO_CLASS
:
3744 case X86_64_SSEUP_CLASS
:
3746 case X86_64_X87_CLASS
:
3747 case X86_64_X87UP_CLASS
:
3751 case X86_64_COMPLEX_X87_CLASS
:
3752 return in_return
? 2 : 0;
3753 case X86_64_MEMORY_CLASS
:
3759 /* Construct container for the argument used by GCC interface. See
3760 FUNCTION_ARG for the detailed description. */
3763 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3764 tree type
, int in_return
, int nintregs
, int nsseregs
,
3765 const int *intreg
, int sse_regno
)
3767 /* The following variables hold the static issued_error state. */
3768 static bool issued_sse_arg_error
;
3769 static bool issued_sse_ret_error
;
3770 static bool issued_x87_ret_error
;
3772 enum machine_mode tmpmode
;
3774 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3775 enum x86_64_reg_class
class[MAX_CLASSES
];
3779 int needed_sseregs
, needed_intregs
;
3780 rtx exp
[MAX_CLASSES
];
3783 n
= classify_argument (mode
, type
, class, 0);
3784 if (TARGET_DEBUG_ARG
)
3787 fprintf (stderr
, "Memory class\n");
3790 fprintf (stderr
, "Classes:");
3791 for (i
= 0; i
< n
; i
++)
3793 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3795 fprintf (stderr
, "\n");
3800 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3803 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3806 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3807 some less clueful developer tries to use floating-point anyway. */
3808 if (needed_sseregs
&& !TARGET_SSE
)
3812 if (!issued_sse_ret_error
)
3814 error ("SSE register return with SSE disabled");
3815 issued_sse_ret_error
= true;
3818 else if (!issued_sse_arg_error
)
3820 error ("SSE register argument with SSE disabled");
3821 issued_sse_arg_error
= true;
3826 /* Likewise, error if the ABI requires us to return values in the
3827 x87 registers and the user specified -mno-80387. */
3828 if (!TARGET_80387
&& in_return
)
3829 for (i
= 0; i
< n
; i
++)
3830 if (class[i
] == X86_64_X87_CLASS
3831 || class[i
] == X86_64_X87UP_CLASS
3832 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3834 if (!issued_x87_ret_error
)
3836 error ("x87 register return with x87 disabled");
3837 issued_x87_ret_error
= true;
3842 /* First construct simple cases. Avoid SCmode, since we want to use
3843 single register to pass this type. */
3844 if (n
== 1 && mode
!= SCmode
)
3847 case X86_64_INTEGER_CLASS
:
3848 case X86_64_INTEGERSI_CLASS
:
3849 return gen_rtx_REG (mode
, intreg
[0]);
3850 case X86_64_SSE_CLASS
:
3851 case X86_64_SSESF_CLASS
:
3852 case X86_64_SSEDF_CLASS
:
3853 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3854 case X86_64_X87_CLASS
:
3855 case X86_64_COMPLEX_X87_CLASS
:
3856 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3857 case X86_64_NO_CLASS
:
3858 /* Zero sized array, struct or class. */
3863 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3865 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3867 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3868 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3869 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3870 && class[1] == X86_64_INTEGER_CLASS
3871 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3872 && intreg
[0] + 1 == intreg
[1])
3873 return gen_rtx_REG (mode
, intreg
[0]);
3875 /* Otherwise figure out the entries of the PARALLEL. */
3876 for (i
= 0; i
< n
; i
++)
3880 case X86_64_NO_CLASS
:
3882 case X86_64_INTEGER_CLASS
:
3883 case X86_64_INTEGERSI_CLASS
:
3884 /* Merge TImodes on aligned occasions here too. */
3885 if (i
* 8 + 8 > bytes
)
3886 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3887 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3891 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3892 if (tmpmode
== BLKmode
)
3894 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3895 gen_rtx_REG (tmpmode
, *intreg
),
3899 case X86_64_SSESF_CLASS
:
3900 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3901 gen_rtx_REG (SFmode
,
3902 SSE_REGNO (sse_regno
)),
3906 case X86_64_SSEDF_CLASS
:
3907 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3908 gen_rtx_REG (DFmode
,
3909 SSE_REGNO (sse_regno
)),
3913 case X86_64_SSE_CLASS
:
3914 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3918 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3919 gen_rtx_REG (tmpmode
,
3920 SSE_REGNO (sse_regno
)),
3922 if (tmpmode
== TImode
)
3931 /* Empty aligned struct, union or class. */
3935 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3936 for (i
= 0; i
< nexps
; i
++)
3937 XVECEXP (ret
, 0, i
) = exp
[i
];
3941 /* Update the data in CUM to advance over an argument
3942 of mode MODE and data type TYPE.
3943 (TYPE is null for libcalls where that information may not be available.) */
3946 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3947 tree type
, int named
)
3950 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3951 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3954 mode
= type_natural_mode (type
);
3956 if (TARGET_DEBUG_ARG
)
3957 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3958 "mode=%s, named=%d)\n\n",
3959 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3960 GET_MODE_NAME (mode
), named
);
3964 int int_nregs
, sse_nregs
;
3965 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3966 cum
->words
+= words
;
3967 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3969 cum
->nregs
-= int_nregs
;
3970 cum
->sse_nregs
-= sse_nregs
;
3971 cum
->regno
+= int_nregs
;
3972 cum
->sse_regno
+= sse_nregs
;
3975 cum
->words
+= words
;
3993 cum
->words
+= words
;
3994 cum
->nregs
-= words
;
3995 cum
->regno
+= words
;
3997 if (cum
->nregs
<= 0)
4005 if (cum
->float_in_sse
< 2)
4008 if (cum
->float_in_sse
< 1)
4019 if (!type
|| !AGGREGATE_TYPE_P (type
))
4021 cum
->sse_words
+= words
;
4022 cum
->sse_nregs
-= 1;
4023 cum
->sse_regno
+= 1;
4024 if (cum
->sse_nregs
<= 0)
4036 if (!type
|| !AGGREGATE_TYPE_P (type
))
4038 cum
->mmx_words
+= words
;
4039 cum
->mmx_nregs
-= 1;
4040 cum
->mmx_regno
+= 1;
4041 if (cum
->mmx_nregs
<= 0)
4052 /* Define where to put the arguments to a function.
4053 Value is zero to push the argument on the stack,
4054 or a hard register in which to store the argument.
4056 MODE is the argument's machine mode.
4057 TYPE is the data type of the argument (as a tree).
4058 This is null for libcalls where that information may
4060 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4061 the preceding args and about the function being called.
4062 NAMED is nonzero if this argument is a named parameter
4063 (otherwise it is an extra parameter matching an ellipsis). */
4066 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
4067 tree type
, int named
)
4069 enum machine_mode mode
= orig_mode
;
4072 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
4073 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4074 static bool warnedsse
, warnedmmx
;
4076 /* To simplify the code below, represent vector types with a vector mode
4077 even if MMX/SSE are not active. */
4078 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4079 mode
= type_natural_mode (type
);
4081 /* Handle a hidden AL argument containing number of registers for varargs
4082 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4084 if (mode
== VOIDmode
)
4087 return GEN_INT (cum
->maybe_vaarg
4088 ? (cum
->sse_nregs
< 0
4096 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4098 &x86_64_int_parameter_registers
[cum
->regno
],
4103 /* For now, pass fp/complex values on the stack. */
4115 if (words
<= cum
->nregs
)
4117 int regno
= cum
->regno
;
4119 /* Fastcall allocates the first two DWORD (SImode) or
4120 smaller arguments to ECX and EDX. */
4123 if (mode
== BLKmode
|| mode
== DImode
)
4126 /* ECX not EAX is the first allocated register. */
4130 ret
= gen_rtx_REG (mode
, regno
);
4134 if (cum
->float_in_sse
< 2)
4137 if (cum
->float_in_sse
< 1)
4147 if (!type
|| !AGGREGATE_TYPE_P (type
))
4149 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4152 warning (0, "SSE vector argument without SSE enabled "
4156 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4157 cum
->sse_regno
+ FIRST_SSE_REG
);
4164 if (!type
|| !AGGREGATE_TYPE_P (type
))
4166 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4169 warning (0, "MMX vector argument without MMX enabled "
4173 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4174 cum
->mmx_regno
+ FIRST_MMX_REG
);
4179 if (TARGET_DEBUG_ARG
)
4182 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4183 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4186 print_simple_rtl (stderr
, ret
);
4188 fprintf (stderr
, ", stack");
4190 fprintf (stderr
, " )\n");
4196 /* A C expression that indicates when an argument must be passed by
4197 reference. If nonzero for an argument, a copy of that argument is
4198 made in memory and a pointer to the argument is passed instead of
4199 the argument itself. The pointer is passed in whatever way is
4200 appropriate for passing a pointer to that type. */
4203 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4204 enum machine_mode mode ATTRIBUTE_UNUSED
,
4205 tree type
, bool named ATTRIBUTE_UNUSED
)
4210 if (type
&& int_size_in_bytes (type
) == -1)
4212 if (TARGET_DEBUG_ARG
)
4213 fprintf (stderr
, "function_arg_pass_by_reference\n");
4220 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4221 ABI. Only called if TARGET_SSE. */
4223 contains_128bit_aligned_vector_p (tree type
)
4225 enum machine_mode mode
= TYPE_MODE (type
);
4226 if (SSE_REG_MODE_P (mode
)
4227 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4229 if (TYPE_ALIGN (type
) < 128)
4232 if (AGGREGATE_TYPE_P (type
))
4234 /* Walk the aggregates recursively. */
4235 switch (TREE_CODE (type
))
4239 case QUAL_UNION_TYPE
:
4243 /* Walk all the structure fields. */
4244 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4246 if (TREE_CODE (field
) == FIELD_DECL
4247 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4254 /* Just for use if some languages passes arrays by value. */
4255 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4266 /* Gives the alignment boundary, in bits, of an argument with the
4267 specified mode and type. */
4270 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4274 align
= TYPE_ALIGN (type
);
4276 align
= GET_MODE_ALIGNMENT (mode
);
4277 if (align
< PARM_BOUNDARY
)
4278 align
= PARM_BOUNDARY
;
4281 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4282 make an exception for SSE modes since these require 128bit
4285 The handling here differs from field_alignment. ICC aligns MMX
4286 arguments to 4 byte boundaries, while structure fields are aligned
4287 to 8 byte boundaries. */
4289 align
= PARM_BOUNDARY
;
4292 if (!SSE_REG_MODE_P (mode
))
4293 align
= PARM_BOUNDARY
;
4297 if (!contains_128bit_aligned_vector_p (type
))
4298 align
= PARM_BOUNDARY
;
4306 /* Return true if N is a possible register number of function value. */
4308 ix86_function_value_regno_p (int regno
)
4314 return ((regno
) == 0
4315 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4316 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4318 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4319 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4320 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4325 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4326 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4330 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4337 /* Define how to find the value returned by a function.
4338 VALTYPE is the data type of the value (as a tree).
4339 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4340 otherwise, FUNC is 0. */
4342 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4343 bool outgoing ATTRIBUTE_UNUSED
)
4345 enum machine_mode natmode
= type_natural_mode (valtype
);
4349 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4350 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4351 x86_64_int_return_registers
, 0);
4352 /* For zero sized structures, construct_container return NULL, but we
4353 need to keep rest of compiler happy by returning meaningful value. */
4355 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4360 tree fn
= NULL_TREE
, fntype
;
4362 && DECL_P (fntype_or_decl
))
4363 fn
= fntype_or_decl
;
4364 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4365 return gen_rtx_REG (TYPE_MODE (valtype
),
4366 ix86_value_regno (natmode
, fn
, fntype
));
4370 /* Return true iff type is returned in memory. */
4372 ix86_return_in_memory (tree type
)
4374 int needed_intregs
, needed_sseregs
, size
;
4375 enum machine_mode mode
= type_natural_mode (type
);
4378 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4380 if (mode
== BLKmode
)
4383 size
= int_size_in_bytes (type
);
4385 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4388 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4390 /* User-created vectors small enough to fit in EAX. */
4394 /* MMX/3dNow values are returned in MM0,
4395 except when it doesn't exits. */
4397 return (TARGET_MMX
? 0 : 1);
4399 /* SSE values are returned in XMM0, except when it doesn't exist. */
4401 return (TARGET_SSE
? 0 : 1);
4415 /* When returning SSE vector types, we have a choice of either
4416 (1) being abi incompatible with a -march switch, or
4417 (2) generating an error.
4418 Given no good solution, I think the safest thing is one warning.
4419 The user won't be able to use -Werror, but....
4421 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4422 called in response to actually generating a caller or callee that
4423 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4424 via aggregate_value_p for general type probing from tree-ssa. */
4427 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4429 static bool warnedsse
, warnedmmx
;
4433 /* Look at the return type of the function, not the function type. */
4434 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4436 if (!TARGET_SSE
&& !warnedsse
)
4439 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4442 warning (0, "SSE vector return without SSE enabled "
4447 if (!TARGET_MMX
&& !warnedmmx
)
4449 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4452 warning (0, "MMX vector return without MMX enabled "
4461 /* Define how to find the value returned by a library function
4462 assuming the value has mode MODE. */
4464 ix86_libcall_value (enum machine_mode mode
)
4478 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4481 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4485 return gen_rtx_REG (mode
, 0);
4489 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4492 /* Given a mode, return the register to use for a return value. */
4495 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4497 gcc_assert (!TARGET_64BIT
);
4499 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4500 we normally prevent this case when mmx is not available. However
4501 some ABIs may require the result to be returned like DImode. */
4502 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4503 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4505 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4506 we prevent this case when sse is not available. However some ABIs
4507 may require the result to be returned like integer TImode. */
4508 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4509 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4511 /* Decimal floating point values can go in %eax, unlike other float modes. */
4512 if (DECIMAL_FLOAT_MODE_P (mode
))
4515 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4516 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4519 /* Floating point return values in %st(0), except for local functions when
4520 SSE math is enabled or for functions with sseregparm attribute. */
4521 if ((func
|| fntype
)
4522 && (mode
== SFmode
|| mode
== DFmode
))
4524 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4525 if ((sse_level
>= 1 && mode
== SFmode
)
4526 || (sse_level
== 2 && mode
== DFmode
))
4527 return FIRST_SSE_REG
;
4530 return FIRST_FLOAT_REG
;
4533 /* Create the va_list data type. */
4536 ix86_build_builtin_va_list (void)
4538 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4540 /* For i386 we use plain pointer to argument area. */
4542 return build_pointer_type (char_type_node
);
4544 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4545 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4547 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4548 unsigned_type_node
);
4549 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4550 unsigned_type_node
);
4551 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4553 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4556 va_list_gpr_counter_field
= f_gpr
;
4557 va_list_fpr_counter_field
= f_fpr
;
4559 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4560 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4561 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4562 DECL_FIELD_CONTEXT (f_sav
) = record
;
4564 TREE_CHAIN (record
) = type_decl
;
4565 TYPE_NAME (record
) = type_decl
;
4566 TYPE_FIELDS (record
) = f_gpr
;
4567 TREE_CHAIN (f_gpr
) = f_fpr
;
4568 TREE_CHAIN (f_fpr
) = f_ovf
;
4569 TREE_CHAIN (f_ovf
) = f_sav
;
4571 layout_type (record
);
4573 /* The correct type is an array type of one element. */
4574 return build_array_type (record
, build_index_type (size_zero_node
));
4577 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4580 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4581 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4584 CUMULATIVE_ARGS next_cum
;
4585 rtx save_area
= NULL_RTX
, mem
;
4598 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4601 /* Indicate to allocate space on the stack for varargs save area. */
4602 ix86_save_varrargs_registers
= 1;
4604 cfun
->stack_alignment_needed
= 128;
4606 fntype
= TREE_TYPE (current_function_decl
);
4607 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4608 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4609 != void_type_node
));
4611 /* For varargs, we do not want to skip the dummy va_dcl argument.
4612 For stdargs, we do want to skip the last named argument. */
4615 function_arg_advance (&next_cum
, mode
, type
, 1);
4618 save_area
= frame_pointer_rtx
;
4620 set
= get_varargs_alias_set ();
4622 for (i
= next_cum
.regno
;
4624 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4627 mem
= gen_rtx_MEM (Pmode
,
4628 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4629 MEM_NOTRAP_P (mem
) = 1;
4630 set_mem_alias_set (mem
, set
);
4631 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4632 x86_64_int_parameter_registers
[i
]));
4635 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4637 /* Now emit code to save SSE registers. The AX parameter contains number
4638 of SSE parameter registers used to call this function. We use
4639 sse_prologue_save insn template that produces computed jump across
4640 SSE saves. We need some preparation work to get this working. */
4642 label
= gen_label_rtx ();
4643 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4645 /* Compute address to jump to :
4646 label - 5*eax + nnamed_sse_arguments*5 */
4647 tmp_reg
= gen_reg_rtx (Pmode
);
4648 nsse_reg
= gen_reg_rtx (Pmode
);
4649 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4650 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4651 gen_rtx_MULT (Pmode
, nsse_reg
,
4653 if (next_cum
.sse_regno
)
4656 gen_rtx_CONST (DImode
,
4657 gen_rtx_PLUS (DImode
,
4659 GEN_INT (next_cum
.sse_regno
* 4))));
4661 emit_move_insn (nsse_reg
, label_ref
);
4662 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4664 /* Compute address of memory block we save into. We always use pointer
4665 pointing 127 bytes after first byte to store - this is needed to keep
4666 instruction size limited by 4 bytes. */
4667 tmp_reg
= gen_reg_rtx (Pmode
);
4668 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4669 plus_constant (save_area
,
4670 8 * REGPARM_MAX
+ 127)));
4671 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4672 MEM_NOTRAP_P (mem
) = 1;
4673 set_mem_alias_set (mem
, set
);
4674 set_mem_align (mem
, BITS_PER_WORD
);
4676 /* And finally do the dirty job! */
4677 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4678 GEN_INT (next_cum
.sse_regno
), label
));
4683 /* Implement va_start. */
4686 ix86_va_start (tree valist
, rtx nextarg
)
4688 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4689 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4690 tree gpr
, fpr
, ovf
, sav
, t
;
4693 /* Only 64bit target needs something special. */
4696 std_expand_builtin_va_start (valist
, nextarg
);
4700 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4701 f_fpr
= TREE_CHAIN (f_gpr
);
4702 f_ovf
= TREE_CHAIN (f_fpr
);
4703 f_sav
= TREE_CHAIN (f_ovf
);
4705 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4706 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4707 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4708 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4709 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4711 /* Count number of gp and fp argument registers used. */
4712 words
= current_function_args_info
.words
;
4713 n_gpr
= current_function_args_info
.regno
;
4714 n_fpr
= current_function_args_info
.sse_regno
;
4716 if (TARGET_DEBUG_ARG
)
4717 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4718 (int) words
, (int) n_gpr
, (int) n_fpr
);
4720 if (cfun
->va_list_gpr_size
)
4722 type
= TREE_TYPE (gpr
);
4723 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4724 build_int_cst (type
, n_gpr
* 8));
4725 TREE_SIDE_EFFECTS (t
) = 1;
4726 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4729 if (cfun
->va_list_fpr_size
)
4731 type
= TREE_TYPE (fpr
);
4732 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4733 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4734 TREE_SIDE_EFFECTS (t
) = 1;
4735 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4738 /* Find the overflow area. */
4739 type
= TREE_TYPE (ovf
);
4740 t
= make_tree (type
, virtual_incoming_args_rtx
);
4742 t
= build2 (PLUS_EXPR
, type
, t
,
4743 build_int_cst (type
, words
* UNITS_PER_WORD
));
4744 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4745 TREE_SIDE_EFFECTS (t
) = 1;
4746 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4748 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4750 /* Find the register save area.
4751 Prologue of the function save it right above stack frame. */
4752 type
= TREE_TYPE (sav
);
4753 t
= make_tree (type
, frame_pointer_rtx
);
4754 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4755 TREE_SIDE_EFFECTS (t
) = 1;
4756 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4760 /* Implement va_arg. */
4763 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4765 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4766 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4767 tree gpr
, fpr
, ovf
, sav
, t
;
4769 tree lab_false
, lab_over
= NULL_TREE
;
4774 enum machine_mode nat_mode
;
4776 /* Only 64bit target needs something special. */
4778 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4780 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4781 f_fpr
= TREE_CHAIN (f_gpr
);
4782 f_ovf
= TREE_CHAIN (f_fpr
);
4783 f_sav
= TREE_CHAIN (f_ovf
);
4785 valist
= build_va_arg_indirect_ref (valist
);
4786 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4787 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4788 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4789 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4791 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4793 type
= build_pointer_type (type
);
4794 size
= int_size_in_bytes (type
);
4795 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4797 nat_mode
= type_natural_mode (type
);
4798 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4799 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4801 /* Pull the value out of the saved registers. */
4803 addr
= create_tmp_var (ptr_type_node
, "addr");
4804 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4808 int needed_intregs
, needed_sseregs
;
4810 tree int_addr
, sse_addr
;
4812 lab_false
= create_artificial_label ();
4813 lab_over
= create_artificial_label ();
4815 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4817 need_temp
= (!REG_P (container
)
4818 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4819 || TYPE_ALIGN (type
) > 128));
4821 /* In case we are passing structure, verify that it is consecutive block
4822 on the register save area. If not we need to do moves. */
4823 if (!need_temp
&& !REG_P (container
))
4825 /* Verify that all registers are strictly consecutive */
4826 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4830 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4832 rtx slot
= XVECEXP (container
, 0, i
);
4833 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4834 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4842 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4844 rtx slot
= XVECEXP (container
, 0, i
);
4845 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4846 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4858 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4859 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4860 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4861 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4864 /* First ensure that we fit completely in registers. */
4867 t
= build_int_cst (TREE_TYPE (gpr
),
4868 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4869 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4870 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4871 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4872 gimplify_and_add (t
, pre_p
);
4876 t
= build_int_cst (TREE_TYPE (fpr
),
4877 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4879 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4880 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4881 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4882 gimplify_and_add (t
, pre_p
);
4885 /* Compute index to start of area used for integer regs. */
4888 /* int_addr = gpr + sav; */
4889 t
= fold_convert (ptr_type_node
, gpr
);
4890 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4891 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4892 gimplify_and_add (t
, pre_p
);
4896 /* sse_addr = fpr + sav; */
4897 t
= fold_convert (ptr_type_node
, fpr
);
4898 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4899 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4900 gimplify_and_add (t
, pre_p
);
4905 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4908 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4909 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4910 gimplify_and_add (t
, pre_p
);
4912 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4914 rtx slot
= XVECEXP (container
, 0, i
);
4915 rtx reg
= XEXP (slot
, 0);
4916 enum machine_mode mode
= GET_MODE (reg
);
4917 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4918 tree addr_type
= build_pointer_type (piece_type
);
4921 tree dest_addr
, dest
;
4923 if (SSE_REGNO_P (REGNO (reg
)))
4925 src_addr
= sse_addr
;
4926 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4930 src_addr
= int_addr
;
4931 src_offset
= REGNO (reg
) * 8;
4933 src_addr
= fold_convert (addr_type
, src_addr
);
4934 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4935 size_int (src_offset
));
4936 src
= build_va_arg_indirect_ref (src_addr
);
4938 dest_addr
= fold_convert (addr_type
, addr
);
4939 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4940 size_int (INTVAL (XEXP (slot
, 1))));
4941 dest
= build_va_arg_indirect_ref (dest_addr
);
4943 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4944 gimplify_and_add (t
, pre_p
);
4950 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4951 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4952 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4953 gimplify_and_add (t
, pre_p
);
4957 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4958 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4959 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4960 gimplify_and_add (t
, pre_p
);
4963 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4964 gimplify_and_add (t
, pre_p
);
4966 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4967 append_to_statement_list (t
, pre_p
);
4970 /* ... otherwise out of the overflow area. */
4972 /* Care for on-stack alignment if needed. */
4973 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4974 || integer_zerop (TYPE_SIZE (type
)))
4978 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4979 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4980 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4981 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4982 build_int_cst (TREE_TYPE (t
), -align
));
4984 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4986 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4987 gimplify_and_add (t2
, pre_p
);
4989 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4990 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4991 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4992 gimplify_and_add (t
, pre_p
);
4996 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4997 append_to_statement_list (t
, pre_p
);
5000 ptrtype
= build_pointer_type (type
);
5001 addr
= fold_convert (ptrtype
, addr
);
5004 addr
= build_va_arg_indirect_ref (addr
);
5005 return build_va_arg_indirect_ref (addr
);
5008 /* Return nonzero if OPNUM's MEM should be matched
5009 in movabs* patterns. */
5012 ix86_check_movabs (rtx insn
, int opnum
)
5016 set
= PATTERN (insn
);
5017 if (GET_CODE (set
) == PARALLEL
)
5018 set
= XVECEXP (set
, 0, 0);
5019 gcc_assert (GET_CODE (set
) == SET
);
5020 mem
= XEXP (set
, opnum
);
5021 while (GET_CODE (mem
) == SUBREG
)
5022 mem
= SUBREG_REG (mem
);
5023 gcc_assert (MEM_P (mem
));
5024 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5027 /* Initialize the table of extra 80387 mathematical constants. */
5030 init_ext_80387_constants (void)
5032 static const char * cst
[5] =
5034 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5035 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5036 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5037 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5038 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5042 for (i
= 0; i
< 5; i
++)
5044 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5045 /* Ensure each constant is rounded to XFmode precision. */
5046 real_convert (&ext_80387_constants_table
[i
],
5047 XFmode
, &ext_80387_constants_table
[i
]);
5050 ext_80387_constants_init
= 1;
5053 /* Return true if the constant is something that can be loaded with
5054 a special instruction. */
5057 standard_80387_constant_p (rtx x
)
5061 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
5064 if (x
== CONST0_RTX (GET_MODE (x
)))
5066 if (x
== CONST1_RTX (GET_MODE (x
)))
5069 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5071 /* For XFmode constants, try to find a special 80387 instruction when
5072 optimizing for size or on those CPUs that benefit from them. */
5073 if (GET_MODE (x
) == XFmode
5074 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5078 if (! ext_80387_constants_init
)
5079 init_ext_80387_constants ();
5081 for (i
= 0; i
< 5; i
++)
5082 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5086 /* Load of the constant -0.0 or -1.0 will be split as
5087 fldz;fchs or fld1;fchs sequence. */
5088 if (real_isnegzero (&r
))
5090 if (real_identical (&r
, &dconstm1
))
5096 /* Return the opcode of the special instruction to be used to load
5100 standard_80387_constant_opcode (rtx x
)
5102 switch (standard_80387_constant_p (x
))
5126 /* Return the CONST_DOUBLE representing the 80387 constant that is
5127 loaded by the specified special instruction. The argument IDX
5128 matches the return value from standard_80387_constant_p. */
5131 standard_80387_constant_rtx (int idx
)
5135 if (! ext_80387_constants_init
)
5136 init_ext_80387_constants ();
5152 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5156 /* Return 1 if mode is a valid mode for sse. */
5158 standard_sse_mode_p (enum machine_mode mode
)
5175 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5178 standard_sse_constant_p (rtx x
)
5180 enum machine_mode mode
= GET_MODE (x
);
5182 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5184 if (vector_all_ones_operand (x
, mode
)
5185 && standard_sse_mode_p (mode
))
5186 return TARGET_SSE2
? 2 : -1;
5191 /* Return the opcode of the special instruction to be used to load
5195 standard_sse_constant_opcode (rtx insn
, rtx x
)
5197 switch (standard_sse_constant_p (x
))
5200 if (get_attr_mode (insn
) == MODE_V4SF
)
5201 return "xorps\t%0, %0";
5202 else if (get_attr_mode (insn
) == MODE_V2DF
)
5203 return "xorpd\t%0, %0";
5205 return "pxor\t%0, %0";
5207 return "pcmpeqd\t%0, %0";
5212 /* Returns 1 if OP contains a symbol reference */
5215 symbolic_reference_mentioned_p (rtx op
)
5220 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5223 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5224 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5230 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5231 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5235 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5242 /* Return 1 if it is appropriate to emit `ret' instructions in the
5243 body of a function. Do this only if the epilogue is simple, needing a
5244 couple of insns. Prior to reloading, we can't tell how many registers
5245 must be saved, so return 0 then. Return 0 if there is no frame
5246 marker to de-allocate. */
5249 ix86_can_use_return_insn_p (void)
5251 struct ix86_frame frame
;
5253 if (! reload_completed
|| frame_pointer_needed
)
5256 /* Don't allow more than 32 pop, since that's all we can do
5257 with one instruction. */
5258 if (current_function_pops_args
5259 && current_function_args_size
>= 32768)
5262 ix86_compute_frame_layout (&frame
);
5263 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5266 /* Value should be nonzero if functions must have frame pointers.
5267 Zero means the frame pointer need not be set up (and parms may
5268 be accessed via the stack pointer) in functions that seem suitable. */
5271 ix86_frame_pointer_required (void)
5273 /* If we accessed previous frames, then the generated code expects
5274 to be able to access the saved ebp value in our frame. */
5275 if (cfun
->machine
->accesses_prev_frame
)
5278 /* Several x86 os'es need a frame pointer for other reasons,
5279 usually pertaining to setjmp. */
5280 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5283 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5284 the frame pointer by default. Turn it back on now if we've not
5285 got a leaf function. */
5286 if (TARGET_OMIT_LEAF_FRAME_POINTER
5287 && (!current_function_is_leaf
5288 || ix86_current_function_calls_tls_descriptor
))
5291 if (current_function_profile
)
5297 /* Record that the current function accesses previous call frames. */
5300 ix86_setup_frame_addresses (void)
5302 cfun
->machine
->accesses_prev_frame
= 1;
5305 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5306 # define USE_HIDDEN_LINKONCE 1
5308 # define USE_HIDDEN_LINKONCE 0
5311 static int pic_labels_used
;
5313 /* Fills in the label name that should be used for a pc thunk for
5314 the given register. */
5317 get_pc_thunk_name (char name
[32], unsigned int regno
)
5319 gcc_assert (!TARGET_64BIT
);
5321 if (USE_HIDDEN_LINKONCE
)
5322 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5324 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5328 /* This function generates code for -fpic that loads %ebx with
5329 the return address of the caller and then returns. */
5332 ix86_file_end (void)
5337 for (regno
= 0; regno
< 8; ++regno
)
5341 if (! ((pic_labels_used
>> regno
) & 1))
5344 get_pc_thunk_name (name
, regno
);
5349 switch_to_section (darwin_sections
[text_coal_section
]);
5350 fputs ("\t.weak_definition\t", asm_out_file
);
5351 assemble_name (asm_out_file
, name
);
5352 fputs ("\n\t.private_extern\t", asm_out_file
);
5353 assemble_name (asm_out_file
, name
);
5354 fputs ("\n", asm_out_file
);
5355 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5359 if (USE_HIDDEN_LINKONCE
)
5363 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5365 TREE_PUBLIC (decl
) = 1;
5366 TREE_STATIC (decl
) = 1;
5367 DECL_ONE_ONLY (decl
) = 1;
5369 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5370 switch_to_section (get_named_section (decl
, NULL
, 0));
5372 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5373 fputs ("\t.hidden\t", asm_out_file
);
5374 assemble_name (asm_out_file
, name
);
5375 fputc ('\n', asm_out_file
);
5376 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5380 switch_to_section (text_section
);
5381 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5384 xops
[0] = gen_rtx_REG (SImode
, regno
);
5385 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5386 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5387 output_asm_insn ("ret", xops
);
5390 if (NEED_INDICATE_EXEC_STACK
)
5391 file_end_indicate_exec_stack ();
5394 /* Emit code for the SET_GOT patterns. */
5397 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5403 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5405 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5406 xops
[2] = gen_rtx_MEM (Pmode
,
5407 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5408 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5410 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5411 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5412 an unadorned address. */
5413 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5414 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5415 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5419 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5421 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5423 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5426 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5428 output_asm_insn ("call\t%a2", xops
);
5431 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5432 is what will be referenced by the Mach-O PIC subsystem. */
5434 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5437 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5438 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5441 output_asm_insn ("pop{l}\t%0", xops
);
5446 get_pc_thunk_name (name
, REGNO (dest
));
5447 pic_labels_used
|= 1 << REGNO (dest
);
5449 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5450 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5451 output_asm_insn ("call\t%X2", xops
);
5452 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5453 is what will be referenced by the Mach-O PIC subsystem. */
5456 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5458 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5459 CODE_LABEL_NUMBER (label
));
5466 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5467 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5469 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5474 /* Generate an "push" pattern for input ARG. */
5479 return gen_rtx_SET (VOIDmode
,
5481 gen_rtx_PRE_DEC (Pmode
,
5482 stack_pointer_rtx
)),
5486 /* Return >= 0 if there is an unused call-clobbered register available
5487 for the entire function. */
5490 ix86_select_alt_pic_regnum (void)
5492 if (current_function_is_leaf
&& !current_function_profile
5493 && !ix86_current_function_calls_tls_descriptor
)
5496 for (i
= 2; i
>= 0; --i
)
5497 if (!regs_ever_live
[i
])
5501 return INVALID_REGNUM
;
5504 /* Return 1 if we need to save REGNO. */
5506 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5508 if (pic_offset_table_rtx
5509 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5510 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5511 || current_function_profile
5512 || current_function_calls_eh_return
5513 || current_function_uses_const_pool
))
5515 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5520 if (current_function_calls_eh_return
&& maybe_eh_return
)
5525 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5526 if (test
== INVALID_REGNUM
)
5533 if (cfun
->machine
->force_align_arg_pointer
5534 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5537 return (regs_ever_live
[regno
]
5538 && !call_used_regs
[regno
]
5539 && !fixed_regs
[regno
]
5540 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5543 /* Return number of registers to be saved on the stack. */
5546 ix86_nsaved_regs (void)
5551 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5552 if (ix86_save_reg (regno
, true))
5557 /* Return the offset between two registers, one to be eliminated, and the other
5558 its replacement, at the start of a routine. */
5561 ix86_initial_elimination_offset (int from
, int to
)
5563 struct ix86_frame frame
;
5564 ix86_compute_frame_layout (&frame
);
5566 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5567 return frame
.hard_frame_pointer_offset
;
5568 else if (from
== FRAME_POINTER_REGNUM
5569 && to
== HARD_FRAME_POINTER_REGNUM
)
5570 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5573 gcc_assert (to
== STACK_POINTER_REGNUM
);
5575 if (from
== ARG_POINTER_REGNUM
)
5576 return frame
.stack_pointer_offset
;
5578 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5579 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5583 /* Fill structure ix86_frame about frame of currently computed function. */
5586 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5588 HOST_WIDE_INT total_size
;
5589 unsigned int stack_alignment_needed
;
5590 HOST_WIDE_INT offset
;
5591 unsigned int preferred_alignment
;
5592 HOST_WIDE_INT size
= get_frame_size ();
5594 frame
->nregs
= ix86_nsaved_regs ();
5597 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5598 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5600 /* During reload iteration the amount of registers saved can change.
5601 Recompute the value as needed. Do not recompute when amount of registers
5602 didn't change as reload does multiple calls to the function and does not
5603 expect the decision to change within single iteration. */
5605 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5607 int count
= frame
->nregs
;
5609 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5610 /* The fast prologue uses move instead of push to save registers. This
5611 is significantly longer, but also executes faster as modern hardware
5612 can execute the moves in parallel, but can't do that for push/pop.
5614 Be careful about choosing what prologue to emit: When function takes
5615 many instructions to execute we may use slow version as well as in
5616 case function is known to be outside hot spot (this is known with
5617 feedback only). Weight the size of function by number of registers
5618 to save as it is cheap to use one or two push instructions but very
5619 slow to use many of them. */
5621 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5622 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5623 || (flag_branch_probabilities
5624 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5625 cfun
->machine
->use_fast_prologue_epilogue
= false;
5627 cfun
->machine
->use_fast_prologue_epilogue
5628 = !expensive_function_p (count
);
5630 if (TARGET_PROLOGUE_USING_MOVE
5631 && cfun
->machine
->use_fast_prologue_epilogue
)
5632 frame
->save_regs_using_mov
= true;
5634 frame
->save_regs_using_mov
= false;
5637 /* Skip return address and saved base pointer. */
5638 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5640 frame
->hard_frame_pointer_offset
= offset
;
5642 /* Do some sanity checking of stack_alignment_needed and
5643 preferred_alignment, since i386 port is the only using those features
5644 that may break easily. */
5646 gcc_assert (!size
|| stack_alignment_needed
);
5647 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5648 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5649 gcc_assert (stack_alignment_needed
5650 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5652 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5653 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5655 /* Register save area */
5656 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5659 if (ix86_save_varrargs_registers
)
5661 offset
+= X86_64_VARARGS_SIZE
;
5662 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5665 frame
->va_arg_size
= 0;
5667 /* Align start of frame for local function. */
5668 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5669 & -stack_alignment_needed
) - offset
;
5671 offset
+= frame
->padding1
;
5673 /* Frame pointer points here. */
5674 frame
->frame_pointer_offset
= offset
;
5678 /* Add outgoing arguments area. Can be skipped if we eliminated
5679 all the function calls as dead code.
5680 Skipping is however impossible when function calls alloca. Alloca
5681 expander assumes that last current_function_outgoing_args_size
5682 of stack frame are unused. */
5683 if (ACCUMULATE_OUTGOING_ARGS
5684 && (!current_function_is_leaf
|| current_function_calls_alloca
5685 || ix86_current_function_calls_tls_descriptor
))
5687 offset
+= current_function_outgoing_args_size
;
5688 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5691 frame
->outgoing_arguments_size
= 0;
5693 /* Align stack boundary. Only needed if we're calling another function
5695 if (!current_function_is_leaf
|| current_function_calls_alloca
5696 || ix86_current_function_calls_tls_descriptor
)
5697 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5698 & -preferred_alignment
) - offset
;
5700 frame
->padding2
= 0;
5702 offset
+= frame
->padding2
;
5704 /* We've reached end of stack frame. */
5705 frame
->stack_pointer_offset
= offset
;
5707 /* Size prologue needs to allocate. */
5708 frame
->to_allocate
=
5709 (size
+ frame
->padding1
+ frame
->padding2
5710 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5712 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5713 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5714 frame
->save_regs_using_mov
= false;
5716 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5717 && current_function_is_leaf
5718 && !ix86_current_function_calls_tls_descriptor
)
5720 frame
->red_zone_size
= frame
->to_allocate
;
5721 if (frame
->save_regs_using_mov
)
5722 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5723 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5724 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5727 frame
->red_zone_size
= 0;
5728 frame
->to_allocate
-= frame
->red_zone_size
;
5729 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5731 fprintf (stderr
, "\n");
5732 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5733 fprintf (stderr
, "size: %ld\n", (long)size
);
5734 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5735 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5736 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5737 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5738 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5739 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5740 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5741 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5742 (long)frame
->hard_frame_pointer_offset
);
5743 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5744 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5745 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5746 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5750 /* Emit code to save registers in the prologue. */
5753 ix86_emit_save_regs (void)
5758 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5759 if (ix86_save_reg (regno
, true))
5761 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5762 RTX_FRAME_RELATED_P (insn
) = 1;
5766 /* Emit code to save registers using MOV insns. First register
5767 is restored from POINTER + OFFSET. */
5769 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5774 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5775 if (ix86_save_reg (regno
, true))
5777 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5779 gen_rtx_REG (Pmode
, regno
));
5780 RTX_FRAME_RELATED_P (insn
) = 1;
5781 offset
+= UNITS_PER_WORD
;
5785 /* Expand prologue or epilogue stack adjustment.
5786 The pattern exist to put a dependency on all ebp-based memory accesses.
5787 STYLE should be negative if instructions should be marked as frame related,
5788 zero if %r11 register is live and cannot be freely used and positive
5792 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5797 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5798 else if (x86_64_immediate_operand (offset
, DImode
))
5799 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5803 /* r11 is used by indirect sibcall return as well, set before the
5804 epilogue and used after the epilogue. ATM indirect sibcall
5805 shouldn't be used together with huge frame sizes in one
5806 function because of the frame_size check in sibcall.c. */
5808 r11
= gen_rtx_REG (DImode
, R11_REG
);
5809 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5811 RTX_FRAME_RELATED_P (insn
) = 1;
5812 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5816 RTX_FRAME_RELATED_P (insn
) = 1;
5819 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5822 ix86_internal_arg_pointer (void)
5824 bool has_force_align_arg_pointer
=
5825 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5826 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5827 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5828 && DECL_NAME (current_function_decl
)
5829 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5830 && DECL_FILE_SCOPE_P (current_function_decl
))
5831 || ix86_force_align_arg_pointer
5832 || has_force_align_arg_pointer
)
5834 /* Nested functions can't realign the stack due to a register
5836 if (DECL_CONTEXT (current_function_decl
)
5837 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5839 if (ix86_force_align_arg_pointer
)
5840 warning (0, "-mstackrealign ignored for nested functions");
5841 if (has_force_align_arg_pointer
)
5842 error ("%s not supported for nested functions",
5843 ix86_force_align_arg_pointer_string
);
5844 return virtual_incoming_args_rtx
;
5846 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5847 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5850 return virtual_incoming_args_rtx
;
5853 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5854 This is called from dwarf2out.c to emit call frame instructions
5855 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5857 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5859 rtx unspec
= SET_SRC (pattern
);
5860 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5864 case UNSPEC_REG_SAVE
:
5865 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5866 SET_DEST (pattern
));
5868 case UNSPEC_DEF_CFA
:
5869 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5870 INTVAL (XVECEXP (unspec
, 0, 0)));
5877 /* Expand the prologue into a bunch of separate insns. */
5880 ix86_expand_prologue (void)
5884 struct ix86_frame frame
;
5885 HOST_WIDE_INT allocate
;
5887 ix86_compute_frame_layout (&frame
);
5889 if (cfun
->machine
->force_align_arg_pointer
)
5893 /* Grab the argument pointer. */
5894 x
= plus_constant (stack_pointer_rtx
, 4);
5895 y
= cfun
->machine
->force_align_arg_pointer
;
5896 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5897 RTX_FRAME_RELATED_P (insn
) = 1;
5899 /* The unwind info consists of two parts: install the fafp as the cfa,
5900 and record the fafp as the "save register" of the stack pointer.
5901 The later is there in order that the unwinder can see where it
5902 should restore the stack pointer across the and insn. */
5903 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5904 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5905 RTX_FRAME_RELATED_P (x
) = 1;
5906 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5908 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5909 RTX_FRAME_RELATED_P (y
) = 1;
5910 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5911 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5912 REG_NOTES (insn
) = x
;
5914 /* Align the stack. */
5915 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5918 /* And here we cheat like madmen with the unwind info. We force the
5919 cfa register back to sp+4, which is exactly what it was at the
5920 start of the function. Re-pushing the return address results in
5921 the return at the same spot relative to the cfa, and thus is
5922 correct wrt the unwind info. */
5923 x
= cfun
->machine
->force_align_arg_pointer
;
5924 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5925 insn
= emit_insn (gen_push (x
));
5926 RTX_FRAME_RELATED_P (insn
) = 1;
5929 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5930 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5931 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5932 REG_NOTES (insn
) = x
;
5935 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5936 slower on all targets. Also sdb doesn't like it. */
5938 if (frame_pointer_needed
)
5940 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5941 RTX_FRAME_RELATED_P (insn
) = 1;
5943 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5944 RTX_FRAME_RELATED_P (insn
) = 1;
5947 allocate
= frame
.to_allocate
;
5949 if (!frame
.save_regs_using_mov
)
5950 ix86_emit_save_regs ();
5952 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5954 /* When using red zone we may start register saving before allocating
5955 the stack frame saving one cycle of the prologue. */
5956 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5957 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5958 : stack_pointer_rtx
,
5959 -frame
.nregs
* UNITS_PER_WORD
);
5963 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5964 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5965 GEN_INT (-allocate
), -1);
5968 /* Only valid for Win32. */
5969 rtx eax
= gen_rtx_REG (SImode
, 0);
5970 bool eax_live
= ix86_eax_live_at_start_p ();
5973 gcc_assert (!TARGET_64BIT
);
5977 emit_insn (gen_push (eax
));
5981 emit_move_insn (eax
, GEN_INT (allocate
));
5983 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5984 RTX_FRAME_RELATED_P (insn
) = 1;
5985 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5986 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5987 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5988 t
, REG_NOTES (insn
));
5992 if (frame_pointer_needed
)
5993 t
= plus_constant (hard_frame_pointer_rtx
,
5996 - frame
.nregs
* UNITS_PER_WORD
);
5998 t
= plus_constant (stack_pointer_rtx
, allocate
);
5999 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
6003 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6005 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6006 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6008 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6009 -frame
.nregs
* UNITS_PER_WORD
);
6012 pic_reg_used
= false;
6013 if (pic_offset_table_rtx
6014 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
6015 || current_function_profile
))
6017 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6019 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6020 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
6022 pic_reg_used
= true;
6029 if (ix86_cmodel
== CM_LARGE_PIC
)
6031 rtx tmp_reg
= gen_rtx_REG (DImode
,
6032 FIRST_REX_INT_REG
+ 3 /* R11 */);
6033 rtx label
= gen_label_rtx ();
6035 LABEL_PRESERVE_P (label
) = 1;
6036 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6037 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6038 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6039 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6040 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6041 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6042 pic_offset_table_rtx
, tmp_reg
));
6045 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6048 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6050 /* Even with accurate pre-reload life analysis, we can wind up
6051 deleting all references to the pic register after reload.
6052 Consider if cross-jumping unifies two sides of a branch
6053 controlled by a comparison vs the only read from a global.
6054 In which case, allow the set_got to be deleted, though we're
6055 too late to do anything about the ebx save in the prologue. */
6056 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6059 /* Prevent function calls from be scheduled before the call to mcount.
6060 In the pic_reg_used case, make sure that the got load isn't deleted. */
6061 if (current_function_profile
)
6062 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6065 /* Emit code to restore saved registers using MOV insns. First register
6066 is restored from POINTER + OFFSET. */
6068 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6069 int maybe_eh_return
)
6072 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6074 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6075 if (ix86_save_reg (regno
, maybe_eh_return
))
6077 /* Ensure that adjust_address won't be forced to produce pointer
6078 out of range allowed by x86-64 instruction set. */
6079 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6083 r11
= gen_rtx_REG (DImode
, R11_REG
);
6084 emit_move_insn (r11
, GEN_INT (offset
));
6085 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6086 base_address
= gen_rtx_MEM (Pmode
, r11
);
6089 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6090 adjust_address (base_address
, Pmode
, offset
));
6091 offset
+= UNITS_PER_WORD
;
6095 /* Restore function stack, frame, and registers. */
6098 ix86_expand_epilogue (int style
)
6101 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6102 struct ix86_frame frame
;
6103 HOST_WIDE_INT offset
;
6105 ix86_compute_frame_layout (&frame
);
6107 /* Calculate start of saved registers relative to ebp. Special care
6108 must be taken for the normal return case of a function using
6109 eh_return: the eax and edx registers are marked as saved, but not
6110 restored along this path. */
6111 offset
= frame
.nregs
;
6112 if (current_function_calls_eh_return
&& style
!= 2)
6114 offset
*= -UNITS_PER_WORD
;
6116 /* If we're only restoring one register and sp is not valid then
6117 using a move instruction to restore the register since it's
6118 less work than reloading sp and popping the register.
6120 The default code result in stack adjustment using add/lea instruction,
6121 while this code results in LEAVE instruction (or discrete equivalent),
6122 so it is profitable in some other cases as well. Especially when there
6123 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6124 and there is exactly one register to pop. This heuristic may need some
6125 tuning in future. */
6126 if ((!sp_valid
&& frame
.nregs
<= 1)
6127 || (TARGET_EPILOGUE_USING_MOVE
6128 && cfun
->machine
->use_fast_prologue_epilogue
6129 && (frame
.nregs
> 1 || frame
.to_allocate
))
6130 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6131 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6132 && cfun
->machine
->use_fast_prologue_epilogue
6133 && frame
.nregs
== 1)
6134 || current_function_calls_eh_return
)
6136 /* Restore registers. We can use ebp or esp to address the memory
6137 locations. If both are available, default to ebp, since offsets
6138 are known to be small. Only exception is esp pointing directly to the
6139 end of block of saved registers, where we may simplify addressing
6142 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6143 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6144 frame
.to_allocate
, style
== 2);
6146 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6147 offset
, style
== 2);
6149 /* eh_return epilogues need %ecx added to the stack pointer. */
6152 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6154 if (frame_pointer_needed
)
6156 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6157 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6158 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6160 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6161 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6163 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6168 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6169 tmp
= plus_constant (tmp
, (frame
.to_allocate
6170 + frame
.nregs
* UNITS_PER_WORD
));
6171 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6174 else if (!frame_pointer_needed
)
6175 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6176 GEN_INT (frame
.to_allocate
6177 + frame
.nregs
* UNITS_PER_WORD
),
6179 /* If not an i386, mov & pop is faster than "leave". */
6180 else if (TARGET_USE_LEAVE
|| optimize_size
6181 || !cfun
->machine
->use_fast_prologue_epilogue
)
6182 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6185 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6186 hard_frame_pointer_rtx
,
6189 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6191 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6196 /* First step is to deallocate the stack frame so that we can
6197 pop the registers. */
6200 gcc_assert (frame_pointer_needed
);
6201 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6202 hard_frame_pointer_rtx
,
6203 GEN_INT (offset
), style
);
6205 else if (frame
.to_allocate
)
6206 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6207 GEN_INT (frame
.to_allocate
), style
);
6209 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6210 if (ix86_save_reg (regno
, false))
6213 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6215 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6217 if (frame_pointer_needed
)
6219 /* Leave results in shorter dependency chains on CPUs that are
6220 able to grok it fast. */
6221 if (TARGET_USE_LEAVE
)
6222 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6223 else if (TARGET_64BIT
)
6224 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6226 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6230 if (cfun
->machine
->force_align_arg_pointer
)
6232 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6233 cfun
->machine
->force_align_arg_pointer
,
6237 /* Sibcall epilogues don't want a return instruction. */
6241 if (current_function_pops_args
&& current_function_args_size
)
6243 rtx popc
= GEN_INT (current_function_pops_args
);
6245 /* i386 can only pop 64K bytes. If asked to pop more, pop
6246 return address, do explicit add, and jump indirectly to the
6249 if (current_function_pops_args
>= 65536)
6251 rtx ecx
= gen_rtx_REG (SImode
, 2);
6253 /* There is no "pascal" calling convention in 64bit ABI. */
6254 gcc_assert (!TARGET_64BIT
);
6256 emit_insn (gen_popsi1 (ecx
));
6257 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6258 emit_jump_insn (gen_return_indirect_internal (ecx
));
6261 emit_jump_insn (gen_return_pop_internal (popc
));
6264 emit_jump_insn (gen_return_internal ());
6267 /* Reset from the function's potential modifications. */
6270 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6271 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6273 if (pic_offset_table_rtx
)
6274 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6276 /* Mach-O doesn't support labels at the end of objects, so if
6277 it looks like we might want one, insert a NOP. */
6279 rtx insn
= get_last_insn ();
6282 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6283 insn
= PREV_INSN (insn
);
6287 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6288 fputs ("\tnop\n", file
);
6294 /* Extract the parts of an RTL expression that is a valid memory address
6295 for an instruction. Return 0 if the structure of the address is
6296 grossly off. Return -1 if the address contains ASHIFT, so it is not
6297 strictly valid, but still used for computing length of lea instruction. */
6300 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6302 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6303 rtx base_reg
, index_reg
;
6304 HOST_WIDE_INT scale
= 1;
6305 rtx scale_rtx
= NULL_RTX
;
6307 enum ix86_address_seg seg
= SEG_DEFAULT
;
6309 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6311 else if (GET_CODE (addr
) == PLUS
)
6321 addends
[n
++] = XEXP (op
, 1);
6324 while (GET_CODE (op
) == PLUS
);
6329 for (i
= n
; i
>= 0; --i
)
6332 switch (GET_CODE (op
))
6337 index
= XEXP (op
, 0);
6338 scale_rtx
= XEXP (op
, 1);
6342 if (XINT (op
, 1) == UNSPEC_TP
6343 && TARGET_TLS_DIRECT_SEG_REFS
6344 && seg
== SEG_DEFAULT
)
6345 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6374 else if (GET_CODE (addr
) == MULT
)
6376 index
= XEXP (addr
, 0); /* index*scale */
6377 scale_rtx
= XEXP (addr
, 1);
6379 else if (GET_CODE (addr
) == ASHIFT
)
6383 /* We're called for lea too, which implements ashift on occasion. */
6384 index
= XEXP (addr
, 0);
6385 tmp
= XEXP (addr
, 1);
6386 if (!CONST_INT_P (tmp
))
6388 scale
= INTVAL (tmp
);
6389 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6395 disp
= addr
; /* displacement */
6397 /* Extract the integral value of scale. */
6400 if (!CONST_INT_P (scale_rtx
))
6402 scale
= INTVAL (scale_rtx
);
6405 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6406 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6408 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6409 if (base_reg
&& index_reg
&& scale
== 1
6410 && (index_reg
== arg_pointer_rtx
6411 || index_reg
== frame_pointer_rtx
6412 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6415 tmp
= base
, base
= index
, index
= tmp
;
6416 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6419 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6420 if ((base_reg
== hard_frame_pointer_rtx
6421 || base_reg
== frame_pointer_rtx
6422 || base_reg
== arg_pointer_rtx
) && !disp
)
6425 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6426 Avoid this by transforming to [%esi+0]. */
6427 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6428 && base_reg
&& !index_reg
&& !disp
6430 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6433 /* Special case: encode reg+reg instead of reg*2. */
6434 if (!base
&& index
&& scale
&& scale
== 2)
6435 base
= index
, base_reg
= index_reg
, scale
= 1;
6437 /* Special case: scaling cannot be encoded without base or displacement. */
6438 if (!base
&& !disp
&& index
&& scale
!= 1)
6450 /* Return cost of the memory address x.
6451 For i386, it is better to use a complex address than let gcc copy
6452 the address into a reg and make a new pseudo. But not if the address
6453 requires to two regs - that would mean more pseudos with longer
6456 ix86_address_cost (rtx x
)
6458 struct ix86_address parts
;
6460 int ok
= ix86_decompose_address (x
, &parts
);
6464 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6465 parts
.base
= SUBREG_REG (parts
.base
);
6466 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6467 parts
.index
= SUBREG_REG (parts
.index
);
6469 /* More complex memory references are better. */
6470 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6472 if (parts
.seg
!= SEG_DEFAULT
)
6475 /* Attempt to minimize number of registers in the address. */
6477 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6479 && (!REG_P (parts
.index
)
6480 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6484 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6486 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6487 && parts
.base
!= parts
.index
)
6490 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6491 since it's predecode logic can't detect the length of instructions
6492 and it degenerates to vector decoded. Increase cost of such
6493 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6494 to split such addresses or even refuse such addresses at all.
6496 Following addressing modes are affected:
6501 The first and last case may be avoidable by explicitly coding the zero in
6502 memory address, but I don't have AMD-K6 machine handy to check this
6506 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6507 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6508 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6514 /* If X is a machine specific address (i.e. a symbol or label being
6515 referenced as a displacement from the GOT implemented using an
6516 UNSPEC), then return the base term. Otherwise return X. */
6519 ix86_find_base_term (rtx x
)
6525 if (GET_CODE (x
) != CONST
)
6528 if (GET_CODE (term
) == PLUS
6529 && (CONST_INT_P (XEXP (term
, 1))
6530 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6531 term
= XEXP (term
, 0);
6532 if (GET_CODE (term
) != UNSPEC
6533 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6536 term
= XVECEXP (term
, 0, 0);
6538 if (GET_CODE (term
) != SYMBOL_REF
6539 && GET_CODE (term
) != LABEL_REF
)
6545 term
= ix86_delegitimize_address (x
);
6547 if (GET_CODE (term
) != SYMBOL_REF
6548 && GET_CODE (term
) != LABEL_REF
)
6554 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6555 this is used for to form addresses to local data when -fPIC is in
6559 darwin_local_data_pic (rtx disp
)
6561 if (GET_CODE (disp
) == MINUS
)
6563 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6564 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6565 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6567 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6568 if (! strcmp (sym_name
, "<pic base>"))
6576 /* Determine if a given RTX is a valid constant. We already know this
6577 satisfies CONSTANT_P. */
6580 legitimate_constant_p (rtx x
)
6582 switch (GET_CODE (x
))
6587 if (GET_CODE (x
) == PLUS
)
6589 if (!CONST_INT_P (XEXP (x
, 1)))
6594 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6597 /* Only some unspecs are valid as "constants". */
6598 if (GET_CODE (x
) == UNSPEC
)
6599 switch (XINT (x
, 1))
6604 return TARGET_64BIT
;
6607 x
= XVECEXP (x
, 0, 0);
6608 return (GET_CODE (x
) == SYMBOL_REF
6609 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6611 x
= XVECEXP (x
, 0, 0);
6612 return (GET_CODE (x
) == SYMBOL_REF
6613 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6618 /* We must have drilled down to a symbol. */
6619 if (GET_CODE (x
) == LABEL_REF
)
6621 if (GET_CODE (x
) != SYMBOL_REF
)
6626 /* TLS symbols are never valid. */
6627 if (SYMBOL_REF_TLS_MODEL (x
))
6632 if (GET_MODE (x
) == TImode
6633 && x
!= CONST0_RTX (TImode
)
6639 if (x
== CONST0_RTX (GET_MODE (x
)))
6647 /* Otherwise we handle everything else in the move patterns. */
6651 /* Determine if it's legal to put X into the constant pool. This
6652 is not possible for the address of thread-local symbols, which
6653 is checked above. */
6656 ix86_cannot_force_const_mem (rtx x
)
6658 /* We can always put integral constants and vectors in memory. */
6659 switch (GET_CODE (x
))
6669 return !legitimate_constant_p (x
);
6672 /* Determine if a given RTX is a valid constant address. */
6675 constant_address_p (rtx x
)
6677 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6680 /* Nonzero if the constant value X is a legitimate general operand
6681 when generating PIC code. It is given that flag_pic is on and
6682 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6685 legitimate_pic_operand_p (rtx x
)
6689 switch (GET_CODE (x
))
6692 inner
= XEXP (x
, 0);
6693 if (GET_CODE (inner
) == PLUS
6694 && CONST_INT_P (XEXP (inner
, 1)))
6695 inner
= XEXP (inner
, 0);
6697 /* Only some unspecs are valid as "constants". */
6698 if (GET_CODE (inner
) == UNSPEC
)
6699 switch (XINT (inner
, 1))
6704 return TARGET_64BIT
;
6706 x
= XVECEXP (inner
, 0, 0);
6707 return (GET_CODE (x
) == SYMBOL_REF
6708 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6716 return legitimate_pic_address_disp_p (x
);
6723 /* Determine if a given CONST RTX is a valid memory displacement
6727 legitimate_pic_address_disp_p (rtx disp
)
6731 /* In 64bit mode we can allow direct addresses of symbols and labels
6732 when they are not dynamic symbols. */
6735 rtx op0
= disp
, op1
;
6737 switch (GET_CODE (disp
))
6743 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6745 op0
= XEXP (XEXP (disp
, 0), 0);
6746 op1
= XEXP (XEXP (disp
, 0), 1);
6747 if (!CONST_INT_P (op1
)
6748 || INTVAL (op1
) >= 16*1024*1024
6749 || INTVAL (op1
) < -16*1024*1024)
6751 if (GET_CODE (op0
) == LABEL_REF
)
6753 if (GET_CODE (op0
) != SYMBOL_REF
)
6758 /* TLS references should always be enclosed in UNSPEC. */
6759 if (SYMBOL_REF_TLS_MODEL (op0
))
6761 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6762 && ix86_cmodel
!= CM_LARGE_PIC
)
6770 if (GET_CODE (disp
) != CONST
)
6772 disp
= XEXP (disp
, 0);
6776 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6777 of GOT tables. We should not need these anyway. */
6778 if (GET_CODE (disp
) != UNSPEC
6779 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6780 && XINT (disp
, 1) != UNSPEC_GOTOFF
6781 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6784 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6785 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6791 if (GET_CODE (disp
) == PLUS
)
6793 if (!CONST_INT_P (XEXP (disp
, 1)))
6795 disp
= XEXP (disp
, 0);
6799 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6802 if (GET_CODE (disp
) != UNSPEC
)
6805 switch (XINT (disp
, 1))
6810 /* We need to check for both symbols and labels because VxWorks loads
6811 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6813 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6814 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6816 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6817 While ABI specify also 32bit relocation but we don't produce it in
6818 small PIC model at all. */
6819 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6820 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6822 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6824 case UNSPEC_GOTTPOFF
:
6825 case UNSPEC_GOTNTPOFF
:
6826 case UNSPEC_INDNTPOFF
:
6829 disp
= XVECEXP (disp
, 0, 0);
6830 return (GET_CODE (disp
) == SYMBOL_REF
6831 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6833 disp
= XVECEXP (disp
, 0, 0);
6834 return (GET_CODE (disp
) == SYMBOL_REF
6835 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6837 disp
= XVECEXP (disp
, 0, 0);
6838 return (GET_CODE (disp
) == SYMBOL_REF
6839 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6845 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6846 memory address for an instruction. The MODE argument is the machine mode
6847 for the MEM expression that wants to use this address.
6849 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6850 convert common non-canonical forms to canonical form so that they will
6854 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6856 struct ix86_address parts
;
6857 rtx base
, index
, disp
;
6858 HOST_WIDE_INT scale
;
6859 const char *reason
= NULL
;
6860 rtx reason_rtx
= NULL_RTX
;
6862 if (TARGET_DEBUG_ADDR
)
6865 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6866 GET_MODE_NAME (mode
), strict
);
6870 if (ix86_decompose_address (addr
, &parts
) <= 0)
6872 reason
= "decomposition failed";
6877 index
= parts
.index
;
6879 scale
= parts
.scale
;
6881 /* Validate base register.
6883 Don't allow SUBREG's that span more than a word here. It can lead to spill
6884 failures when the base is one word out of a two word structure, which is
6885 represented internally as a DImode int. */
6894 else if (GET_CODE (base
) == SUBREG
6895 && REG_P (SUBREG_REG (base
))
6896 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6898 reg
= SUBREG_REG (base
);
6901 reason
= "base is not a register";
6905 if (GET_MODE (base
) != Pmode
)
6907 reason
= "base is not in Pmode";
6911 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6912 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6914 reason
= "base is not valid";
6919 /* Validate index register.
6921 Don't allow SUBREG's that span more than a word here -- same as above. */
6930 else if (GET_CODE (index
) == SUBREG
6931 && REG_P (SUBREG_REG (index
))
6932 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6934 reg
= SUBREG_REG (index
);
6937 reason
= "index is not a register";
6941 if (GET_MODE (index
) != Pmode
)
6943 reason
= "index is not in Pmode";
6947 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6948 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6950 reason
= "index is not valid";
6955 /* Validate scale factor. */
6958 reason_rtx
= GEN_INT (scale
);
6961 reason
= "scale without index";
6965 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6967 reason
= "scale is not a valid multiplier";
6972 /* Validate displacement. */
6977 if (GET_CODE (disp
) == CONST
6978 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6979 switch (XINT (XEXP (disp
, 0), 1))
6981 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6982 used. While ABI specify also 32bit relocations, we don't produce
6983 them at all and use IP relative instead. */
6986 gcc_assert (flag_pic
);
6988 goto is_legitimate_pic
;
6989 reason
= "64bit address unspec";
6992 case UNSPEC_GOTPCREL
:
6993 gcc_assert (flag_pic
);
6994 goto is_legitimate_pic
;
6996 case UNSPEC_GOTTPOFF
:
6997 case UNSPEC_GOTNTPOFF
:
6998 case UNSPEC_INDNTPOFF
:
7004 reason
= "invalid address unspec";
7008 else if (SYMBOLIC_CONST (disp
)
7012 && MACHOPIC_INDIRECT
7013 && !machopic_operand_p (disp
)
7019 if (TARGET_64BIT
&& (index
|| base
))
7021 /* foo@dtpoff(%rX) is ok. */
7022 if (GET_CODE (disp
) != CONST
7023 || GET_CODE (XEXP (disp
, 0)) != PLUS
7024 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7025 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7026 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7027 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7029 reason
= "non-constant pic memory reference";
7033 else if (! legitimate_pic_address_disp_p (disp
))
7035 reason
= "displacement is an invalid pic construct";
7039 /* This code used to verify that a symbolic pic displacement
7040 includes the pic_offset_table_rtx register.
7042 While this is good idea, unfortunately these constructs may
7043 be created by "adds using lea" optimization for incorrect
7052 This code is nonsensical, but results in addressing
7053 GOT table with pic_offset_table_rtx base. We can't
7054 just refuse it easily, since it gets matched by
7055 "addsi3" pattern, that later gets split to lea in the
7056 case output register differs from input. While this
7057 can be handled by separate addsi pattern for this case
7058 that never results in lea, this seems to be easier and
7059 correct fix for crash to disable this test. */
7061 else if (GET_CODE (disp
) != LABEL_REF
7062 && !CONST_INT_P (disp
)
7063 && (GET_CODE (disp
) != CONST
7064 || !legitimate_constant_p (disp
))
7065 && (GET_CODE (disp
) != SYMBOL_REF
7066 || !legitimate_constant_p (disp
)))
7068 reason
= "displacement is not constant";
7071 else if (TARGET_64BIT
7072 && !x86_64_immediate_operand (disp
, VOIDmode
))
7074 reason
= "displacement is out of range";
7079 /* Everything looks valid. */
7080 if (TARGET_DEBUG_ADDR
)
7081 fprintf (stderr
, "Success.\n");
7085 if (TARGET_DEBUG_ADDR
)
7087 fprintf (stderr
, "Error: %s\n", reason
);
7088 debug_rtx (reason_rtx
);
7093 /* Return a unique alias set for the GOT. */
7095 static HOST_WIDE_INT
7096 ix86_GOT_alias_set (void)
7098 static HOST_WIDE_INT set
= -1;
7100 set
= new_alias_set ();
7104 /* Return a legitimate reference for ORIG (an address) using the
7105 register REG. If REG is 0, a new pseudo is generated.
7107 There are two types of references that must be handled:
7109 1. Global data references must load the address from the GOT, via
7110 the PIC reg. An insn is emitted to do this load, and the reg is
7113 2. Static data references, constant pool addresses, and code labels
7114 compute the address as an offset from the GOT, whose base is in
7115 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7116 differentiate them from global data objects. The returned
7117 address is the PIC reg + an unspec constant.
7119 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7120 reg also appears in the address. */
7123 legitimize_pic_address (rtx orig
, rtx reg
)
7130 if (TARGET_MACHO
&& !TARGET_64BIT
)
7133 reg
= gen_reg_rtx (Pmode
);
7134 /* Use the generic Mach-O PIC machinery. */
7135 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7139 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7141 else if (TARGET_64BIT
7142 && ix86_cmodel
!= CM_SMALL_PIC
7143 && gotoff_operand (addr
, Pmode
))
7146 /* This symbol may be referenced via a displacement from the PIC
7147 base address (@GOTOFF). */
7149 if (reload_in_progress
)
7150 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7151 if (GET_CODE (addr
) == CONST
)
7152 addr
= XEXP (addr
, 0);
7153 if (GET_CODE (addr
) == PLUS
)
7155 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7156 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7159 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7160 new = gen_rtx_CONST (Pmode
, new);
7162 tmpreg
= gen_reg_rtx (Pmode
);
7165 emit_move_insn (tmpreg
, new);
7169 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7170 tmpreg
, 1, OPTAB_DIRECT
);
7173 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7175 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7177 /* This symbol may be referenced via a displacement from the PIC
7178 base address (@GOTOFF). */
7180 if (reload_in_progress
)
7181 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7182 if (GET_CODE (addr
) == CONST
)
7183 addr
= XEXP (addr
, 0);
7184 if (GET_CODE (addr
) == PLUS
)
7186 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7187 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7190 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7191 new = gen_rtx_CONST (Pmode
, new);
7192 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7196 emit_move_insn (reg
, new);
7200 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7201 /* We can't use @GOTOFF for text labels on VxWorks;
7202 see gotoff_operand. */
7203 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7205 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7207 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7208 new = gen_rtx_CONST (Pmode
, new);
7209 new = gen_const_mem (Pmode
, new);
7210 set_mem_alias_set (new, ix86_GOT_alias_set ());
7213 reg
= gen_reg_rtx (Pmode
);
7214 /* Use directly gen_movsi, otherwise the address is loaded
7215 into register for CSE. We don't want to CSE this addresses,
7216 instead we CSE addresses from the GOT table, so skip this. */
7217 emit_insn (gen_movsi (reg
, new));
7222 /* This symbol must be referenced via a load from the
7223 Global Offset Table (@GOT). */
7225 if (reload_in_progress
)
7226 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7227 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7228 new = gen_rtx_CONST (Pmode
, new);
7230 new = force_reg (Pmode
, new);
7231 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7232 new = gen_const_mem (Pmode
, new);
7233 set_mem_alias_set (new, ix86_GOT_alias_set ());
7236 reg
= gen_reg_rtx (Pmode
);
7237 emit_move_insn (reg
, new);
7243 if (CONST_INT_P (addr
)
7244 && !x86_64_immediate_operand (addr
, VOIDmode
))
7248 emit_move_insn (reg
, addr
);
7252 new = force_reg (Pmode
, addr
);
7254 else if (GET_CODE (addr
) == CONST
)
7256 addr
= XEXP (addr
, 0);
7258 /* We must match stuff we generate before. Assume the only
7259 unspecs that can get here are ours. Not that we could do
7260 anything with them anyway.... */
7261 if (GET_CODE (addr
) == UNSPEC
7262 || (GET_CODE (addr
) == PLUS
7263 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7265 gcc_assert (GET_CODE (addr
) == PLUS
);
7267 if (GET_CODE (addr
) == PLUS
)
7269 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7271 /* Check first to see if this is a constant offset from a @GOTOFF
7272 symbol reference. */
7273 if (gotoff_operand (op0
, Pmode
)
7274 && CONST_INT_P (op1
))
7278 if (reload_in_progress
)
7279 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7280 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7282 new = gen_rtx_PLUS (Pmode
, new, op1
);
7283 new = gen_rtx_CONST (Pmode
, new);
7284 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7288 emit_move_insn (reg
, new);
7294 if (INTVAL (op1
) < -16*1024*1024
7295 || INTVAL (op1
) >= 16*1024*1024)
7297 if (!x86_64_immediate_operand (op1
, Pmode
))
7298 op1
= force_reg (Pmode
, op1
);
7299 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7305 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7306 new = legitimize_pic_address (XEXP (addr
, 1),
7307 base
== reg
? NULL_RTX
: reg
);
7309 if (CONST_INT_P (new))
7310 new = plus_constant (base
, INTVAL (new));
7313 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7315 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7316 new = XEXP (new, 1);
7318 new = gen_rtx_PLUS (Pmode
, base
, new);
7326 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7329 get_thread_pointer (int to_reg
)
7333 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7337 reg
= gen_reg_rtx (Pmode
);
7338 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7339 insn
= emit_insn (insn
);
7344 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7345 false if we expect this to be used for a memory address and true if
7346 we expect to load the address into a register. */
7349 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7351 rtx dest
, base
, off
, pic
, tp
;
7356 case TLS_MODEL_GLOBAL_DYNAMIC
:
7357 dest
= gen_reg_rtx (Pmode
);
7358 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7360 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7362 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7365 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7366 insns
= get_insns ();
7369 emit_libcall_block (insns
, dest
, rax
, x
);
7371 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7372 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7374 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7376 if (TARGET_GNU2_TLS
)
7378 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7380 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7384 case TLS_MODEL_LOCAL_DYNAMIC
:
7385 base
= gen_reg_rtx (Pmode
);
7386 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7388 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7390 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7393 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7394 insns
= get_insns ();
7397 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7398 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7399 emit_libcall_block (insns
, base
, rax
, note
);
7401 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7402 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7404 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7406 if (TARGET_GNU2_TLS
)
7408 rtx x
= ix86_tls_module_base ();
7410 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7411 gen_rtx_MINUS (Pmode
, x
, tp
));
7414 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7415 off
= gen_rtx_CONST (Pmode
, off
);
7417 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7419 if (TARGET_GNU2_TLS
)
7421 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7423 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7428 case TLS_MODEL_INITIAL_EXEC
:
7432 type
= UNSPEC_GOTNTPOFF
;
7436 if (reload_in_progress
)
7437 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7438 pic
= pic_offset_table_rtx
;
7439 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7441 else if (!TARGET_ANY_GNU_TLS
)
7443 pic
= gen_reg_rtx (Pmode
);
7444 emit_insn (gen_set_got (pic
));
7445 type
= UNSPEC_GOTTPOFF
;
7450 type
= UNSPEC_INDNTPOFF
;
7453 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7454 off
= gen_rtx_CONST (Pmode
, off
);
7456 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7457 off
= gen_const_mem (Pmode
, off
);
7458 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7460 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7462 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7463 off
= force_reg (Pmode
, off
);
7464 return gen_rtx_PLUS (Pmode
, base
, off
);
7468 base
= get_thread_pointer (true);
7469 dest
= gen_reg_rtx (Pmode
);
7470 emit_insn (gen_subsi3 (dest
, base
, off
));
7474 case TLS_MODEL_LOCAL_EXEC
:
7475 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7476 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7477 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7478 off
= gen_rtx_CONST (Pmode
, off
);
7480 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7482 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7483 return gen_rtx_PLUS (Pmode
, base
, off
);
7487 base
= get_thread_pointer (true);
7488 dest
= gen_reg_rtx (Pmode
);
7489 emit_insn (gen_subsi3 (dest
, base
, off
));
7500 /* Try machine-dependent ways of modifying an illegitimate address
7501 to be legitimate. If we find one, return the new, valid address.
7502 This macro is used in only one place: `memory_address' in explow.c.
7504 OLDX is the address as it was before break_out_memory_refs was called.
7505 In some cases it is useful to look at this to decide what needs to be done.
7507 MODE and WIN are passed so that this macro can use
7508 GO_IF_LEGITIMATE_ADDRESS.
7510 It is always safe for this macro to do nothing. It exists to recognize
7511 opportunities to optimize the output.
7513 For the 80386, we handle X+REG by loading X into a register R and
7514 using R+REG. R will go in a general reg and indexing will be used.
7515 However, if REG is a broken-out memory address or multiplication,
7516 nothing needs to be done because REG can certainly go in a general reg.
7518 When -fpic is used, special handling is needed for symbolic references.
7519 See comments by legitimize_pic_address in i386.c for details. */
7522 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7527 if (TARGET_DEBUG_ADDR
)
7529 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7530 GET_MODE_NAME (mode
));
7534 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7536 return legitimize_tls_address (x
, log
, false);
7537 if (GET_CODE (x
) == CONST
7538 && GET_CODE (XEXP (x
, 0)) == PLUS
7539 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7540 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7542 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7543 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7546 if (flag_pic
&& SYMBOLIC_CONST (x
))
7547 return legitimize_pic_address (x
, 0);
7549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7550 if (GET_CODE (x
) == ASHIFT
7551 && CONST_INT_P (XEXP (x
, 1))
7552 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7555 log
= INTVAL (XEXP (x
, 1));
7556 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7557 GEN_INT (1 << log
));
7560 if (GET_CODE (x
) == PLUS
)
7562 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7564 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7565 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7566 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7569 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7570 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7571 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7572 GEN_INT (1 << log
));
7575 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7576 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7577 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7580 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7581 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7582 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7583 GEN_INT (1 << log
));
7586 /* Put multiply first if it isn't already. */
7587 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7589 rtx tmp
= XEXP (x
, 0);
7590 XEXP (x
, 0) = XEXP (x
, 1);
7595 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7596 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7597 created by virtual register instantiation, register elimination, and
7598 similar optimizations. */
7599 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7602 x
= gen_rtx_PLUS (Pmode
,
7603 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7604 XEXP (XEXP (x
, 1), 0)),
7605 XEXP (XEXP (x
, 1), 1));
7609 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7610 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7611 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7612 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7613 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7614 && CONSTANT_P (XEXP (x
, 1)))
7617 rtx other
= NULL_RTX
;
7619 if (CONST_INT_P (XEXP (x
, 1)))
7621 constant
= XEXP (x
, 1);
7622 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7624 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7626 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7627 other
= XEXP (x
, 1);
7635 x
= gen_rtx_PLUS (Pmode
,
7636 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7637 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7638 plus_constant (other
, INTVAL (constant
)));
7642 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7645 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7648 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7651 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7654 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7658 && REG_P (XEXP (x
, 1))
7659 && REG_P (XEXP (x
, 0)))
7662 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7665 x
= legitimize_pic_address (x
, 0);
7668 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7671 if (REG_P (XEXP (x
, 0)))
7673 rtx temp
= gen_reg_rtx (Pmode
);
7674 rtx val
= force_operand (XEXP (x
, 1), temp
);
7676 emit_move_insn (temp
, val
);
7682 else if (REG_P (XEXP (x
, 1)))
7684 rtx temp
= gen_reg_rtx (Pmode
);
7685 rtx val
= force_operand (XEXP (x
, 0), temp
);
7687 emit_move_insn (temp
, val
);
7697 /* Print an integer constant expression in assembler syntax. Addition
7698 and subtraction are the only arithmetic that may appear in these
7699 expressions. FILE is the stdio stream to write to, X is the rtx, and
7700 CODE is the operand print code from the output string. */
7703 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7707 switch (GET_CODE (x
))
7710 gcc_assert (flag_pic
);
7715 if (! TARGET_MACHO
|| TARGET_64BIT
)
7716 output_addr_const (file
, x
);
7719 const char *name
= XSTR (x
, 0);
7721 /* Mark the decl as referenced so that cgraph will output the function. */
7722 if (SYMBOL_REF_DECL (x
))
7723 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7726 if (MACHOPIC_INDIRECT
7727 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7728 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7730 assemble_name (file
, name
);
7732 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7733 fputs ("@PLT", file
);
7740 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7741 assemble_name (asm_out_file
, buf
);
7745 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7749 /* This used to output parentheses around the expression,
7750 but that does not work on the 386 (either ATT or BSD assembler). */
7751 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7755 if (GET_MODE (x
) == VOIDmode
)
7757 /* We can use %d if the number is <32 bits and positive. */
7758 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7759 fprintf (file
, "0x%lx%08lx",
7760 (unsigned long) CONST_DOUBLE_HIGH (x
),
7761 (unsigned long) CONST_DOUBLE_LOW (x
));
7763 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7766 /* We can't handle floating point constants;
7767 PRINT_OPERAND must handle them. */
7768 output_operand_lossage ("floating constant misused");
7772 /* Some assemblers need integer constants to appear first. */
7773 if (CONST_INT_P (XEXP (x
, 0)))
7775 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7777 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7781 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7782 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7784 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7790 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7791 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7793 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7795 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7799 gcc_assert (XVECLEN (x
, 0) == 1);
7800 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7801 switch (XINT (x
, 1))
7804 fputs ("@GOT", file
);
7807 fputs ("@GOTOFF", file
);
7810 fputs ("@PLTOFF", file
);
7812 case UNSPEC_GOTPCREL
:
7813 fputs ("@GOTPCREL(%rip)", file
);
7815 case UNSPEC_GOTTPOFF
:
7816 /* FIXME: This might be @TPOFF in Sun ld too. */
7817 fputs ("@GOTTPOFF", file
);
7820 fputs ("@TPOFF", file
);
7824 fputs ("@TPOFF", file
);
7826 fputs ("@NTPOFF", file
);
7829 fputs ("@DTPOFF", file
);
7831 case UNSPEC_GOTNTPOFF
:
7833 fputs ("@GOTTPOFF(%rip)", file
);
7835 fputs ("@GOTNTPOFF", file
);
7837 case UNSPEC_INDNTPOFF
:
7838 fputs ("@INDNTPOFF", file
);
7841 output_operand_lossage ("invalid UNSPEC as operand");
7847 output_operand_lossage ("invalid expression as operand");
7851 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7852 We need to emit DTP-relative relocations. */
7855 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7857 fputs (ASM_LONG
, file
);
7858 output_addr_const (file
, x
);
7859 fputs ("@DTPOFF", file
);
7865 fputs (", 0", file
);
7872 /* In the name of slightly smaller debug output, and to cater to
7873 general assembler lossage, recognize PIC+GOTOFF and turn it back
7874 into a direct symbol reference.
7876 On Darwin, this is necessary to avoid a crash, because Darwin
7877 has a different PIC label for each routine but the DWARF debugging
7878 information is not associated with any particular routine, so it's
7879 necessary to remove references to the PIC label from RTL stored by
7880 the DWARF output code. */
7883 ix86_delegitimize_address (rtx orig_x
)
7886 /* reg_addend is NULL or a multiple of some register. */
7887 rtx reg_addend
= NULL_RTX
;
7888 /* const_addend is NULL or a const_int. */
7889 rtx const_addend
= NULL_RTX
;
7890 /* This is the result, or NULL. */
7891 rtx result
= NULL_RTX
;
7898 if (GET_CODE (x
) != CONST
7899 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7900 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7903 return XVECEXP (XEXP (x
, 0), 0, 0);
7906 if (GET_CODE (x
) != PLUS
7907 || GET_CODE (XEXP (x
, 1)) != CONST
)
7910 if (REG_P (XEXP (x
, 0))
7911 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7912 /* %ebx + GOT/GOTOFF */
7914 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7916 /* %ebx + %reg * scale + GOT/GOTOFF */
7917 reg_addend
= XEXP (x
, 0);
7918 if (REG_P (XEXP (reg_addend
, 0))
7919 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7920 reg_addend
= XEXP (reg_addend
, 1);
7921 else if (REG_P (XEXP (reg_addend
, 1))
7922 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7923 reg_addend
= XEXP (reg_addend
, 0);
7926 if (!REG_P (reg_addend
)
7927 && GET_CODE (reg_addend
) != MULT
7928 && GET_CODE (reg_addend
) != ASHIFT
)
7934 x
= XEXP (XEXP (x
, 1), 0);
7935 if (GET_CODE (x
) == PLUS
7936 && CONST_INT_P (XEXP (x
, 1)))
7938 const_addend
= XEXP (x
, 1);
7942 if (GET_CODE (x
) == UNSPEC
7943 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7944 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7945 result
= XVECEXP (x
, 0, 0);
7947 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7949 result
= XEXP (x
, 0);
7955 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7957 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7962 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7967 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7969 enum rtx_code second_code
, bypass_code
;
7970 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7971 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7972 code
= ix86_fp_compare_code_to_integer (code
);
7976 code
= reverse_condition (code
);
7987 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7991 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7992 Those same assemblers have the same but opposite lossage on cmov. */
7993 gcc_assert (mode
== CCmode
);
7994 suffix
= fp
? "nbe" : "a";
8014 gcc_assert (mode
== CCmode
);
8036 gcc_assert (mode
== CCmode
);
8037 suffix
= fp
? "nb" : "ae";
8040 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8044 gcc_assert (mode
== CCmode
);
8048 suffix
= fp
? "u" : "p";
8051 suffix
= fp
? "nu" : "np";
8056 fputs (suffix
, file
);
8059 /* Print the name of register X to FILE based on its machine mode and number.
8060 If CODE is 'w', pretend the mode is HImode.
8061 If CODE is 'b', pretend the mode is QImode.
8062 If CODE is 'k', pretend the mode is SImode.
8063 If CODE is 'q', pretend the mode is DImode.
8064 If CODE is 'h', pretend the reg is the 'high' byte register.
8065 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8068 print_reg (rtx x
, int code
, FILE *file
)
8070 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8071 && REGNO (x
) != FRAME_POINTER_REGNUM
8072 && REGNO (x
) != FLAGS_REG
8073 && REGNO (x
) != FPSR_REG
8074 && REGNO (x
) != FPCR_REG
);
8076 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8079 if (code
== 'w' || MMX_REG_P (x
))
8081 else if (code
== 'b')
8083 else if (code
== 'k')
8085 else if (code
== 'q')
8087 else if (code
== 'y')
8089 else if (code
== 'h')
8092 code
= GET_MODE_SIZE (GET_MODE (x
));
8094 /* Irritatingly, AMD extended registers use different naming convention
8095 from the normal registers. */
8096 if (REX_INT_REG_P (x
))
8098 gcc_assert (TARGET_64BIT
);
8102 error ("extended registers have no high halves");
8105 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8108 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8111 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8114 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8117 error ("unsupported operand size for extended register");
8125 if (STACK_TOP_P (x
))
8127 fputs ("st(0)", file
);
8134 if (! ANY_FP_REG_P (x
))
8135 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8140 fputs (hi_reg_name
[REGNO (x
)], file
);
8143 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8145 fputs (qi_reg_name
[REGNO (x
)], file
);
8148 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8150 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8157 /* Locate some local-dynamic symbol still in use by this function
8158 so that we can print its name in some tls_local_dynamic_base
8162 get_some_local_dynamic_name (void)
8166 if (cfun
->machine
->some_ld_name
)
8167 return cfun
->machine
->some_ld_name
;
8169 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8171 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8172 return cfun
->machine
->some_ld_name
;
8178 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8182 if (GET_CODE (x
) == SYMBOL_REF
8183 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8185 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8193 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8194 C -- print opcode suffix for set/cmov insn.
8195 c -- like C, but print reversed condition
8196 F,f -- likewise, but for floating-point.
8197 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8199 R -- print the prefix for register names.
8200 z -- print the opcode suffix for the size of the current operand.
8201 * -- print a star (in certain assembler syntax)
8202 A -- print an absolute memory reference.
8203 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8204 s -- print a shift double count, followed by the assemblers argument
8206 b -- print the QImode name of the register for the indicated operand.
8207 %b0 would print %al if operands[0] is reg 0.
8208 w -- likewise, print the HImode name of the register.
8209 k -- likewise, print the SImode name of the register.
8210 q -- likewise, print the DImode name of the register.
8211 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8212 y -- print "st(0)" instead of "st" as a register.
8213 D -- print condition for SSE cmp instruction.
8214 P -- if PIC, print an @PLT suffix.
8215 X -- don't print any sort of PIC '@' suffix for a symbol.
8216 & -- print some in-use local-dynamic symbol name.
8217 H -- print a memory address offset by 8; used for sse high-parts
8221 print_operand (FILE *file
, rtx x
, int code
)
8228 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8233 assemble_name (file
, get_some_local_dynamic_name ());
8237 switch (ASSEMBLER_DIALECT
)
8244 /* Intel syntax. For absolute addresses, registers should not
8245 be surrounded by braces. */
8249 PRINT_OPERAND (file
, x
, 0);
8259 PRINT_OPERAND (file
, x
, 0);
8264 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8269 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8274 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8279 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8284 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8289 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8294 /* 387 opcodes don't get size suffixes if the operands are
8296 if (STACK_REG_P (x
))
8299 /* Likewise if using Intel opcodes. */
8300 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8303 /* This is the size of op from size of operand. */
8304 switch (GET_MODE_SIZE (GET_MODE (x
)))
8311 #ifdef HAVE_GAS_FILDS_FISTS
8317 if (GET_MODE (x
) == SFmode
)
8332 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8334 #ifdef GAS_MNEMONICS
8360 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8362 PRINT_OPERAND (file
, x
, 0);
8368 /* Little bit of braindamage here. The SSE compare instructions
8369 does use completely different names for the comparisons that the
8370 fp conditional moves. */
8371 switch (GET_CODE (x
))
8386 fputs ("unord", file
);
8390 fputs ("neq", file
);
8394 fputs ("nlt", file
);
8398 fputs ("nle", file
);
8401 fputs ("ord", file
);
8408 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8409 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8411 switch (GET_MODE (x
))
8413 case HImode
: putc ('w', file
); break;
8415 case SFmode
: putc ('l', file
); break;
8417 case DFmode
: putc ('q', file
); break;
8418 default: gcc_unreachable ();
8425 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8428 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8429 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8432 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8435 /* Like above, but reverse condition */
8437 /* Check to see if argument to %c is really a constant
8438 and not a condition code which needs to be reversed. */
8439 if (!COMPARISON_P (x
))
8441 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8444 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8447 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8448 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8451 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8455 /* It doesn't actually matter what mode we use here, as we're
8456 only going to use this for printing. */
8457 x
= adjust_address_nv (x
, DImode
, 8);
8464 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8467 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8470 int pred_val
= INTVAL (XEXP (x
, 0));
8472 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8473 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8475 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8476 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8478 /* Emit hints only in the case default branch prediction
8479 heuristics would fail. */
8480 if (taken
!= cputaken
)
8482 /* We use 3e (DS) prefix for taken branches and
8483 2e (CS) prefix for not taken branches. */
8485 fputs ("ds ; ", file
);
8487 fputs ("cs ; ", file
);
8494 output_operand_lossage ("invalid operand code '%c'", code
);
8499 print_reg (x
, code
, file
);
8503 /* No `byte ptr' prefix for call instructions. */
8504 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8507 switch (GET_MODE_SIZE (GET_MODE (x
)))
8509 case 1: size
= "BYTE"; break;
8510 case 2: size
= "WORD"; break;
8511 case 4: size
= "DWORD"; break;
8512 case 8: size
= "QWORD"; break;
8513 case 12: size
= "XWORD"; break;
8514 case 16: size
= "XMMWORD"; break;
8519 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8522 else if (code
== 'w')
8524 else if (code
== 'k')
8528 fputs (" PTR ", file
);
8532 /* Avoid (%rip) for call operands. */
8533 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8534 && !CONST_INT_P (x
))
8535 output_addr_const (file
, x
);
8536 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8537 output_operand_lossage ("invalid constraints for operand");
8542 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8547 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8548 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8550 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8552 fprintf (file
, "0x%08lx", l
);
8555 /* These float cases don't actually occur as immediate operands. */
8556 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8560 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8561 fprintf (file
, "%s", dstr
);
8564 else if (GET_CODE (x
) == CONST_DOUBLE
8565 && GET_MODE (x
) == XFmode
)
8569 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8570 fprintf (file
, "%s", dstr
);
8575 /* We have patterns that allow zero sets of memory, for instance.
8576 In 64-bit mode, we should probably support all 8-byte vectors,
8577 since we can in fact encode that into an immediate. */
8578 if (GET_CODE (x
) == CONST_VECTOR
)
8580 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8586 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8588 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8591 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8592 || GET_CODE (x
) == LABEL_REF
)
8594 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8597 fputs ("OFFSET FLAT:", file
);
8600 if (CONST_INT_P (x
))
8601 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8603 output_pic_addr_const (file
, x
, code
);
8605 output_addr_const (file
, x
);
8609 /* Print a memory operand whose address is ADDR. */
8612 print_operand_address (FILE *file
, rtx addr
)
8614 struct ix86_address parts
;
8615 rtx base
, index
, disp
;
8617 int ok
= ix86_decompose_address (addr
, &parts
);
8622 index
= parts
.index
;
8624 scale
= parts
.scale
;
8632 if (USER_LABEL_PREFIX
[0] == 0)
8634 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8640 if (!base
&& !index
)
8642 /* Displacement only requires special attention. */
8644 if (CONST_INT_P (disp
))
8646 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8648 if (USER_LABEL_PREFIX
[0] == 0)
8650 fputs ("ds:", file
);
8652 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8655 output_pic_addr_const (file
, disp
, 0);
8657 output_addr_const (file
, disp
);
8659 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8662 if (GET_CODE (disp
) == CONST
8663 && GET_CODE (XEXP (disp
, 0)) == PLUS
8664 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8665 disp
= XEXP (XEXP (disp
, 0), 0);
8666 if (GET_CODE (disp
) == LABEL_REF
8667 || (GET_CODE (disp
) == SYMBOL_REF
8668 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8669 fputs ("(%rip)", file
);
8674 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8679 output_pic_addr_const (file
, disp
, 0);
8680 else if (GET_CODE (disp
) == LABEL_REF
)
8681 output_asm_label (disp
);
8683 output_addr_const (file
, disp
);
8688 print_reg (base
, 0, file
);
8692 print_reg (index
, 0, file
);
8694 fprintf (file
, ",%d", scale
);
8700 rtx offset
= NULL_RTX
;
8704 /* Pull out the offset of a symbol; print any symbol itself. */
8705 if (GET_CODE (disp
) == CONST
8706 && GET_CODE (XEXP (disp
, 0)) == PLUS
8707 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8709 offset
= XEXP (XEXP (disp
, 0), 1);
8710 disp
= gen_rtx_CONST (VOIDmode
,
8711 XEXP (XEXP (disp
, 0), 0));
8715 output_pic_addr_const (file
, disp
, 0);
8716 else if (GET_CODE (disp
) == LABEL_REF
)
8717 output_asm_label (disp
);
8718 else if (CONST_INT_P (disp
))
8721 output_addr_const (file
, disp
);
8727 print_reg (base
, 0, file
);
8730 if (INTVAL (offset
) >= 0)
8732 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8736 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8743 print_reg (index
, 0, file
);
8745 fprintf (file
, "*%d", scale
);
8753 output_addr_const_extra (FILE *file
, rtx x
)
8757 if (GET_CODE (x
) != UNSPEC
)
8760 op
= XVECEXP (x
, 0, 0);
8761 switch (XINT (x
, 1))
8763 case UNSPEC_GOTTPOFF
:
8764 output_addr_const (file
, op
);
8765 /* FIXME: This might be @TPOFF in Sun ld. */
8766 fputs ("@GOTTPOFF", file
);
8769 output_addr_const (file
, op
);
8770 fputs ("@TPOFF", file
);
8773 output_addr_const (file
, op
);
8775 fputs ("@TPOFF", file
);
8777 fputs ("@NTPOFF", file
);
8780 output_addr_const (file
, op
);
8781 fputs ("@DTPOFF", file
);
8783 case UNSPEC_GOTNTPOFF
:
8784 output_addr_const (file
, op
);
8786 fputs ("@GOTTPOFF(%rip)", file
);
8788 fputs ("@GOTNTPOFF", file
);
8790 case UNSPEC_INDNTPOFF
:
8791 output_addr_const (file
, op
);
8792 fputs ("@INDNTPOFF", file
);
8802 /* Split one or more DImode RTL references into pairs of SImode
8803 references. The RTL can be REG, offsettable MEM, integer constant, or
8804 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8805 split and "num" is its length. lo_half and hi_half are output arrays
8806 that parallel "operands". */
8809 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8813 rtx op
= operands
[num
];
8815 /* simplify_subreg refuse to split volatile memory addresses,
8816 but we still have to handle it. */
8819 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8820 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8824 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8825 GET_MODE (op
) == VOIDmode
8826 ? DImode
: GET_MODE (op
), 0);
8827 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8828 GET_MODE (op
) == VOIDmode
8829 ? DImode
: GET_MODE (op
), 4);
8833 /* Split one or more TImode RTL references into pairs of DImode
8834 references. The RTL can be REG, offsettable MEM, integer constant, or
8835 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8836 split and "num" is its length. lo_half and hi_half are output arrays
8837 that parallel "operands". */
8840 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8844 rtx op
= operands
[num
];
8846 /* simplify_subreg refuse to split volatile memory addresses, but we
8847 still have to handle it. */
8850 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8851 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8855 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8856 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8861 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8862 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8863 is the expression of the binary operation. The output may either be
8864 emitted here, or returned to the caller, like all output_* functions.
8866 There is no guarantee that the operands are the same mode, as they
8867 might be within FLOAT or FLOAT_EXTEND expressions. */
8869 #ifndef SYSV386_COMPAT
8870 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8871 wants to fix the assemblers because that causes incompatibility
8872 with gcc. No-one wants to fix gcc because that causes
8873 incompatibility with assemblers... You can use the option of
8874 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8875 #define SYSV386_COMPAT 1
8879 output_387_binary_op (rtx insn
, rtx
*operands
)
8881 static char buf
[30];
8884 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8886 #ifdef ENABLE_CHECKING
8887 /* Even if we do not want to check the inputs, this documents input
8888 constraints. Which helps in understanding the following code. */
8889 if (STACK_REG_P (operands
[0])
8890 && ((REG_P (operands
[1])
8891 && REGNO (operands
[0]) == REGNO (operands
[1])
8892 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8893 || (REG_P (operands
[2])
8894 && REGNO (operands
[0]) == REGNO (operands
[2])
8895 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8896 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8899 gcc_assert (is_sse
);
8902 switch (GET_CODE (operands
[3]))
8905 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8906 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8914 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8915 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8923 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8924 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8932 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8933 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8947 if (GET_MODE (operands
[0]) == SFmode
)
8948 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8950 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8955 switch (GET_CODE (operands
[3]))
8959 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8961 rtx temp
= operands
[2];
8962 operands
[2] = operands
[1];
8966 /* know operands[0] == operands[1]. */
8968 if (MEM_P (operands
[2]))
8974 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8976 if (STACK_TOP_P (operands
[0]))
8977 /* How is it that we are storing to a dead operand[2]?
8978 Well, presumably operands[1] is dead too. We can't
8979 store the result to st(0) as st(0) gets popped on this
8980 instruction. Instead store to operands[2] (which I
8981 think has to be st(1)). st(1) will be popped later.
8982 gcc <= 2.8.1 didn't have this check and generated
8983 assembly code that the Unixware assembler rejected. */
8984 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8986 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8990 if (STACK_TOP_P (operands
[0]))
8991 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8993 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8998 if (MEM_P (operands
[1]))
9004 if (MEM_P (operands
[2]))
9010 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9013 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9014 derived assemblers, confusingly reverse the direction of
9015 the operation for fsub{r} and fdiv{r} when the
9016 destination register is not st(0). The Intel assembler
9017 doesn't have this brain damage. Read !SYSV386_COMPAT to
9018 figure out what the hardware really does. */
9019 if (STACK_TOP_P (operands
[0]))
9020 p
= "{p\t%0, %2|rp\t%2, %0}";
9022 p
= "{rp\t%2, %0|p\t%0, %2}";
9024 if (STACK_TOP_P (operands
[0]))
9025 /* As above for fmul/fadd, we can't store to st(0). */
9026 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9028 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9033 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9036 if (STACK_TOP_P (operands
[0]))
9037 p
= "{rp\t%0, %1|p\t%1, %0}";
9039 p
= "{p\t%1, %0|rp\t%0, %1}";
9041 if (STACK_TOP_P (operands
[0]))
9042 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9044 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9049 if (STACK_TOP_P (operands
[0]))
9051 if (STACK_TOP_P (operands
[1]))
9052 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9054 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9057 else if (STACK_TOP_P (operands
[1]))
9060 p
= "{\t%1, %0|r\t%0, %1}";
9062 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9068 p
= "{r\t%2, %0|\t%0, %2}";
9070 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9083 /* Return needed mode for entity in optimize_mode_switching pass. */
9086 ix86_mode_needed (int entity
, rtx insn
)
9088 enum attr_i387_cw mode
;
9090 /* The mode UNINITIALIZED is used to store control word after a
9091 function call or ASM pattern. The mode ANY specify that function
9092 has no requirements on the control word and make no changes in the
9093 bits we are interested in. */
9096 || (NONJUMP_INSN_P (insn
)
9097 && (asm_noperands (PATTERN (insn
)) >= 0
9098 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9099 return I387_CW_UNINITIALIZED
;
9101 if (recog_memoized (insn
) < 0)
9104 mode
= get_attr_i387_cw (insn
);
9109 if (mode
== I387_CW_TRUNC
)
9114 if (mode
== I387_CW_FLOOR
)
9119 if (mode
== I387_CW_CEIL
)
9124 if (mode
== I387_CW_MASK_PM
)
9135 /* Output code to initialize control word copies used by trunc?f?i and
9136 rounding patterns. CURRENT_MODE is set to current control word,
9137 while NEW_MODE is set to new control word. */
9140 emit_i387_cw_initialization (int mode
)
9142 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9147 rtx reg
= gen_reg_rtx (HImode
);
9149 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9150 emit_move_insn (reg
, copy_rtx (stored_mode
));
9152 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9157 /* round toward zero (truncate) */
9158 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9159 slot
= SLOT_CW_TRUNC
;
9163 /* round down toward -oo */
9164 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9165 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9166 slot
= SLOT_CW_FLOOR
;
9170 /* round up toward +oo */
9171 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9172 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9173 slot
= SLOT_CW_CEIL
;
9176 case I387_CW_MASK_PM
:
9177 /* mask precision exception for nearbyint() */
9178 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9179 slot
= SLOT_CW_MASK_PM
;
9191 /* round toward zero (truncate) */
9192 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9193 slot
= SLOT_CW_TRUNC
;
9197 /* round down toward -oo */
9198 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9199 slot
= SLOT_CW_FLOOR
;
9203 /* round up toward +oo */
9204 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9205 slot
= SLOT_CW_CEIL
;
9208 case I387_CW_MASK_PM
:
9209 /* mask precision exception for nearbyint() */
9210 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9211 slot
= SLOT_CW_MASK_PM
;
9219 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9221 new_mode
= assign_386_stack_local (HImode
, slot
);
9222 emit_move_insn (new_mode
, reg
);
9225 /* Output code for INSN to convert a float to a signed int. OPERANDS
9226 are the insn operands. The output may be [HSD]Imode and the input
9227 operand may be [SDX]Fmode. */
9230 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9232 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9233 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9234 int round_mode
= get_attr_i387_cw (insn
);
9236 /* Jump through a hoop or two for DImode, since the hardware has no
9237 non-popping instruction. We used to do this a different way, but
9238 that was somewhat fragile and broke with post-reload splitters. */
9239 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9240 output_asm_insn ("fld\t%y1", operands
);
9242 gcc_assert (STACK_TOP_P (operands
[1]));
9243 gcc_assert (MEM_P (operands
[0]));
9246 output_asm_insn ("fisttp%z0\t%0", operands
);
9249 if (round_mode
!= I387_CW_ANY
)
9250 output_asm_insn ("fldcw\t%3", operands
);
9251 if (stack_top_dies
|| dimode_p
)
9252 output_asm_insn ("fistp%z0\t%0", operands
);
9254 output_asm_insn ("fist%z0\t%0", operands
);
9255 if (round_mode
!= I387_CW_ANY
)
9256 output_asm_insn ("fldcw\t%2", operands
);
9262 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9263 have the values zero or one, indicates the ffreep insn's operand
9264 from the OPERANDS array. */
9267 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9269 if (TARGET_USE_FFREEP
)
9270 #if HAVE_AS_IX86_FFREEP
9271 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9274 static char retval
[] = ".word\t0xc_df";
9275 int regno
= REGNO (operands
[opno
]);
9277 gcc_assert (FP_REGNO_P (regno
));
9279 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9284 return opno
? "fstp\t%y1" : "fstp\t%y0";
9288 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9289 should be used. UNORDERED_P is true when fucom should be used. */
9292 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9295 rtx cmp_op0
, cmp_op1
;
9296 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9300 cmp_op0
= operands
[0];
9301 cmp_op1
= operands
[1];
9305 cmp_op0
= operands
[1];
9306 cmp_op1
= operands
[2];
9311 if (GET_MODE (operands
[0]) == SFmode
)
9313 return "ucomiss\t{%1, %0|%0, %1}";
9315 return "comiss\t{%1, %0|%0, %1}";
9318 return "ucomisd\t{%1, %0|%0, %1}";
9320 return "comisd\t{%1, %0|%0, %1}";
9323 gcc_assert (STACK_TOP_P (cmp_op0
));
9325 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9327 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9331 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9332 return output_387_ffreep (operands
, 1);
9335 return "ftst\n\tfnstsw\t%0";
9338 if (STACK_REG_P (cmp_op1
)
9340 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9341 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9343 /* If both the top of the 387 stack dies, and the other operand
9344 is also a stack register that dies, then this must be a
9345 `fcompp' float compare */
9349 /* There is no double popping fcomi variant. Fortunately,
9350 eflags is immune from the fstp's cc clobbering. */
9352 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9354 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9355 return output_387_ffreep (operands
, 0);
9360 return "fucompp\n\tfnstsw\t%0";
9362 return "fcompp\n\tfnstsw\t%0";
9367 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9369 static const char * const alt
[16] =
9371 "fcom%z2\t%y2\n\tfnstsw\t%0",
9372 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9373 "fucom%z2\t%y2\n\tfnstsw\t%0",
9374 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9376 "ficom%z2\t%y2\n\tfnstsw\t%0",
9377 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9381 "fcomi\t{%y1, %0|%0, %y1}",
9382 "fcomip\t{%y1, %0|%0, %y1}",
9383 "fucomi\t{%y1, %0|%0, %y1}",
9384 "fucomip\t{%y1, %0|%0, %y1}",
9395 mask
= eflags_p
<< 3;
9396 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9397 mask
|= unordered_p
<< 1;
9398 mask
|= stack_top_dies
;
9400 gcc_assert (mask
< 16);
9409 ix86_output_addr_vec_elt (FILE *file
, int value
)
9411 const char *directive
= ASM_LONG
;
9415 directive
= ASM_QUAD
;
9417 gcc_assert (!TARGET_64BIT
);
9420 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9424 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9426 const char *directive
= ASM_LONG
;
9429 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9430 directive
= ASM_QUAD
;
9432 gcc_assert (!TARGET_64BIT
);
9434 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9435 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9436 fprintf (file
, "%s%s%d-%s%d\n",
9437 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9438 else if (HAVE_AS_GOTOFF_IN_DATA
)
9439 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9441 else if (TARGET_MACHO
)
9443 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9444 machopic_output_function_base_name (file
);
9445 fprintf(file
, "\n");
9449 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9450 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9453 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9457 ix86_expand_clear (rtx dest
)
9461 /* We play register width games, which are only valid after reload. */
9462 gcc_assert (reload_completed
);
9464 /* Avoid HImode and its attendant prefix byte. */
9465 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9466 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9468 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9470 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9471 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9473 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9474 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9480 /* X is an unchanging MEM. If it is a constant pool reference, return
9481 the constant pool rtx, else NULL. */
9484 maybe_get_pool_constant (rtx x
)
9486 x
= ix86_delegitimize_address (XEXP (x
, 0));
9488 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9489 return get_pool_constant (x
);
9495 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9497 int strict
= (reload_in_progress
|| reload_completed
);
9499 enum tls_model model
;
9504 if (GET_CODE (op1
) == SYMBOL_REF
)
9506 model
= SYMBOL_REF_TLS_MODEL (op1
);
9509 op1
= legitimize_tls_address (op1
, model
, true);
9510 op1
= force_operand (op1
, op0
);
9515 else if (GET_CODE (op1
) == CONST
9516 && GET_CODE (XEXP (op1
, 0)) == PLUS
9517 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9519 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9522 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9523 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9524 op1
= force_operand (op1
, NULL
);
9525 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9526 op0
, 1, OPTAB_DIRECT
);
9532 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9534 if (TARGET_MACHO
&& !TARGET_64BIT
)
9539 rtx temp
= ((reload_in_progress
9540 || ((op0
&& REG_P (op0
))
9542 ? op0
: gen_reg_rtx (Pmode
));
9543 op1
= machopic_indirect_data_reference (op1
, temp
);
9544 op1
= machopic_legitimize_pic_address (op1
, mode
,
9545 temp
== op1
? 0 : temp
);
9547 else if (MACHOPIC_INDIRECT
)
9548 op1
= machopic_indirect_data_reference (op1
, 0);
9556 op1
= force_reg (Pmode
, op1
);
9557 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9559 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9560 op1
= legitimize_pic_address (op1
, reg
);
9569 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9570 || !push_operand (op0
, mode
))
9572 op1
= force_reg (mode
, op1
);
9574 if (push_operand (op0
, mode
)
9575 && ! general_no_elim_operand (op1
, mode
))
9576 op1
= copy_to_mode_reg (mode
, op1
);
9578 /* Force large constants in 64bit compilation into register
9579 to get them CSEed. */
9580 if (TARGET_64BIT
&& mode
== DImode
9581 && immediate_operand (op1
, mode
)
9582 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9583 && !register_operand (op0
, mode
)
9584 && optimize
&& !reload_completed
&& !reload_in_progress
)
9585 op1
= copy_to_mode_reg (mode
, op1
);
9587 if (FLOAT_MODE_P (mode
))
9589 /* If we are loading a floating point constant to a register,
9590 force the value to memory now, since we'll get better code
9591 out the back end. */
9595 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9597 op1
= validize_mem (force_const_mem (mode
, op1
));
9598 if (!register_operand (op0
, mode
))
9600 rtx temp
= gen_reg_rtx (mode
);
9601 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9602 emit_move_insn (op0
, temp
);
9609 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9613 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9615 rtx op0
= operands
[0], op1
= operands
[1];
9617 /* Force constants other than zero into memory. We do not know how
9618 the instructions used to build constants modify the upper 64 bits
9619 of the register, once we have that information we may be able
9620 to handle some of them more efficiently. */
9621 if ((reload_in_progress
| reload_completed
) == 0
9622 && register_operand (op0
, mode
)
9624 && standard_sse_constant_p (op1
) <= 0)
9625 op1
= validize_mem (force_const_mem (mode
, op1
));
9627 /* Make operand1 a register if it isn't already. */
9629 && !register_operand (op0
, mode
)
9630 && !register_operand (op1
, mode
))
9632 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9636 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9639 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9640 straight to ix86_expand_vector_move. */
9641 /* Code generation for scalar reg-reg moves of single and double precision data:
9642 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9646 if (x86_sse_partial_reg_dependency == true)
9651 Code generation for scalar loads of double precision data:
9652 if (x86_sse_split_regs == true)
9653 movlpd mem, reg (gas syntax)
9657 Code generation for unaligned packed loads of single precision data
9658 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9659 if (x86_sse_unaligned_move_optimal)
9662 if (x86_sse_partial_reg_dependency == true)
9674 Code generation for unaligned packed loads of double precision data
9675 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9676 if (x86_sse_unaligned_move_optimal)
9679 if (x86_sse_split_regs == true)
9692 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9701 /* If we're optimizing for size, movups is the smallest. */
9704 op0
= gen_lowpart (V4SFmode
, op0
);
9705 op1
= gen_lowpart (V4SFmode
, op1
);
9706 emit_insn (gen_sse_movups (op0
, op1
));
9710 /* ??? If we have typed data, then it would appear that using
9711 movdqu is the only way to get unaligned data loaded with
9713 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9715 op0
= gen_lowpart (V16QImode
, op0
);
9716 op1
= gen_lowpart (V16QImode
, op1
);
9717 emit_insn (gen_sse2_movdqu (op0
, op1
));
9721 if (TARGET_SSE2
&& mode
== V2DFmode
)
9725 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9727 op0
= gen_lowpart (V2DFmode
, op0
);
9728 op1
= gen_lowpart (V2DFmode
, op1
);
9729 emit_insn (gen_sse2_movupd (op0
, op1
));
9733 /* When SSE registers are split into halves, we can avoid
9734 writing to the top half twice. */
9735 if (TARGET_SSE_SPLIT_REGS
)
9737 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9742 /* ??? Not sure about the best option for the Intel chips.
9743 The following would seem to satisfy; the register is
9744 entirely cleared, breaking the dependency chain. We
9745 then store to the upper half, with a dependency depth
9746 of one. A rumor has it that Intel recommends two movsd
9747 followed by an unpacklpd, but this is unconfirmed. And
9748 given that the dependency depth of the unpacklpd would
9749 still be one, I'm not sure why this would be better. */
9750 zero
= CONST0_RTX (V2DFmode
);
9753 m
= adjust_address (op1
, DFmode
, 0);
9754 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9755 m
= adjust_address (op1
, DFmode
, 8);
9756 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9760 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9762 op0
= gen_lowpart (V4SFmode
, op0
);
9763 op1
= gen_lowpart (V4SFmode
, op1
);
9764 emit_insn (gen_sse_movups (op0
, op1
));
9768 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9769 emit_move_insn (op0
, CONST0_RTX (mode
));
9771 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9773 if (mode
!= V4SFmode
)
9774 op0
= gen_lowpart (V4SFmode
, op0
);
9775 m
= adjust_address (op1
, V2SFmode
, 0);
9776 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9777 m
= adjust_address (op1
, V2SFmode
, 8);
9778 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9781 else if (MEM_P (op0
))
9783 /* If we're optimizing for size, movups is the smallest. */
9786 op0
= gen_lowpart (V4SFmode
, op0
);
9787 op1
= gen_lowpart (V4SFmode
, op1
);
9788 emit_insn (gen_sse_movups (op0
, op1
));
9792 /* ??? Similar to above, only less clear because of quote
9793 typeless stores unquote. */
9794 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9795 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9797 op0
= gen_lowpart (V16QImode
, op0
);
9798 op1
= gen_lowpart (V16QImode
, op1
);
9799 emit_insn (gen_sse2_movdqu (op0
, op1
));
9803 if (TARGET_SSE2
&& mode
== V2DFmode
)
9805 m
= adjust_address (op0
, DFmode
, 0);
9806 emit_insn (gen_sse2_storelpd (m
, op1
));
9807 m
= adjust_address (op0
, DFmode
, 8);
9808 emit_insn (gen_sse2_storehpd (m
, op1
));
9812 if (mode
!= V4SFmode
)
9813 op1
= gen_lowpart (V4SFmode
, op1
);
9814 m
= adjust_address (op0
, V2SFmode
, 0);
9815 emit_insn (gen_sse_storelps (m
, op1
));
9816 m
= adjust_address (op0
, V2SFmode
, 8);
9817 emit_insn (gen_sse_storehps (m
, op1
));
9824 /* Expand a push in MODE. This is some mode for which we do not support
9825 proper push instructions, at least from the registers that we expect
9826 the value to live in. */
9829 ix86_expand_push (enum machine_mode mode
, rtx x
)
9833 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9834 GEN_INT (-GET_MODE_SIZE (mode
)),
9835 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9836 if (tmp
!= stack_pointer_rtx
)
9837 emit_move_insn (stack_pointer_rtx
, tmp
);
9839 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9840 emit_move_insn (tmp
, x
);
9843 /* Helper function of ix86_fixup_binary_operands to canonicalize
9844 operand order. Returns true if the operands should be swapped. */
9847 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9850 rtx dst
= operands
[0];
9851 rtx src1
= operands
[1];
9852 rtx src2
= operands
[2];
9854 /* If the operation is not commutative, we can't do anything. */
9855 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9858 /* Highest priority is that src1 should match dst. */
9859 if (rtx_equal_p (dst
, src1
))
9861 if (rtx_equal_p (dst
, src2
))
9864 /* Next highest priority is that immediate constants come second. */
9865 if (immediate_operand (src2
, mode
))
9867 if (immediate_operand (src1
, mode
))
9870 /* Lowest priority is that memory references should come second. */
9880 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9881 destination to use for the operation. If different from the true
9882 destination in operands[0], a copy operation will be required. */
9885 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9888 rtx dst
= operands
[0];
9889 rtx src1
= operands
[1];
9890 rtx src2
= operands
[2];
9892 /* Canonicalize operand order. */
9893 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9900 /* Both source operands cannot be in memory. */
9901 if (MEM_P (src1
) && MEM_P (src2
))
9903 /* Optimization: Only read from memory once. */
9904 if (rtx_equal_p (src1
, src2
))
9906 src2
= force_reg (mode
, src2
);
9910 src2
= force_reg (mode
, src2
);
9913 /* If the destination is memory, and we do not have matching source
9914 operands, do things in registers. */
9915 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9916 dst
= gen_reg_rtx (mode
);
9918 /* Source 1 cannot be a constant. */
9919 if (CONSTANT_P (src1
))
9920 src1
= force_reg (mode
, src1
);
9922 /* Source 1 cannot be a non-matching memory. */
9923 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9924 src1
= force_reg (mode
, src1
);
9931 /* Similarly, but assume that the destination has already been
9935 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9936 enum machine_mode mode
, rtx operands
[])
9938 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9939 gcc_assert (dst
== operands
[0]);
9942 /* Attempt to expand a binary operator. Make the expansion closer to the
9943 actual machine, then just general_operand, which will allow 3 separate
9944 memory references (one output, two input) in a single insn. */
9947 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9950 rtx src1
, src2
, dst
, op
, clob
;
9952 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9956 /* Emit the instruction. */
9958 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9959 if (reload_in_progress
)
9961 /* Reload doesn't know about the flags register, and doesn't know that
9962 it doesn't want to clobber it. We can only do this with PLUS. */
9963 gcc_assert (code
== PLUS
);
9968 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9969 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9972 /* Fix up the destination if needed. */
9973 if (dst
!= operands
[0])
9974 emit_move_insn (operands
[0], dst
);
9977 /* Return TRUE or FALSE depending on whether the binary operator meets the
9978 appropriate constraints. */
9981 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9984 rtx dst
= operands
[0];
9985 rtx src1
= operands
[1];
9986 rtx src2
= operands
[2];
9988 /* Both source operands cannot be in memory. */
9989 if (MEM_P (src1
) && MEM_P (src2
))
9992 /* Canonicalize operand order for commutative operators. */
9993 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10000 /* If the destination is memory, we must have a matching source operand. */
10001 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10004 /* Source 1 cannot be a constant. */
10005 if (CONSTANT_P (src1
))
10008 /* Source 1 cannot be a non-matching memory. */
10009 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10015 /* Attempt to expand a unary operator. Make the expansion closer to the
10016 actual machine, then just general_operand, which will allow 2 separate
10017 memory references (one output, one input) in a single insn. */
10020 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10023 int matching_memory
;
10024 rtx src
, dst
, op
, clob
;
10029 /* If the destination is memory, and we do not have matching source
10030 operands, do things in registers. */
10031 matching_memory
= 0;
10034 if (rtx_equal_p (dst
, src
))
10035 matching_memory
= 1;
10037 dst
= gen_reg_rtx (mode
);
10040 /* When source operand is memory, destination must match. */
10041 if (MEM_P (src
) && !matching_memory
)
10042 src
= force_reg (mode
, src
);
10044 /* Emit the instruction. */
10046 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10047 if (reload_in_progress
|| code
== NOT
)
10049 /* Reload doesn't know about the flags register, and doesn't know that
10050 it doesn't want to clobber it. */
10051 gcc_assert (code
== NOT
);
10056 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10057 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10060 /* Fix up the destination if needed. */
10061 if (dst
!= operands
[0])
10062 emit_move_insn (operands
[0], dst
);
10065 /* Return TRUE or FALSE depending on whether the unary operator meets the
10066 appropriate constraints. */
10069 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10070 enum machine_mode mode ATTRIBUTE_UNUSED
,
10071 rtx operands
[2] ATTRIBUTE_UNUSED
)
10073 /* If one of operands is memory, source and destination must match. */
10074 if ((MEM_P (operands
[0])
10075 || MEM_P (operands
[1]))
10076 && ! rtx_equal_p (operands
[0], operands
[1]))
10081 /* Post-reload splitter for converting an SF or DFmode value in an
10082 SSE register into an unsigned SImode. */
10085 ix86_split_convert_uns_si_sse (rtx operands
[])
10087 enum machine_mode vecmode
;
10088 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10090 large
= operands
[1];
10091 zero_or_two31
= operands
[2];
10092 input
= operands
[3];
10093 two31
= operands
[4];
10094 vecmode
= GET_MODE (large
);
10095 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10097 /* Load up the value into the low element. We must ensure that the other
10098 elements are valid floats -- zero is the easiest such value. */
10101 if (vecmode
== V4SFmode
)
10102 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10104 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10108 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10109 emit_move_insn (value
, CONST0_RTX (vecmode
));
10110 if (vecmode
== V4SFmode
)
10111 emit_insn (gen_sse_movss (value
, value
, input
));
10113 emit_insn (gen_sse2_movsd (value
, value
, input
));
10116 emit_move_insn (large
, two31
);
10117 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10119 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10120 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10122 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10123 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10125 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10126 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10128 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10129 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10131 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10132 if (vecmode
== V4SFmode
)
10133 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10135 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10138 emit_insn (gen_xorv4si3 (value
, value
, large
));
10141 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10142 Expects the 64-bit DImode to be supplied in a pair of integral
10143 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10144 -mfpmath=sse, !optimize_size only. */
10147 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10149 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10150 rtx int_xmm
, fp_xmm
;
10151 rtx biases
, exponents
;
10154 int_xmm
= gen_reg_rtx (V4SImode
);
10155 if (TARGET_INTER_UNIT_MOVES
)
10156 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10157 else if (TARGET_SSE_SPLIT_REGS
)
10159 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10160 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10164 x
= gen_reg_rtx (V2DImode
);
10165 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10166 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10169 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10170 gen_rtvec (4, GEN_INT (0x43300000UL
),
10171 GEN_INT (0x45300000UL
),
10172 const0_rtx
, const0_rtx
));
10173 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10175 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10176 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10178 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10179 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10180 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10181 (0x1.0p84 + double(fp_value_hi_xmm)).
10182 Note these exponents differ by 32. */
10184 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10186 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10187 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10188 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10189 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10190 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10191 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10192 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10193 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10194 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10196 /* Add the upper and lower DFmode values together. */
10198 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10201 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10202 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10203 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10206 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10209 /* Convert an unsigned SImode value into a DFmode. Only currently used
10210 for SSE, but applicable anywhere. */
10213 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10215 REAL_VALUE_TYPE TWO31r
;
10218 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10219 NULL
, 1, OPTAB_DIRECT
);
10221 fp
= gen_reg_rtx (DFmode
);
10222 emit_insn (gen_floatsidf2 (fp
, x
));
10224 real_ldexp (&TWO31r
, &dconst1
, 31);
10225 x
= const_double_from_real_value (TWO31r
, DFmode
);
10227 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10229 emit_move_insn (target
, x
);
10232 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10233 32-bit mode; otherwise we have a direct convert instruction. */
10236 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10238 REAL_VALUE_TYPE TWO32r
;
10239 rtx fp_lo
, fp_hi
, x
;
10241 fp_lo
= gen_reg_rtx (DFmode
);
10242 fp_hi
= gen_reg_rtx (DFmode
);
10244 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10246 real_ldexp (&TWO32r
, &dconst1
, 32);
10247 x
= const_double_from_real_value (TWO32r
, DFmode
);
10248 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10250 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10252 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10255 emit_move_insn (target
, x
);
10258 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10259 For x86_32, -mfpmath=sse, !optimize_size only. */
10261 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10263 REAL_VALUE_TYPE ONE16r
;
10264 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10266 real_ldexp (&ONE16r
, &dconst1
, 16);
10267 x
= const_double_from_real_value (ONE16r
, SFmode
);
10268 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10269 NULL
, 0, OPTAB_DIRECT
);
10270 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10271 NULL
, 0, OPTAB_DIRECT
);
10272 fp_hi
= gen_reg_rtx (SFmode
);
10273 fp_lo
= gen_reg_rtx (SFmode
);
10274 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10275 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10276 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10278 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10280 if (!rtx_equal_p (target
, fp_hi
))
10281 emit_move_insn (target
, fp_hi
);
10284 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10285 then replicate the value for all elements of the vector
10289 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10296 v
= gen_rtvec (4, value
, value
, value
, value
);
10298 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10299 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10300 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10304 v
= gen_rtvec (2, value
, value
);
10306 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10307 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10310 gcc_unreachable ();
10314 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10315 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10316 true, then replicate the mask for all elements of the vector register.
10317 If INVERT is true, then create a mask excluding the sign bit. */
10320 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10322 enum machine_mode vec_mode
;
10323 HOST_WIDE_INT hi
, lo
;
10328 /* Find the sign bit, sign extended to 2*HWI. */
10329 if (mode
== SFmode
)
10330 lo
= 0x80000000, hi
= lo
< 0;
10331 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10332 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10334 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10337 lo
= ~lo
, hi
= ~hi
;
10339 /* Force this value into the low part of a fp vector constant. */
10340 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10341 mask
= gen_lowpart (mode
, mask
);
10343 v
= ix86_build_const_vector (mode
, vect
, mask
);
10344 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10345 return force_reg (vec_mode
, v
);
10348 /* Generate code for floating point ABS or NEG. */
10351 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10354 rtx mask
, set
, use
, clob
, dst
, src
;
10355 bool matching_memory
;
10356 bool use_sse
= false;
10357 bool vector_mode
= VECTOR_MODE_P (mode
);
10358 enum machine_mode elt_mode
= mode
;
10362 elt_mode
= GET_MODE_INNER (mode
);
10365 else if (TARGET_SSE_MATH
)
10366 use_sse
= SSE_FLOAT_MODE_P (mode
);
10368 /* NEG and ABS performed with SSE use bitwise mask operations.
10369 Create the appropriate mask now. */
10371 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10378 /* If the destination is memory, and we don't have matching source
10379 operands or we're using the x87, do things in registers. */
10380 matching_memory
= false;
10383 if (use_sse
&& rtx_equal_p (dst
, src
))
10384 matching_memory
= true;
10386 dst
= gen_reg_rtx (mode
);
10388 if (MEM_P (src
) && !matching_memory
)
10389 src
= force_reg (mode
, src
);
10393 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10394 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10399 set
= gen_rtx_fmt_e (code
, mode
, src
);
10400 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10403 use
= gen_rtx_USE (VOIDmode
, mask
);
10404 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10405 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10406 gen_rtvec (3, set
, use
, clob
)));
10412 if (dst
!= operands
[0])
10413 emit_move_insn (operands
[0], dst
);
10416 /* Expand a copysign operation. Special case operand 0 being a constant. */
10419 ix86_expand_copysign (rtx operands
[])
10421 enum machine_mode mode
, vmode
;
10422 rtx dest
, op0
, op1
, mask
, nmask
;
10424 dest
= operands
[0];
10428 mode
= GET_MODE (dest
);
10429 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10431 if (GET_CODE (op0
) == CONST_DOUBLE
)
10435 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10436 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10438 if (op0
== CONST0_RTX (mode
))
10439 op0
= CONST0_RTX (vmode
);
10442 if (mode
== SFmode
)
10443 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10444 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10446 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10447 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10450 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10452 if (mode
== SFmode
)
10453 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10455 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10459 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10460 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10462 if (mode
== SFmode
)
10463 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10465 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10469 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10470 be a constant, and so has already been expanded into a vector constant. */
10473 ix86_split_copysign_const (rtx operands
[])
10475 enum machine_mode mode
, vmode
;
10476 rtx dest
, op0
, op1
, mask
, x
;
10478 dest
= operands
[0];
10481 mask
= operands
[3];
10483 mode
= GET_MODE (dest
);
10484 vmode
= GET_MODE (mask
);
10486 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10487 x
= gen_rtx_AND (vmode
, dest
, mask
);
10488 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10490 if (op0
!= CONST0_RTX (vmode
))
10492 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10493 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10497 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10498 so we have to do two masks. */
10501 ix86_split_copysign_var (rtx operands
[])
10503 enum machine_mode mode
, vmode
;
10504 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10506 dest
= operands
[0];
10507 scratch
= operands
[1];
10510 nmask
= operands
[4];
10511 mask
= operands
[5];
10513 mode
= GET_MODE (dest
);
10514 vmode
= GET_MODE (mask
);
10516 if (rtx_equal_p (op0
, op1
))
10518 /* Shouldn't happen often (it's useless, obviously), but when it does
10519 we'd generate incorrect code if we continue below. */
10520 emit_move_insn (dest
, op0
);
10524 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10526 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10528 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10529 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10532 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10533 x
= gen_rtx_NOT (vmode
, dest
);
10534 x
= gen_rtx_AND (vmode
, x
, op0
);
10535 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10539 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10541 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10543 else /* alternative 2,4 */
10545 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10546 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10547 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10549 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10551 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10553 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10554 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10556 else /* alternative 3,4 */
10558 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10560 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10561 x
= gen_rtx_AND (vmode
, dest
, op0
);
10563 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10566 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10567 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10570 /* Return TRUE or FALSE depending on whether the first SET in INSN
10571 has source and destination with matching CC modes, and that the
10572 CC mode is at least as constrained as REQ_MODE. */
10575 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10578 enum machine_mode set_mode
;
10580 set
= PATTERN (insn
);
10581 if (GET_CODE (set
) == PARALLEL
)
10582 set
= XVECEXP (set
, 0, 0);
10583 gcc_assert (GET_CODE (set
) == SET
);
10584 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10586 set_mode
= GET_MODE (SET_DEST (set
));
10590 if (req_mode
!= CCNOmode
10591 && (req_mode
!= CCmode
10592 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10596 if (req_mode
== CCGCmode
)
10600 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10604 if (req_mode
== CCZmode
)
10611 gcc_unreachable ();
10614 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10617 /* Generate insn patterns to do an integer compare of OPERANDS. */
10620 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10622 enum machine_mode cmpmode
;
10625 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10626 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10628 /* This is very simple, but making the interface the same as in the
10629 FP case makes the rest of the code easier. */
10630 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10631 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10633 /* Return the test that should be put into the flags user, i.e.
10634 the bcc, scc, or cmov instruction. */
10635 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10638 /* Figure out whether to use ordered or unordered fp comparisons.
10639 Return the appropriate mode to use. */
10642 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10644 /* ??? In order to make all comparisons reversible, we do all comparisons
10645 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10646 all forms trapping and nontrapping comparisons, we can make inequality
10647 comparisons trapping again, since it results in better code when using
10648 FCOM based compares. */
10649 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10653 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10655 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10656 return ix86_fp_compare_mode (code
);
10659 /* Only zero flag is needed. */
10660 case EQ
: /* ZF=0 */
10661 case NE
: /* ZF!=0 */
10663 /* Codes needing carry flag. */
10664 case GEU
: /* CF=0 */
10665 case GTU
: /* CF=0 & ZF=0 */
10666 case LTU
: /* CF=1 */
10667 case LEU
: /* CF=1 | ZF=1 */
10669 /* Codes possibly doable only with sign flag when
10670 comparing against zero. */
10671 case GE
: /* SF=OF or SF=0 */
10672 case LT
: /* SF<>OF or SF=1 */
10673 if (op1
== const0_rtx
)
10676 /* For other cases Carry flag is not required. */
10678 /* Codes doable only with sign flag when comparing
10679 against zero, but we miss jump instruction for it
10680 so we need to use relational tests against overflow
10681 that thus needs to be zero. */
10682 case GT
: /* ZF=0 & SF=OF */
10683 case LE
: /* ZF=1 | SF<>OF */
10684 if (op1
== const0_rtx
)
10688 /* strcmp pattern do (use flags) and combine may ask us for proper
10693 gcc_unreachable ();
10697 /* Return the fixed registers used for condition codes. */
10700 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10707 /* If two condition code modes are compatible, return a condition code
10708 mode which is compatible with both. Otherwise, return
10711 static enum machine_mode
10712 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10717 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10720 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10721 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10727 gcc_unreachable ();
10749 /* These are only compatible with themselves, which we already
10755 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10758 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10760 enum rtx_code swapped_code
= swap_condition (code
);
10761 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10762 || (ix86_fp_comparison_cost (swapped_code
)
10763 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10766 /* Swap, force into registers, or otherwise massage the two operands
10767 to a fp comparison. The operands are updated in place; the new
10768 comparison code is returned. */
10770 static enum rtx_code
10771 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10773 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10774 rtx op0
= *pop0
, op1
= *pop1
;
10775 enum machine_mode op_mode
= GET_MODE (op0
);
10776 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10778 /* All of the unordered compare instructions only work on registers.
10779 The same is true of the fcomi compare instructions. The XFmode
10780 compare instructions require registers except when comparing
10781 against zero or when converting operand 1 from fixed point to
10785 && (fpcmp_mode
== CCFPUmode
10786 || (op_mode
== XFmode
10787 && ! (standard_80387_constant_p (op0
) == 1
10788 || standard_80387_constant_p (op1
) == 1)
10789 && GET_CODE (op1
) != FLOAT
)
10790 || ix86_use_fcomi_compare (code
)))
10792 op0
= force_reg (op_mode
, op0
);
10793 op1
= force_reg (op_mode
, op1
);
10797 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10798 things around if they appear profitable, otherwise force op0
10799 into a register. */
10801 if (standard_80387_constant_p (op0
) == 0
10803 && ! (standard_80387_constant_p (op1
) == 0
10807 tmp
= op0
, op0
= op1
, op1
= tmp
;
10808 code
= swap_condition (code
);
10812 op0
= force_reg (op_mode
, op0
);
10814 if (CONSTANT_P (op1
))
10816 int tmp
= standard_80387_constant_p (op1
);
10818 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10822 op1
= force_reg (op_mode
, op1
);
10825 op1
= force_reg (op_mode
, op1
);
10829 /* Try to rearrange the comparison to make it cheaper. */
10830 if (ix86_fp_comparison_cost (code
)
10831 > ix86_fp_comparison_cost (swap_condition (code
))
10832 && (REG_P (op1
) || !no_new_pseudos
))
10835 tmp
= op0
, op0
= op1
, op1
= tmp
;
10836 code
= swap_condition (code
);
10838 op0
= force_reg (op_mode
, op0
);
10846 /* Convert comparison codes we use to represent FP comparison to integer
10847 code that will result in proper branch. Return UNKNOWN if no such code
10851 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10880 /* Split comparison code CODE into comparisons we can do using branch
10881 instructions. BYPASS_CODE is comparison code for branch that will
10882 branch around FIRST_CODE and SECOND_CODE. If some of branches
10883 is not required, set value to UNKNOWN.
10884 We never require more than two branches. */
10887 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10888 enum rtx_code
*first_code
,
10889 enum rtx_code
*second_code
)
10891 *first_code
= code
;
10892 *bypass_code
= UNKNOWN
;
10893 *second_code
= UNKNOWN
;
10895 /* The fcomi comparison sets flags as follows:
10905 case GT
: /* GTU - CF=0 & ZF=0 */
10906 case GE
: /* GEU - CF=0 */
10907 case ORDERED
: /* PF=0 */
10908 case UNORDERED
: /* PF=1 */
10909 case UNEQ
: /* EQ - ZF=1 */
10910 case UNLT
: /* LTU - CF=1 */
10911 case UNLE
: /* LEU - CF=1 | ZF=1 */
10912 case LTGT
: /* EQ - ZF=0 */
10914 case LT
: /* LTU - CF=1 - fails on unordered */
10915 *first_code
= UNLT
;
10916 *bypass_code
= UNORDERED
;
10918 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10919 *first_code
= UNLE
;
10920 *bypass_code
= UNORDERED
;
10922 case EQ
: /* EQ - ZF=1 - fails on unordered */
10923 *first_code
= UNEQ
;
10924 *bypass_code
= UNORDERED
;
10926 case NE
: /* NE - ZF=0 - fails on unordered */
10927 *first_code
= LTGT
;
10928 *second_code
= UNORDERED
;
10930 case UNGE
: /* GEU - CF=0 - fails on unordered */
10932 *second_code
= UNORDERED
;
10934 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10936 *second_code
= UNORDERED
;
10939 gcc_unreachable ();
10941 if (!TARGET_IEEE_FP
)
10943 *second_code
= UNKNOWN
;
10944 *bypass_code
= UNKNOWN
;
10948 /* Return cost of comparison done fcom + arithmetics operations on AX.
10949 All following functions do use number of instructions as a cost metrics.
10950 In future this should be tweaked to compute bytes for optimize_size and
10951 take into account performance of various instructions on various CPUs. */
10953 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10955 if (!TARGET_IEEE_FP
)
10957 /* The cost of code output by ix86_expand_fp_compare. */
10981 gcc_unreachable ();
10985 /* Return cost of comparison done using fcomi operation.
10986 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10988 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10990 enum rtx_code bypass_code
, first_code
, second_code
;
10991 /* Return arbitrarily high cost when instruction is not supported - this
10992 prevents gcc from using it. */
10995 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10996 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10999 /* Return cost of comparison done using sahf operation.
11000 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11002 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11004 enum rtx_code bypass_code
, first_code
, second_code
;
11005 /* Return arbitrarily high cost when instruction is not preferred - this
11006 avoids gcc from using it. */
11007 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11009 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11010 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11013 /* Compute cost of the comparison done using any method.
11014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11016 ix86_fp_comparison_cost (enum rtx_code code
)
11018 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11021 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11022 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11024 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11025 if (min
> sahf_cost
)
11027 if (min
> fcomi_cost
)
11032 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11035 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11036 rtx
*second_test
, rtx
*bypass_test
)
11038 enum machine_mode fpcmp_mode
, intcmp_mode
;
11040 int cost
= ix86_fp_comparison_cost (code
);
11041 enum rtx_code bypass_code
, first_code
, second_code
;
11043 fpcmp_mode
= ix86_fp_compare_mode (code
);
11044 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11047 *second_test
= NULL_RTX
;
11049 *bypass_test
= NULL_RTX
;
11051 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11053 /* Do fcomi/sahf based test when profitable. */
11054 if ((TARGET_CMOVE
|| TARGET_SAHF
)
11055 && (bypass_code
== UNKNOWN
|| bypass_test
)
11056 && (second_code
== UNKNOWN
|| second_test
)
11057 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11061 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11062 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11068 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11069 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11071 scratch
= gen_reg_rtx (HImode
);
11072 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11073 emit_insn (gen_x86_sahf_1 (scratch
));
11076 /* The FP codes work out to act like unsigned. */
11077 intcmp_mode
= fpcmp_mode
;
11079 if (bypass_code
!= UNKNOWN
)
11080 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11081 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11083 if (second_code
!= UNKNOWN
)
11084 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11085 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11090 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11091 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11092 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11094 scratch
= gen_reg_rtx (HImode
);
11095 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11097 /* In the unordered case, we have to check C2 for NaN's, which
11098 doesn't happen to work out to anything nice combination-wise.
11099 So do some bit twiddling on the value we've got in AH to come
11100 up with an appropriate set of condition codes. */
11102 intcmp_mode
= CCNOmode
;
11107 if (code
== GT
|| !TARGET_IEEE_FP
)
11109 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11114 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11115 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11116 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11117 intcmp_mode
= CCmode
;
11123 if (code
== LT
&& TARGET_IEEE_FP
)
11125 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11126 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11127 intcmp_mode
= CCmode
;
11132 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11138 if (code
== GE
|| !TARGET_IEEE_FP
)
11140 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11145 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11146 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11153 if (code
== LE
&& TARGET_IEEE_FP
)
11155 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11156 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11157 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11158 intcmp_mode
= CCmode
;
11163 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11169 if (code
== EQ
&& TARGET_IEEE_FP
)
11171 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11172 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11173 intcmp_mode
= CCmode
;
11178 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11185 if (code
== NE
&& TARGET_IEEE_FP
)
11187 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11188 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11194 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11200 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11204 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11209 gcc_unreachable ();
11213 /* Return the test that should be put into the flags user, i.e.
11214 the bcc, scc, or cmov instruction. */
11215 return gen_rtx_fmt_ee (code
, VOIDmode
,
11216 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11221 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11224 op0
= ix86_compare_op0
;
11225 op1
= ix86_compare_op1
;
11228 *second_test
= NULL_RTX
;
11230 *bypass_test
= NULL_RTX
;
11232 if (ix86_compare_emitted
)
11234 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11235 ix86_compare_emitted
= NULL_RTX
;
11237 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11238 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11239 second_test
, bypass_test
);
11241 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11246 /* Return true if the CODE will result in nontrivial jump sequence. */
11248 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11250 enum rtx_code bypass_code
, first_code
, second_code
;
11253 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11254 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11258 ix86_expand_branch (enum rtx_code code
, rtx label
)
11262 /* If we have emitted a compare insn, go straight to simple.
11263 ix86_expand_compare won't emit anything if ix86_compare_emitted
11265 if (ix86_compare_emitted
)
11268 switch (GET_MODE (ix86_compare_op0
))
11274 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11275 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11276 gen_rtx_LABEL_REF (VOIDmode
, label
),
11278 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11287 enum rtx_code bypass_code
, first_code
, second_code
;
11289 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11290 &ix86_compare_op1
);
11292 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11294 /* Check whether we will use the natural sequence with one jump. If
11295 so, we can expand jump early. Otherwise delay expansion by
11296 creating compound insn to not confuse optimizers. */
11297 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11300 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11301 gen_rtx_LABEL_REF (VOIDmode
, label
),
11302 pc_rtx
, NULL_RTX
, NULL_RTX
);
11306 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11307 ix86_compare_op0
, ix86_compare_op1
);
11308 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11309 gen_rtx_LABEL_REF (VOIDmode
, label
),
11311 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11313 use_fcomi
= ix86_use_fcomi_compare (code
);
11314 vec
= rtvec_alloc (3 + !use_fcomi
);
11315 RTVEC_ELT (vec
, 0) = tmp
;
11317 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11319 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11322 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11324 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11333 /* Expand DImode branch into multiple compare+branch. */
11335 rtx lo
[2], hi
[2], label2
;
11336 enum rtx_code code1
, code2
, code3
;
11337 enum machine_mode submode
;
11339 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11341 tmp
= ix86_compare_op0
;
11342 ix86_compare_op0
= ix86_compare_op1
;
11343 ix86_compare_op1
= tmp
;
11344 code
= swap_condition (code
);
11346 if (GET_MODE (ix86_compare_op0
) == DImode
)
11348 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11349 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11354 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11355 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11359 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11360 avoid two branches. This costs one extra insn, so disable when
11361 optimizing for size. */
11363 if ((code
== EQ
|| code
== NE
)
11365 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11370 if (hi
[1] != const0_rtx
)
11371 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11372 NULL_RTX
, 0, OPTAB_WIDEN
);
11375 if (lo
[1] != const0_rtx
)
11376 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11377 NULL_RTX
, 0, OPTAB_WIDEN
);
11379 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11380 NULL_RTX
, 0, OPTAB_WIDEN
);
11382 ix86_compare_op0
= tmp
;
11383 ix86_compare_op1
= const0_rtx
;
11384 ix86_expand_branch (code
, label
);
11388 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11389 op1 is a constant and the low word is zero, then we can just
11390 examine the high word. */
11392 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11395 case LT
: case LTU
: case GE
: case GEU
:
11396 ix86_compare_op0
= hi
[0];
11397 ix86_compare_op1
= hi
[1];
11398 ix86_expand_branch (code
, label
);
11404 /* Otherwise, we need two or three jumps. */
11406 label2
= gen_label_rtx ();
11409 code2
= swap_condition (code
);
11410 code3
= unsigned_condition (code
);
11414 case LT
: case GT
: case LTU
: case GTU
:
11417 case LE
: code1
= LT
; code2
= GT
; break;
11418 case GE
: code1
= GT
; code2
= LT
; break;
11419 case LEU
: code1
= LTU
; code2
= GTU
; break;
11420 case GEU
: code1
= GTU
; code2
= LTU
; break;
11422 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11423 case NE
: code2
= UNKNOWN
; break;
11426 gcc_unreachable ();
11431 * if (hi(a) < hi(b)) goto true;
11432 * if (hi(a) > hi(b)) goto false;
11433 * if (lo(a) < lo(b)) goto true;
11437 ix86_compare_op0
= hi
[0];
11438 ix86_compare_op1
= hi
[1];
11440 if (code1
!= UNKNOWN
)
11441 ix86_expand_branch (code1
, label
);
11442 if (code2
!= UNKNOWN
)
11443 ix86_expand_branch (code2
, label2
);
11445 ix86_compare_op0
= lo
[0];
11446 ix86_compare_op1
= lo
[1];
11447 ix86_expand_branch (code3
, label
);
11449 if (code2
!= UNKNOWN
)
11450 emit_label (label2
);
11455 gcc_unreachable ();
11459 /* Split branch based on floating point condition. */
11461 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11462 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11464 rtx second
, bypass
;
11465 rtx label
= NULL_RTX
;
11467 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11470 if (target2
!= pc_rtx
)
11473 code
= reverse_condition_maybe_unordered (code
);
11478 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11479 tmp
, &second
, &bypass
);
11481 /* Remove pushed operand from stack. */
11483 ix86_free_from_memory (GET_MODE (pushed
));
11485 if (split_branch_probability
>= 0)
11487 /* Distribute the probabilities across the jumps.
11488 Assume the BYPASS and SECOND to be always test
11490 probability
= split_branch_probability
;
11492 /* Value of 1 is low enough to make no need for probability
11493 to be updated. Later we may run some experiments and see
11494 if unordered values are more frequent in practice. */
11496 bypass_probability
= 1;
11498 second_probability
= 1;
11500 if (bypass
!= NULL_RTX
)
11502 label
= gen_label_rtx ();
11503 i
= emit_jump_insn (gen_rtx_SET
11505 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11507 gen_rtx_LABEL_REF (VOIDmode
,
11510 if (bypass_probability
>= 0)
11512 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11513 GEN_INT (bypass_probability
),
11516 i
= emit_jump_insn (gen_rtx_SET
11518 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11519 condition
, target1
, target2
)));
11520 if (probability
>= 0)
11522 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11523 GEN_INT (probability
),
11525 if (second
!= NULL_RTX
)
11527 i
= emit_jump_insn (gen_rtx_SET
11529 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11531 if (second_probability
>= 0)
11533 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11534 GEN_INT (second_probability
),
11537 if (label
!= NULL_RTX
)
11538 emit_label (label
);
11542 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11544 rtx ret
, tmp
, tmpreg
, equiv
;
11545 rtx second_test
, bypass_test
;
11547 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11548 return 0; /* FAIL */
11550 gcc_assert (GET_MODE (dest
) == QImode
);
11552 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11553 PUT_MODE (ret
, QImode
);
11558 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11559 if (bypass_test
|| second_test
)
11561 rtx test
= second_test
;
11563 rtx tmp2
= gen_reg_rtx (QImode
);
11566 gcc_assert (!second_test
);
11567 test
= bypass_test
;
11569 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11571 PUT_MODE (test
, QImode
);
11572 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11575 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11577 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11580 /* Attach a REG_EQUAL note describing the comparison result. */
11581 if (ix86_compare_op0
&& ix86_compare_op1
)
11583 equiv
= simplify_gen_relational (code
, QImode
,
11584 GET_MODE (ix86_compare_op0
),
11585 ix86_compare_op0
, ix86_compare_op1
);
11586 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11589 return 1; /* DONE */
11592 /* Expand comparison setting or clearing carry flag. Return true when
11593 successful and set pop for the operation. */
11595 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11597 enum machine_mode mode
=
11598 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11600 /* Do not handle DImode compares that go through special path. Also we can't
11601 deal with FP compares yet. This is possible to add. */
11602 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11604 if (FLOAT_MODE_P (mode
))
11606 rtx second_test
= NULL
, bypass_test
= NULL
;
11607 rtx compare_op
, compare_seq
;
11609 /* Shortcut: following common codes never translate into carry flag compares. */
11610 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11611 || code
== ORDERED
|| code
== UNORDERED
)
11614 /* These comparisons require zero flag; swap operands so they won't. */
11615 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11616 && !TARGET_IEEE_FP
)
11621 code
= swap_condition (code
);
11624 /* Try to expand the comparison and verify that we end up with carry flag
11625 based comparison. This is fails to be true only when we decide to expand
11626 comparison using arithmetic that is not too common scenario. */
11628 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11629 &second_test
, &bypass_test
);
11630 compare_seq
= get_insns ();
11633 if (second_test
|| bypass_test
)
11635 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11636 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11637 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11639 code
= GET_CODE (compare_op
);
11640 if (code
!= LTU
&& code
!= GEU
)
11642 emit_insn (compare_seq
);
11646 if (!INTEGRAL_MODE_P (mode
))
11654 /* Convert a==0 into (unsigned)a<1. */
11657 if (op1
!= const0_rtx
)
11660 code
= (code
== EQ
? LTU
: GEU
);
11663 /* Convert a>b into b<a or a>=b-1. */
11666 if (CONST_INT_P (op1
))
11668 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11669 /* Bail out on overflow. We still can swap operands but that
11670 would force loading of the constant into register. */
11671 if (op1
== const0_rtx
11672 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11674 code
= (code
== GTU
? GEU
: LTU
);
11681 code
= (code
== GTU
? LTU
: GEU
);
11685 /* Convert a>=0 into (unsigned)a<0x80000000. */
11688 if (mode
== DImode
|| op1
!= const0_rtx
)
11690 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11691 code
= (code
== LT
? GEU
: LTU
);
11695 if (mode
== DImode
|| op1
!= constm1_rtx
)
11697 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11698 code
= (code
== LE
? GEU
: LTU
);
11704 /* Swapping operands may cause constant to appear as first operand. */
11705 if (!nonimmediate_operand (op0
, VOIDmode
))
11707 if (no_new_pseudos
)
11709 op0
= force_reg (mode
, op0
);
11711 ix86_compare_op0
= op0
;
11712 ix86_compare_op1
= op1
;
11713 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11714 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11719 ix86_expand_int_movcc (rtx operands
[])
11721 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11722 rtx compare_seq
, compare_op
;
11723 rtx second_test
, bypass_test
;
11724 enum machine_mode mode
= GET_MODE (operands
[0]);
11725 bool sign_bit_compare_p
= false;;
11728 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11729 compare_seq
= get_insns ();
11732 compare_code
= GET_CODE (compare_op
);
11734 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11735 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11736 sign_bit_compare_p
= true;
11738 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11739 HImode insns, we'd be swallowed in word prefix ops. */
11741 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11742 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11743 && CONST_INT_P (operands
[2])
11744 && CONST_INT_P (operands
[3]))
11746 rtx out
= operands
[0];
11747 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11748 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11749 HOST_WIDE_INT diff
;
11752 /* Sign bit compares are better done using shifts than we do by using
11754 if (sign_bit_compare_p
11755 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11756 ix86_compare_op1
, &compare_op
))
11758 /* Detect overlap between destination and compare sources. */
11761 if (!sign_bit_compare_p
)
11763 bool fpcmp
= false;
11765 compare_code
= GET_CODE (compare_op
);
11767 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11768 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11771 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11774 /* To simplify rest of code, restrict to the GEU case. */
11775 if (compare_code
== LTU
)
11777 HOST_WIDE_INT tmp
= ct
;
11780 compare_code
= reverse_condition (compare_code
);
11781 code
= reverse_condition (code
);
11786 PUT_CODE (compare_op
,
11787 reverse_condition_maybe_unordered
11788 (GET_CODE (compare_op
)));
11790 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11794 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11795 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11796 tmp
= gen_reg_rtx (mode
);
11798 if (mode
== DImode
)
11799 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11801 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11805 if (code
== GT
|| code
== GE
)
11806 code
= reverse_condition (code
);
11809 HOST_WIDE_INT tmp
= ct
;
11814 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11815 ix86_compare_op1
, VOIDmode
, 0, -1);
11828 tmp
= expand_simple_binop (mode
, PLUS
,
11830 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11841 tmp
= expand_simple_binop (mode
, IOR
,
11843 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11845 else if (diff
== -1 && ct
)
11855 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11857 tmp
= expand_simple_binop (mode
, PLUS
,
11858 copy_rtx (tmp
), GEN_INT (cf
),
11859 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11867 * andl cf - ct, dest
11877 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11880 tmp
= expand_simple_binop (mode
, AND
,
11882 gen_int_mode (cf
- ct
, mode
),
11883 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11885 tmp
= expand_simple_binop (mode
, PLUS
,
11886 copy_rtx (tmp
), GEN_INT (ct
),
11887 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11890 if (!rtx_equal_p (tmp
, out
))
11891 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11893 return 1; /* DONE */
11899 tmp
= ct
, ct
= cf
, cf
= tmp
;
11901 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11903 /* We may be reversing unordered compare to normal compare, that
11904 is not valid in general (we may convert non-trapping condition
11905 to trapping one), however on i386 we currently emit all
11906 comparisons unordered. */
11907 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11908 code
= reverse_condition_maybe_unordered (code
);
11912 compare_code
= reverse_condition (compare_code
);
11913 code
= reverse_condition (code
);
11917 compare_code
= UNKNOWN
;
11918 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11919 && CONST_INT_P (ix86_compare_op1
))
11921 if (ix86_compare_op1
== const0_rtx
11922 && (code
== LT
|| code
== GE
))
11923 compare_code
= code
;
11924 else if (ix86_compare_op1
== constm1_rtx
)
11928 else if (code
== GT
)
11933 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11934 if (compare_code
!= UNKNOWN
11935 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11936 && (cf
== -1 || ct
== -1))
11938 /* If lea code below could be used, only optimize
11939 if it results in a 2 insn sequence. */
11941 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11942 || diff
== 3 || diff
== 5 || diff
== 9)
11943 || (compare_code
== LT
&& ct
== -1)
11944 || (compare_code
== GE
&& cf
== -1))
11947 * notl op1 (if necessary)
11955 code
= reverse_condition (code
);
11958 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11959 ix86_compare_op1
, VOIDmode
, 0, -1);
11961 out
= expand_simple_binop (mode
, IOR
,
11963 out
, 1, OPTAB_DIRECT
);
11964 if (out
!= operands
[0])
11965 emit_move_insn (operands
[0], out
);
11967 return 1; /* DONE */
11972 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11973 || diff
== 3 || diff
== 5 || diff
== 9)
11974 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11976 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11982 * lea cf(dest*(ct-cf)),dest
11986 * This also catches the degenerate setcc-only case.
11992 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11993 ix86_compare_op1
, VOIDmode
, 0, 1);
11996 /* On x86_64 the lea instruction operates on Pmode, so we need
11997 to get arithmetics done in proper mode to match. */
11999 tmp
= copy_rtx (out
);
12003 out1
= copy_rtx (out
);
12004 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12008 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12014 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12017 if (!rtx_equal_p (tmp
, out
))
12020 out
= force_operand (tmp
, copy_rtx (out
));
12022 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12024 if (!rtx_equal_p (out
, operands
[0]))
12025 emit_move_insn (operands
[0], copy_rtx (out
));
12027 return 1; /* DONE */
12031 * General case: Jumpful:
12032 * xorl dest,dest cmpl op1, op2
12033 * cmpl op1, op2 movl ct, dest
12034 * setcc dest jcc 1f
12035 * decl dest movl cf, dest
12036 * andl (cf-ct),dest 1:
12039 * Size 20. Size 14.
12041 * This is reasonably steep, but branch mispredict costs are
12042 * high on modern cpus, so consider failing only if optimizing
12046 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12047 && BRANCH_COST
>= 2)
12053 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
12054 /* We may be reversing unordered compare to normal compare,
12055 that is not valid in general (we may convert non-trapping
12056 condition to trapping one), however on i386 we currently
12057 emit all comparisons unordered. */
12058 code
= reverse_condition_maybe_unordered (code
);
12061 code
= reverse_condition (code
);
12062 if (compare_code
!= UNKNOWN
)
12063 compare_code
= reverse_condition (compare_code
);
12067 if (compare_code
!= UNKNOWN
)
12069 /* notl op1 (if needed)
12074 For x < 0 (resp. x <= -1) there will be no notl,
12075 so if possible swap the constants to get rid of the
12077 True/false will be -1/0 while code below (store flag
12078 followed by decrement) is 0/-1, so the constants need
12079 to be exchanged once more. */
12081 if (compare_code
== GE
|| !cf
)
12083 code
= reverse_condition (code
);
12088 HOST_WIDE_INT tmp
= cf
;
12093 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12094 ix86_compare_op1
, VOIDmode
, 0, -1);
12098 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12099 ix86_compare_op1
, VOIDmode
, 0, 1);
12101 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12102 copy_rtx (out
), 1, OPTAB_DIRECT
);
12105 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12106 gen_int_mode (cf
- ct
, mode
),
12107 copy_rtx (out
), 1, OPTAB_DIRECT
);
12109 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12110 copy_rtx (out
), 1, OPTAB_DIRECT
);
12111 if (!rtx_equal_p (out
, operands
[0]))
12112 emit_move_insn (operands
[0], copy_rtx (out
));
12114 return 1; /* DONE */
12118 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12120 /* Try a few things more with specific constants and a variable. */
12123 rtx var
, orig_out
, out
, tmp
;
12125 if (BRANCH_COST
<= 2)
12126 return 0; /* FAIL */
12128 /* If one of the two operands is an interesting constant, load a
12129 constant with the above and mask it in with a logical operation. */
12131 if (CONST_INT_P (operands
[2]))
12134 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12135 operands
[3] = constm1_rtx
, op
= and_optab
;
12136 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12137 operands
[3] = const0_rtx
, op
= ior_optab
;
12139 return 0; /* FAIL */
12141 else if (CONST_INT_P (operands
[3]))
12144 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12145 operands
[2] = constm1_rtx
, op
= and_optab
;
12146 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12147 operands
[2] = const0_rtx
, op
= ior_optab
;
12149 return 0; /* FAIL */
12152 return 0; /* FAIL */
12154 orig_out
= operands
[0];
12155 tmp
= gen_reg_rtx (mode
);
12158 /* Recurse to get the constant loaded. */
12159 if (ix86_expand_int_movcc (operands
) == 0)
12160 return 0; /* FAIL */
12162 /* Mask in the interesting variable. */
12163 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12165 if (!rtx_equal_p (out
, orig_out
))
12166 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12168 return 1; /* DONE */
12172 * For comparison with above,
12182 if (! nonimmediate_operand (operands
[2], mode
))
12183 operands
[2] = force_reg (mode
, operands
[2]);
12184 if (! nonimmediate_operand (operands
[3], mode
))
12185 operands
[3] = force_reg (mode
, operands
[3]);
12187 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12189 rtx tmp
= gen_reg_rtx (mode
);
12190 emit_move_insn (tmp
, operands
[3]);
12193 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12195 rtx tmp
= gen_reg_rtx (mode
);
12196 emit_move_insn (tmp
, operands
[2]);
12200 if (! register_operand (operands
[2], VOIDmode
)
12202 || ! register_operand (operands
[3], VOIDmode
)))
12203 operands
[2] = force_reg (mode
, operands
[2]);
12206 && ! register_operand (operands
[3], VOIDmode
))
12207 operands
[3] = force_reg (mode
, operands
[3]);
12209 emit_insn (compare_seq
);
12210 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12211 gen_rtx_IF_THEN_ELSE (mode
,
12212 compare_op
, operands
[2],
12215 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12216 gen_rtx_IF_THEN_ELSE (mode
,
12218 copy_rtx (operands
[3]),
12219 copy_rtx (operands
[0]))));
12221 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12222 gen_rtx_IF_THEN_ELSE (mode
,
12224 copy_rtx (operands
[2]),
12225 copy_rtx (operands
[0]))));
12227 return 1; /* DONE */
12230 /* Swap, force into registers, or otherwise massage the two operands
12231 to an sse comparison with a mask result. Thus we differ a bit from
12232 ix86_prepare_fp_compare_args which expects to produce a flags result.
12234 The DEST operand exists to help determine whether to commute commutative
12235 operators. The POP0/POP1 operands are updated in place. The new
12236 comparison code is returned, or UNKNOWN if not implementable. */
12238 static enum rtx_code
12239 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12240 rtx
*pop0
, rtx
*pop1
)
12248 /* We have no LTGT as an operator. We could implement it with
12249 NE & ORDERED, but this requires an extra temporary. It's
12250 not clear that it's worth it. */
12257 /* These are supported directly. */
12264 /* For commutative operators, try to canonicalize the destination
12265 operand to be first in the comparison - this helps reload to
12266 avoid extra moves. */
12267 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12275 /* These are not supported directly. Swap the comparison operands
12276 to transform into something that is supported. */
12280 code
= swap_condition (code
);
12284 gcc_unreachable ();
12290 /* Detect conditional moves that exactly match min/max operational
12291 semantics. Note that this is IEEE safe, as long as we don't
12292 interchange the operands.
12294 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12295 and TRUE if the operation is successful and instructions are emitted. */
12298 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12299 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12301 enum machine_mode mode
;
12307 else if (code
== UNGE
)
12310 if_true
= if_false
;
12316 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12318 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12323 mode
= GET_MODE (dest
);
12325 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12326 but MODE may be a vector mode and thus not appropriate. */
12327 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12329 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12332 if_true
= force_reg (mode
, if_true
);
12333 v
= gen_rtvec (2, if_true
, if_false
);
12334 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12338 code
= is_min
? SMIN
: SMAX
;
12339 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12342 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12346 /* Expand an sse vector comparison. Return the register with the result. */
12349 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12350 rtx op_true
, rtx op_false
)
12352 enum machine_mode mode
= GET_MODE (dest
);
12355 cmp_op0
= force_reg (mode
, cmp_op0
);
12356 if (!nonimmediate_operand (cmp_op1
, mode
))
12357 cmp_op1
= force_reg (mode
, cmp_op1
);
12360 || reg_overlap_mentioned_p (dest
, op_true
)
12361 || reg_overlap_mentioned_p (dest
, op_false
))
12362 dest
= gen_reg_rtx (mode
);
12364 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12365 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12370 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12371 operations. This is used for both scalar and vector conditional moves. */
12374 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12376 enum machine_mode mode
= GET_MODE (dest
);
12379 if (op_false
== CONST0_RTX (mode
))
12381 op_true
= force_reg (mode
, op_true
);
12382 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12383 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12385 else if (op_true
== CONST0_RTX (mode
))
12387 op_false
= force_reg (mode
, op_false
);
12388 x
= gen_rtx_NOT (mode
, cmp
);
12389 x
= gen_rtx_AND (mode
, x
, op_false
);
12390 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12394 op_true
= force_reg (mode
, op_true
);
12395 op_false
= force_reg (mode
, op_false
);
12397 t2
= gen_reg_rtx (mode
);
12399 t3
= gen_reg_rtx (mode
);
12403 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12404 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12406 x
= gen_rtx_NOT (mode
, cmp
);
12407 x
= gen_rtx_AND (mode
, x
, op_false
);
12408 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12410 x
= gen_rtx_IOR (mode
, t3
, t2
);
12411 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12415 /* Expand a floating-point conditional move. Return true if successful. */
12418 ix86_expand_fp_movcc (rtx operands
[])
12420 enum machine_mode mode
= GET_MODE (operands
[0]);
12421 enum rtx_code code
= GET_CODE (operands
[1]);
12422 rtx tmp
, compare_op
, second_test
, bypass_test
;
12424 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12426 enum machine_mode cmode
;
12428 /* Since we've no cmove for sse registers, don't force bad register
12429 allocation just to gain access to it. Deny movcc when the
12430 comparison mode doesn't match the move mode. */
12431 cmode
= GET_MODE (ix86_compare_op0
);
12432 if (cmode
== VOIDmode
)
12433 cmode
= GET_MODE (ix86_compare_op1
);
12437 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12439 &ix86_compare_op1
);
12440 if (code
== UNKNOWN
)
12443 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12444 ix86_compare_op1
, operands
[2],
12448 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12449 ix86_compare_op1
, operands
[2], operands
[3]);
12450 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12454 /* The floating point conditional move instructions don't directly
12455 support conditions resulting from a signed integer comparison. */
12457 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12459 /* The floating point conditional move instructions don't directly
12460 support signed integer comparisons. */
12462 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12464 gcc_assert (!second_test
&& !bypass_test
);
12465 tmp
= gen_reg_rtx (QImode
);
12466 ix86_expand_setcc (code
, tmp
);
12468 ix86_compare_op0
= tmp
;
12469 ix86_compare_op1
= const0_rtx
;
12470 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12472 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12474 tmp
= gen_reg_rtx (mode
);
12475 emit_move_insn (tmp
, operands
[3]);
12478 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12480 tmp
= gen_reg_rtx (mode
);
12481 emit_move_insn (tmp
, operands
[2]);
12485 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12486 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12487 operands
[2], operands
[3])));
12489 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12490 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12491 operands
[3], operands
[0])));
12493 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12494 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12495 operands
[2], operands
[0])));
12500 /* Expand a floating-point vector conditional move; a vcond operation
12501 rather than a movcc operation. */
12504 ix86_expand_fp_vcond (rtx operands
[])
12506 enum rtx_code code
= GET_CODE (operands
[3]);
12509 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12510 &operands
[4], &operands
[5]);
12511 if (code
== UNKNOWN
)
12514 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12515 operands
[5], operands
[1], operands
[2]))
12518 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12519 operands
[1], operands
[2]);
12520 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12524 /* Expand a signed integral vector conditional move. */
12527 ix86_expand_int_vcond (rtx operands
[])
12529 enum machine_mode mode
= GET_MODE (operands
[0]);
12530 enum rtx_code code
= GET_CODE (operands
[3]);
12531 bool negate
= false;
12534 cop0
= operands
[4];
12535 cop1
= operands
[5];
12537 /* Canonicalize the comparison to EQ, GT, GTU. */
12548 code
= reverse_condition (code
);
12554 code
= reverse_condition (code
);
12560 code
= swap_condition (code
);
12561 x
= cop0
, cop0
= cop1
, cop1
= x
;
12565 gcc_unreachable ();
12568 /* Unsigned parallel compare is not supported by the hardware. Play some
12569 tricks to turn this into a signed comparison against 0. */
12572 cop0
= force_reg (mode
, cop0
);
12580 /* Perform a parallel modulo subtraction. */
12581 t1
= gen_reg_rtx (mode
);
12582 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12584 /* Extract the original sign bit of op0. */
12585 mask
= GEN_INT (-0x80000000);
12586 mask
= gen_rtx_CONST_VECTOR (mode
,
12587 gen_rtvec (4, mask
, mask
, mask
, mask
));
12588 mask
= force_reg (mode
, mask
);
12589 t2
= gen_reg_rtx (mode
);
12590 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12592 /* XOR it back into the result of the subtraction. This results
12593 in the sign bit set iff we saw unsigned underflow. */
12594 x
= gen_reg_rtx (mode
);
12595 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12603 /* Perform a parallel unsigned saturating subtraction. */
12604 x
= gen_reg_rtx (mode
);
12605 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12606 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12613 gcc_unreachable ();
12617 cop1
= CONST0_RTX (mode
);
12620 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12621 operands
[1+negate
], operands
[2-negate
]);
12623 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12624 operands
[2-negate
]);
12628 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12629 true if we should do zero extension, else sign extension. HIGH_P is
12630 true if we want the N/2 high elements, else the low elements. */
12633 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12635 enum machine_mode imode
= GET_MODE (operands
[1]);
12636 rtx (*unpack
)(rtx
, rtx
, rtx
);
12643 unpack
= gen_vec_interleave_highv16qi
;
12645 unpack
= gen_vec_interleave_lowv16qi
;
12649 unpack
= gen_vec_interleave_highv8hi
;
12651 unpack
= gen_vec_interleave_lowv8hi
;
12655 unpack
= gen_vec_interleave_highv4si
;
12657 unpack
= gen_vec_interleave_lowv4si
;
12660 gcc_unreachable ();
12663 dest
= gen_lowpart (imode
, operands
[0]);
12666 se
= force_reg (imode
, CONST0_RTX (imode
));
12668 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12669 operands
[1], pc_rtx
, pc_rtx
);
12671 emit_insn (unpack (dest
, operands
[1], se
));
12674 /* Expand conditional increment or decrement using adb/sbb instructions.
12675 The default case using setcc followed by the conditional move can be
12676 done by generic code. */
12678 ix86_expand_int_addcc (rtx operands
[])
12680 enum rtx_code code
= GET_CODE (operands
[1]);
12682 rtx val
= const0_rtx
;
12683 bool fpcmp
= false;
12684 enum machine_mode mode
= GET_MODE (operands
[0]);
12686 if (operands
[3] != const1_rtx
12687 && operands
[3] != constm1_rtx
)
12689 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12690 ix86_compare_op1
, &compare_op
))
12692 code
= GET_CODE (compare_op
);
12694 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12695 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12698 code
= ix86_fp_compare_code_to_integer (code
);
12705 PUT_CODE (compare_op
,
12706 reverse_condition_maybe_unordered
12707 (GET_CODE (compare_op
)));
12709 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12711 PUT_MODE (compare_op
, mode
);
12713 /* Construct either adc or sbb insn. */
12714 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12716 switch (GET_MODE (operands
[0]))
12719 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12722 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12725 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12728 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12731 gcc_unreachable ();
12736 switch (GET_MODE (operands
[0]))
12739 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12742 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12745 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12748 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12751 gcc_unreachable ();
12754 return 1; /* DONE */
12758 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12759 works for floating pointer parameters and nonoffsetable memories.
12760 For pushes, it returns just stack offsets; the values will be saved
12761 in the right order. Maximally three parts are generated. */
12764 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12769 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12771 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12773 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12774 gcc_assert (size
>= 2 && size
<= 3);
12776 /* Optimize constant pool reference to immediates. This is used by fp
12777 moves, that force all constants to memory to allow combining. */
12778 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12780 rtx tmp
= maybe_get_pool_constant (operand
);
12785 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12787 /* The only non-offsetable memories we handle are pushes. */
12788 int ok
= push_operand (operand
, VOIDmode
);
12792 operand
= copy_rtx (operand
);
12793 PUT_MODE (operand
, Pmode
);
12794 parts
[0] = parts
[1] = parts
[2] = operand
;
12798 if (GET_CODE (operand
) == CONST_VECTOR
)
12800 enum machine_mode imode
= int_mode_for_mode (mode
);
12801 /* Caution: if we looked through a constant pool memory above,
12802 the operand may actually have a different mode now. That's
12803 ok, since we want to pun this all the way back to an integer. */
12804 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12805 gcc_assert (operand
!= NULL
);
12811 if (mode
== DImode
)
12812 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12815 if (REG_P (operand
))
12817 gcc_assert (reload_completed
);
12818 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12819 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12821 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12823 else if (offsettable_memref_p (operand
))
12825 operand
= adjust_address (operand
, SImode
, 0);
12826 parts
[0] = operand
;
12827 parts
[1] = adjust_address (operand
, SImode
, 4);
12829 parts
[2] = adjust_address (operand
, SImode
, 8);
12831 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12836 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12840 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12841 parts
[2] = gen_int_mode (l
[2], SImode
);
12844 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12847 gcc_unreachable ();
12849 parts
[1] = gen_int_mode (l
[1], SImode
);
12850 parts
[0] = gen_int_mode (l
[0], SImode
);
12853 gcc_unreachable ();
12858 if (mode
== TImode
)
12859 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12860 if (mode
== XFmode
|| mode
== TFmode
)
12862 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12863 if (REG_P (operand
))
12865 gcc_assert (reload_completed
);
12866 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12867 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12869 else if (offsettable_memref_p (operand
))
12871 operand
= adjust_address (operand
, DImode
, 0);
12872 parts
[0] = operand
;
12873 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12875 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12880 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12881 real_to_target (l
, &r
, mode
);
12883 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12884 if (HOST_BITS_PER_WIDE_INT
>= 64)
12887 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12888 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12891 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12893 if (upper_mode
== SImode
)
12894 parts
[1] = gen_int_mode (l
[2], SImode
);
12895 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12898 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12899 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12902 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12905 gcc_unreachable ();
12912 /* Emit insns to perform a move or push of DI, DF, and XF values.
12913 Return false when normal moves are needed; true when all required
12914 insns have been emitted. Operands 2-4 contain the input values
12915 int the correct order; operands 5-7 contain the output values. */
12918 ix86_split_long_move (rtx operands
[])
12923 int collisions
= 0;
12924 enum machine_mode mode
= GET_MODE (operands
[0]);
12926 /* The DFmode expanders may ask us to move double.
12927 For 64bit target this is single move. By hiding the fact
12928 here we simplify i386.md splitters. */
12929 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12931 /* Optimize constant pool reference to immediates. This is used by
12932 fp moves, that force all constants to memory to allow combining. */
12934 if (MEM_P (operands
[1])
12935 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12936 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12937 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12938 if (push_operand (operands
[0], VOIDmode
))
12940 operands
[0] = copy_rtx (operands
[0]);
12941 PUT_MODE (operands
[0], Pmode
);
12944 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12945 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12946 emit_move_insn (operands
[0], operands
[1]);
12950 /* The only non-offsettable memory we handle is push. */
12951 if (push_operand (operands
[0], VOIDmode
))
12954 gcc_assert (!MEM_P (operands
[0])
12955 || offsettable_memref_p (operands
[0]));
12957 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12958 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12960 /* When emitting push, take care for source operands on the stack. */
12961 if (push
&& MEM_P (operands
[1])
12962 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12965 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12966 XEXP (part
[1][2], 0));
12967 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12968 XEXP (part
[1][1], 0));
12971 /* We need to do copy in the right order in case an address register
12972 of the source overlaps the destination. */
12973 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12975 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12977 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12980 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12983 /* Collision in the middle part can be handled by reordering. */
12984 if (collisions
== 1 && nparts
== 3
12985 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12988 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12989 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12992 /* If there are more collisions, we can't handle it by reordering.
12993 Do an lea to the last part and use only one colliding move. */
12994 else if (collisions
> 1)
13000 base
= part
[0][nparts
- 1];
13002 /* Handle the case when the last part isn't valid for lea.
13003 Happens in 64-bit mode storing the 12-byte XFmode. */
13004 if (GET_MODE (base
) != Pmode
)
13005 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13007 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13008 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13009 part
[1][1] = replace_equiv_address (part
[1][1],
13010 plus_constant (base
, UNITS_PER_WORD
));
13012 part
[1][2] = replace_equiv_address (part
[1][2],
13013 plus_constant (base
, 8));
13023 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13024 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13025 emit_move_insn (part
[0][2], part
[1][2]);
13030 /* In 64bit mode we don't have 32bit push available. In case this is
13031 register, it is OK - we will just use larger counterpart. We also
13032 retype memory - these comes from attempt to avoid REX prefix on
13033 moving of second half of TFmode value. */
13034 if (GET_MODE (part
[1][1]) == SImode
)
13036 switch (GET_CODE (part
[1][1]))
13039 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13043 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13047 gcc_unreachable ();
13050 if (GET_MODE (part
[1][0]) == SImode
)
13051 part
[1][0] = part
[1][1];
13054 emit_move_insn (part
[0][1], part
[1][1]);
13055 emit_move_insn (part
[0][0], part
[1][0]);
13059 /* Choose correct order to not overwrite the source before it is copied. */
13060 if ((REG_P (part
[0][0])
13061 && REG_P (part
[1][1])
13062 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13064 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13066 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13070 operands
[2] = part
[0][2];
13071 operands
[3] = part
[0][1];
13072 operands
[4] = part
[0][0];
13073 operands
[5] = part
[1][2];
13074 operands
[6] = part
[1][1];
13075 operands
[7] = part
[1][0];
13079 operands
[2] = part
[0][1];
13080 operands
[3] = part
[0][0];
13081 operands
[5] = part
[1][1];
13082 operands
[6] = part
[1][0];
13089 operands
[2] = part
[0][0];
13090 operands
[3] = part
[0][1];
13091 operands
[4] = part
[0][2];
13092 operands
[5] = part
[1][0];
13093 operands
[6] = part
[1][1];
13094 operands
[7] = part
[1][2];
13098 operands
[2] = part
[0][0];
13099 operands
[3] = part
[0][1];
13100 operands
[5] = part
[1][0];
13101 operands
[6] = part
[1][1];
13105 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13108 if (CONST_INT_P (operands
[5])
13109 && operands
[5] != const0_rtx
13110 && REG_P (operands
[2]))
13112 if (CONST_INT_P (operands
[6])
13113 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13114 operands
[6] = operands
[2];
13117 && CONST_INT_P (operands
[7])
13118 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13119 operands
[7] = operands
[2];
13123 && CONST_INT_P (operands
[6])
13124 && operands
[6] != const0_rtx
13125 && REG_P (operands
[3])
13126 && CONST_INT_P (operands
[7])
13127 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13128 operands
[7] = operands
[3];
13131 emit_move_insn (operands
[2], operands
[5]);
13132 emit_move_insn (operands
[3], operands
[6]);
13134 emit_move_insn (operands
[4], operands
[7]);
13139 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13140 left shift by a constant, either using a single shift or
13141 a sequence of add instructions. */
13144 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13148 emit_insn ((mode
== DImode
13150 : gen_adddi3
) (operand
, operand
, operand
));
13152 else if (!optimize_size
13153 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13156 for (i
=0; i
<count
; i
++)
13158 emit_insn ((mode
== DImode
13160 : gen_adddi3
) (operand
, operand
, operand
));
13164 emit_insn ((mode
== DImode
13166 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13170 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13172 rtx low
[2], high
[2];
13174 const int single_width
= mode
== DImode
? 32 : 64;
13176 if (CONST_INT_P (operands
[2]))
13178 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13179 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13181 if (count
>= single_width
)
13183 emit_move_insn (high
[0], low
[1]);
13184 emit_move_insn (low
[0], const0_rtx
);
13186 if (count
> single_width
)
13187 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13191 if (!rtx_equal_p (operands
[0], operands
[1]))
13192 emit_move_insn (operands
[0], operands
[1]);
13193 emit_insn ((mode
== DImode
13195 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13196 ix86_expand_ashl_const (low
[0], count
, mode
);
13201 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13203 if (operands
[1] == const1_rtx
)
13205 /* Assuming we've chosen a QImode capable registers, then 1 << N
13206 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13207 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13209 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13211 ix86_expand_clear (low
[0]);
13212 ix86_expand_clear (high
[0]);
13213 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13215 d
= gen_lowpart (QImode
, low
[0]);
13216 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13217 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13218 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13220 d
= gen_lowpart (QImode
, high
[0]);
13221 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13222 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13223 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13226 /* Otherwise, we can get the same results by manually performing
13227 a bit extract operation on bit 5/6, and then performing the two
13228 shifts. The two methods of getting 0/1 into low/high are exactly
13229 the same size. Avoiding the shift in the bit extract case helps
13230 pentium4 a bit; no one else seems to care much either way. */
13235 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13236 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13238 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13239 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13241 emit_insn ((mode
== DImode
13243 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13244 emit_insn ((mode
== DImode
13246 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13247 emit_move_insn (low
[0], high
[0]);
13248 emit_insn ((mode
== DImode
13250 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13253 emit_insn ((mode
== DImode
13255 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13256 emit_insn ((mode
== DImode
13258 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13262 if (operands
[1] == constm1_rtx
)
13264 /* For -1 << N, we can avoid the shld instruction, because we
13265 know that we're shifting 0...31/63 ones into a -1. */
13266 emit_move_insn (low
[0], constm1_rtx
);
13268 emit_move_insn (high
[0], low
[0]);
13270 emit_move_insn (high
[0], constm1_rtx
);
13274 if (!rtx_equal_p (operands
[0], operands
[1]))
13275 emit_move_insn (operands
[0], operands
[1]);
13277 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13278 emit_insn ((mode
== DImode
13280 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13283 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13285 if (TARGET_CMOVE
&& scratch
)
13287 ix86_expand_clear (scratch
);
13288 emit_insn ((mode
== DImode
13289 ? gen_x86_shift_adj_1
13290 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13293 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13297 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13299 rtx low
[2], high
[2];
13301 const int single_width
= mode
== DImode
? 32 : 64;
13303 if (CONST_INT_P (operands
[2]))
13305 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13306 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13308 if (count
== single_width
* 2 - 1)
13310 emit_move_insn (high
[0], high
[1]);
13311 emit_insn ((mode
== DImode
13313 : gen_ashrdi3
) (high
[0], high
[0],
13314 GEN_INT (single_width
- 1)));
13315 emit_move_insn (low
[0], high
[0]);
13318 else if (count
>= single_width
)
13320 emit_move_insn (low
[0], high
[1]);
13321 emit_move_insn (high
[0], low
[0]);
13322 emit_insn ((mode
== DImode
13324 : gen_ashrdi3
) (high
[0], high
[0],
13325 GEN_INT (single_width
- 1)));
13326 if (count
> single_width
)
13327 emit_insn ((mode
== DImode
13329 : gen_ashrdi3
) (low
[0], low
[0],
13330 GEN_INT (count
- single_width
)));
13334 if (!rtx_equal_p (operands
[0], operands
[1]))
13335 emit_move_insn (operands
[0], operands
[1]);
13336 emit_insn ((mode
== DImode
13338 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13339 emit_insn ((mode
== DImode
13341 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13346 if (!rtx_equal_p (operands
[0], operands
[1]))
13347 emit_move_insn (operands
[0], operands
[1]);
13349 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13351 emit_insn ((mode
== DImode
13353 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13354 emit_insn ((mode
== DImode
13356 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13358 if (TARGET_CMOVE
&& scratch
)
13360 emit_move_insn (scratch
, high
[0]);
13361 emit_insn ((mode
== DImode
13363 : gen_ashrdi3
) (scratch
, scratch
,
13364 GEN_INT (single_width
- 1)));
13365 emit_insn ((mode
== DImode
13366 ? gen_x86_shift_adj_1
13367 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13371 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13376 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13378 rtx low
[2], high
[2];
13380 const int single_width
= mode
== DImode
? 32 : 64;
13382 if (CONST_INT_P (operands
[2]))
13384 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13385 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13387 if (count
>= single_width
)
13389 emit_move_insn (low
[0], high
[1]);
13390 ix86_expand_clear (high
[0]);
13392 if (count
> single_width
)
13393 emit_insn ((mode
== DImode
13395 : gen_lshrdi3
) (low
[0], low
[0],
13396 GEN_INT (count
- single_width
)));
13400 if (!rtx_equal_p (operands
[0], operands
[1]))
13401 emit_move_insn (operands
[0], operands
[1]);
13402 emit_insn ((mode
== DImode
13404 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13405 emit_insn ((mode
== DImode
13407 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13412 if (!rtx_equal_p (operands
[0], operands
[1]))
13413 emit_move_insn (operands
[0], operands
[1]);
13415 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13417 emit_insn ((mode
== DImode
13419 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13420 emit_insn ((mode
== DImode
13422 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13424 /* Heh. By reversing the arguments, we can reuse this pattern. */
13425 if (TARGET_CMOVE
&& scratch
)
13427 ix86_expand_clear (scratch
);
13428 emit_insn ((mode
== DImode
13429 ? gen_x86_shift_adj_1
13430 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13434 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13438 /* Predict just emitted jump instruction to be taken with probability PROB. */
13440 predict_jump (int prob
)
13442 rtx insn
= get_last_insn ();
13443 gcc_assert (JUMP_P (insn
));
13445 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13450 /* Helper function for the string operations below. Dest VARIABLE whether
13451 it is aligned to VALUE bytes. If true, jump to the label. */
13453 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13455 rtx label
= gen_label_rtx ();
13456 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13457 if (GET_MODE (variable
) == DImode
)
13458 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13460 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13461 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13464 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13466 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13470 /* Adjust COUNTER by the VALUE. */
13472 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13474 if (GET_MODE (countreg
) == DImode
)
13475 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13477 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13480 /* Zero extend possibly SImode EXP to Pmode register. */
13482 ix86_zero_extend_to_Pmode (rtx exp
)
13485 if (GET_MODE (exp
) == VOIDmode
)
13486 return force_reg (Pmode
, exp
);
13487 if (GET_MODE (exp
) == Pmode
)
13488 return copy_to_mode_reg (Pmode
, exp
);
13489 r
= gen_reg_rtx (Pmode
);
13490 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13494 /* Divide COUNTREG by SCALE. */
13496 scale_counter (rtx countreg
, int scale
)
13499 rtx piece_size_mask
;
13503 if (CONST_INT_P (countreg
))
13504 return GEN_INT (INTVAL (countreg
) / scale
);
13505 gcc_assert (REG_P (countreg
));
13507 piece_size_mask
= GEN_INT (scale
- 1);
13508 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13509 GEN_INT (exact_log2 (scale
)),
13510 NULL
, 1, OPTAB_DIRECT
);
13514 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13515 for constant loop counts. */
13517 static enum machine_mode
13518 counter_mode (rtx count_exp
)
13520 if (GET_MODE (count_exp
) != VOIDmode
)
13521 return GET_MODE (count_exp
);
13522 if (GET_CODE (count_exp
) != CONST_INT
)
13524 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13529 /* When SRCPTR is non-NULL, output simple loop to move memory
13530 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13531 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13532 equivalent loop to set memory by VALUE (supposed to be in MODE).
13534 The size is rounded down to whole number of chunk size moved at once.
13535 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13539 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13540 rtx destptr
, rtx srcptr
, rtx value
,
13541 rtx count
, enum machine_mode mode
, int unroll
,
13544 rtx out_label
, top_label
, iter
, tmp
;
13545 enum machine_mode iter_mode
= counter_mode (count
);
13546 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13547 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13553 top_label
= gen_label_rtx ();
13554 out_label
= gen_label_rtx ();
13555 iter
= gen_reg_rtx (iter_mode
);
13557 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13558 NULL
, 1, OPTAB_DIRECT
);
13559 /* Those two should combine. */
13560 if (piece_size
== const1_rtx
)
13562 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13564 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13566 emit_move_insn (iter
, const0_rtx
);
13568 emit_label (top_label
);
13570 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13571 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13572 destmem
= change_address (destmem
, mode
, x_addr
);
13576 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13577 srcmem
= change_address (srcmem
, mode
, y_addr
);
13579 /* When unrolling for chips that reorder memory reads and writes,
13580 we can save registers by using single temporary.
13581 Also using 4 temporaries is overkill in 32bit mode. */
13582 if (!TARGET_64BIT
&& 0)
13584 for (i
= 0; i
< unroll
; i
++)
13589 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13591 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13593 emit_move_insn (destmem
, srcmem
);
13599 gcc_assert (unroll
<= 4);
13600 for (i
= 0; i
< unroll
; i
++)
13602 tmpreg
[i
] = gen_reg_rtx (mode
);
13606 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13608 emit_move_insn (tmpreg
[i
], srcmem
);
13610 for (i
= 0; i
< unroll
; i
++)
13615 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13617 emit_move_insn (destmem
, tmpreg
[i
]);
13622 for (i
= 0; i
< unroll
; i
++)
13626 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13627 emit_move_insn (destmem
, value
);
13630 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13631 true, OPTAB_LIB_WIDEN
);
13633 emit_move_insn (iter
, tmp
);
13635 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13637 if (expected_size
!= -1)
13639 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13640 if (expected_size
== 0)
13642 else if (expected_size
> REG_BR_PROB_BASE
)
13643 predict_jump (REG_BR_PROB_BASE
- 1);
13645 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13648 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13649 iter
= ix86_zero_extend_to_Pmode (iter
);
13650 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13651 true, OPTAB_LIB_WIDEN
);
13652 if (tmp
!= destptr
)
13653 emit_move_insn (destptr
, tmp
);
13656 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13657 true, OPTAB_LIB_WIDEN
);
13659 emit_move_insn (srcptr
, tmp
);
13661 emit_label (out_label
);
13664 /* Output "rep; mov" instruction.
13665 Arguments have same meaning as for previous function */
13667 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13668 rtx destptr
, rtx srcptr
,
13670 enum machine_mode mode
)
13676 /* If the size is known, it is shorter to use rep movs. */
13677 if (mode
== QImode
&& CONST_INT_P (count
)
13678 && !(INTVAL (count
) & 3))
13681 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13682 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13683 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13684 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13685 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13686 if (mode
!= QImode
)
13688 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13689 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13690 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13691 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13692 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13693 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13697 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13698 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13700 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13704 /* Output "rep; stos" instruction.
13705 Arguments have same meaning as for previous function */
13707 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13709 enum machine_mode mode
)
13714 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13715 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13716 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13717 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13718 if (mode
!= QImode
)
13720 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13721 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13722 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13725 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13726 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13730 emit_strmov (rtx destmem
, rtx srcmem
,
13731 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13733 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13734 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13735 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13738 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13740 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13741 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13744 if (CONST_INT_P (count
))
13746 HOST_WIDE_INT countval
= INTVAL (count
);
13749 if ((countval
& 0x10) && max_size
> 16)
13753 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13754 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13757 gcc_unreachable ();
13760 if ((countval
& 0x08) && max_size
> 8)
13763 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13766 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13767 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13771 if ((countval
& 0x04) && max_size
> 4)
13773 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13776 if ((countval
& 0x02) && max_size
> 2)
13778 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13781 if ((countval
& 0x01) && max_size
> 1)
13783 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13790 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13791 count
, 1, OPTAB_DIRECT
);
13792 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13793 count
, QImode
, 1, 4);
13797 /* When there are stringops, we can cheaply increase dest and src pointers.
13798 Otherwise we save code size by maintaining offset (zero is readily
13799 available from preceding rep operation) and using x86 addressing modes.
13801 if (TARGET_SINGLE_STRINGOP
)
13805 rtx label
= ix86_expand_aligntest (count
, 4, true);
13806 src
= change_address (srcmem
, SImode
, srcptr
);
13807 dest
= change_address (destmem
, SImode
, destptr
);
13808 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13809 emit_label (label
);
13810 LABEL_NUSES (label
) = 1;
13814 rtx label
= ix86_expand_aligntest (count
, 2, true);
13815 src
= change_address (srcmem
, HImode
, srcptr
);
13816 dest
= change_address (destmem
, HImode
, destptr
);
13817 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13818 emit_label (label
);
13819 LABEL_NUSES (label
) = 1;
13823 rtx label
= ix86_expand_aligntest (count
, 1, true);
13824 src
= change_address (srcmem
, QImode
, srcptr
);
13825 dest
= change_address (destmem
, QImode
, destptr
);
13826 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13827 emit_label (label
);
13828 LABEL_NUSES (label
) = 1;
13833 rtx offset
= force_reg (Pmode
, const0_rtx
);
13838 rtx label
= ix86_expand_aligntest (count
, 4, true);
13839 src
= change_address (srcmem
, SImode
, srcptr
);
13840 dest
= change_address (destmem
, SImode
, destptr
);
13841 emit_move_insn (dest
, src
);
13842 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13843 true, OPTAB_LIB_WIDEN
);
13845 emit_move_insn (offset
, tmp
);
13846 emit_label (label
);
13847 LABEL_NUSES (label
) = 1;
13851 rtx label
= ix86_expand_aligntest (count
, 2, true);
13852 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13853 src
= change_address (srcmem
, HImode
, tmp
);
13854 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13855 dest
= change_address (destmem
, HImode
, tmp
);
13856 emit_move_insn (dest
, src
);
13857 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13858 true, OPTAB_LIB_WIDEN
);
13860 emit_move_insn (offset
, tmp
);
13861 emit_label (label
);
13862 LABEL_NUSES (label
) = 1;
13866 rtx label
= ix86_expand_aligntest (count
, 1, true);
13867 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13868 src
= change_address (srcmem
, QImode
, tmp
);
13869 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13870 dest
= change_address (destmem
, QImode
, tmp
);
13871 emit_move_insn (dest
, src
);
13872 emit_label (label
);
13873 LABEL_NUSES (label
) = 1;
13878 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13880 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13881 rtx count
, int max_size
)
13884 expand_simple_binop (counter_mode (count
), AND
, count
,
13885 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13886 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13887 gen_lowpart (QImode
, value
), count
, QImode
,
13891 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13893 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13897 if (CONST_INT_P (count
))
13899 HOST_WIDE_INT countval
= INTVAL (count
);
13902 if ((countval
& 0x10) && max_size
> 16)
13906 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13907 emit_insn (gen_strset (destptr
, dest
, value
));
13908 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13909 emit_insn (gen_strset (destptr
, dest
, value
));
13912 gcc_unreachable ();
13915 if ((countval
& 0x08) && max_size
> 8)
13919 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13920 emit_insn (gen_strset (destptr
, dest
, value
));
13924 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13925 emit_insn (gen_strset (destptr
, dest
, value
));
13926 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13927 emit_insn (gen_strset (destptr
, dest
, value
));
13931 if ((countval
& 0x04) && max_size
> 4)
13933 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13934 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13937 if ((countval
& 0x02) && max_size
> 2)
13939 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13940 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13943 if ((countval
& 0x01) && max_size
> 1)
13945 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13946 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13953 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13958 rtx label
= ix86_expand_aligntest (count
, 16, true);
13961 dest
= change_address (destmem
, DImode
, destptr
);
13962 emit_insn (gen_strset (destptr
, dest
, value
));
13963 emit_insn (gen_strset (destptr
, dest
, value
));
13967 dest
= change_address (destmem
, SImode
, destptr
);
13968 emit_insn (gen_strset (destptr
, dest
, value
));
13969 emit_insn (gen_strset (destptr
, dest
, value
));
13970 emit_insn (gen_strset (destptr
, dest
, value
));
13971 emit_insn (gen_strset (destptr
, dest
, value
));
13973 emit_label (label
);
13974 LABEL_NUSES (label
) = 1;
13978 rtx label
= ix86_expand_aligntest (count
, 8, true);
13981 dest
= change_address (destmem
, DImode
, destptr
);
13982 emit_insn (gen_strset (destptr
, dest
, value
));
13986 dest
= change_address (destmem
, SImode
, destptr
);
13987 emit_insn (gen_strset (destptr
, dest
, value
));
13988 emit_insn (gen_strset (destptr
, dest
, value
));
13990 emit_label (label
);
13991 LABEL_NUSES (label
) = 1;
13995 rtx label
= ix86_expand_aligntest (count
, 4, true);
13996 dest
= change_address (destmem
, SImode
, destptr
);
13997 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13998 emit_label (label
);
13999 LABEL_NUSES (label
) = 1;
14003 rtx label
= ix86_expand_aligntest (count
, 2, true);
14004 dest
= change_address (destmem
, HImode
, destptr
);
14005 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14006 emit_label (label
);
14007 LABEL_NUSES (label
) = 1;
14011 rtx label
= ix86_expand_aligntest (count
, 1, true);
14012 dest
= change_address (destmem
, QImode
, destptr
);
14013 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14014 emit_label (label
);
14015 LABEL_NUSES (label
) = 1;
14019 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14020 DESIRED_ALIGNMENT. */
14022 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14023 rtx destptr
, rtx srcptr
, rtx count
,
14024 int align
, int desired_alignment
)
14026 if (align
<= 1 && desired_alignment
> 1)
14028 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14029 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14030 destmem
= change_address (destmem
, QImode
, destptr
);
14031 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14032 ix86_adjust_counter (count
, 1);
14033 emit_label (label
);
14034 LABEL_NUSES (label
) = 1;
14036 if (align
<= 2 && desired_alignment
> 2)
14038 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14039 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14040 destmem
= change_address (destmem
, HImode
, destptr
);
14041 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14042 ix86_adjust_counter (count
, 2);
14043 emit_label (label
);
14044 LABEL_NUSES (label
) = 1;
14046 if (align
<= 4 && desired_alignment
> 4)
14048 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14049 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14050 destmem
= change_address (destmem
, SImode
, destptr
);
14051 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14052 ix86_adjust_counter (count
, 4);
14053 emit_label (label
);
14054 LABEL_NUSES (label
) = 1;
14056 gcc_assert (desired_alignment
<= 8);
14059 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14060 DESIRED_ALIGNMENT. */
14062 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14063 int align
, int desired_alignment
)
14065 if (align
<= 1 && desired_alignment
> 1)
14067 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14068 destmem
= change_address (destmem
, QImode
, destptr
);
14069 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14070 ix86_adjust_counter (count
, 1);
14071 emit_label (label
);
14072 LABEL_NUSES (label
) = 1;
14074 if (align
<= 2 && desired_alignment
> 2)
14076 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14077 destmem
= change_address (destmem
, HImode
, destptr
);
14078 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14079 ix86_adjust_counter (count
, 2);
14080 emit_label (label
);
14081 LABEL_NUSES (label
) = 1;
14083 if (align
<= 4 && desired_alignment
> 4)
14085 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14086 destmem
= change_address (destmem
, SImode
, destptr
);
14087 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14088 ix86_adjust_counter (count
, 4);
14089 emit_label (label
);
14090 LABEL_NUSES (label
) = 1;
14092 gcc_assert (desired_alignment
<= 8);
14095 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14096 static enum stringop_alg
14097 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14098 int *dynamic_check
)
14100 const struct stringop_algs
* algs
;
14102 *dynamic_check
= -1;
14104 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14106 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14107 if (stringop_alg
!= no_stringop
)
14108 return stringop_alg
;
14109 /* rep; movq or rep; movl is the smallest variant. */
14110 else if (optimize_size
)
14112 if (!count
|| (count
& 3))
14113 return rep_prefix_1_byte
;
14115 return rep_prefix_4_byte
;
14117 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14119 else if (expected_size
!= -1 && expected_size
< 4)
14120 return loop_1_byte
;
14121 else if (expected_size
!= -1)
14124 enum stringop_alg alg
= libcall
;
14125 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14127 gcc_assert (algs
->size
[i
].max
);
14128 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14130 if (algs
->size
[i
].alg
!= libcall
)
14131 alg
= algs
->size
[i
].alg
;
14132 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14133 last non-libcall inline algorithm. */
14134 if (TARGET_INLINE_ALL_STRINGOPS
)
14136 /* When the current size is best to be copied by a libcall,
14137 but we are still forced to inline, run the heuristic bellow
14138 that will pick code for medium sized blocks. */
14139 if (alg
!= libcall
)
14144 return algs
->size
[i
].alg
;
14147 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14149 /* When asked to inline the call anyway, try to pick meaningful choice.
14150 We look for maximal size of block that is faster to copy by hand and
14151 take blocks of at most of that size guessing that average size will
14152 be roughly half of the block.
14154 If this turns out to be bad, we might simply specify the preferred
14155 choice in ix86_costs. */
14156 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14157 && algs
->unknown_size
== libcall
)
14160 enum stringop_alg alg
;
14163 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14164 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14165 max
= algs
->size
[i
].max
;
14168 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14169 gcc_assert (*dynamic_check
== -1);
14170 gcc_assert (alg
!= libcall
);
14171 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14172 *dynamic_check
= max
;
14175 return algs
->unknown_size
;
14178 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14179 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14181 decide_alignment (int align
,
14182 enum stringop_alg alg
,
14185 int desired_align
= 0;
14189 gcc_unreachable ();
14191 case unrolled_loop
:
14192 desired_align
= GET_MODE_SIZE (Pmode
);
14194 case rep_prefix_8_byte
:
14197 case rep_prefix_4_byte
:
14198 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14199 copying whole cacheline at once. */
14200 if (TARGET_PENTIUMPRO
)
14205 case rep_prefix_1_byte
:
14206 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14207 copying whole cacheline at once. */
14208 if (TARGET_PENTIUMPRO
)
14222 if (desired_align
< align
)
14223 desired_align
= align
;
14224 if (expected_size
!= -1 && expected_size
< 4)
14225 desired_align
= align
;
14226 return desired_align
;
14229 /* Return the smallest power of 2 greater than VAL. */
14231 smallest_pow2_greater_than (int val
)
14239 /* Expand string move (memcpy) operation. Use i386 string operations when
14240 profitable. expand_clrmem contains similar code. The code depends upon
14241 architecture, block size and alignment, but always has the same
14244 1) Prologue guard: Conditional that jumps up to epilogues for small
14245 blocks that can be handled by epilogue alone. This is faster but
14246 also needed for correctness, since prologue assume the block is larger
14247 than the desired alignment.
14249 Optional dynamic check for size and libcall for large
14250 blocks is emitted here too, with -minline-stringops-dynamically.
14252 2) Prologue: copy first few bytes in order to get destination aligned
14253 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14254 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14255 We emit either a jump tree on power of two sized blocks, or a byte loop.
14257 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14258 with specified algorithm.
14260 4) Epilogue: code copying tail of the block that is too small to be
14261 handled by main body (or up to size guarded by prologue guard). */
14264 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14265 rtx expected_align_exp
, rtx expected_size_exp
)
14271 rtx jump_around_label
= NULL
;
14272 HOST_WIDE_INT align
= 1;
14273 unsigned HOST_WIDE_INT count
= 0;
14274 HOST_WIDE_INT expected_size
= -1;
14275 int size_needed
= 0, epilogue_size_needed
;
14276 int desired_align
= 0;
14277 enum stringop_alg alg
;
14280 if (CONST_INT_P (align_exp
))
14281 align
= INTVAL (align_exp
);
14282 /* i386 can do misaligned access on reasonably increased cost. */
14283 if (CONST_INT_P (expected_align_exp
)
14284 && INTVAL (expected_align_exp
) > align
)
14285 align
= INTVAL (expected_align_exp
);
14286 if (CONST_INT_P (count_exp
))
14287 count
= expected_size
= INTVAL (count_exp
);
14288 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14289 expected_size
= INTVAL (expected_size_exp
);
14291 /* Step 0: Decide on preferred algorithm, desired alignment and
14292 size of chunks to be copied by main loop. */
14294 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14295 desired_align
= decide_alignment (align
, alg
, expected_size
);
14297 if (!TARGET_ALIGN_STRINGOPS
)
14298 align
= desired_align
;
14300 if (alg
== libcall
)
14302 gcc_assert (alg
!= no_stringop
);
14304 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14305 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14306 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14311 gcc_unreachable ();
14313 size_needed
= GET_MODE_SIZE (Pmode
);
14315 case unrolled_loop
:
14316 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14318 case rep_prefix_8_byte
:
14321 case rep_prefix_4_byte
:
14324 case rep_prefix_1_byte
:
14330 epilogue_size_needed
= size_needed
;
14332 /* Step 1: Prologue guard. */
14334 /* Alignment code needs count to be in register. */
14335 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14337 enum machine_mode mode
= SImode
;
14338 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14340 count_exp
= force_reg (mode
, count_exp
);
14342 gcc_assert (desired_align
>= 1 && align
>= 1);
14344 /* Ensure that alignment prologue won't copy past end of block. */
14345 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14347 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14348 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14349 Make sure it is power of 2. */
14350 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14352 label
= gen_label_rtx ();
14353 emit_cmp_and_jump_insns (count_exp
,
14354 GEN_INT (epilogue_size_needed
),
14355 LTU
, 0, counter_mode (count_exp
), 1, label
);
14356 if (GET_CODE (count_exp
) == CONST_INT
)
14358 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14359 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14361 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14363 /* Emit code to decide on runtime whether library call or inline should be
14365 if (dynamic_check
!= -1)
14367 rtx hot_label
= gen_label_rtx ();
14368 jump_around_label
= gen_label_rtx ();
14369 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14370 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14371 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14372 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14373 emit_jump (jump_around_label
);
14374 emit_label (hot_label
);
14377 /* Step 2: Alignment prologue. */
14379 if (desired_align
> align
)
14381 /* Except for the first move in epilogue, we no longer know
14382 constant offset in aliasing info. It don't seems to worth
14383 the pain to maintain it for the first move, so throw away
14385 src
= change_address (src
, BLKmode
, srcreg
);
14386 dst
= change_address (dst
, BLKmode
, destreg
);
14387 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14390 if (label
&& size_needed
== 1)
14392 emit_label (label
);
14393 LABEL_NUSES (label
) = 1;
14397 /* Step 3: Main loop. */
14403 gcc_unreachable ();
14405 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14406 count_exp
, QImode
, 1, expected_size
);
14409 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14410 count_exp
, Pmode
, 1, expected_size
);
14412 case unrolled_loop
:
14413 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14414 registers for 4 temporaries anyway. */
14415 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14416 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14419 case rep_prefix_8_byte
:
14420 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14423 case rep_prefix_4_byte
:
14424 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14427 case rep_prefix_1_byte
:
14428 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14432 /* Adjust properly the offset of src and dest memory for aliasing. */
14433 if (CONST_INT_P (count_exp
))
14435 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14436 (count
/ size_needed
) * size_needed
);
14437 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14438 (count
/ size_needed
) * size_needed
);
14442 src
= change_address (src
, BLKmode
, srcreg
);
14443 dst
= change_address (dst
, BLKmode
, destreg
);
14446 /* Step 4: Epilogue to copy the remaining bytes. */
14450 /* When the main loop is done, COUNT_EXP might hold original count,
14451 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14452 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14453 bytes. Compensate if needed. */
14455 if (size_needed
< epilogue_size_needed
)
14458 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14459 GEN_INT (size_needed
- 1), count_exp
, 1,
14461 if (tmp
!= count_exp
)
14462 emit_move_insn (count_exp
, tmp
);
14464 emit_label (label
);
14465 LABEL_NUSES (label
) = 1;
14468 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14469 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14470 epilogue_size_needed
);
14471 if (jump_around_label
)
14472 emit_label (jump_around_label
);
14476 /* Helper function for memcpy. For QImode value 0xXY produce
14477 0xXYXYXYXY of wide specified by MODE. This is essentially
14478 a * 0x10101010, but we can do slightly better than
14479 synth_mult by unwinding the sequence by hand on CPUs with
14482 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14484 enum machine_mode valmode
= GET_MODE (val
);
14486 int nops
= mode
== DImode
? 3 : 2;
14488 gcc_assert (mode
== SImode
|| mode
== DImode
);
14489 if (val
== const0_rtx
)
14490 return copy_to_mode_reg (mode
, const0_rtx
);
14491 if (CONST_INT_P (val
))
14493 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14497 if (mode
== DImode
)
14498 v
|= (v
<< 16) << 16;
14499 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14502 if (valmode
== VOIDmode
)
14504 if (valmode
!= QImode
)
14505 val
= gen_lowpart (QImode
, val
);
14506 if (mode
== QImode
)
14508 if (!TARGET_PARTIAL_REG_STALL
)
14510 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14511 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14512 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14513 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14515 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14516 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14517 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14522 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14524 if (!TARGET_PARTIAL_REG_STALL
)
14525 if (mode
== SImode
)
14526 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14528 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14531 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14532 NULL
, 1, OPTAB_DIRECT
);
14534 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14536 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14537 NULL
, 1, OPTAB_DIRECT
);
14538 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14539 if (mode
== SImode
)
14541 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14542 NULL
, 1, OPTAB_DIRECT
);
14543 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14548 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14549 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14550 alignment from ALIGN to DESIRED_ALIGN. */
14552 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14557 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14558 promoted_val
= promote_duplicated_reg (DImode
, val
);
14559 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14560 promoted_val
= promote_duplicated_reg (SImode
, val
);
14561 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14562 promoted_val
= promote_duplicated_reg (HImode
, val
);
14564 promoted_val
= val
;
14566 return promoted_val
;
14569 /* Expand string clear operation (bzero). Use i386 string operations when
14570 profitable. See expand_movmem comment for explanation of individual
14571 steps performed. */
14573 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14574 rtx expected_align_exp
, rtx expected_size_exp
)
14579 rtx jump_around_label
= NULL
;
14580 HOST_WIDE_INT align
= 1;
14581 unsigned HOST_WIDE_INT count
= 0;
14582 HOST_WIDE_INT expected_size
= -1;
14583 int size_needed
= 0, epilogue_size_needed
;
14584 int desired_align
= 0;
14585 enum stringop_alg alg
;
14586 rtx promoted_val
= NULL
;
14587 bool force_loopy_epilogue
= false;
14590 if (CONST_INT_P (align_exp
))
14591 align
= INTVAL (align_exp
);
14592 /* i386 can do misaligned access on reasonably increased cost. */
14593 if (CONST_INT_P (expected_align_exp
)
14594 && INTVAL (expected_align_exp
) > align
)
14595 align
= INTVAL (expected_align_exp
);
14596 if (CONST_INT_P (count_exp
))
14597 count
= expected_size
= INTVAL (count_exp
);
14598 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14599 expected_size
= INTVAL (expected_size_exp
);
14601 /* Step 0: Decide on preferred algorithm, desired alignment and
14602 size of chunks to be copied by main loop. */
14604 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14605 desired_align
= decide_alignment (align
, alg
, expected_size
);
14607 if (!TARGET_ALIGN_STRINGOPS
)
14608 align
= desired_align
;
14610 if (alg
== libcall
)
14612 gcc_assert (alg
!= no_stringop
);
14614 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14615 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14620 gcc_unreachable ();
14622 size_needed
= GET_MODE_SIZE (Pmode
);
14624 case unrolled_loop
:
14625 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14627 case rep_prefix_8_byte
:
14630 case rep_prefix_4_byte
:
14633 case rep_prefix_1_byte
:
14638 epilogue_size_needed
= size_needed
;
14640 /* Step 1: Prologue guard. */
14642 /* Alignment code needs count to be in register. */
14643 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14645 enum machine_mode mode
= SImode
;
14646 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14648 count_exp
= force_reg (mode
, count_exp
);
14650 /* Do the cheap promotion to allow better CSE across the
14651 main loop and epilogue (ie one load of the big constant in the
14652 front of all code. */
14653 if (CONST_INT_P (val_exp
))
14654 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14655 desired_align
, align
);
14656 /* Ensure that alignment prologue won't copy past end of block. */
14657 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14659 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14660 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14661 Make sure it is power of 2. */
14662 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14664 /* To improve performance of small blocks, we jump around the VAL
14665 promoting mode. This mean that if the promoted VAL is not constant,
14666 we might not use it in the epilogue and have to use byte
14668 if (epilogue_size_needed
> 2 && !promoted_val
)
14669 force_loopy_epilogue
= true;
14670 label
= gen_label_rtx ();
14671 emit_cmp_and_jump_insns (count_exp
,
14672 GEN_INT (epilogue_size_needed
),
14673 LTU
, 0, counter_mode (count_exp
), 1, label
);
14674 if (GET_CODE (count_exp
) == CONST_INT
)
14676 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14677 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14679 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14681 if (dynamic_check
!= -1)
14683 rtx hot_label
= gen_label_rtx ();
14684 jump_around_label
= gen_label_rtx ();
14685 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14686 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14687 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14688 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14689 emit_jump (jump_around_label
);
14690 emit_label (hot_label
);
14693 /* Step 2: Alignment prologue. */
14695 /* Do the expensive promotion once we branched off the small blocks. */
14697 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14698 desired_align
, align
);
14699 gcc_assert (desired_align
>= 1 && align
>= 1);
14701 if (desired_align
> align
)
14703 /* Except for the first move in epilogue, we no longer know
14704 constant offset in aliasing info. It don't seems to worth
14705 the pain to maintain it for the first move, so throw away
14707 dst
= change_address (dst
, BLKmode
, destreg
);
14708 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14711 if (label
&& size_needed
== 1)
14713 emit_label (label
);
14714 LABEL_NUSES (label
) = 1;
14718 /* Step 3: Main loop. */
14724 gcc_unreachable ();
14726 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14727 count_exp
, QImode
, 1, expected_size
);
14730 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14731 count_exp
, Pmode
, 1, expected_size
);
14733 case unrolled_loop
:
14734 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14735 count_exp
, Pmode
, 4, expected_size
);
14737 case rep_prefix_8_byte
:
14738 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14741 case rep_prefix_4_byte
:
14742 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14745 case rep_prefix_1_byte
:
14746 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14750 /* Adjust properly the offset of src and dest memory for aliasing. */
14751 if (CONST_INT_P (count_exp
))
14752 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14753 (count
/ size_needed
) * size_needed
);
14755 dst
= change_address (dst
, BLKmode
, destreg
);
14757 /* Step 4: Epilogue to copy the remaining bytes. */
14761 /* When the main loop is done, COUNT_EXP might hold original count,
14762 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14763 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14764 bytes. Compensate if needed. */
14766 if (size_needed
< desired_align
- align
)
14769 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14770 GEN_INT (size_needed
- 1), count_exp
, 1,
14772 size_needed
= desired_align
- align
+ 1;
14773 if (tmp
!= count_exp
)
14774 emit_move_insn (count_exp
, tmp
);
14776 emit_label (label
);
14777 LABEL_NUSES (label
) = 1;
14779 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14781 if (force_loopy_epilogue
)
14782 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14785 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14788 if (jump_around_label
)
14789 emit_label (jump_around_label
);
14793 /* Expand strlen. */
14795 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14797 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14799 /* The generic case of strlen expander is long. Avoid it's
14800 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14802 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14803 && !TARGET_INLINE_ALL_STRINGOPS
14805 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14808 addr
= force_reg (Pmode
, XEXP (src
, 0));
14809 scratch1
= gen_reg_rtx (Pmode
);
14811 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14814 /* Well it seems that some optimizer does not combine a call like
14815 foo(strlen(bar), strlen(bar));
14816 when the move and the subtraction is done here. It does calculate
14817 the length just once when these instructions are done inside of
14818 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14819 often used and I use one fewer register for the lifetime of
14820 output_strlen_unroll() this is better. */
14822 emit_move_insn (out
, addr
);
14824 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14826 /* strlensi_unroll_1 returns the address of the zero at the end of
14827 the string, like memchr(), so compute the length by subtracting
14828 the start address. */
14830 emit_insn (gen_subdi3 (out
, out
, addr
));
14832 emit_insn (gen_subsi3 (out
, out
, addr
));
14837 scratch2
= gen_reg_rtx (Pmode
);
14838 scratch3
= gen_reg_rtx (Pmode
);
14839 scratch4
= force_reg (Pmode
, constm1_rtx
);
14841 emit_move_insn (scratch3
, addr
);
14842 eoschar
= force_reg (QImode
, eoschar
);
14844 src
= replace_equiv_address_nv (src
, scratch3
);
14846 /* If .md starts supporting :P, this can be done in .md. */
14847 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14848 scratch4
), UNSPEC_SCAS
);
14849 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14852 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14853 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14857 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14858 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14864 /* Expand the appropriate insns for doing strlen if not just doing
14867 out = result, initialized with the start address
14868 align_rtx = alignment of the address.
14869 scratch = scratch register, initialized with the startaddress when
14870 not aligned, otherwise undefined
14872 This is just the body. It needs the initializations mentioned above and
14873 some address computing at the end. These things are done in i386.md. */
14876 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14880 rtx align_2_label
= NULL_RTX
;
14881 rtx align_3_label
= NULL_RTX
;
14882 rtx align_4_label
= gen_label_rtx ();
14883 rtx end_0_label
= gen_label_rtx ();
14885 rtx tmpreg
= gen_reg_rtx (SImode
);
14886 rtx scratch
= gen_reg_rtx (SImode
);
14890 if (CONST_INT_P (align_rtx
))
14891 align
= INTVAL (align_rtx
);
14893 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14895 /* Is there a known alignment and is it less than 4? */
14898 rtx scratch1
= gen_reg_rtx (Pmode
);
14899 emit_move_insn (scratch1
, out
);
14900 /* Is there a known alignment and is it not 2? */
14903 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14904 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14906 /* Leave just the 3 lower bits. */
14907 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14908 NULL_RTX
, 0, OPTAB_WIDEN
);
14910 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14911 Pmode
, 1, align_4_label
);
14912 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14913 Pmode
, 1, align_2_label
);
14914 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14915 Pmode
, 1, align_3_label
);
14919 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14920 check if is aligned to 4 - byte. */
14922 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14923 NULL_RTX
, 0, OPTAB_WIDEN
);
14925 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14926 Pmode
, 1, align_4_label
);
14929 mem
= change_address (src
, QImode
, out
);
14931 /* Now compare the bytes. */
14933 /* Compare the first n unaligned byte on a byte per byte basis. */
14934 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14935 QImode
, 1, end_0_label
);
14937 /* Increment the address. */
14939 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14941 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14943 /* Not needed with an alignment of 2 */
14946 emit_label (align_2_label
);
14948 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14952 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14954 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14956 emit_label (align_3_label
);
14959 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14963 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14965 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14968 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14969 align this loop. It gives only huge programs, but does not help to
14971 emit_label (align_4_label
);
14973 mem
= change_address (src
, SImode
, out
);
14974 emit_move_insn (scratch
, mem
);
14976 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14978 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14980 /* This formula yields a nonzero result iff one of the bytes is zero.
14981 This saves three branches inside loop and many cycles. */
14983 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14984 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14985 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14986 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14987 gen_int_mode (0x80808080, SImode
)));
14988 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14993 rtx reg
= gen_reg_rtx (SImode
);
14994 rtx reg2
= gen_reg_rtx (Pmode
);
14995 emit_move_insn (reg
, tmpreg
);
14996 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14998 /* If zero is not in the first two bytes, move two bytes forward. */
14999 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15000 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15001 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15002 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15003 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15006 /* Emit lea manually to avoid clobbering of flags. */
15007 emit_insn (gen_rtx_SET (SImode
, reg2
,
15008 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15010 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15011 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15012 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15013 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15020 rtx end_2_label
= gen_label_rtx ();
15021 /* Is zero in the first two bytes? */
15023 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15024 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15025 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15026 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15027 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15029 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15030 JUMP_LABEL (tmp
) = end_2_label
;
15032 /* Not in the first two. Move two bytes forward. */
15033 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15035 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15037 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15039 emit_label (end_2_label
);
15043 /* Avoid branch in fixing the byte. */
15044 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15045 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15046 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15048 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15050 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15052 emit_label (end_0_label
);
15055 /* For given symbol (function) construct code to compute address of it's PLT
15056 entry in large x86-64 PIC model. */
15058 construct_plt_address (rtx symbol
)
15060 rtx tmp
= gen_reg_rtx (Pmode
);
15061 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15063 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15064 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15066 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15067 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15072 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15073 rtx callarg2 ATTRIBUTE_UNUSED
,
15074 rtx pop
, int sibcall
)
15076 rtx use
= NULL
, call
;
15078 if (pop
== const0_rtx
)
15080 gcc_assert (!TARGET_64BIT
|| !pop
);
15082 if (TARGET_MACHO
&& !TARGET_64BIT
)
15085 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15086 fnaddr
= machopic_indirect_call_target (fnaddr
);
15091 /* Static functions and indirect calls don't need the pic register. */
15092 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15093 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15094 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15095 use_reg (&use
, pic_offset_table_rtx
);
15098 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15100 rtx al
= gen_rtx_REG (QImode
, 0);
15101 emit_move_insn (al
, callarg2
);
15102 use_reg (&use
, al
);
15105 if (ix86_cmodel
== CM_LARGE_PIC
15106 && GET_CODE (fnaddr
) == MEM
15107 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15108 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15109 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15110 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15112 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15113 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15115 if (sibcall
&& TARGET_64BIT
15116 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15119 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15120 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15121 emit_move_insn (fnaddr
, addr
);
15122 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15125 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15127 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15130 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15131 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15132 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15135 call
= emit_call_insn (call
);
15137 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15141 /* Clear stack slot assignments remembered from previous functions.
15142 This is called from INIT_EXPANDERS once before RTL is emitted for each
15145 static struct machine_function
*
15146 ix86_init_machine_status (void)
15148 struct machine_function
*f
;
15150 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15151 f
->use_fast_prologue_epilogue_nregs
= -1;
15152 f
->tls_descriptor_call_expanded_p
= 0;
15157 /* Return a MEM corresponding to a stack slot with mode MODE.
15158 Allocate a new slot if necessary.
15160 The RTL for a function can have several slots available: N is
15161 which slot to use. */
15164 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15166 struct stack_local_entry
*s
;
15168 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15170 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15171 if (s
->mode
== mode
&& s
->n
== n
)
15172 return copy_rtx (s
->rtl
);
15174 s
= (struct stack_local_entry
*)
15175 ggc_alloc (sizeof (struct stack_local_entry
));
15178 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15180 s
->next
= ix86_stack_locals
;
15181 ix86_stack_locals
= s
;
15185 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15187 static GTY(()) rtx ix86_tls_symbol
;
15189 ix86_tls_get_addr (void)
15192 if (!ix86_tls_symbol
)
15194 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15195 (TARGET_ANY_GNU_TLS
15197 ? "___tls_get_addr"
15198 : "__tls_get_addr");
15201 return ix86_tls_symbol
;
15204 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15206 static GTY(()) rtx ix86_tls_module_base_symbol
;
15208 ix86_tls_module_base (void)
15211 if (!ix86_tls_module_base_symbol
)
15213 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15214 "_TLS_MODULE_BASE_");
15215 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15216 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15219 return ix86_tls_module_base_symbol
;
15222 /* Calculate the length of the memory address in the instruction
15223 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15226 memory_address_length (rtx addr
)
15228 struct ix86_address parts
;
15229 rtx base
, index
, disp
;
15233 if (GET_CODE (addr
) == PRE_DEC
15234 || GET_CODE (addr
) == POST_INC
15235 || GET_CODE (addr
) == PRE_MODIFY
15236 || GET_CODE (addr
) == POST_MODIFY
)
15239 ok
= ix86_decompose_address (addr
, &parts
);
15242 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15243 parts
.base
= SUBREG_REG (parts
.base
);
15244 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15245 parts
.index
= SUBREG_REG (parts
.index
);
15248 index
= parts
.index
;
15253 - esp as the base always wants an index,
15254 - ebp as the base always wants a displacement. */
15256 /* Register Indirect. */
15257 if (base
&& !index
&& !disp
)
15259 /* esp (for its index) and ebp (for its displacement) need
15260 the two-byte modrm form. */
15261 if (addr
== stack_pointer_rtx
15262 || addr
== arg_pointer_rtx
15263 || addr
== frame_pointer_rtx
15264 || addr
== hard_frame_pointer_rtx
)
15268 /* Direct Addressing. */
15269 else if (disp
&& !base
&& !index
)
15274 /* Find the length of the displacement constant. */
15277 if (base
&& satisfies_constraint_K (disp
))
15282 /* ebp always wants a displacement. */
15283 else if (base
== hard_frame_pointer_rtx
)
15286 /* An index requires the two-byte modrm form.... */
15288 /* ...like esp, which always wants an index. */
15289 || base
== stack_pointer_rtx
15290 || base
== arg_pointer_rtx
15291 || base
== frame_pointer_rtx
)
15298 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15299 is set, expect that insn have 8bit immediate alternative. */
15301 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15305 extract_insn_cached (insn
);
15306 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15307 if (CONSTANT_P (recog_data
.operand
[i
]))
15310 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15314 switch (get_attr_mode (insn
))
15325 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15330 fatal_insn ("unknown insn mode", insn
);
15336 /* Compute default value for "length_address" attribute. */
15338 ix86_attr_length_address_default (rtx insn
)
15342 if (get_attr_type (insn
) == TYPE_LEA
)
15344 rtx set
= PATTERN (insn
);
15346 if (GET_CODE (set
) == PARALLEL
)
15347 set
= XVECEXP (set
, 0, 0);
15349 gcc_assert (GET_CODE (set
) == SET
);
15351 return memory_address_length (SET_SRC (set
));
15354 extract_insn_cached (insn
);
15355 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15356 if (MEM_P (recog_data
.operand
[i
]))
15358 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15364 /* Return the maximum number of instructions a cpu can issue. */
15367 ix86_issue_rate (void)
15371 case PROCESSOR_PENTIUM
:
15375 case PROCESSOR_PENTIUMPRO
:
15376 case PROCESSOR_PENTIUM4
:
15377 case PROCESSOR_ATHLON
:
15379 case PROCESSOR_AMDFAM10
:
15380 case PROCESSOR_NOCONA
:
15381 case PROCESSOR_GENERIC32
:
15382 case PROCESSOR_GENERIC64
:
15385 case PROCESSOR_CORE2
:
15393 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15394 by DEP_INSN and nothing set by DEP_INSN. */
15397 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15401 /* Simplify the test for uninteresting insns. */
15402 if (insn_type
!= TYPE_SETCC
15403 && insn_type
!= TYPE_ICMOV
15404 && insn_type
!= TYPE_FCMOV
15405 && insn_type
!= TYPE_IBR
)
15408 if ((set
= single_set (dep_insn
)) != 0)
15410 set
= SET_DEST (set
);
15413 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15414 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15415 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15416 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15418 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15419 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15424 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15427 /* This test is true if the dependent insn reads the flags but
15428 not any other potentially set register. */
15429 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15432 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15438 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15439 address with operands set by DEP_INSN. */
15442 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15446 if (insn_type
== TYPE_LEA
15449 addr
= PATTERN (insn
);
15451 if (GET_CODE (addr
) == PARALLEL
)
15452 addr
= XVECEXP (addr
, 0, 0);
15454 gcc_assert (GET_CODE (addr
) == SET
);
15456 addr
= SET_SRC (addr
);
15461 extract_insn_cached (insn
);
15462 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15463 if (MEM_P (recog_data
.operand
[i
]))
15465 addr
= XEXP (recog_data
.operand
[i
], 0);
15472 return modified_in_p (addr
, dep_insn
);
15476 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15478 enum attr_type insn_type
, dep_insn_type
;
15479 enum attr_memory memory
;
15481 int dep_insn_code_number
;
15483 /* Anti and output dependencies have zero cost on all CPUs. */
15484 if (REG_NOTE_KIND (link
) != 0)
15487 dep_insn_code_number
= recog_memoized (dep_insn
);
15489 /* If we can't recognize the insns, we can't really do anything. */
15490 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15493 insn_type
= get_attr_type (insn
);
15494 dep_insn_type
= get_attr_type (dep_insn
);
15498 case PROCESSOR_PENTIUM
:
15499 /* Address Generation Interlock adds a cycle of latency. */
15500 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15503 /* ??? Compares pair with jump/setcc. */
15504 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15507 /* Floating point stores require value to be ready one cycle earlier. */
15508 if (insn_type
== TYPE_FMOV
15509 && get_attr_memory (insn
) == MEMORY_STORE
15510 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15514 case PROCESSOR_PENTIUMPRO
:
15515 memory
= get_attr_memory (insn
);
15517 /* INT->FP conversion is expensive. */
15518 if (get_attr_fp_int_src (dep_insn
))
15521 /* There is one cycle extra latency between an FP op and a store. */
15522 if (insn_type
== TYPE_FMOV
15523 && (set
= single_set (dep_insn
)) != NULL_RTX
15524 && (set2
= single_set (insn
)) != NULL_RTX
15525 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15526 && MEM_P (SET_DEST (set2
)))
15529 /* Show ability of reorder buffer to hide latency of load by executing
15530 in parallel with previous instruction in case
15531 previous instruction is not needed to compute the address. */
15532 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15533 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15535 /* Claim moves to take one cycle, as core can issue one load
15536 at time and the next load can start cycle later. */
15537 if (dep_insn_type
== TYPE_IMOV
15538 || dep_insn_type
== TYPE_FMOV
)
15546 memory
= get_attr_memory (insn
);
15548 /* The esp dependency is resolved before the instruction is really
15550 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15551 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15554 /* INT->FP conversion is expensive. */
15555 if (get_attr_fp_int_src (dep_insn
))
15558 /* Show ability of reorder buffer to hide latency of load by executing
15559 in parallel with previous instruction in case
15560 previous instruction is not needed to compute the address. */
15561 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15562 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15564 /* Claim moves to take one cycle, as core can issue one load
15565 at time and the next load can start cycle later. */
15566 if (dep_insn_type
== TYPE_IMOV
15567 || dep_insn_type
== TYPE_FMOV
)
15576 case PROCESSOR_ATHLON
:
15578 case PROCESSOR_AMDFAM10
:
15579 case PROCESSOR_GENERIC32
:
15580 case PROCESSOR_GENERIC64
:
15581 memory
= get_attr_memory (insn
);
15583 /* Show ability of reorder buffer to hide latency of load by executing
15584 in parallel with previous instruction in case
15585 previous instruction is not needed to compute the address. */
15586 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15587 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15589 enum attr_unit unit
= get_attr_unit (insn
);
15592 /* Because of the difference between the length of integer and
15593 floating unit pipeline preparation stages, the memory operands
15594 for floating point are cheaper.
15596 ??? For Athlon it the difference is most probably 2. */
15597 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15600 loadcost
= TARGET_ATHLON
? 2 : 0;
15602 if (cost
>= loadcost
)
15615 /* How many alternative schedules to try. This should be as wide as the
15616 scheduling freedom in the DFA, but no wider. Making this value too
15617 large results extra work for the scheduler. */
15620 ia32_multipass_dfa_lookahead (void)
15622 if (ix86_tune
== PROCESSOR_PENTIUM
)
15625 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15626 || ix86_tune
== PROCESSOR_K6
)
15634 /* Compute the alignment given to a constant that is being placed in memory.
15635 EXP is the constant and ALIGN is the alignment that the object would
15637 The value of this function is used instead of that alignment to align
15641 ix86_constant_alignment (tree exp
, int align
)
15643 if (TREE_CODE (exp
) == REAL_CST
)
15645 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15647 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15650 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15651 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15652 return BITS_PER_WORD
;
15657 /* Compute the alignment for a static variable.
15658 TYPE is the data type, and ALIGN is the alignment that
15659 the object would ordinarily have. The value of this function is used
15660 instead of that alignment to align the object. */
15663 ix86_data_alignment (tree type
, int align
)
15665 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15667 if (AGGREGATE_TYPE_P (type
)
15668 && TYPE_SIZE (type
)
15669 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15670 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15671 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15672 && align
< max_align
)
15675 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15676 to 16byte boundary. */
15679 if (AGGREGATE_TYPE_P (type
)
15680 && TYPE_SIZE (type
)
15681 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15682 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15683 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15687 if (TREE_CODE (type
) == ARRAY_TYPE
)
15689 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15691 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15694 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15697 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15699 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15702 else if ((TREE_CODE (type
) == RECORD_TYPE
15703 || TREE_CODE (type
) == UNION_TYPE
15704 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15705 && TYPE_FIELDS (type
))
15707 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15709 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15712 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15713 || TREE_CODE (type
) == INTEGER_TYPE
)
15715 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15717 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15724 /* Compute the alignment for a local variable.
15725 TYPE is the data type, and ALIGN is the alignment that
15726 the object would ordinarily have. The value of this macro is used
15727 instead of that alignment to align the object. */
15730 ix86_local_alignment (tree type
, int align
)
15732 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15733 to 16byte boundary. */
15736 if (AGGREGATE_TYPE_P (type
)
15737 && TYPE_SIZE (type
)
15738 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15739 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15740 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15743 if (TREE_CODE (type
) == ARRAY_TYPE
)
15745 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15747 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15750 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15752 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15754 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15757 else if ((TREE_CODE (type
) == RECORD_TYPE
15758 || TREE_CODE (type
) == UNION_TYPE
15759 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15760 && TYPE_FIELDS (type
))
15762 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15764 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15767 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15768 || TREE_CODE (type
) == INTEGER_TYPE
)
15771 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15773 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15779 /* Emit RTL insns to initialize the variable parts of a trampoline.
15780 FNADDR is an RTX for the address of the function's pure code.
15781 CXT is an RTX for the static chain value for the function. */
15783 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15787 /* Compute offset from the end of the jmp to the target function. */
15788 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15789 plus_constant (tramp
, 10),
15790 NULL_RTX
, 1, OPTAB_DIRECT
);
15791 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15792 gen_int_mode (0xb9, QImode
));
15793 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15794 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15795 gen_int_mode (0xe9, QImode
));
15796 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15801 /* Try to load address using shorter movl instead of movabs.
15802 We may want to support movq for kernel mode, but kernel does not use
15803 trampolines at the moment. */
15804 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15806 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15807 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15808 gen_int_mode (0xbb41, HImode
));
15809 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15810 gen_lowpart (SImode
, fnaddr
));
15815 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15816 gen_int_mode (0xbb49, HImode
));
15817 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15821 /* Load static chain using movabs to r10. */
15822 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15823 gen_int_mode (0xba49, HImode
));
15824 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15827 /* Jump to the r11 */
15828 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15829 gen_int_mode (0xff49, HImode
));
15830 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15831 gen_int_mode (0xe3, QImode
));
15833 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15836 #ifdef ENABLE_EXECUTE_STACK
15837 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15838 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15842 /* Codes for all the SSE/MMX builtins. */
15845 IX86_BUILTIN_ADDPS
,
15846 IX86_BUILTIN_ADDSS
,
15847 IX86_BUILTIN_DIVPS
,
15848 IX86_BUILTIN_DIVSS
,
15849 IX86_BUILTIN_MULPS
,
15850 IX86_BUILTIN_MULSS
,
15851 IX86_BUILTIN_SUBPS
,
15852 IX86_BUILTIN_SUBSS
,
15854 IX86_BUILTIN_CMPEQPS
,
15855 IX86_BUILTIN_CMPLTPS
,
15856 IX86_BUILTIN_CMPLEPS
,
15857 IX86_BUILTIN_CMPGTPS
,
15858 IX86_BUILTIN_CMPGEPS
,
15859 IX86_BUILTIN_CMPNEQPS
,
15860 IX86_BUILTIN_CMPNLTPS
,
15861 IX86_BUILTIN_CMPNLEPS
,
15862 IX86_BUILTIN_CMPNGTPS
,
15863 IX86_BUILTIN_CMPNGEPS
,
15864 IX86_BUILTIN_CMPORDPS
,
15865 IX86_BUILTIN_CMPUNORDPS
,
15866 IX86_BUILTIN_CMPEQSS
,
15867 IX86_BUILTIN_CMPLTSS
,
15868 IX86_BUILTIN_CMPLESS
,
15869 IX86_BUILTIN_CMPNEQSS
,
15870 IX86_BUILTIN_CMPNLTSS
,
15871 IX86_BUILTIN_CMPNLESS
,
15872 IX86_BUILTIN_CMPNGTSS
,
15873 IX86_BUILTIN_CMPNGESS
,
15874 IX86_BUILTIN_CMPORDSS
,
15875 IX86_BUILTIN_CMPUNORDSS
,
15877 IX86_BUILTIN_COMIEQSS
,
15878 IX86_BUILTIN_COMILTSS
,
15879 IX86_BUILTIN_COMILESS
,
15880 IX86_BUILTIN_COMIGTSS
,
15881 IX86_BUILTIN_COMIGESS
,
15882 IX86_BUILTIN_COMINEQSS
,
15883 IX86_BUILTIN_UCOMIEQSS
,
15884 IX86_BUILTIN_UCOMILTSS
,
15885 IX86_BUILTIN_UCOMILESS
,
15886 IX86_BUILTIN_UCOMIGTSS
,
15887 IX86_BUILTIN_UCOMIGESS
,
15888 IX86_BUILTIN_UCOMINEQSS
,
15890 IX86_BUILTIN_CVTPI2PS
,
15891 IX86_BUILTIN_CVTPS2PI
,
15892 IX86_BUILTIN_CVTSI2SS
,
15893 IX86_BUILTIN_CVTSI642SS
,
15894 IX86_BUILTIN_CVTSS2SI
,
15895 IX86_BUILTIN_CVTSS2SI64
,
15896 IX86_BUILTIN_CVTTPS2PI
,
15897 IX86_BUILTIN_CVTTSS2SI
,
15898 IX86_BUILTIN_CVTTSS2SI64
,
15900 IX86_BUILTIN_MAXPS
,
15901 IX86_BUILTIN_MAXSS
,
15902 IX86_BUILTIN_MINPS
,
15903 IX86_BUILTIN_MINSS
,
15905 IX86_BUILTIN_LOADUPS
,
15906 IX86_BUILTIN_STOREUPS
,
15907 IX86_BUILTIN_MOVSS
,
15909 IX86_BUILTIN_MOVHLPS
,
15910 IX86_BUILTIN_MOVLHPS
,
15911 IX86_BUILTIN_LOADHPS
,
15912 IX86_BUILTIN_LOADLPS
,
15913 IX86_BUILTIN_STOREHPS
,
15914 IX86_BUILTIN_STORELPS
,
15916 IX86_BUILTIN_MASKMOVQ
,
15917 IX86_BUILTIN_MOVMSKPS
,
15918 IX86_BUILTIN_PMOVMSKB
,
15920 IX86_BUILTIN_MOVNTPS
,
15921 IX86_BUILTIN_MOVNTQ
,
15923 IX86_BUILTIN_LOADDQU
,
15924 IX86_BUILTIN_STOREDQU
,
15926 IX86_BUILTIN_PACKSSWB
,
15927 IX86_BUILTIN_PACKSSDW
,
15928 IX86_BUILTIN_PACKUSWB
,
15930 IX86_BUILTIN_PADDB
,
15931 IX86_BUILTIN_PADDW
,
15932 IX86_BUILTIN_PADDD
,
15933 IX86_BUILTIN_PADDQ
,
15934 IX86_BUILTIN_PADDSB
,
15935 IX86_BUILTIN_PADDSW
,
15936 IX86_BUILTIN_PADDUSB
,
15937 IX86_BUILTIN_PADDUSW
,
15938 IX86_BUILTIN_PSUBB
,
15939 IX86_BUILTIN_PSUBW
,
15940 IX86_BUILTIN_PSUBD
,
15941 IX86_BUILTIN_PSUBQ
,
15942 IX86_BUILTIN_PSUBSB
,
15943 IX86_BUILTIN_PSUBSW
,
15944 IX86_BUILTIN_PSUBUSB
,
15945 IX86_BUILTIN_PSUBUSW
,
15948 IX86_BUILTIN_PANDN
,
15952 IX86_BUILTIN_PAVGB
,
15953 IX86_BUILTIN_PAVGW
,
15955 IX86_BUILTIN_PCMPEQB
,
15956 IX86_BUILTIN_PCMPEQW
,
15957 IX86_BUILTIN_PCMPEQD
,
15958 IX86_BUILTIN_PCMPGTB
,
15959 IX86_BUILTIN_PCMPGTW
,
15960 IX86_BUILTIN_PCMPGTD
,
15962 IX86_BUILTIN_PMADDWD
,
15964 IX86_BUILTIN_PMAXSW
,
15965 IX86_BUILTIN_PMAXUB
,
15966 IX86_BUILTIN_PMINSW
,
15967 IX86_BUILTIN_PMINUB
,
15969 IX86_BUILTIN_PMULHUW
,
15970 IX86_BUILTIN_PMULHW
,
15971 IX86_BUILTIN_PMULLW
,
15973 IX86_BUILTIN_PSADBW
,
15974 IX86_BUILTIN_PSHUFW
,
15976 IX86_BUILTIN_PSLLW
,
15977 IX86_BUILTIN_PSLLD
,
15978 IX86_BUILTIN_PSLLQ
,
15979 IX86_BUILTIN_PSRAW
,
15980 IX86_BUILTIN_PSRAD
,
15981 IX86_BUILTIN_PSRLW
,
15982 IX86_BUILTIN_PSRLD
,
15983 IX86_BUILTIN_PSRLQ
,
15984 IX86_BUILTIN_PSLLWI
,
15985 IX86_BUILTIN_PSLLDI
,
15986 IX86_BUILTIN_PSLLQI
,
15987 IX86_BUILTIN_PSRAWI
,
15988 IX86_BUILTIN_PSRADI
,
15989 IX86_BUILTIN_PSRLWI
,
15990 IX86_BUILTIN_PSRLDI
,
15991 IX86_BUILTIN_PSRLQI
,
15993 IX86_BUILTIN_PUNPCKHBW
,
15994 IX86_BUILTIN_PUNPCKHWD
,
15995 IX86_BUILTIN_PUNPCKHDQ
,
15996 IX86_BUILTIN_PUNPCKLBW
,
15997 IX86_BUILTIN_PUNPCKLWD
,
15998 IX86_BUILTIN_PUNPCKLDQ
,
16000 IX86_BUILTIN_SHUFPS
,
16002 IX86_BUILTIN_RCPPS
,
16003 IX86_BUILTIN_RCPSS
,
16004 IX86_BUILTIN_RSQRTPS
,
16005 IX86_BUILTIN_RSQRTSS
,
16006 IX86_BUILTIN_SQRTPS
,
16007 IX86_BUILTIN_SQRTSS
,
16009 IX86_BUILTIN_UNPCKHPS
,
16010 IX86_BUILTIN_UNPCKLPS
,
16012 IX86_BUILTIN_ANDPS
,
16013 IX86_BUILTIN_ANDNPS
,
16015 IX86_BUILTIN_XORPS
,
16018 IX86_BUILTIN_LDMXCSR
,
16019 IX86_BUILTIN_STMXCSR
,
16020 IX86_BUILTIN_SFENCE
,
16022 /* 3DNow! Original */
16023 IX86_BUILTIN_FEMMS
,
16024 IX86_BUILTIN_PAVGUSB
,
16025 IX86_BUILTIN_PF2ID
,
16026 IX86_BUILTIN_PFACC
,
16027 IX86_BUILTIN_PFADD
,
16028 IX86_BUILTIN_PFCMPEQ
,
16029 IX86_BUILTIN_PFCMPGE
,
16030 IX86_BUILTIN_PFCMPGT
,
16031 IX86_BUILTIN_PFMAX
,
16032 IX86_BUILTIN_PFMIN
,
16033 IX86_BUILTIN_PFMUL
,
16034 IX86_BUILTIN_PFRCP
,
16035 IX86_BUILTIN_PFRCPIT1
,
16036 IX86_BUILTIN_PFRCPIT2
,
16037 IX86_BUILTIN_PFRSQIT1
,
16038 IX86_BUILTIN_PFRSQRT
,
16039 IX86_BUILTIN_PFSUB
,
16040 IX86_BUILTIN_PFSUBR
,
16041 IX86_BUILTIN_PI2FD
,
16042 IX86_BUILTIN_PMULHRW
,
16044 /* 3DNow! Athlon Extensions */
16045 IX86_BUILTIN_PF2IW
,
16046 IX86_BUILTIN_PFNACC
,
16047 IX86_BUILTIN_PFPNACC
,
16048 IX86_BUILTIN_PI2FW
,
16049 IX86_BUILTIN_PSWAPDSI
,
16050 IX86_BUILTIN_PSWAPDSF
,
16053 IX86_BUILTIN_ADDPD
,
16054 IX86_BUILTIN_ADDSD
,
16055 IX86_BUILTIN_DIVPD
,
16056 IX86_BUILTIN_DIVSD
,
16057 IX86_BUILTIN_MULPD
,
16058 IX86_BUILTIN_MULSD
,
16059 IX86_BUILTIN_SUBPD
,
16060 IX86_BUILTIN_SUBSD
,
16062 IX86_BUILTIN_CMPEQPD
,
16063 IX86_BUILTIN_CMPLTPD
,
16064 IX86_BUILTIN_CMPLEPD
,
16065 IX86_BUILTIN_CMPGTPD
,
16066 IX86_BUILTIN_CMPGEPD
,
16067 IX86_BUILTIN_CMPNEQPD
,
16068 IX86_BUILTIN_CMPNLTPD
,
16069 IX86_BUILTIN_CMPNLEPD
,
16070 IX86_BUILTIN_CMPNGTPD
,
16071 IX86_BUILTIN_CMPNGEPD
,
16072 IX86_BUILTIN_CMPORDPD
,
16073 IX86_BUILTIN_CMPUNORDPD
,
16074 IX86_BUILTIN_CMPNEPD
,
16075 IX86_BUILTIN_CMPEQSD
,
16076 IX86_BUILTIN_CMPLTSD
,
16077 IX86_BUILTIN_CMPLESD
,
16078 IX86_BUILTIN_CMPNEQSD
,
16079 IX86_BUILTIN_CMPNLTSD
,
16080 IX86_BUILTIN_CMPNLESD
,
16081 IX86_BUILTIN_CMPORDSD
,
16082 IX86_BUILTIN_CMPUNORDSD
,
16083 IX86_BUILTIN_CMPNESD
,
16085 IX86_BUILTIN_COMIEQSD
,
16086 IX86_BUILTIN_COMILTSD
,
16087 IX86_BUILTIN_COMILESD
,
16088 IX86_BUILTIN_COMIGTSD
,
16089 IX86_BUILTIN_COMIGESD
,
16090 IX86_BUILTIN_COMINEQSD
,
16091 IX86_BUILTIN_UCOMIEQSD
,
16092 IX86_BUILTIN_UCOMILTSD
,
16093 IX86_BUILTIN_UCOMILESD
,
16094 IX86_BUILTIN_UCOMIGTSD
,
16095 IX86_BUILTIN_UCOMIGESD
,
16096 IX86_BUILTIN_UCOMINEQSD
,
16098 IX86_BUILTIN_MAXPD
,
16099 IX86_BUILTIN_MAXSD
,
16100 IX86_BUILTIN_MINPD
,
16101 IX86_BUILTIN_MINSD
,
16103 IX86_BUILTIN_ANDPD
,
16104 IX86_BUILTIN_ANDNPD
,
16106 IX86_BUILTIN_XORPD
,
16108 IX86_BUILTIN_SQRTPD
,
16109 IX86_BUILTIN_SQRTSD
,
16111 IX86_BUILTIN_UNPCKHPD
,
16112 IX86_BUILTIN_UNPCKLPD
,
16114 IX86_BUILTIN_SHUFPD
,
16116 IX86_BUILTIN_LOADUPD
,
16117 IX86_BUILTIN_STOREUPD
,
16118 IX86_BUILTIN_MOVSD
,
16120 IX86_BUILTIN_LOADHPD
,
16121 IX86_BUILTIN_LOADLPD
,
16123 IX86_BUILTIN_CVTDQ2PD
,
16124 IX86_BUILTIN_CVTDQ2PS
,
16126 IX86_BUILTIN_CVTPD2DQ
,
16127 IX86_BUILTIN_CVTPD2PI
,
16128 IX86_BUILTIN_CVTPD2PS
,
16129 IX86_BUILTIN_CVTTPD2DQ
,
16130 IX86_BUILTIN_CVTTPD2PI
,
16132 IX86_BUILTIN_CVTPI2PD
,
16133 IX86_BUILTIN_CVTSI2SD
,
16134 IX86_BUILTIN_CVTSI642SD
,
16136 IX86_BUILTIN_CVTSD2SI
,
16137 IX86_BUILTIN_CVTSD2SI64
,
16138 IX86_BUILTIN_CVTSD2SS
,
16139 IX86_BUILTIN_CVTSS2SD
,
16140 IX86_BUILTIN_CVTTSD2SI
,
16141 IX86_BUILTIN_CVTTSD2SI64
,
16143 IX86_BUILTIN_CVTPS2DQ
,
16144 IX86_BUILTIN_CVTPS2PD
,
16145 IX86_BUILTIN_CVTTPS2DQ
,
16147 IX86_BUILTIN_MOVNTI
,
16148 IX86_BUILTIN_MOVNTPD
,
16149 IX86_BUILTIN_MOVNTDQ
,
16152 IX86_BUILTIN_MASKMOVDQU
,
16153 IX86_BUILTIN_MOVMSKPD
,
16154 IX86_BUILTIN_PMOVMSKB128
,
16156 IX86_BUILTIN_PACKSSWB128
,
16157 IX86_BUILTIN_PACKSSDW128
,
16158 IX86_BUILTIN_PACKUSWB128
,
16160 IX86_BUILTIN_PADDB128
,
16161 IX86_BUILTIN_PADDW128
,
16162 IX86_BUILTIN_PADDD128
,
16163 IX86_BUILTIN_PADDQ128
,
16164 IX86_BUILTIN_PADDSB128
,
16165 IX86_BUILTIN_PADDSW128
,
16166 IX86_BUILTIN_PADDUSB128
,
16167 IX86_BUILTIN_PADDUSW128
,
16168 IX86_BUILTIN_PSUBB128
,
16169 IX86_BUILTIN_PSUBW128
,
16170 IX86_BUILTIN_PSUBD128
,
16171 IX86_BUILTIN_PSUBQ128
,
16172 IX86_BUILTIN_PSUBSB128
,
16173 IX86_BUILTIN_PSUBSW128
,
16174 IX86_BUILTIN_PSUBUSB128
,
16175 IX86_BUILTIN_PSUBUSW128
,
16177 IX86_BUILTIN_PAND128
,
16178 IX86_BUILTIN_PANDN128
,
16179 IX86_BUILTIN_POR128
,
16180 IX86_BUILTIN_PXOR128
,
16182 IX86_BUILTIN_PAVGB128
,
16183 IX86_BUILTIN_PAVGW128
,
16185 IX86_BUILTIN_PCMPEQB128
,
16186 IX86_BUILTIN_PCMPEQW128
,
16187 IX86_BUILTIN_PCMPEQD128
,
16188 IX86_BUILTIN_PCMPGTB128
,
16189 IX86_BUILTIN_PCMPGTW128
,
16190 IX86_BUILTIN_PCMPGTD128
,
16192 IX86_BUILTIN_PMADDWD128
,
16194 IX86_BUILTIN_PMAXSW128
,
16195 IX86_BUILTIN_PMAXUB128
,
16196 IX86_BUILTIN_PMINSW128
,
16197 IX86_BUILTIN_PMINUB128
,
16199 IX86_BUILTIN_PMULUDQ
,
16200 IX86_BUILTIN_PMULUDQ128
,
16201 IX86_BUILTIN_PMULHUW128
,
16202 IX86_BUILTIN_PMULHW128
,
16203 IX86_BUILTIN_PMULLW128
,
16205 IX86_BUILTIN_PSADBW128
,
16206 IX86_BUILTIN_PSHUFHW
,
16207 IX86_BUILTIN_PSHUFLW
,
16208 IX86_BUILTIN_PSHUFD
,
16210 IX86_BUILTIN_PSLLW128
,
16211 IX86_BUILTIN_PSLLD128
,
16212 IX86_BUILTIN_PSLLQ128
,
16213 IX86_BUILTIN_PSRAW128
,
16214 IX86_BUILTIN_PSRAD128
,
16215 IX86_BUILTIN_PSRLW128
,
16216 IX86_BUILTIN_PSRLD128
,
16217 IX86_BUILTIN_PSRLQ128
,
16218 IX86_BUILTIN_PSLLDQI128
,
16219 IX86_BUILTIN_PSLLWI128
,
16220 IX86_BUILTIN_PSLLDI128
,
16221 IX86_BUILTIN_PSLLQI128
,
16222 IX86_BUILTIN_PSRAWI128
,
16223 IX86_BUILTIN_PSRADI128
,
16224 IX86_BUILTIN_PSRLDQI128
,
16225 IX86_BUILTIN_PSRLWI128
,
16226 IX86_BUILTIN_PSRLDI128
,
16227 IX86_BUILTIN_PSRLQI128
,
16229 IX86_BUILTIN_PUNPCKHBW128
,
16230 IX86_BUILTIN_PUNPCKHWD128
,
16231 IX86_BUILTIN_PUNPCKHDQ128
,
16232 IX86_BUILTIN_PUNPCKHQDQ128
,
16233 IX86_BUILTIN_PUNPCKLBW128
,
16234 IX86_BUILTIN_PUNPCKLWD128
,
16235 IX86_BUILTIN_PUNPCKLDQ128
,
16236 IX86_BUILTIN_PUNPCKLQDQ128
,
16238 IX86_BUILTIN_CLFLUSH
,
16239 IX86_BUILTIN_MFENCE
,
16240 IX86_BUILTIN_LFENCE
,
16242 /* Prescott New Instructions. */
16243 IX86_BUILTIN_ADDSUBPS
,
16244 IX86_BUILTIN_HADDPS
,
16245 IX86_BUILTIN_HSUBPS
,
16246 IX86_BUILTIN_MOVSHDUP
,
16247 IX86_BUILTIN_MOVSLDUP
,
16248 IX86_BUILTIN_ADDSUBPD
,
16249 IX86_BUILTIN_HADDPD
,
16250 IX86_BUILTIN_HSUBPD
,
16251 IX86_BUILTIN_LDDQU
,
16253 IX86_BUILTIN_MONITOR
,
16254 IX86_BUILTIN_MWAIT
,
16257 IX86_BUILTIN_PHADDW
,
16258 IX86_BUILTIN_PHADDD
,
16259 IX86_BUILTIN_PHADDSW
,
16260 IX86_BUILTIN_PHSUBW
,
16261 IX86_BUILTIN_PHSUBD
,
16262 IX86_BUILTIN_PHSUBSW
,
16263 IX86_BUILTIN_PMADDUBSW
,
16264 IX86_BUILTIN_PMULHRSW
,
16265 IX86_BUILTIN_PSHUFB
,
16266 IX86_BUILTIN_PSIGNB
,
16267 IX86_BUILTIN_PSIGNW
,
16268 IX86_BUILTIN_PSIGND
,
16269 IX86_BUILTIN_PALIGNR
,
16270 IX86_BUILTIN_PABSB
,
16271 IX86_BUILTIN_PABSW
,
16272 IX86_BUILTIN_PABSD
,
16274 IX86_BUILTIN_PHADDW128
,
16275 IX86_BUILTIN_PHADDD128
,
16276 IX86_BUILTIN_PHADDSW128
,
16277 IX86_BUILTIN_PHSUBW128
,
16278 IX86_BUILTIN_PHSUBD128
,
16279 IX86_BUILTIN_PHSUBSW128
,
16280 IX86_BUILTIN_PMADDUBSW128
,
16281 IX86_BUILTIN_PMULHRSW128
,
16282 IX86_BUILTIN_PSHUFB128
,
16283 IX86_BUILTIN_PSIGNB128
,
16284 IX86_BUILTIN_PSIGNW128
,
16285 IX86_BUILTIN_PSIGND128
,
16286 IX86_BUILTIN_PALIGNR128
,
16287 IX86_BUILTIN_PABSB128
,
16288 IX86_BUILTIN_PABSW128
,
16289 IX86_BUILTIN_PABSD128
,
16291 /* AMDFAM10 - SSE4A New Instructions. */
16292 IX86_BUILTIN_MOVNTSD
,
16293 IX86_BUILTIN_MOVNTSS
,
16294 IX86_BUILTIN_EXTRQI
,
16295 IX86_BUILTIN_EXTRQ
,
16296 IX86_BUILTIN_INSERTQI
,
16297 IX86_BUILTIN_INSERTQ
,
16299 IX86_BUILTIN_VEC_INIT_V2SI
,
16300 IX86_BUILTIN_VEC_INIT_V4HI
,
16301 IX86_BUILTIN_VEC_INIT_V8QI
,
16302 IX86_BUILTIN_VEC_EXT_V2DF
,
16303 IX86_BUILTIN_VEC_EXT_V2DI
,
16304 IX86_BUILTIN_VEC_EXT_V4SF
,
16305 IX86_BUILTIN_VEC_EXT_V4SI
,
16306 IX86_BUILTIN_VEC_EXT_V8HI
,
16307 IX86_BUILTIN_VEC_EXT_V2SI
,
16308 IX86_BUILTIN_VEC_EXT_V4HI
,
16309 IX86_BUILTIN_VEC_SET_V8HI
,
16310 IX86_BUILTIN_VEC_SET_V4HI
,
16315 /* Table for the ix86 builtin decls. */
16316 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16318 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16319 * if the target_flags include one of MASK. Stores the function decl
16320 * in the ix86_builtins array.
16321 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16324 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16326 tree decl
= NULL_TREE
;
16328 if (mask
& target_flags
16329 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16331 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16333 ix86_builtins
[(int) code
] = decl
;
16339 /* Like def_builtin, but also marks the function decl "const". */
16342 def_builtin_const (int mask
, const char *name
, tree type
,
16343 enum ix86_builtins code
)
16345 tree decl
= def_builtin (mask
, name
, type
, code
);
16347 TREE_READONLY (decl
) = 1;
16351 /* Bits for builtin_description.flag. */
16353 /* Set when we don't support the comparison natively, and should
16354 swap_comparison in order to support it. */
16355 #define BUILTIN_DESC_SWAP_OPERANDS 1
16357 struct builtin_description
16359 const unsigned int mask
;
16360 const enum insn_code icode
;
16361 const char *const name
;
16362 const enum ix86_builtins code
;
16363 const enum rtx_code comparison
;
16364 const unsigned int flag
;
16367 static const struct builtin_description bdesc_comi
[] =
16369 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16370 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16371 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16372 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16373 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16374 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16375 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16376 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16377 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16378 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16379 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16380 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16381 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16382 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16383 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16384 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16385 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16386 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16387 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16388 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16389 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16390 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16391 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16392 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16395 static const struct builtin_description bdesc_2arg
[] =
16398 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16399 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16400 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16401 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16402 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16403 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16404 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16405 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16407 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16408 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16409 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16410 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16411 BUILTIN_DESC_SWAP_OPERANDS
},
16412 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16413 BUILTIN_DESC_SWAP_OPERANDS
},
16414 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16415 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16416 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16417 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16418 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16419 BUILTIN_DESC_SWAP_OPERANDS
},
16420 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16421 BUILTIN_DESC_SWAP_OPERANDS
},
16422 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16423 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16424 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16425 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16426 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16427 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16428 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16429 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16430 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16431 BUILTIN_DESC_SWAP_OPERANDS
},
16432 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16433 BUILTIN_DESC_SWAP_OPERANDS
},
16434 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16436 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16437 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16438 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16439 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16441 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16442 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16443 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16444 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16446 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16447 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16448 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16449 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16450 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16453 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16454 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16455 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16456 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16457 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16458 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16459 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16460 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16462 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16463 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16464 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16465 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16466 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16467 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16468 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16469 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16471 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16472 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16473 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16475 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16476 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16477 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16478 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16480 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16481 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16483 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16484 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16485 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16486 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16487 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16488 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16490 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16491 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16492 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16493 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16495 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16496 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16497 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16498 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16499 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16500 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16503 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16504 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16505 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16507 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16508 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16509 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16511 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16512 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16513 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16514 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16515 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16516 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16518 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16519 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16520 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16521 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16522 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16523 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16525 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16526 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16527 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16528 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16530 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16531 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16534 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16535 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16536 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16537 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16538 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16539 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16540 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16541 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16543 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16544 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16545 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16546 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16547 BUILTIN_DESC_SWAP_OPERANDS
},
16548 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16549 BUILTIN_DESC_SWAP_OPERANDS
},
16550 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16551 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16552 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16553 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16554 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16555 BUILTIN_DESC_SWAP_OPERANDS
},
16556 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16557 BUILTIN_DESC_SWAP_OPERANDS
},
16558 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16559 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16560 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16561 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16562 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16563 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16564 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16565 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16566 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16568 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16569 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16570 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16571 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16573 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16574 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16575 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16576 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16578 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16579 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16580 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16583 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16584 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16585 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16586 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16587 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16588 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16589 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16590 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16592 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16593 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16594 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16595 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16596 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16597 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16598 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16599 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16601 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16602 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16604 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16605 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16606 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16607 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16609 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16610 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16612 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16613 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16614 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16615 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16616 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16617 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16619 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16620 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16621 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16622 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16624 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16625 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16626 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16627 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16628 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16629 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16630 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16631 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16633 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16634 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16635 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16637 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16638 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16640 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16641 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16643 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16644 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16645 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16647 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16648 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16649 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16651 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16652 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16654 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16656 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16657 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16658 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16659 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16662 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16663 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16664 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16665 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16666 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16667 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16670 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16671 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16672 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16673 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16674 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16675 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16676 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16677 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16678 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16679 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16680 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16681 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16682 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16683 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16684 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16685 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16686 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16687 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16688 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16689 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16690 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16691 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16692 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16693 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16696 static const struct builtin_description bdesc_1arg
[] =
16698 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16699 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16701 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16702 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16703 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16705 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16706 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16707 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16708 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16709 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16710 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16712 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16713 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16715 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16717 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16718 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16720 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16721 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16722 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16723 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16724 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16726 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16728 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16729 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16730 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16731 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16733 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16734 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16735 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16738 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16739 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16742 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16743 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16744 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16745 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16746 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16747 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16751 ix86_init_builtins (void)
16754 ix86_init_mmx_sse_builtins ();
16757 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16758 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16761 ix86_init_mmx_sse_builtins (void)
16763 const struct builtin_description
* d
;
16766 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16767 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16768 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16769 tree V2DI_type_node
16770 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16771 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16772 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16773 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16774 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16775 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16776 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16778 tree pchar_type_node
= build_pointer_type (char_type_node
);
16779 tree pcchar_type_node
= build_pointer_type (
16780 build_type_variant (char_type_node
, 1, 0));
16781 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16782 tree pcfloat_type_node
= build_pointer_type (
16783 build_type_variant (float_type_node
, 1, 0));
16784 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16785 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16786 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16789 tree int_ftype_v4sf_v4sf
16790 = build_function_type_list (integer_type_node
,
16791 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16792 tree v4si_ftype_v4sf_v4sf
16793 = build_function_type_list (V4SI_type_node
,
16794 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16795 /* MMX/SSE/integer conversions. */
16796 tree int_ftype_v4sf
16797 = build_function_type_list (integer_type_node
,
16798 V4SF_type_node
, NULL_TREE
);
16799 tree int64_ftype_v4sf
16800 = build_function_type_list (long_long_integer_type_node
,
16801 V4SF_type_node
, NULL_TREE
);
16802 tree int_ftype_v8qi
16803 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16804 tree v4sf_ftype_v4sf_int
16805 = build_function_type_list (V4SF_type_node
,
16806 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16807 tree v4sf_ftype_v4sf_int64
16808 = build_function_type_list (V4SF_type_node
,
16809 V4SF_type_node
, long_long_integer_type_node
,
16811 tree v4sf_ftype_v4sf_v2si
16812 = build_function_type_list (V4SF_type_node
,
16813 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16815 /* Miscellaneous. */
16816 tree v8qi_ftype_v4hi_v4hi
16817 = build_function_type_list (V8QI_type_node
,
16818 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16819 tree v4hi_ftype_v2si_v2si
16820 = build_function_type_list (V4HI_type_node
,
16821 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16822 tree v4sf_ftype_v4sf_v4sf_int
16823 = build_function_type_list (V4SF_type_node
,
16824 V4SF_type_node
, V4SF_type_node
,
16825 integer_type_node
, NULL_TREE
);
16826 tree v2si_ftype_v4hi_v4hi
16827 = build_function_type_list (V2SI_type_node
,
16828 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16829 tree v4hi_ftype_v4hi_int
16830 = build_function_type_list (V4HI_type_node
,
16831 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16832 tree v4hi_ftype_v4hi_di
16833 = build_function_type_list (V4HI_type_node
,
16834 V4HI_type_node
, long_long_unsigned_type_node
,
16836 tree v2si_ftype_v2si_di
16837 = build_function_type_list (V2SI_type_node
,
16838 V2SI_type_node
, long_long_unsigned_type_node
,
16840 tree void_ftype_void
16841 = build_function_type (void_type_node
, void_list_node
);
16842 tree void_ftype_unsigned
16843 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16844 tree void_ftype_unsigned_unsigned
16845 = build_function_type_list (void_type_node
, unsigned_type_node
,
16846 unsigned_type_node
, NULL_TREE
);
16847 tree void_ftype_pcvoid_unsigned_unsigned
16848 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16849 unsigned_type_node
, unsigned_type_node
,
16851 tree unsigned_ftype_void
16852 = build_function_type (unsigned_type_node
, void_list_node
);
16853 tree v2si_ftype_v4sf
16854 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16855 /* Loads/stores. */
16856 tree void_ftype_v8qi_v8qi_pchar
16857 = build_function_type_list (void_type_node
,
16858 V8QI_type_node
, V8QI_type_node
,
16859 pchar_type_node
, NULL_TREE
);
16860 tree v4sf_ftype_pcfloat
16861 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16862 /* @@@ the type is bogus */
16863 tree v4sf_ftype_v4sf_pv2si
16864 = build_function_type_list (V4SF_type_node
,
16865 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16866 tree void_ftype_pv2si_v4sf
16867 = build_function_type_list (void_type_node
,
16868 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16869 tree void_ftype_pfloat_v4sf
16870 = build_function_type_list (void_type_node
,
16871 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16872 tree void_ftype_pdi_di
16873 = build_function_type_list (void_type_node
,
16874 pdi_type_node
, long_long_unsigned_type_node
,
16876 tree void_ftype_pv2di_v2di
16877 = build_function_type_list (void_type_node
,
16878 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16879 /* Normal vector unops. */
16880 tree v4sf_ftype_v4sf
16881 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16882 tree v16qi_ftype_v16qi
16883 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16884 tree v8hi_ftype_v8hi
16885 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16886 tree v4si_ftype_v4si
16887 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16888 tree v8qi_ftype_v8qi
16889 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16890 tree v4hi_ftype_v4hi
16891 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16893 /* Normal vector binops. */
16894 tree v4sf_ftype_v4sf_v4sf
16895 = build_function_type_list (V4SF_type_node
,
16896 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16897 tree v8qi_ftype_v8qi_v8qi
16898 = build_function_type_list (V8QI_type_node
,
16899 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16900 tree v4hi_ftype_v4hi_v4hi
16901 = build_function_type_list (V4HI_type_node
,
16902 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16903 tree v2si_ftype_v2si_v2si
16904 = build_function_type_list (V2SI_type_node
,
16905 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16906 tree di_ftype_di_di
16907 = build_function_type_list (long_long_unsigned_type_node
,
16908 long_long_unsigned_type_node
,
16909 long_long_unsigned_type_node
, NULL_TREE
);
16911 tree di_ftype_di_di_int
16912 = build_function_type_list (long_long_unsigned_type_node
,
16913 long_long_unsigned_type_node
,
16914 long_long_unsigned_type_node
,
16915 integer_type_node
, NULL_TREE
);
16917 tree v2si_ftype_v2sf
16918 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16919 tree v2sf_ftype_v2si
16920 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16921 tree v2si_ftype_v2si
16922 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16923 tree v2sf_ftype_v2sf
16924 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16925 tree v2sf_ftype_v2sf_v2sf
16926 = build_function_type_list (V2SF_type_node
,
16927 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16928 tree v2si_ftype_v2sf_v2sf
16929 = build_function_type_list (V2SI_type_node
,
16930 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16931 tree pint_type_node
= build_pointer_type (integer_type_node
);
16932 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16933 tree pcdouble_type_node
= build_pointer_type (
16934 build_type_variant (double_type_node
, 1, 0));
16935 tree int_ftype_v2df_v2df
16936 = build_function_type_list (integer_type_node
,
16937 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16939 tree void_ftype_pcvoid
16940 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16941 tree v4sf_ftype_v4si
16942 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16943 tree v4si_ftype_v4sf
16944 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16945 tree v2df_ftype_v4si
16946 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16947 tree v4si_ftype_v2df
16948 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16949 tree v2si_ftype_v2df
16950 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16951 tree v4sf_ftype_v2df
16952 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16953 tree v2df_ftype_v2si
16954 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16955 tree v2df_ftype_v4sf
16956 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16957 tree int_ftype_v2df
16958 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16959 tree int64_ftype_v2df
16960 = build_function_type_list (long_long_integer_type_node
,
16961 V2DF_type_node
, NULL_TREE
);
16962 tree v2df_ftype_v2df_int
16963 = build_function_type_list (V2DF_type_node
,
16964 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16965 tree v2df_ftype_v2df_int64
16966 = build_function_type_list (V2DF_type_node
,
16967 V2DF_type_node
, long_long_integer_type_node
,
16969 tree v4sf_ftype_v4sf_v2df
16970 = build_function_type_list (V4SF_type_node
,
16971 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16972 tree v2df_ftype_v2df_v4sf
16973 = build_function_type_list (V2DF_type_node
,
16974 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16975 tree v2df_ftype_v2df_v2df_int
16976 = build_function_type_list (V2DF_type_node
,
16977 V2DF_type_node
, V2DF_type_node
,
16980 tree v2df_ftype_v2df_pcdouble
16981 = build_function_type_list (V2DF_type_node
,
16982 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16983 tree void_ftype_pdouble_v2df
16984 = build_function_type_list (void_type_node
,
16985 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16986 tree void_ftype_pint_int
16987 = build_function_type_list (void_type_node
,
16988 pint_type_node
, integer_type_node
, NULL_TREE
);
16989 tree void_ftype_v16qi_v16qi_pchar
16990 = build_function_type_list (void_type_node
,
16991 V16QI_type_node
, V16QI_type_node
,
16992 pchar_type_node
, NULL_TREE
);
16993 tree v2df_ftype_pcdouble
16994 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16995 tree v2df_ftype_v2df_v2df
16996 = build_function_type_list (V2DF_type_node
,
16997 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16998 tree v16qi_ftype_v16qi_v16qi
16999 = build_function_type_list (V16QI_type_node
,
17000 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17001 tree v8hi_ftype_v8hi_v8hi
17002 = build_function_type_list (V8HI_type_node
,
17003 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17004 tree v4si_ftype_v4si_v4si
17005 = build_function_type_list (V4SI_type_node
,
17006 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17007 tree v2di_ftype_v2di_v2di
17008 = build_function_type_list (V2DI_type_node
,
17009 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17010 tree v2di_ftype_v2df_v2df
17011 = build_function_type_list (V2DI_type_node
,
17012 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17013 tree v2df_ftype_v2df
17014 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17015 tree v2di_ftype_v2di_int
17016 = build_function_type_list (V2DI_type_node
,
17017 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17018 tree v2di_ftype_v2di_v2di_int
17019 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17020 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17021 tree v4si_ftype_v4si_int
17022 = build_function_type_list (V4SI_type_node
,
17023 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17024 tree v8hi_ftype_v8hi_int
17025 = build_function_type_list (V8HI_type_node
,
17026 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17027 tree v8hi_ftype_v8hi_v2di
17028 = build_function_type_list (V8HI_type_node
,
17029 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
17030 tree v4si_ftype_v4si_v2di
17031 = build_function_type_list (V4SI_type_node
,
17032 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
17033 tree v4si_ftype_v8hi_v8hi
17034 = build_function_type_list (V4SI_type_node
,
17035 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17036 tree di_ftype_v8qi_v8qi
17037 = build_function_type_list (long_long_unsigned_type_node
,
17038 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17039 tree di_ftype_v2si_v2si
17040 = build_function_type_list (long_long_unsigned_type_node
,
17041 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17042 tree v2di_ftype_v16qi_v16qi
17043 = build_function_type_list (V2DI_type_node
,
17044 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17045 tree v2di_ftype_v4si_v4si
17046 = build_function_type_list (V2DI_type_node
,
17047 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17048 tree int_ftype_v16qi
17049 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17050 tree v16qi_ftype_pcchar
17051 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17052 tree void_ftype_pchar_v16qi
17053 = build_function_type_list (void_type_node
,
17054 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17056 tree v2di_ftype_v2di_unsigned_unsigned
17057 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17058 unsigned_type_node
, unsigned_type_node
,
17060 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17061 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17062 unsigned_type_node
, unsigned_type_node
,
17064 tree v2di_ftype_v2di_v16qi
17065 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17069 tree float128_type
;
17072 /* The __float80 type. */
17073 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17074 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17078 /* The __float80 type. */
17079 float80_type
= make_node (REAL_TYPE
);
17080 TYPE_PRECISION (float80_type
) = 80;
17081 layout_type (float80_type
);
17082 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17087 float128_type
= make_node (REAL_TYPE
);
17088 TYPE_PRECISION (float128_type
) = 128;
17089 layout_type (float128_type
);
17090 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17093 /* Add all builtins that are more or less simple operations on two
17095 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17097 /* Use one of the operands; the target can have a different mode for
17098 mask-generating compares. */
17099 enum machine_mode mode
;
17104 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17109 type
= v16qi_ftype_v16qi_v16qi
;
17112 type
= v8hi_ftype_v8hi_v8hi
;
17115 type
= v4si_ftype_v4si_v4si
;
17118 type
= v2di_ftype_v2di_v2di
;
17121 type
= v2df_ftype_v2df_v2df
;
17124 type
= v4sf_ftype_v4sf_v4sf
;
17127 type
= v8qi_ftype_v8qi_v8qi
;
17130 type
= v4hi_ftype_v4hi_v4hi
;
17133 type
= v2si_ftype_v2si_v2si
;
17136 type
= di_ftype_di_di
;
17140 gcc_unreachable ();
17143 /* Override for comparisons. */
17144 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17145 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17146 type
= v4si_ftype_v4sf_v4sf
;
17148 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17149 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17150 type
= v2di_ftype_v2df_v2df
;
17152 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17155 /* Add all builtins that are more or less simple operations on 1 operand. */
17156 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17158 enum machine_mode mode
;
17163 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17168 type
= v16qi_ftype_v16qi
;
17171 type
= v8hi_ftype_v8hi
;
17174 type
= v4si_ftype_v4si
;
17177 type
= v2df_ftype_v2df
;
17180 type
= v4sf_ftype_v4sf
;
17183 type
= v8qi_ftype_v8qi
;
17186 type
= v4hi_ftype_v4hi
;
17189 type
= v2si_ftype_v2si
;
17196 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17199 /* Add the remaining MMX insns with somewhat more complicated types. */
17200 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17201 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17202 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17203 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17205 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17206 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17207 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17209 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17210 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17212 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17213 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17215 /* comi/ucomi insns. */
17216 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17217 if (d
->mask
== MASK_SSE2
)
17218 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17220 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17222 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17223 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17224 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17226 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17227 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17228 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17229 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17230 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17231 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17232 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17233 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17234 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17235 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17236 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17238 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17240 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17241 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17243 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17244 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17245 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17246 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17248 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17249 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17250 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17251 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17253 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17255 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17257 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17258 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17259 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17260 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17261 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17262 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17264 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17266 /* Original 3DNow! */
17267 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17268 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17269 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17270 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17271 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17272 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17273 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17274 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17275 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17276 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17277 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17278 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17279 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17280 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17281 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17282 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17283 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17284 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17285 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17286 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17288 /* 3DNow! extension as used in the Athlon CPU. */
17289 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17290 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17291 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17292 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17293 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17294 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17297 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17299 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17300 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17302 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17303 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17305 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17306 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17307 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17308 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17309 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17311 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17312 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17313 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17314 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17316 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17317 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17319 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17321 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17322 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17324 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17325 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17326 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17327 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17328 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17330 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17332 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17333 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17334 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17335 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17337 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17338 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17339 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17341 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17342 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17343 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17344 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17346 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17347 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17348 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17350 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17351 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17353 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17354 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17356 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17357 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17358 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17360 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17361 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17362 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17364 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17365 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17367 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17368 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17369 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17370 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17372 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17373 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17374 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17375 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17377 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17378 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17380 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17382 /* Prescott New Instructions. */
17383 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17384 void_ftype_pcvoid_unsigned_unsigned
,
17385 IX86_BUILTIN_MONITOR
);
17386 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17387 void_ftype_unsigned_unsigned
,
17388 IX86_BUILTIN_MWAIT
);
17389 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17390 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17393 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17394 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17395 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17396 IX86_BUILTIN_PALIGNR
);
17398 /* AMDFAM10 SSE4A New built-ins */
17399 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17400 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17401 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17402 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17403 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17404 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17405 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17406 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17407 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17408 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17409 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17410 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17412 /* Access to the vec_init patterns. */
17413 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17414 integer_type_node
, NULL_TREE
);
17415 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17416 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17418 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17419 short_integer_type_node
,
17420 short_integer_type_node
,
17421 short_integer_type_node
, NULL_TREE
);
17422 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17423 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17425 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17426 char_type_node
, char_type_node
,
17427 char_type_node
, char_type_node
,
17428 char_type_node
, char_type_node
,
17429 char_type_node
, NULL_TREE
);
17430 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17431 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17433 /* Access to the vec_extract patterns. */
17434 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17435 integer_type_node
, NULL_TREE
);
17436 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17437 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17439 ftype
= build_function_type_list (long_long_integer_type_node
,
17440 V2DI_type_node
, integer_type_node
,
17442 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17443 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17445 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17446 integer_type_node
, NULL_TREE
);
17447 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17448 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17450 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17451 integer_type_node
, NULL_TREE
);
17452 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17453 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17455 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17456 integer_type_node
, NULL_TREE
);
17457 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17458 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17460 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17461 integer_type_node
, NULL_TREE
);
17462 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17463 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17465 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17466 integer_type_node
, NULL_TREE
);
17467 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17468 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17470 /* Access to the vec_set patterns. */
17471 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17473 integer_type_node
, NULL_TREE
);
17474 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17475 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17477 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17479 integer_type_node
, NULL_TREE
);
17480 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17481 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17484 /* Errors in the source file can cause expand_expr to return const0_rtx
17485 where we expect a vector. To avoid crashing, use one of the vector
17486 clear instructions. */
17488 safe_vector_operand (rtx x
, enum machine_mode mode
)
17490 if (x
== const0_rtx
)
17491 x
= CONST0_RTX (mode
);
17495 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17498 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17501 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17502 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17503 rtx op0
= expand_normal (arg0
);
17504 rtx op1
= expand_normal (arg1
);
17505 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17506 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17507 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17509 if (VECTOR_MODE_P (mode0
))
17510 op0
= safe_vector_operand (op0
, mode0
);
17511 if (VECTOR_MODE_P (mode1
))
17512 op1
= safe_vector_operand (op1
, mode1
);
17514 if (optimize
|| !target
17515 || GET_MODE (target
) != tmode
17516 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17517 target
= gen_reg_rtx (tmode
);
17519 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17521 rtx x
= gen_reg_rtx (V4SImode
);
17522 emit_insn (gen_sse2_loadd (x
, op1
));
17523 op1
= gen_lowpart (TImode
, x
);
17526 /* The insn must want input operands in the same modes as the
17528 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17529 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17531 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17532 op0
= copy_to_mode_reg (mode0
, op0
);
17533 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17534 op1
= copy_to_mode_reg (mode1
, op1
);
17536 /* ??? Using ix86_fixup_binary_operands is problematic when
17537 we've got mismatched modes. Fake it. */
17543 if (tmode
== mode0
&& tmode
== mode1
)
17545 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17549 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17551 op0
= force_reg (mode0
, op0
);
17552 op1
= force_reg (mode1
, op1
);
17553 target
= gen_reg_rtx (tmode
);
17556 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17563 /* Subroutine of ix86_expand_builtin to take care of stores. */
17566 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17569 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17570 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17571 rtx op0
= expand_normal (arg0
);
17572 rtx op1
= expand_normal (arg1
);
17573 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17574 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17576 if (VECTOR_MODE_P (mode1
))
17577 op1
= safe_vector_operand (op1
, mode1
);
17579 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17580 op1
= copy_to_mode_reg (mode1
, op1
);
17582 pat
= GEN_FCN (icode
) (op0
, op1
);
17588 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17591 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17592 rtx target
, int do_load
)
17595 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17596 rtx op0
= expand_normal (arg0
);
17597 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17598 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17600 if (optimize
|| !target
17601 || GET_MODE (target
) != tmode
17602 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17603 target
= gen_reg_rtx (tmode
);
17605 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17608 if (VECTOR_MODE_P (mode0
))
17609 op0
= safe_vector_operand (op0
, mode0
);
17611 if ((optimize
&& !register_operand (op0
, mode0
))
17612 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17613 op0
= copy_to_mode_reg (mode0
, op0
);
17616 pat
= GEN_FCN (icode
) (target
, op0
);
17623 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17624 sqrtss, rsqrtss, rcpss. */
17627 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17630 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17631 rtx op1
, op0
= expand_normal (arg0
);
17632 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17633 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17635 if (optimize
|| !target
17636 || GET_MODE (target
) != tmode
17637 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17638 target
= gen_reg_rtx (tmode
);
17640 if (VECTOR_MODE_P (mode0
))
17641 op0
= safe_vector_operand (op0
, mode0
);
17643 if ((optimize
&& !register_operand (op0
, mode0
))
17644 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17645 op0
= copy_to_mode_reg (mode0
, op0
);
17648 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17649 op1
= copy_to_mode_reg (mode0
, op1
);
17651 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17658 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17661 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17665 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17666 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17667 rtx op0
= expand_normal (arg0
);
17668 rtx op1
= expand_normal (arg1
);
17670 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17671 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17672 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17673 enum rtx_code comparison
= d
->comparison
;
17675 if (VECTOR_MODE_P (mode0
))
17676 op0
= safe_vector_operand (op0
, mode0
);
17677 if (VECTOR_MODE_P (mode1
))
17678 op1
= safe_vector_operand (op1
, mode1
);
17680 /* Swap operands if we have a comparison that isn't available in
17682 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17684 rtx tmp
= gen_reg_rtx (mode1
);
17685 emit_move_insn (tmp
, op1
);
17690 if (optimize
|| !target
17691 || GET_MODE (target
) != tmode
17692 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17693 target
= gen_reg_rtx (tmode
);
17695 if ((optimize
&& !register_operand (op0
, mode0
))
17696 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17697 op0
= copy_to_mode_reg (mode0
, op0
);
17698 if ((optimize
&& !register_operand (op1
, mode1
))
17699 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17700 op1
= copy_to_mode_reg (mode1
, op1
);
17702 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17703 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17710 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17713 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17717 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17718 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17719 rtx op0
= expand_normal (arg0
);
17720 rtx op1
= expand_normal (arg1
);
17722 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17723 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17724 enum rtx_code comparison
= d
->comparison
;
17726 if (VECTOR_MODE_P (mode0
))
17727 op0
= safe_vector_operand (op0
, mode0
);
17728 if (VECTOR_MODE_P (mode1
))
17729 op1
= safe_vector_operand (op1
, mode1
);
17731 /* Swap operands if we have a comparison that isn't available in
17733 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17740 target
= gen_reg_rtx (SImode
);
17741 emit_move_insn (target
, const0_rtx
);
17742 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17744 if ((optimize
&& !register_operand (op0
, mode0
))
17745 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17746 op0
= copy_to_mode_reg (mode0
, op0
);
17747 if ((optimize
&& !register_operand (op1
, mode1
))
17748 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17749 op1
= copy_to_mode_reg (mode1
, op1
);
17751 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17752 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17756 emit_insn (gen_rtx_SET (VOIDmode
,
17757 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17758 gen_rtx_fmt_ee (comparison
, QImode
,
17762 return SUBREG_REG (target
);
17765 /* Return the integer constant in ARG. Constrain it to be in the range
17766 of the subparts of VEC_TYPE; issue an error if not. */
17769 get_element_number (tree vec_type
, tree arg
)
17771 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17773 if (!host_integerp (arg
, 1)
17774 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17776 error ("selector must be an integer constant in the range 0..%wi", max
);
17783 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17784 ix86_expand_vector_init. We DO have language-level syntax for this, in
17785 the form of (type){ init-list }. Except that since we can't place emms
17786 instructions from inside the compiler, we can't allow the use of MMX
17787 registers unless the user explicitly asks for it. So we do *not* define
17788 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17789 we have builtins invoked by mmintrin.h that gives us license to emit
17790 these sorts of instructions. */
17793 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17795 enum machine_mode tmode
= TYPE_MODE (type
);
17796 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17797 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17798 rtvec v
= rtvec_alloc (n_elt
);
17800 gcc_assert (VECTOR_MODE_P (tmode
));
17801 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17803 for (i
= 0; i
< n_elt
; ++i
)
17805 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17806 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17809 if (!target
|| !register_operand (target
, tmode
))
17810 target
= gen_reg_rtx (tmode
);
17812 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17816 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17817 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17818 had a language-level syntax for referencing vector elements. */
17821 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17823 enum machine_mode tmode
, mode0
;
17828 arg0
= CALL_EXPR_ARG (exp
, 0);
17829 arg1
= CALL_EXPR_ARG (exp
, 1);
17831 op0
= expand_normal (arg0
);
17832 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17834 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17835 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17836 gcc_assert (VECTOR_MODE_P (mode0
));
17838 op0
= force_reg (mode0
, op0
);
17840 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17841 target
= gen_reg_rtx (tmode
);
17843 ix86_expand_vector_extract (true, target
, op0
, elt
);
17848 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17849 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17850 a language-level syntax for referencing vector elements. */
17853 ix86_expand_vec_set_builtin (tree exp
)
17855 enum machine_mode tmode
, mode1
;
17856 tree arg0
, arg1
, arg2
;
17860 arg0
= CALL_EXPR_ARG (exp
, 0);
17861 arg1
= CALL_EXPR_ARG (exp
, 1);
17862 arg2
= CALL_EXPR_ARG (exp
, 2);
17864 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17865 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17866 gcc_assert (VECTOR_MODE_P (tmode
));
17868 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17869 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17870 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17872 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17873 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17875 op0
= force_reg (tmode
, op0
);
17876 op1
= force_reg (mode1
, op1
);
17878 ix86_expand_vector_set (true, op0
, op1
, elt
);
17883 /* Expand an expression EXP that calls a built-in function,
17884 with result going to TARGET if that's convenient
17885 (and in mode MODE if that's convenient).
17886 SUBTARGET may be used as the target for computing one of EXP's operands.
17887 IGNORE is nonzero if the value is to be ignored. */
17890 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17891 enum machine_mode mode ATTRIBUTE_UNUSED
,
17892 int ignore ATTRIBUTE_UNUSED
)
17894 const struct builtin_description
*d
;
17896 enum insn_code icode
;
17897 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17898 tree arg0
, arg1
, arg2
, arg3
;
17899 rtx op0
, op1
, op2
, op3
, pat
;
17900 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17901 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17905 case IX86_BUILTIN_EMMS
:
17906 emit_insn (gen_mmx_emms ());
17909 case IX86_BUILTIN_SFENCE
:
17910 emit_insn (gen_sse_sfence ());
17913 case IX86_BUILTIN_MASKMOVQ
:
17914 case IX86_BUILTIN_MASKMOVDQU
:
17915 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17916 ? CODE_FOR_mmx_maskmovq
17917 : CODE_FOR_sse2_maskmovdqu
);
17918 /* Note the arg order is different from the operand order. */
17919 arg1
= CALL_EXPR_ARG (exp
, 0);
17920 arg2
= CALL_EXPR_ARG (exp
, 1);
17921 arg0
= CALL_EXPR_ARG (exp
, 2);
17922 op0
= expand_normal (arg0
);
17923 op1
= expand_normal (arg1
);
17924 op2
= expand_normal (arg2
);
17925 mode0
= insn_data
[icode
].operand
[0].mode
;
17926 mode1
= insn_data
[icode
].operand
[1].mode
;
17927 mode2
= insn_data
[icode
].operand
[2].mode
;
17929 op0
= force_reg (Pmode
, op0
);
17930 op0
= gen_rtx_MEM (mode1
, op0
);
17932 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17933 op0
= copy_to_mode_reg (mode0
, op0
);
17934 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17935 op1
= copy_to_mode_reg (mode1
, op1
);
17936 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17937 op2
= copy_to_mode_reg (mode2
, op2
);
17938 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17944 case IX86_BUILTIN_SQRTSS
:
17945 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17946 case IX86_BUILTIN_RSQRTSS
:
17947 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17948 case IX86_BUILTIN_RCPSS
:
17949 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17951 case IX86_BUILTIN_LOADUPS
:
17952 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17954 case IX86_BUILTIN_STOREUPS
:
17955 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17957 case IX86_BUILTIN_LOADHPS
:
17958 case IX86_BUILTIN_LOADLPS
:
17959 case IX86_BUILTIN_LOADHPD
:
17960 case IX86_BUILTIN_LOADLPD
:
17961 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17962 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17963 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17964 : CODE_FOR_sse2_loadlpd
);
17965 arg0
= CALL_EXPR_ARG (exp
, 0);
17966 arg1
= CALL_EXPR_ARG (exp
, 1);
17967 op0
= expand_normal (arg0
);
17968 op1
= expand_normal (arg1
);
17969 tmode
= insn_data
[icode
].operand
[0].mode
;
17970 mode0
= insn_data
[icode
].operand
[1].mode
;
17971 mode1
= insn_data
[icode
].operand
[2].mode
;
17973 op0
= force_reg (mode0
, op0
);
17974 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17975 if (optimize
|| target
== 0
17976 || GET_MODE (target
) != tmode
17977 || !register_operand (target
, tmode
))
17978 target
= gen_reg_rtx (tmode
);
17979 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17985 case IX86_BUILTIN_STOREHPS
:
17986 case IX86_BUILTIN_STORELPS
:
17987 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17988 : CODE_FOR_sse_storelps
);
17989 arg0
= CALL_EXPR_ARG (exp
, 0);
17990 arg1
= CALL_EXPR_ARG (exp
, 1);
17991 op0
= expand_normal (arg0
);
17992 op1
= expand_normal (arg1
);
17993 mode0
= insn_data
[icode
].operand
[0].mode
;
17994 mode1
= insn_data
[icode
].operand
[1].mode
;
17996 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17997 op1
= force_reg (mode1
, op1
);
17999 pat
= GEN_FCN (icode
) (op0
, op1
);
18005 case IX86_BUILTIN_MOVNTPS
:
18006 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
18007 case IX86_BUILTIN_MOVNTQ
:
18008 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
18010 case IX86_BUILTIN_LDMXCSR
:
18011 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
18012 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18013 emit_move_insn (target
, op0
);
18014 emit_insn (gen_sse_ldmxcsr (target
));
18017 case IX86_BUILTIN_STMXCSR
:
18018 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18019 emit_insn (gen_sse_stmxcsr (target
));
18020 return copy_to_mode_reg (SImode
, target
);
18022 case IX86_BUILTIN_SHUFPS
:
18023 case IX86_BUILTIN_SHUFPD
:
18024 icode
= (fcode
== IX86_BUILTIN_SHUFPS
18025 ? CODE_FOR_sse_shufps
18026 : CODE_FOR_sse2_shufpd
);
18027 arg0
= CALL_EXPR_ARG (exp
, 0);
18028 arg1
= CALL_EXPR_ARG (exp
, 1);
18029 arg2
= CALL_EXPR_ARG (exp
, 2);
18030 op0
= expand_normal (arg0
);
18031 op1
= expand_normal (arg1
);
18032 op2
= expand_normal (arg2
);
18033 tmode
= insn_data
[icode
].operand
[0].mode
;
18034 mode0
= insn_data
[icode
].operand
[1].mode
;
18035 mode1
= insn_data
[icode
].operand
[2].mode
;
18036 mode2
= insn_data
[icode
].operand
[3].mode
;
18038 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18039 op0
= copy_to_mode_reg (mode0
, op0
);
18040 if ((optimize
&& !register_operand (op1
, mode1
))
18041 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18042 op1
= copy_to_mode_reg (mode1
, op1
);
18043 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18045 /* @@@ better error message */
18046 error ("mask must be an immediate");
18047 return gen_reg_rtx (tmode
);
18049 if (optimize
|| target
== 0
18050 || GET_MODE (target
) != tmode
18051 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18052 target
= gen_reg_rtx (tmode
);
18053 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18059 case IX86_BUILTIN_PSHUFW
:
18060 case IX86_BUILTIN_PSHUFD
:
18061 case IX86_BUILTIN_PSHUFHW
:
18062 case IX86_BUILTIN_PSHUFLW
:
18063 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18064 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18065 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18066 : CODE_FOR_mmx_pshufw
);
18067 arg0
= CALL_EXPR_ARG (exp
, 0);
18068 arg1
= CALL_EXPR_ARG (exp
, 1);
18069 op0
= expand_normal (arg0
);
18070 op1
= expand_normal (arg1
);
18071 tmode
= insn_data
[icode
].operand
[0].mode
;
18072 mode1
= insn_data
[icode
].operand
[1].mode
;
18073 mode2
= insn_data
[icode
].operand
[2].mode
;
18075 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18076 op0
= copy_to_mode_reg (mode1
, op0
);
18077 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18079 /* @@@ better error message */
18080 error ("mask must be an immediate");
18084 || GET_MODE (target
) != tmode
18085 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18086 target
= gen_reg_rtx (tmode
);
18087 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18093 case IX86_BUILTIN_PSLLDQI128
:
18094 case IX86_BUILTIN_PSRLDQI128
:
18095 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18096 : CODE_FOR_sse2_lshrti3
);
18097 arg0
= CALL_EXPR_ARG (exp
, 0);
18098 arg1
= CALL_EXPR_ARG (exp
, 1);
18099 op0
= expand_normal (arg0
);
18100 op1
= expand_normal (arg1
);
18101 tmode
= insn_data
[icode
].operand
[0].mode
;
18102 mode1
= insn_data
[icode
].operand
[1].mode
;
18103 mode2
= insn_data
[icode
].operand
[2].mode
;
18105 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18107 op0
= copy_to_reg (op0
);
18108 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18110 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18112 error ("shift must be an immediate");
18115 target
= gen_reg_rtx (V2DImode
);
18116 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
18122 case IX86_BUILTIN_FEMMS
:
18123 emit_insn (gen_mmx_femms ());
18126 case IX86_BUILTIN_PAVGUSB
:
18127 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18129 case IX86_BUILTIN_PF2ID
:
18130 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18132 case IX86_BUILTIN_PFACC
:
18133 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18135 case IX86_BUILTIN_PFADD
:
18136 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18138 case IX86_BUILTIN_PFCMPEQ
:
18139 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18141 case IX86_BUILTIN_PFCMPGE
:
18142 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18144 case IX86_BUILTIN_PFCMPGT
:
18145 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18147 case IX86_BUILTIN_PFMAX
:
18148 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18150 case IX86_BUILTIN_PFMIN
:
18151 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18153 case IX86_BUILTIN_PFMUL
:
18154 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18156 case IX86_BUILTIN_PFRCP
:
18157 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18159 case IX86_BUILTIN_PFRCPIT1
:
18160 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18162 case IX86_BUILTIN_PFRCPIT2
:
18163 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18165 case IX86_BUILTIN_PFRSQIT1
:
18166 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18168 case IX86_BUILTIN_PFRSQRT
:
18169 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18171 case IX86_BUILTIN_PFSUB
:
18172 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18174 case IX86_BUILTIN_PFSUBR
:
18175 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18177 case IX86_BUILTIN_PI2FD
:
18178 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18180 case IX86_BUILTIN_PMULHRW
:
18181 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18183 case IX86_BUILTIN_PF2IW
:
18184 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18186 case IX86_BUILTIN_PFNACC
:
18187 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18189 case IX86_BUILTIN_PFPNACC
:
18190 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18192 case IX86_BUILTIN_PI2FW
:
18193 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18195 case IX86_BUILTIN_PSWAPDSI
:
18196 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18198 case IX86_BUILTIN_PSWAPDSF
:
18199 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18201 case IX86_BUILTIN_SQRTSD
:
18202 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18203 case IX86_BUILTIN_LOADUPD
:
18204 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18205 case IX86_BUILTIN_STOREUPD
:
18206 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18208 case IX86_BUILTIN_MFENCE
:
18209 emit_insn (gen_sse2_mfence ());
18211 case IX86_BUILTIN_LFENCE
:
18212 emit_insn (gen_sse2_lfence ());
18215 case IX86_BUILTIN_CLFLUSH
:
18216 arg0
= CALL_EXPR_ARG (exp
, 0);
18217 op0
= expand_normal (arg0
);
18218 icode
= CODE_FOR_sse2_clflush
;
18219 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18220 op0
= copy_to_mode_reg (Pmode
, op0
);
18222 emit_insn (gen_sse2_clflush (op0
));
18225 case IX86_BUILTIN_MOVNTPD
:
18226 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18227 case IX86_BUILTIN_MOVNTDQ
:
18228 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18229 case IX86_BUILTIN_MOVNTI
:
18230 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18232 case IX86_BUILTIN_LOADDQU
:
18233 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18234 case IX86_BUILTIN_STOREDQU
:
18235 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18237 case IX86_BUILTIN_MONITOR
:
18238 arg0
= CALL_EXPR_ARG (exp
, 0);
18239 arg1
= CALL_EXPR_ARG (exp
, 1);
18240 arg2
= CALL_EXPR_ARG (exp
, 2);
18241 op0
= expand_normal (arg0
);
18242 op1
= expand_normal (arg1
);
18243 op2
= expand_normal (arg2
);
18245 op0
= copy_to_mode_reg (Pmode
, op0
);
18247 op1
= copy_to_mode_reg (SImode
, op1
);
18249 op2
= copy_to_mode_reg (SImode
, op2
);
18251 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18253 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18256 case IX86_BUILTIN_MWAIT
:
18257 arg0
= CALL_EXPR_ARG (exp
, 0);
18258 arg1
= CALL_EXPR_ARG (exp
, 1);
18259 op0
= expand_normal (arg0
);
18260 op1
= expand_normal (arg1
);
18262 op0
= copy_to_mode_reg (SImode
, op0
);
18264 op1
= copy_to_mode_reg (SImode
, op1
);
18265 emit_insn (gen_sse3_mwait (op0
, op1
));
18268 case IX86_BUILTIN_LDDQU
:
18269 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18272 case IX86_BUILTIN_PALIGNR
:
18273 case IX86_BUILTIN_PALIGNR128
:
18274 if (fcode
== IX86_BUILTIN_PALIGNR
)
18276 icode
= CODE_FOR_ssse3_palignrdi
;
18281 icode
= CODE_FOR_ssse3_palignrti
;
18284 arg0
= CALL_EXPR_ARG (exp
, 0);
18285 arg1
= CALL_EXPR_ARG (exp
, 1);
18286 arg2
= CALL_EXPR_ARG (exp
, 2);
18287 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18288 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18289 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18290 tmode
= insn_data
[icode
].operand
[0].mode
;
18291 mode1
= insn_data
[icode
].operand
[1].mode
;
18292 mode2
= insn_data
[icode
].operand
[2].mode
;
18293 mode3
= insn_data
[icode
].operand
[3].mode
;
18295 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18297 op0
= copy_to_reg (op0
);
18298 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18300 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18302 op1
= copy_to_reg (op1
);
18303 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18305 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18307 error ("shift must be an immediate");
18310 target
= gen_reg_rtx (mode
);
18311 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18318 case IX86_BUILTIN_MOVNTSD
:
18319 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18321 case IX86_BUILTIN_MOVNTSS
:
18322 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18324 case IX86_BUILTIN_INSERTQ
:
18325 case IX86_BUILTIN_EXTRQ
:
18326 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18327 ? CODE_FOR_sse4a_extrq
18328 : CODE_FOR_sse4a_insertq
);
18329 arg0
= CALL_EXPR_ARG (exp
, 0);
18330 arg1
= CALL_EXPR_ARG (exp
, 1);
18331 op0
= expand_normal (arg0
);
18332 op1
= expand_normal (arg1
);
18333 tmode
= insn_data
[icode
].operand
[0].mode
;
18334 mode1
= insn_data
[icode
].operand
[1].mode
;
18335 mode2
= insn_data
[icode
].operand
[2].mode
;
18336 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18337 op0
= copy_to_mode_reg (mode1
, op0
);
18338 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18339 op1
= copy_to_mode_reg (mode2
, op1
);
18340 if (optimize
|| target
== 0
18341 || GET_MODE (target
) != tmode
18342 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18343 target
= gen_reg_rtx (tmode
);
18344 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18350 case IX86_BUILTIN_EXTRQI
:
18351 icode
= CODE_FOR_sse4a_extrqi
;
18352 arg0
= CALL_EXPR_ARG (exp
, 0);
18353 arg1
= CALL_EXPR_ARG (exp
, 1);
18354 arg2
= CALL_EXPR_ARG (exp
, 2);
18355 op0
= expand_normal (arg0
);
18356 op1
= expand_normal (arg1
);
18357 op2
= expand_normal (arg2
);
18358 tmode
= insn_data
[icode
].operand
[0].mode
;
18359 mode1
= insn_data
[icode
].operand
[1].mode
;
18360 mode2
= insn_data
[icode
].operand
[2].mode
;
18361 mode3
= insn_data
[icode
].operand
[3].mode
;
18362 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18363 op0
= copy_to_mode_reg (mode1
, op0
);
18364 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18366 error ("index mask must be an immediate");
18367 return gen_reg_rtx (tmode
);
18369 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18371 error ("length mask must be an immediate");
18372 return gen_reg_rtx (tmode
);
18374 if (optimize
|| target
== 0
18375 || GET_MODE (target
) != tmode
18376 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18377 target
= gen_reg_rtx (tmode
);
18378 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18384 case IX86_BUILTIN_INSERTQI
:
18385 icode
= CODE_FOR_sse4a_insertqi
;
18386 arg0
= CALL_EXPR_ARG (exp
, 0);
18387 arg1
= CALL_EXPR_ARG (exp
, 1);
18388 arg2
= CALL_EXPR_ARG (exp
, 2);
18389 arg3
= CALL_EXPR_ARG (exp
, 3);
18390 op0
= expand_normal (arg0
);
18391 op1
= expand_normal (arg1
);
18392 op2
= expand_normal (arg2
);
18393 op3
= expand_normal (arg3
);
18394 tmode
= insn_data
[icode
].operand
[0].mode
;
18395 mode1
= insn_data
[icode
].operand
[1].mode
;
18396 mode2
= insn_data
[icode
].operand
[2].mode
;
18397 mode3
= insn_data
[icode
].operand
[3].mode
;
18398 mode4
= insn_data
[icode
].operand
[4].mode
;
18400 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18401 op0
= copy_to_mode_reg (mode1
, op0
);
18403 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18404 op1
= copy_to_mode_reg (mode2
, op1
);
18406 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18408 error ("index mask must be an immediate");
18409 return gen_reg_rtx (tmode
);
18411 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18413 error ("length mask must be an immediate");
18414 return gen_reg_rtx (tmode
);
18416 if (optimize
|| target
== 0
18417 || GET_MODE (target
) != tmode
18418 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18419 target
= gen_reg_rtx (tmode
);
18420 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18426 case IX86_BUILTIN_VEC_INIT_V2SI
:
18427 case IX86_BUILTIN_VEC_INIT_V4HI
:
18428 case IX86_BUILTIN_VEC_INIT_V8QI
:
18429 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18431 case IX86_BUILTIN_VEC_EXT_V2DF
:
18432 case IX86_BUILTIN_VEC_EXT_V2DI
:
18433 case IX86_BUILTIN_VEC_EXT_V4SF
:
18434 case IX86_BUILTIN_VEC_EXT_V4SI
:
18435 case IX86_BUILTIN_VEC_EXT_V8HI
:
18436 case IX86_BUILTIN_VEC_EXT_V2SI
:
18437 case IX86_BUILTIN_VEC_EXT_V4HI
:
18438 return ix86_expand_vec_ext_builtin (exp
, target
);
18440 case IX86_BUILTIN_VEC_SET_V8HI
:
18441 case IX86_BUILTIN_VEC_SET_V4HI
:
18442 return ix86_expand_vec_set_builtin (exp
);
18448 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18449 if (d
->code
== fcode
)
18451 /* Compares are treated specially. */
18452 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18453 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18454 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18455 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18456 return ix86_expand_sse_compare (d
, exp
, target
);
18458 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18461 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18462 if (d
->code
== fcode
)
18463 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18465 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18466 if (d
->code
== fcode
)
18467 return ix86_expand_sse_comi (d
, exp
, target
);
18469 gcc_unreachable ();
18472 /* Returns a function decl for a vectorized version of the builtin function
18473 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18474 if it is not available. */
18477 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18480 enum machine_mode in_mode
, out_mode
;
18483 if (TREE_CODE (type_out
) != VECTOR_TYPE
18484 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18487 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18488 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18489 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18490 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18494 case BUILT_IN_SQRT
:
18495 if (out_mode
== DFmode
&& out_n
== 2
18496 && in_mode
== DFmode
&& in_n
== 2)
18497 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18500 case BUILT_IN_SQRTF
:
18501 if (out_mode
== SFmode
&& out_n
== 4
18502 && in_mode
== SFmode
&& in_n
== 4)
18503 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18506 case BUILT_IN_LRINTF
:
18507 if (out_mode
== SImode
&& out_n
== 4
18508 && in_mode
== SFmode
&& in_n
== 4)
18509 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18519 /* Returns a decl of a function that implements conversion of the
18520 input vector of type TYPE, or NULL_TREE if it is not available. */
18523 ix86_builtin_conversion (enum tree_code code
, tree type
)
18525 if (TREE_CODE (type
) != VECTOR_TYPE
)
18531 switch (TYPE_MODE (type
))
18534 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18539 case FIX_TRUNC_EXPR
:
18540 switch (TYPE_MODE (type
))
18543 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18553 /* Store OPERAND to the memory after reload is completed. This means
18554 that we can't easily use assign_stack_local. */
18556 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18560 gcc_assert (reload_completed
);
18561 if (TARGET_RED_ZONE
)
18563 result
= gen_rtx_MEM (mode
,
18564 gen_rtx_PLUS (Pmode
,
18566 GEN_INT (-RED_ZONE_SIZE
)));
18567 emit_move_insn (result
, operand
);
18569 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18575 operand
= gen_lowpart (DImode
, operand
);
18579 gen_rtx_SET (VOIDmode
,
18580 gen_rtx_MEM (DImode
,
18581 gen_rtx_PRE_DEC (DImode
,
18582 stack_pointer_rtx
)),
18586 gcc_unreachable ();
18588 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18597 split_di (&operand
, 1, operands
, operands
+ 1);
18599 gen_rtx_SET (VOIDmode
,
18600 gen_rtx_MEM (SImode
,
18601 gen_rtx_PRE_DEC (Pmode
,
18602 stack_pointer_rtx
)),
18605 gen_rtx_SET (VOIDmode
,
18606 gen_rtx_MEM (SImode
,
18607 gen_rtx_PRE_DEC (Pmode
,
18608 stack_pointer_rtx
)),
18613 /* Store HImodes as SImodes. */
18614 operand
= gen_lowpart (SImode
, operand
);
18618 gen_rtx_SET (VOIDmode
,
18619 gen_rtx_MEM (GET_MODE (operand
),
18620 gen_rtx_PRE_DEC (SImode
,
18621 stack_pointer_rtx
)),
18625 gcc_unreachable ();
18627 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18632 /* Free operand from the memory. */
18634 ix86_free_from_memory (enum machine_mode mode
)
18636 if (!TARGET_RED_ZONE
)
18640 if (mode
== DImode
|| TARGET_64BIT
)
18644 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18645 to pop or add instruction if registers are available. */
18646 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18647 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18652 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18653 QImode must go into class Q_REGS.
18654 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18655 movdf to do mem-to-mem moves through integer regs. */
18657 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18659 enum machine_mode mode
= GET_MODE (x
);
18661 /* We're only allowed to return a subclass of CLASS. Many of the
18662 following checks fail for NO_REGS, so eliminate that early. */
18663 if (class == NO_REGS
)
18666 /* All classes can load zeros. */
18667 if (x
== CONST0_RTX (mode
))
18670 /* Force constants into memory if we are loading a (nonzero) constant into
18671 an MMX or SSE register. This is because there are no MMX/SSE instructions
18672 to load from a constant. */
18674 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18677 /* Prefer SSE regs only, if we can use them for math. */
18678 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18679 return SSE_CLASS_P (class) ? class : NO_REGS
;
18681 /* Floating-point constants need more complex checks. */
18682 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18684 /* General regs can load everything. */
18685 if (reg_class_subset_p (class, GENERAL_REGS
))
18688 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18689 zero above. We only want to wind up preferring 80387 registers if
18690 we plan on doing computation with them. */
18692 && standard_80387_constant_p (x
))
18694 /* Limit class to non-sse. */
18695 if (class == FLOAT_SSE_REGS
)
18697 if (class == FP_TOP_SSE_REGS
)
18699 if (class == FP_SECOND_SSE_REGS
)
18700 return FP_SECOND_REG
;
18701 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18708 /* Generally when we see PLUS here, it's the function invariant
18709 (plus soft-fp const_int). Which can only be computed into general
18711 if (GET_CODE (x
) == PLUS
)
18712 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18714 /* QImode constants are easy to load, but non-constant QImode data
18715 must go into Q_REGS. */
18716 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18718 if (reg_class_subset_p (class, Q_REGS
))
18720 if (reg_class_subset_p (Q_REGS
, class))
18728 /* Discourage putting floating-point values in SSE registers unless
18729 SSE math is being used, and likewise for the 387 registers. */
18731 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18733 enum machine_mode mode
= GET_MODE (x
);
18735 /* Restrict the output reload class to the register bank that we are doing
18736 math on. If we would like not to return a subset of CLASS, reject this
18737 alternative: if reload cannot do this, it will still use its choice. */
18738 mode
= GET_MODE (x
);
18739 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18740 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18742 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18744 if (class == FP_TOP_SSE_REGS
)
18746 else if (class == FP_SECOND_SSE_REGS
)
18747 return FP_SECOND_REG
;
18749 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18755 /* If we are copying between general and FP registers, we need a memory
18756 location. The same is true for SSE and MMX registers.
18758 The macro can't work reliably when one of the CLASSES is class containing
18759 registers from multiple units (SSE, MMX, integer). We avoid this by never
18760 combining those units in single alternative in the machine description.
18761 Ensure that this constraint holds to avoid unexpected surprises.
18763 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18764 enforce these sanity checks. */
18767 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18768 enum machine_mode mode
, int strict
)
18770 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18771 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18772 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18773 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18774 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18775 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18777 gcc_assert (!strict
);
18781 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18784 /* ??? This is a lie. We do have moves between mmx/general, and for
18785 mmx/sse2. But by saying we need secondary memory we discourage the
18786 register allocator from using the mmx registers unless needed. */
18787 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18790 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18792 /* SSE1 doesn't have any direct moves from other classes. */
18796 /* If the target says that inter-unit moves are more expensive
18797 than moving through memory, then don't generate them. */
18798 if (!TARGET_INTER_UNIT_MOVES
)
18801 /* Between SSE and general, we have moves no larger than word size. */
18802 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18809 /* Return true if the registers in CLASS cannot represent the change from
18810 modes FROM to TO. */
18813 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18814 enum reg_class
class)
18819 /* x87 registers can't do subreg at all, as all values are reformatted
18820 to extended precision. */
18821 if (MAYBE_FLOAT_CLASS_P (class))
18824 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18826 /* Vector registers do not support QI or HImode loads. If we don't
18827 disallow a change to these modes, reload will assume it's ok to
18828 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18829 the vec_dupv4hi pattern. */
18830 if (GET_MODE_SIZE (from
) < 4)
18833 /* Vector registers do not support subreg with nonzero offsets, which
18834 are otherwise valid for integer registers. Since we can't see
18835 whether we have a nonzero offset from here, prohibit all
18836 nonparadoxical subregs changing size. */
18837 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18844 /* Return the cost of moving data from a register in class CLASS1 to
18845 one in class CLASS2.
18847 It is not required that the cost always equal 2 when FROM is the same as TO;
18848 on some machines it is expensive to move between registers if they are not
18849 general registers. */
18852 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18853 enum reg_class class2
)
18855 /* In case we require secondary memory, compute cost of the store followed
18856 by load. In order to avoid bad register allocation choices, we need
18857 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18859 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18863 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18864 MEMORY_MOVE_COST (mode
, class1
, 1));
18865 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18866 MEMORY_MOVE_COST (mode
, class2
, 1));
18868 /* In case of copying from general_purpose_register we may emit multiple
18869 stores followed by single load causing memory size mismatch stall.
18870 Count this as arbitrarily high cost of 20. */
18871 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18874 /* In the case of FP/MMX moves, the registers actually overlap, and we
18875 have to switch modes in order to treat them differently. */
18876 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18877 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18883 /* Moves between SSE/MMX and integer unit are expensive. */
18884 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18885 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18886 return ix86_cost
->mmxsse_to_integer
;
18887 if (MAYBE_FLOAT_CLASS_P (class1
))
18888 return ix86_cost
->fp_move
;
18889 if (MAYBE_SSE_CLASS_P (class1
))
18890 return ix86_cost
->sse_move
;
18891 if (MAYBE_MMX_CLASS_P (class1
))
18892 return ix86_cost
->mmx_move
;
18896 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18899 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18901 /* Flags and only flags can only hold CCmode values. */
18902 if (CC_REGNO_P (regno
))
18903 return GET_MODE_CLASS (mode
) == MODE_CC
;
18904 if (GET_MODE_CLASS (mode
) == MODE_CC
18905 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18906 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18908 if (FP_REGNO_P (regno
))
18909 return VALID_FP_MODE_P (mode
);
18910 if (SSE_REGNO_P (regno
))
18912 /* We implement the move patterns for all vector modes into and
18913 out of SSE registers, even when no operation instructions
18915 return (VALID_SSE_REG_MODE (mode
)
18916 || VALID_SSE2_REG_MODE (mode
)
18917 || VALID_MMX_REG_MODE (mode
)
18918 || VALID_MMX_REG_MODE_3DNOW (mode
));
18920 if (MMX_REGNO_P (regno
))
18922 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18923 so if the register is available at all, then we can move data of
18924 the given mode into or out of it. */
18925 return (VALID_MMX_REG_MODE (mode
)
18926 || VALID_MMX_REG_MODE_3DNOW (mode
));
18929 if (mode
== QImode
)
18931 /* Take care for QImode values - they can be in non-QI regs,
18932 but then they do cause partial register stalls. */
18933 if (regno
< 4 || TARGET_64BIT
)
18935 if (!TARGET_PARTIAL_REG_STALL
)
18937 return reload_in_progress
|| reload_completed
;
18939 /* We handle both integer and floats in the general purpose registers. */
18940 else if (VALID_INT_MODE_P (mode
))
18942 else if (VALID_FP_MODE_P (mode
))
18944 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18945 on to use that value in smaller contexts, this can easily force a
18946 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18947 supporting DImode, allow it. */
18948 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18954 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18955 tieable integer mode. */
18958 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18967 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18970 return TARGET_64BIT
;
18977 /* Return true if MODE1 is accessible in a register that can hold MODE2
18978 without copying. That is, all register classes that can hold MODE2
18979 can also hold MODE1. */
18982 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18984 if (mode1
== mode2
)
18987 if (ix86_tieable_integer_mode_p (mode1
)
18988 && ix86_tieable_integer_mode_p (mode2
))
18991 /* MODE2 being XFmode implies fp stack or general regs, which means we
18992 can tie any smaller floating point modes to it. Note that we do not
18993 tie this with TFmode. */
18994 if (mode2
== XFmode
)
18995 return mode1
== SFmode
|| mode1
== DFmode
;
18997 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18998 that we can tie it with SFmode. */
18999 if (mode2
== DFmode
)
19000 return mode1
== SFmode
;
19002 /* If MODE2 is only appropriate for an SSE register, then tie with
19003 any other mode acceptable to SSE registers. */
19004 if (GET_MODE_SIZE (mode2
) == 16
19005 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19006 return (GET_MODE_SIZE (mode1
) == 16
19007 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19009 /* If MODE2 is appropriate for an MMX register, then tie
19010 with any other mode acceptable to MMX registers. */
19011 if (GET_MODE_SIZE (mode2
) == 8
19012 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
19013 return (GET_MODE_SIZE (mode1
) == 8
19014 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
19019 /* Return the cost of moving data of mode M between a
19020 register and memory. A value of 2 is the default; this cost is
19021 relative to those in `REGISTER_MOVE_COST'.
19023 If moving between registers and memory is more expensive than
19024 between two registers, you should define this macro to express the
19027 Model also increased moving costs of QImode registers in non
19031 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19033 if (FLOAT_CLASS_P (class))
19050 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19052 if (SSE_CLASS_P (class))
19055 switch (GET_MODE_SIZE (mode
))
19069 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19071 if (MMX_CLASS_P (class))
19074 switch (GET_MODE_SIZE (mode
))
19085 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19087 switch (GET_MODE_SIZE (mode
))
19091 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19092 : ix86_cost
->movzbl_load
);
19094 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19095 : ix86_cost
->int_store
[0] + 4);
19098 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19100 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19101 if (mode
== TFmode
)
19103 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19104 * (((int) GET_MODE_SIZE (mode
)
19105 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19109 /* Compute a (partial) cost for rtx X. Return true if the complete
19110 cost has been computed, and false if subexpressions should be
19111 scanned. In either case, *TOTAL contains the cost result. */
19114 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19116 enum machine_mode mode
= GET_MODE (x
);
19124 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19126 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19128 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19130 || (!GET_CODE (x
) != LABEL_REF
19131 && (GET_CODE (x
) != SYMBOL_REF
19132 || !SYMBOL_REF_LOCAL_P (x
)))))
19139 if (mode
== VOIDmode
)
19142 switch (standard_80387_constant_p (x
))
19147 default: /* Other constants */
19152 /* Start with (MEM (SYMBOL_REF)), since that's where
19153 it'll probably end up. Add a penalty for size. */
19154 *total
= (COSTS_N_INSNS (1)
19155 + (flag_pic
!= 0 && !TARGET_64BIT
)
19156 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19162 /* The zero extensions is often completely free on x86_64, so make
19163 it as cheap as possible. */
19164 if (TARGET_64BIT
&& mode
== DImode
19165 && GET_MODE (XEXP (x
, 0)) == SImode
)
19167 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19168 *total
= ix86_cost
->add
;
19170 *total
= ix86_cost
->movzx
;
19174 *total
= ix86_cost
->movsx
;
19178 if (CONST_INT_P (XEXP (x
, 1))
19179 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19181 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19184 *total
= ix86_cost
->add
;
19187 if ((value
== 2 || value
== 3)
19188 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19190 *total
= ix86_cost
->lea
;
19200 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19202 if (CONST_INT_P (XEXP (x
, 1)))
19204 if (INTVAL (XEXP (x
, 1)) > 32)
19205 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19207 *total
= ix86_cost
->shift_const
* 2;
19211 if (GET_CODE (XEXP (x
, 1)) == AND
)
19212 *total
= ix86_cost
->shift_var
* 2;
19214 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19219 if (CONST_INT_P (XEXP (x
, 1)))
19220 *total
= ix86_cost
->shift_const
;
19222 *total
= ix86_cost
->shift_var
;
19227 if (FLOAT_MODE_P (mode
))
19229 *total
= ix86_cost
->fmul
;
19234 rtx op0
= XEXP (x
, 0);
19235 rtx op1
= XEXP (x
, 1);
19237 if (CONST_INT_P (XEXP (x
, 1)))
19239 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19240 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19244 /* This is arbitrary. */
19247 /* Compute costs correctly for widening multiplication. */
19248 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19249 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19250 == GET_MODE_SIZE (mode
))
19252 int is_mulwiden
= 0;
19253 enum machine_mode inner_mode
= GET_MODE (op0
);
19255 if (GET_CODE (op0
) == GET_CODE (op1
))
19256 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19257 else if (CONST_INT_P (op1
))
19259 if (GET_CODE (op0
) == SIGN_EXTEND
)
19260 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19263 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19267 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19270 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19271 + nbits
* ix86_cost
->mult_bit
19272 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19281 if (FLOAT_MODE_P (mode
))
19282 *total
= ix86_cost
->fdiv
;
19284 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19288 if (FLOAT_MODE_P (mode
))
19289 *total
= ix86_cost
->fadd
;
19290 else if (GET_MODE_CLASS (mode
) == MODE_INT
19291 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19293 if (GET_CODE (XEXP (x
, 0)) == PLUS
19294 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19295 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19296 && CONSTANT_P (XEXP (x
, 1)))
19298 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19299 if (val
== 2 || val
== 4 || val
== 8)
19301 *total
= ix86_cost
->lea
;
19302 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19303 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19305 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19309 else if (GET_CODE (XEXP (x
, 0)) == MULT
19310 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19312 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19313 if (val
== 2 || val
== 4 || val
== 8)
19315 *total
= ix86_cost
->lea
;
19316 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19317 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19321 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19323 *total
= ix86_cost
->lea
;
19324 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19325 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19326 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19333 if (FLOAT_MODE_P (mode
))
19335 *total
= ix86_cost
->fadd
;
19343 if (!TARGET_64BIT
&& mode
== DImode
)
19345 *total
= (ix86_cost
->add
* 2
19346 + (rtx_cost (XEXP (x
, 0), outer_code
)
19347 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19348 + (rtx_cost (XEXP (x
, 1), outer_code
)
19349 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19355 if (FLOAT_MODE_P (mode
))
19357 *total
= ix86_cost
->fchs
;
19363 if (!TARGET_64BIT
&& mode
== DImode
)
19364 *total
= ix86_cost
->add
* 2;
19366 *total
= ix86_cost
->add
;
19370 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19371 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19372 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19373 && XEXP (x
, 1) == const0_rtx
)
19375 /* This kind of construct is implemented using test[bwl].
19376 Treat it as if we had an AND. */
19377 *total
= (ix86_cost
->add
19378 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19379 + rtx_cost (const1_rtx
, outer_code
));
19385 if (!TARGET_SSE_MATH
19387 || (mode
== DFmode
&& !TARGET_SSE2
))
19392 if (FLOAT_MODE_P (mode
))
19393 *total
= ix86_cost
->fabs
;
19397 if (FLOAT_MODE_P (mode
))
19398 *total
= ix86_cost
->fsqrt
;
19402 if (XINT (x
, 1) == UNSPEC_TP
)
19413 static int current_machopic_label_num
;
19415 /* Given a symbol name and its associated stub, write out the
19416 definition of the stub. */
19419 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19421 unsigned int length
;
19422 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19423 int label
= ++current_machopic_label_num
;
19425 /* For 64-bit we shouldn't get here. */
19426 gcc_assert (!TARGET_64BIT
);
19428 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19429 symb
= (*targetm
.strip_name_encoding
) (symb
);
19431 length
= strlen (stub
);
19432 binder_name
= alloca (length
+ 32);
19433 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19435 length
= strlen (symb
);
19436 symbol_name
= alloca (length
+ 32);
19437 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19439 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19442 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19444 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19446 fprintf (file
, "%s:\n", stub
);
19447 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19451 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19452 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19453 fprintf (file
, "\tjmp\t*%%edx\n");
19456 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19458 fprintf (file
, "%s:\n", binder_name
);
19462 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19463 fprintf (file
, "\tpushl\t%%eax\n");
19466 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19468 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19470 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19471 fprintf (file
, "%s:\n", lazy_ptr_name
);
19472 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19473 fprintf (file
, "\t.long %s\n", binder_name
);
19477 darwin_x86_file_end (void)
19479 darwin_file_end ();
19482 #endif /* TARGET_MACHO */
19484 /* Order the registers for register allocator. */
19487 x86_order_regs_for_local_alloc (void)
19492 /* First allocate the local general purpose registers. */
19493 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19494 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19495 reg_alloc_order
[pos
++] = i
;
19497 /* Global general purpose registers. */
19498 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19499 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19500 reg_alloc_order
[pos
++] = i
;
19502 /* x87 registers come first in case we are doing FP math
19504 if (!TARGET_SSE_MATH
)
19505 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19506 reg_alloc_order
[pos
++] = i
;
19508 /* SSE registers. */
19509 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19510 reg_alloc_order
[pos
++] = i
;
19511 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19512 reg_alloc_order
[pos
++] = i
;
19514 /* x87 registers. */
19515 if (TARGET_SSE_MATH
)
19516 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19517 reg_alloc_order
[pos
++] = i
;
19519 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19520 reg_alloc_order
[pos
++] = i
;
19522 /* Initialize the rest of array as we do not allocate some registers
19524 while (pos
< FIRST_PSEUDO_REGISTER
)
19525 reg_alloc_order
[pos
++] = 0;
19528 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19529 struct attribute_spec.handler. */
19531 ix86_handle_struct_attribute (tree
*node
, tree name
,
19532 tree args ATTRIBUTE_UNUSED
,
19533 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19536 if (DECL_P (*node
))
19538 if (TREE_CODE (*node
) == TYPE_DECL
)
19539 type
= &TREE_TYPE (*node
);
19544 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19545 || TREE_CODE (*type
) == UNION_TYPE
)))
19547 warning (OPT_Wattributes
, "%qs attribute ignored",
19548 IDENTIFIER_POINTER (name
));
19549 *no_add_attrs
= true;
19552 else if ((is_attribute_p ("ms_struct", name
)
19553 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19554 || ((is_attribute_p ("gcc_struct", name
)
19555 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19557 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19558 IDENTIFIER_POINTER (name
));
19559 *no_add_attrs
= true;
19566 ix86_ms_bitfield_layout_p (tree record_type
)
19568 return (TARGET_MS_BITFIELD_LAYOUT
&&
19569 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19570 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19573 /* Returns an expression indicating where the this parameter is
19574 located on entry to the FUNCTION. */
19577 x86_this_parameter (tree function
)
19579 tree type
= TREE_TYPE (function
);
19583 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19584 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19587 if (ix86_function_regparm (type
, function
) > 0)
19591 parm
= TYPE_ARG_TYPES (type
);
19592 /* Figure out whether or not the function has a variable number of
19594 for (; parm
; parm
= TREE_CHAIN (parm
))
19595 if (TREE_VALUE (parm
) == void_type_node
)
19597 /* If not, the this parameter is in the first argument. */
19601 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19603 return gen_rtx_REG (SImode
, regno
);
19607 if (aggregate_value_p (TREE_TYPE (type
), type
))
19608 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19610 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19613 /* Determine whether x86_output_mi_thunk can succeed. */
19616 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19617 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19618 HOST_WIDE_INT vcall_offset
, tree function
)
19620 /* 64-bit can handle anything. */
19624 /* For 32-bit, everything's fine if we have one free register. */
19625 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19628 /* Need a free register for vcall_offset. */
19632 /* Need a free register for GOT references. */
19633 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19636 /* Otherwise ok. */
19640 /* Output the assembler code for a thunk function. THUNK_DECL is the
19641 declaration for the thunk function itself, FUNCTION is the decl for
19642 the target function. DELTA is an immediate constant offset to be
19643 added to THIS. If VCALL_OFFSET is nonzero, the word at
19644 *(*this + vcall_offset) should be added to THIS. */
19647 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19648 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19649 HOST_WIDE_INT vcall_offset
, tree function
)
19652 rtx
this = x86_this_parameter (function
);
19655 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19656 pull it in now and let DELTA benefit. */
19659 else if (vcall_offset
)
19661 /* Put the this parameter into %eax. */
19663 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19664 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19667 this_reg
= NULL_RTX
;
19669 /* Adjust the this parameter by a fixed constant. */
19672 xops
[0] = GEN_INT (delta
);
19673 xops
[1] = this_reg
? this_reg
: this;
19676 if (!x86_64_general_operand (xops
[0], DImode
))
19678 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19680 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19684 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19687 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19690 /* Adjust the this parameter by a value stored in the vtable. */
19694 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19697 int tmp_regno
= 2 /* ECX */;
19698 if (lookup_attribute ("fastcall",
19699 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19700 tmp_regno
= 0 /* EAX */;
19701 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19704 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19707 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19709 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19711 /* Adjust the this parameter. */
19712 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19713 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19715 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19716 xops
[0] = GEN_INT (vcall_offset
);
19718 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19719 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19721 xops
[1] = this_reg
;
19723 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19725 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19728 /* If necessary, drop THIS back to its stack slot. */
19729 if (this_reg
&& this_reg
!= this)
19731 xops
[0] = this_reg
;
19733 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19736 xops
[0] = XEXP (DECL_RTL (function
), 0);
19739 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19740 output_asm_insn ("jmp\t%P0", xops
);
19743 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19744 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19745 tmp
= gen_rtx_MEM (QImode
, tmp
);
19747 output_asm_insn ("jmp\t%A0", xops
);
19752 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19753 output_asm_insn ("jmp\t%P0", xops
);
19758 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19759 tmp
= (gen_rtx_SYMBOL_REF
19761 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19762 tmp
= gen_rtx_MEM (QImode
, tmp
);
19764 output_asm_insn ("jmp\t%0", xops
);
19767 #endif /* TARGET_MACHO */
19769 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19770 output_set_got (tmp
, NULL_RTX
);
19773 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19774 output_asm_insn ("jmp\t{*}%1", xops
);
19780 x86_file_start (void)
19782 default_file_start ();
19784 darwin_file_start ();
19786 if (X86_FILE_START_VERSION_DIRECTIVE
)
19787 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19788 if (X86_FILE_START_FLTUSED
)
19789 fputs ("\t.global\t__fltused\n", asm_out_file
);
19790 if (ix86_asm_dialect
== ASM_INTEL
)
19791 fputs ("\t.intel_syntax\n", asm_out_file
);
19795 x86_field_alignment (tree field
, int computed
)
19797 enum machine_mode mode
;
19798 tree type
= TREE_TYPE (field
);
19800 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19802 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19803 ? get_inner_array_type (type
) : type
);
19804 if (mode
== DFmode
|| mode
== DCmode
19805 || GET_MODE_CLASS (mode
) == MODE_INT
19806 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19807 return MIN (32, computed
);
19811 /* Output assembler code to FILE to increment profiler label # LABELNO
19812 for profiling a function entry. */
19814 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19819 #ifndef NO_PROFILE_COUNTERS
19820 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19822 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19826 #ifndef NO_PROFILE_COUNTERS
19827 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19829 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19833 #ifndef NO_PROFILE_COUNTERS
19834 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19835 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19837 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19841 #ifndef NO_PROFILE_COUNTERS
19842 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19843 PROFILE_COUNT_REGISTER
);
19845 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19849 /* We don't have exact information about the insn sizes, but we may assume
19850 quite safely that we are informed about all 1 byte insns and memory
19851 address sizes. This is enough to eliminate unnecessary padding in
19855 min_insn_size (rtx insn
)
19859 if (!INSN_P (insn
) || !active_insn_p (insn
))
19862 /* Discard alignments we've emit and jump instructions. */
19863 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19864 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19867 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19868 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19871 /* Important case - calls are always 5 bytes.
19872 It is common to have many calls in the row. */
19874 && symbolic_reference_mentioned_p (PATTERN (insn
))
19875 && !SIBLING_CALL_P (insn
))
19877 if (get_attr_length (insn
) <= 1)
19880 /* For normal instructions we may rely on the sizes of addresses
19881 and the presence of symbol to require 4 bytes of encoding.
19882 This is not the case for jumps where references are PC relative. */
19883 if (!JUMP_P (insn
))
19885 l
= get_attr_length_address (insn
);
19886 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19895 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19899 ix86_avoid_jump_misspredicts (void)
19901 rtx insn
, start
= get_insns ();
19902 int nbytes
= 0, njumps
= 0;
19905 /* Look for all minimal intervals of instructions containing 4 jumps.
19906 The intervals are bounded by START and INSN. NBYTES is the total
19907 size of instructions in the interval including INSN and not including
19908 START. When the NBYTES is smaller than 16 bytes, it is possible
19909 that the end of START and INSN ends up in the same 16byte page.
19911 The smallest offset in the page INSN can start is the case where START
19912 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19913 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19915 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19918 nbytes
+= min_insn_size (insn
);
19920 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19921 INSN_UID (insn
), min_insn_size (insn
));
19923 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19924 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19932 start
= NEXT_INSN (start
);
19933 if ((JUMP_P (start
)
19934 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19935 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19937 njumps
--, isjump
= 1;
19940 nbytes
-= min_insn_size (start
);
19942 gcc_assert (njumps
>= 0);
19944 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19945 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19947 if (njumps
== 3 && isjump
&& nbytes
< 16)
19949 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19952 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19953 INSN_UID (insn
), padsize
);
19954 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19959 /* AMD Athlon works faster
19960 when RET is not destination of conditional jump or directly preceded
19961 by other jump instruction. We avoid the penalty by inserting NOP just
19962 before the RET instructions in such cases. */
19964 ix86_pad_returns (void)
19969 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19971 basic_block bb
= e
->src
;
19972 rtx ret
= BB_END (bb
);
19974 bool replace
= false;
19976 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19977 || !maybe_hot_bb_p (bb
))
19979 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19980 if (active_insn_p (prev
) || LABEL_P (prev
))
19982 if (prev
&& LABEL_P (prev
))
19987 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19988 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19989 && !(e
->flags
& EDGE_FALLTHRU
))
19994 prev
= prev_active_insn (ret
);
19996 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19999 /* Empty functions get branch mispredict even when the jump destination
20000 is not visible to us. */
20001 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
20006 emit_insn_before (gen_return_internal_long (), ret
);
20012 /* Implement machine specific optimizations. We implement padding of returns
20013 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20017 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
20018 ix86_pad_returns ();
20019 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
20020 ix86_avoid_jump_misspredicts ();
20023 /* Return nonzero when QImode register that must be represented via REX prefix
20026 x86_extended_QIreg_mentioned_p (rtx insn
)
20029 extract_insn_cached (insn
);
20030 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20031 if (REG_P (recog_data
.operand
[i
])
20032 && REGNO (recog_data
.operand
[i
]) >= 4)
20037 /* Return nonzero when P points to register encoded via REX prefix.
20038 Called via for_each_rtx. */
20040 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20042 unsigned int regno
;
20045 regno
= REGNO (*p
);
20046 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20049 /* Return true when INSN mentions register that must be encoded using REX
20052 x86_extended_reg_mentioned_p (rtx insn
)
20054 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20057 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20058 optabs would emit if we didn't have TFmode patterns. */
20061 x86_emit_floatuns (rtx operands
[2])
20063 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20064 enum machine_mode mode
, inmode
;
20066 inmode
= GET_MODE (operands
[1]);
20067 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20070 in
= force_reg (inmode
, operands
[1]);
20071 mode
= GET_MODE (out
);
20072 neglab
= gen_label_rtx ();
20073 donelab
= gen_label_rtx ();
20074 f0
= gen_reg_rtx (mode
);
20076 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20078 expand_float (out
, in
, 0);
20080 emit_jump_insn (gen_jump (donelab
));
20083 emit_label (neglab
);
20085 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20087 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20089 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20091 expand_float (f0
, i0
, 0);
20093 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20095 emit_label (donelab
);
20098 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20099 with all elements equal to VAR. Return true if successful. */
20102 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20103 rtx target
, rtx val
)
20105 enum machine_mode smode
, wsmode
, wvmode
;
20120 val
= force_reg (GET_MODE_INNER (mode
), val
);
20121 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20122 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20128 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20130 val
= gen_lowpart (SImode
, val
);
20131 x
= gen_rtx_TRUNCATE (HImode
, val
);
20132 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20133 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20155 /* Extend HImode to SImode using a paradoxical SUBREG. */
20156 tmp1
= gen_reg_rtx (SImode
);
20157 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20158 /* Insert the SImode value as low element of V4SImode vector. */
20159 tmp2
= gen_reg_rtx (V4SImode
);
20160 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20161 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20162 CONST0_RTX (V4SImode
),
20164 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20165 /* Cast the V4SImode vector back to a V8HImode vector. */
20166 tmp1
= gen_reg_rtx (V8HImode
);
20167 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20168 /* Duplicate the low short through the whole low SImode word. */
20169 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20170 /* Cast the V8HImode vector back to a V4SImode vector. */
20171 tmp2
= gen_reg_rtx (V4SImode
);
20172 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20173 /* Replicate the low element of the V4SImode vector. */
20174 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20175 /* Cast the V2SImode back to V8HImode, and store in target. */
20176 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20187 /* Extend QImode to SImode using a paradoxical SUBREG. */
20188 tmp1
= gen_reg_rtx (SImode
);
20189 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20190 /* Insert the SImode value as low element of V4SImode vector. */
20191 tmp2
= gen_reg_rtx (V4SImode
);
20192 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20193 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20194 CONST0_RTX (V4SImode
),
20196 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20197 /* Cast the V4SImode vector back to a V16QImode vector. */
20198 tmp1
= gen_reg_rtx (V16QImode
);
20199 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20200 /* Duplicate the low byte through the whole low SImode word. */
20201 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20202 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20203 /* Cast the V16QImode vector back to a V4SImode vector. */
20204 tmp2
= gen_reg_rtx (V4SImode
);
20205 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20206 /* Replicate the low element of the V4SImode vector. */
20207 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20208 /* Cast the V2SImode back to V16QImode, and store in target. */
20209 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20217 /* Replicate the value once into the next wider mode and recurse. */
20218 val
= convert_modes (wsmode
, smode
, val
, true);
20219 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20220 GEN_INT (GET_MODE_BITSIZE (smode
)),
20221 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20222 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20224 x
= gen_reg_rtx (wvmode
);
20225 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20226 gcc_unreachable ();
20227 emit_move_insn (target
, gen_lowpart (mode
, x
));
20235 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20236 whose ONE_VAR element is VAR, and other elements are zero. Return true
20240 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20241 rtx target
, rtx var
, int one_var
)
20243 enum machine_mode vsimode
;
20259 var
= force_reg (GET_MODE_INNER (mode
), var
);
20260 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20261 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20266 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20267 new_target
= gen_reg_rtx (mode
);
20269 new_target
= target
;
20270 var
= force_reg (GET_MODE_INNER (mode
), var
);
20271 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20272 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20273 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20276 /* We need to shuffle the value to the correct position, so
20277 create a new pseudo to store the intermediate result. */
20279 /* With SSE2, we can use the integer shuffle insns. */
20280 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20282 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20284 GEN_INT (one_var
== 1 ? 0 : 1),
20285 GEN_INT (one_var
== 2 ? 0 : 1),
20286 GEN_INT (one_var
== 3 ? 0 : 1)));
20287 if (target
!= new_target
)
20288 emit_move_insn (target
, new_target
);
20292 /* Otherwise convert the intermediate result to V4SFmode and
20293 use the SSE1 shuffle instructions. */
20294 if (mode
!= V4SFmode
)
20296 tmp
= gen_reg_rtx (V4SFmode
);
20297 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20302 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20304 GEN_INT (one_var
== 1 ? 0 : 1),
20305 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20306 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20308 if (mode
!= V4SFmode
)
20309 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20310 else if (tmp
!= target
)
20311 emit_move_insn (target
, tmp
);
20313 else if (target
!= new_target
)
20314 emit_move_insn (target
, new_target
);
20319 vsimode
= V4SImode
;
20325 vsimode
= V2SImode
;
20331 /* Zero extend the variable element to SImode and recurse. */
20332 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20334 x
= gen_reg_rtx (vsimode
);
20335 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20337 gcc_unreachable ();
20339 emit_move_insn (target
, gen_lowpart (mode
, x
));
20347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20348 consisting of the values in VALS. It is known that all elements
20349 except ONE_VAR are constants. Return true if successful. */
20352 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20353 rtx target
, rtx vals
, int one_var
)
20355 rtx var
= XVECEXP (vals
, 0, one_var
);
20356 enum machine_mode wmode
;
20359 const_vec
= copy_rtx (vals
);
20360 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20361 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20369 /* For the two element vectors, it's just as easy to use
20370 the general case. */
20386 /* There's no way to set one QImode entry easily. Combine
20387 the variable value with its adjacent constant value, and
20388 promote to an HImode set. */
20389 x
= XVECEXP (vals
, 0, one_var
^ 1);
20392 var
= convert_modes (HImode
, QImode
, var
, true);
20393 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20394 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20395 x
= GEN_INT (INTVAL (x
) & 0xff);
20399 var
= convert_modes (HImode
, QImode
, var
, true);
20400 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20402 if (x
!= const0_rtx
)
20403 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20404 1, OPTAB_LIB_WIDEN
);
20406 x
= gen_reg_rtx (wmode
);
20407 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20408 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20410 emit_move_insn (target
, gen_lowpart (mode
, x
));
20417 emit_move_insn (target
, const_vec
);
20418 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20422 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20423 all values variable, and none identical. */
20426 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20427 rtx target
, rtx vals
)
20429 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20430 rtx op0
= NULL
, op1
= NULL
;
20431 bool use_vec_concat
= false;
20437 if (!mmx_ok
&& !TARGET_SSE
)
20443 /* For the two element vectors, we always implement VEC_CONCAT. */
20444 op0
= XVECEXP (vals
, 0, 0);
20445 op1
= XVECEXP (vals
, 0, 1);
20446 use_vec_concat
= true;
20450 half_mode
= V2SFmode
;
20453 half_mode
= V2SImode
;
20459 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20460 Recurse to load the two halves. */
20462 op0
= gen_reg_rtx (half_mode
);
20463 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20464 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20466 op1
= gen_reg_rtx (half_mode
);
20467 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20468 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20470 use_vec_concat
= true;
20481 gcc_unreachable ();
20484 if (use_vec_concat
)
20486 if (!register_operand (op0
, half_mode
))
20487 op0
= force_reg (half_mode
, op0
);
20488 if (!register_operand (op1
, half_mode
))
20489 op1
= force_reg (half_mode
, op1
);
20491 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20492 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20496 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20497 enum machine_mode inner_mode
;
20498 rtx words
[4], shift
;
20500 inner_mode
= GET_MODE_INNER (mode
);
20501 n_elts
= GET_MODE_NUNITS (mode
);
20502 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20503 n_elt_per_word
= n_elts
/ n_words
;
20504 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20506 for (i
= 0; i
< n_words
; ++i
)
20508 rtx word
= NULL_RTX
;
20510 for (j
= 0; j
< n_elt_per_word
; ++j
)
20512 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20513 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20519 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20520 word
, 1, OPTAB_LIB_WIDEN
);
20521 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20522 word
, 1, OPTAB_LIB_WIDEN
);
20530 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20531 else if (n_words
== 2)
20533 rtx tmp
= gen_reg_rtx (mode
);
20534 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20535 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20536 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20537 emit_move_insn (target
, tmp
);
20539 else if (n_words
== 4)
20541 rtx tmp
= gen_reg_rtx (V4SImode
);
20542 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20543 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20544 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20547 gcc_unreachable ();
20551 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20552 instructions unless MMX_OK is true. */
20555 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20557 enum machine_mode mode
= GET_MODE (target
);
20558 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20559 int n_elts
= GET_MODE_NUNITS (mode
);
20560 int n_var
= 0, one_var
= -1;
20561 bool all_same
= true, all_const_zero
= true;
20565 for (i
= 0; i
< n_elts
; ++i
)
20567 x
= XVECEXP (vals
, 0, i
);
20568 if (!CONSTANT_P (x
))
20569 n_var
++, one_var
= i
;
20570 else if (x
!= CONST0_RTX (inner_mode
))
20571 all_const_zero
= false;
20572 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20576 /* Constants are best loaded from the constant pool. */
20579 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20583 /* If all values are identical, broadcast the value. */
20585 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20586 XVECEXP (vals
, 0, 0)))
20589 /* Values where only one field is non-constant are best loaded from
20590 the pool and overwritten via move later. */
20594 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20595 XVECEXP (vals
, 0, one_var
),
20599 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20603 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20607 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20609 enum machine_mode mode
= GET_MODE (target
);
20610 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20611 bool use_vec_merge
= false;
20620 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20621 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20623 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20625 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20626 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20636 /* For the two element vectors, we implement a VEC_CONCAT with
20637 the extraction of the other element. */
20639 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20640 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20643 op0
= val
, op1
= tmp
;
20645 op0
= tmp
, op1
= val
;
20647 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20648 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20656 use_vec_merge
= true;
20660 /* tmp = target = A B C D */
20661 tmp
= copy_to_reg (target
);
20662 /* target = A A B B */
20663 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20664 /* target = X A B B */
20665 ix86_expand_vector_set (false, target
, val
, 0);
20666 /* target = A X C D */
20667 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20668 GEN_INT (1), GEN_INT (0),
20669 GEN_INT (2+4), GEN_INT (3+4)));
20673 /* tmp = target = A B C D */
20674 tmp
= copy_to_reg (target
);
20675 /* tmp = X B C D */
20676 ix86_expand_vector_set (false, tmp
, val
, 0);
20677 /* target = A B X D */
20678 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20679 GEN_INT (0), GEN_INT (1),
20680 GEN_INT (0+4), GEN_INT (3+4)));
20684 /* tmp = target = A B C D */
20685 tmp
= copy_to_reg (target
);
20686 /* tmp = X B C D */
20687 ix86_expand_vector_set (false, tmp
, val
, 0);
20688 /* target = A B X D */
20689 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20690 GEN_INT (0), GEN_INT (1),
20691 GEN_INT (2+4), GEN_INT (0+4)));
20695 gcc_unreachable ();
20700 /* Element 0 handled by vec_merge below. */
20703 use_vec_merge
= true;
20709 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20710 store into element 0, then shuffle them back. */
20714 order
[0] = GEN_INT (elt
);
20715 order
[1] = const1_rtx
;
20716 order
[2] = const2_rtx
;
20717 order
[3] = GEN_INT (3);
20718 order
[elt
] = const0_rtx
;
20720 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20721 order
[1], order
[2], order
[3]));
20723 ix86_expand_vector_set (false, target
, val
, 0);
20725 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20726 order
[1], order
[2], order
[3]));
20730 /* For SSE1, we have to reuse the V4SF code. */
20731 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20732 gen_lowpart (SFmode
, val
), elt
);
20737 use_vec_merge
= TARGET_SSE2
;
20740 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20751 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20752 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20753 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20757 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20759 emit_move_insn (mem
, target
);
20761 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20762 emit_move_insn (tmp
, val
);
20764 emit_move_insn (target
, mem
);
20769 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20771 enum machine_mode mode
= GET_MODE (vec
);
20772 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20773 bool use_vec_extr
= false;
20786 use_vec_extr
= true;
20798 tmp
= gen_reg_rtx (mode
);
20799 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20800 GEN_INT (elt
), GEN_INT (elt
),
20801 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20805 tmp
= gen_reg_rtx (mode
);
20806 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20810 gcc_unreachable ();
20813 use_vec_extr
= true;
20828 tmp
= gen_reg_rtx (mode
);
20829 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20830 GEN_INT (elt
), GEN_INT (elt
),
20831 GEN_INT (elt
), GEN_INT (elt
)));
20835 tmp
= gen_reg_rtx (mode
);
20836 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20840 gcc_unreachable ();
20843 use_vec_extr
= true;
20848 /* For SSE1, we have to reuse the V4SF code. */
20849 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20850 gen_lowpart (V4SFmode
, vec
), elt
);
20856 use_vec_extr
= TARGET_SSE2
;
20859 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20864 /* ??? Could extract the appropriate HImode element and shift. */
20871 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20872 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20874 /* Let the rtl optimizers know about the zero extension performed. */
20875 if (inner_mode
== HImode
)
20877 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20878 target
= gen_lowpart (SImode
, target
);
20881 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20885 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20887 emit_move_insn (mem
, vec
);
20889 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20890 emit_move_insn (target
, tmp
);
20894 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20895 pattern to reduce; DEST is the destination; IN is the input vector. */
20898 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20900 rtx tmp1
, tmp2
, tmp3
;
20902 tmp1
= gen_reg_rtx (V4SFmode
);
20903 tmp2
= gen_reg_rtx (V4SFmode
);
20904 tmp3
= gen_reg_rtx (V4SFmode
);
20906 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20907 emit_insn (fn (tmp2
, tmp1
, in
));
20909 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20910 GEN_INT (1), GEN_INT (1),
20911 GEN_INT (1+4), GEN_INT (1+4)));
20912 emit_insn (fn (dest
, tmp2
, tmp3
));
20915 /* Target hook for scalar_mode_supported_p. */
20917 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20919 if (DECIMAL_FLOAT_MODE_P (mode
))
20922 return default_scalar_mode_supported_p (mode
);
20925 /* Implements target hook vector_mode_supported_p. */
20927 ix86_vector_mode_supported_p (enum machine_mode mode
)
20929 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20931 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20933 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20935 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20940 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20942 We do this in the new i386 backend to maintain source compatibility
20943 with the old cc0-based compiler. */
20946 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20947 tree inputs ATTRIBUTE_UNUSED
,
20950 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20952 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20957 /* Return true if this goes in small data/bss. */
20960 ix86_in_large_data_p (tree exp
)
20962 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20965 /* Functions are never large data. */
20966 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20969 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20971 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20972 if (strcmp (section
, ".ldata") == 0
20973 || strcmp (section
, ".lbss") == 0)
20979 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20981 /* If this is an incomplete type with size 0, then we can't put it
20982 in data because it might be too big when completed. */
20983 if (!size
|| size
> ix86_section_threshold
)
20990 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20992 default_encode_section_info (decl
, rtl
, first
);
20994 if (TREE_CODE (decl
) == VAR_DECL
20995 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20996 && ix86_in_large_data_p (decl
))
20997 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
21000 /* Worker function for REVERSE_CONDITION. */
21003 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
21005 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
21006 ? reverse_condition (code
)
21007 : reverse_condition_maybe_unordered (code
));
21010 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21014 output_387_reg_move (rtx insn
, rtx
*operands
)
21016 if (REG_P (operands
[1])
21017 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21019 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21020 return output_387_ffreep (operands
, 0);
21021 return "fstp\t%y0";
21023 if (STACK_TOP_P (operands
[0]))
21024 return "fld%z1\t%y1";
21028 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21029 FP status register is set. */
21032 ix86_emit_fp_unordered_jump (rtx label
)
21034 rtx reg
= gen_reg_rtx (HImode
);
21037 emit_insn (gen_x86_fnstsw_1 (reg
));
21039 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
21041 emit_insn (gen_x86_sahf_1 (reg
));
21043 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21044 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21048 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21050 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21051 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21054 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21055 gen_rtx_LABEL_REF (VOIDmode
, label
),
21057 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21059 emit_jump_insn (temp
);
21060 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21063 /* Output code to perform a log1p XFmode calculation. */
21065 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21067 rtx label1
= gen_label_rtx ();
21068 rtx label2
= gen_label_rtx ();
21070 rtx tmp
= gen_reg_rtx (XFmode
);
21071 rtx tmp2
= gen_reg_rtx (XFmode
);
21073 emit_insn (gen_absxf2 (tmp
, op1
));
21074 emit_insn (gen_cmpxf (tmp
,
21075 CONST_DOUBLE_FROM_REAL_VALUE (
21076 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21078 emit_jump_insn (gen_bge (label1
));
21080 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21081 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21082 emit_jump (label2
);
21084 emit_label (label1
);
21085 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21086 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21087 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21088 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21090 emit_label (label2
);
21093 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21096 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21099 /* With Binutils 2.15, the "@unwind" marker must be specified on
21100 every occurrence of the ".eh_frame" section, not just the first
21103 && strcmp (name
, ".eh_frame") == 0)
21105 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21106 flags
& SECTION_WRITE
? "aw" : "a");
21109 default_elf_asm_named_section (name
, flags
, decl
);
21112 /* Return the mangling of TYPE if it is an extended fundamental type. */
21114 static const char *
21115 ix86_mangle_fundamental_type (tree type
)
21117 switch (TYPE_MODE (type
))
21120 /* __float128 is "g". */
21123 /* "long double" or __float80 is "e". */
21130 /* For 32-bit code we can save PIC register setup by using
21131 __stack_chk_fail_local hidden function instead of calling
21132 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21133 register, so it is better to call __stack_chk_fail directly. */
21136 ix86_stack_protect_fail (void)
21138 return TARGET_64BIT
21139 ? default_external_stack_protect_fail ()
21140 : default_hidden_stack_protect_fail ();
21143 /* Select a format to encode pointers in exception handling data. CODE
21144 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21145 true if the symbol may be affected by dynamic relocations.
21147 ??? All x86 object file formats are capable of representing this.
21148 After all, the relocation needed is the same as for the call insn.
21149 Whether or not a particular assembler allows us to enter such, I
21150 guess we'll have to see. */
21152 asm_preferred_eh_data_format (int code
, int global
)
21156 int type
= DW_EH_PE_sdata8
;
21158 || ix86_cmodel
== CM_SMALL_PIC
21159 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21160 type
= DW_EH_PE_sdata4
;
21161 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21163 if (ix86_cmodel
== CM_SMALL
21164 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21165 return DW_EH_PE_udata4
;
21166 return DW_EH_PE_absptr
;
21169 /* Expand copysign from SIGN to the positive value ABS_VALUE
21170 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21173 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21175 enum machine_mode mode
= GET_MODE (sign
);
21176 rtx sgn
= gen_reg_rtx (mode
);
21177 if (mask
== NULL_RTX
)
21179 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21180 if (!VECTOR_MODE_P (mode
))
21182 /* We need to generate a scalar mode mask in this case. */
21183 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21184 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21185 mask
= gen_reg_rtx (mode
);
21186 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21190 mask
= gen_rtx_NOT (mode
, mask
);
21191 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21192 gen_rtx_AND (mode
, mask
, sign
)));
21193 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21194 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21197 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21198 mask for masking out the sign-bit is stored in *SMASK, if that is
21201 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21203 enum machine_mode mode
= GET_MODE (op0
);
21206 xa
= gen_reg_rtx (mode
);
21207 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21208 if (!VECTOR_MODE_P (mode
))
21210 /* We need to generate a scalar mode mask in this case. */
21211 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21212 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21213 mask
= gen_reg_rtx (mode
);
21214 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21216 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21217 gen_rtx_AND (mode
, op0
, mask
)));
21225 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21226 swapping the operands if SWAP_OPERANDS is true. The expanded
21227 code is a forward jump to a newly created label in case the
21228 comparison is true. The generated label rtx is returned. */
21230 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21231 bool swap_operands
)
21242 label
= gen_label_rtx ();
21243 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21244 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21245 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21246 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21247 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21248 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21249 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21250 JUMP_LABEL (tmp
) = label
;
21255 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21256 using comparison code CODE. Operands are swapped for the comparison if
21257 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21259 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21260 bool swap_operands
)
21262 enum machine_mode mode
= GET_MODE (op0
);
21263 rtx mask
= gen_reg_rtx (mode
);
21272 if (mode
== DFmode
)
21273 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21274 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21276 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21277 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21282 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21283 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21285 ix86_gen_TWO52 (enum machine_mode mode
)
21287 REAL_VALUE_TYPE TWO52r
;
21290 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21291 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21292 TWO52
= force_reg (mode
, TWO52
);
21297 /* Expand SSE sequence for computing lround from OP1 storing
21300 ix86_expand_lround (rtx op0
, rtx op1
)
21302 /* C code for the stuff we're doing below:
21303 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21306 enum machine_mode mode
= GET_MODE (op1
);
21307 const struct real_format
*fmt
;
21308 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21311 /* load nextafter (0.5, 0.0) */
21312 fmt
= REAL_MODE_FORMAT (mode
);
21313 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21314 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21316 /* adj = copysign (0.5, op1) */
21317 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21318 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21320 /* adj = op1 + adj */
21321 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21323 /* op0 = (imode)adj */
21324 expand_fix (op0
, adj
, 0);
21327 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21330 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21332 /* C code for the stuff we're doing below (for do_floor):
21334 xi -= (double)xi > op1 ? 1 : 0;
21337 enum machine_mode fmode
= GET_MODE (op1
);
21338 enum machine_mode imode
= GET_MODE (op0
);
21339 rtx ireg
, freg
, label
, tmp
;
21341 /* reg = (long)op1 */
21342 ireg
= gen_reg_rtx (imode
);
21343 expand_fix (ireg
, op1
, 0);
21345 /* freg = (double)reg */
21346 freg
= gen_reg_rtx (fmode
);
21347 expand_float (freg
, ireg
, 0);
21349 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21350 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21351 freg
, op1
, !do_floor
);
21352 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21353 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21354 emit_move_insn (ireg
, tmp
);
21356 emit_label (label
);
21357 LABEL_NUSES (label
) = 1;
21359 emit_move_insn (op0
, ireg
);
21362 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21363 result in OPERAND0. */
21365 ix86_expand_rint (rtx operand0
, rtx operand1
)
21367 /* C code for the stuff we're doing below:
21368 xa = fabs (operand1);
21369 if (!isless (xa, 2**52))
21371 xa = xa + 2**52 - 2**52;
21372 return copysign (xa, operand1);
21374 enum machine_mode mode
= GET_MODE (operand0
);
21375 rtx res
, xa
, label
, TWO52
, mask
;
21377 res
= gen_reg_rtx (mode
);
21378 emit_move_insn (res
, operand1
);
21380 /* xa = abs (operand1) */
21381 xa
= ix86_expand_sse_fabs (res
, &mask
);
21383 /* if (!isless (xa, TWO52)) goto label; */
21384 TWO52
= ix86_gen_TWO52 (mode
);
21385 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21387 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21388 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21390 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21392 emit_label (label
);
21393 LABEL_NUSES (label
) = 1;
21395 emit_move_insn (operand0
, res
);
21398 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21401 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21403 /* C code for the stuff we expand below.
21404 double xa = fabs (x), x2;
21405 if (!isless (xa, TWO52))
21407 xa = xa + TWO52 - TWO52;
21408 x2 = copysign (xa, x);
21417 enum machine_mode mode
= GET_MODE (operand0
);
21418 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21420 TWO52
= ix86_gen_TWO52 (mode
);
21422 /* Temporary for holding the result, initialized to the input
21423 operand to ease control flow. */
21424 res
= gen_reg_rtx (mode
);
21425 emit_move_insn (res
, operand1
);
21427 /* xa = abs (operand1) */
21428 xa
= ix86_expand_sse_fabs (res
, &mask
);
21430 /* if (!isless (xa, TWO52)) goto label; */
21431 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21433 /* xa = xa + TWO52 - TWO52; */
21434 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21435 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21437 /* xa = copysign (xa, operand1) */
21438 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21440 /* generate 1.0 or -1.0 */
21441 one
= force_reg (mode
,
21442 const_double_from_real_value (do_floor
21443 ? dconst1
: dconstm1
, mode
));
21445 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21446 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21447 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21448 gen_rtx_AND (mode
, one
, tmp
)));
21449 /* We always need to subtract here to preserve signed zero. */
21450 tmp
= expand_simple_binop (mode
, MINUS
,
21451 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21452 emit_move_insn (res
, tmp
);
21454 emit_label (label
);
21455 LABEL_NUSES (label
) = 1;
21457 emit_move_insn (operand0
, res
);
21460 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21463 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21465 /* C code for the stuff we expand below.
21466 double xa = fabs (x), x2;
21467 if (!isless (xa, TWO52))
21469 x2 = (double)(long)x;
21476 if (HONOR_SIGNED_ZEROS (mode))
21477 return copysign (x2, x);
21480 enum machine_mode mode
= GET_MODE (operand0
);
21481 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21483 TWO52
= ix86_gen_TWO52 (mode
);
21485 /* Temporary for holding the result, initialized to the input
21486 operand to ease control flow. */
21487 res
= gen_reg_rtx (mode
);
21488 emit_move_insn (res
, operand1
);
21490 /* xa = abs (operand1) */
21491 xa
= ix86_expand_sse_fabs (res
, &mask
);
21493 /* if (!isless (xa, TWO52)) goto label; */
21494 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21496 /* xa = (double)(long)x */
21497 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21498 expand_fix (xi
, res
, 0);
21499 expand_float (xa
, xi
, 0);
21502 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21504 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21505 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21506 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21507 gen_rtx_AND (mode
, one
, tmp
)));
21508 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21509 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21510 emit_move_insn (res
, tmp
);
21512 if (HONOR_SIGNED_ZEROS (mode
))
21513 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21515 emit_label (label
);
21516 LABEL_NUSES (label
) = 1;
21518 emit_move_insn (operand0
, res
);
21521 /* Expand SSE sequence for computing round from OPERAND1 storing
21522 into OPERAND0. Sequence that works without relying on DImode truncation
21523 via cvttsd2siq that is only available on 64bit targets. */
21525 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21527 /* C code for the stuff we expand below.
21528 double xa = fabs (x), xa2, x2;
21529 if (!isless (xa, TWO52))
21531 Using the absolute value and copying back sign makes
21532 -0.0 -> -0.0 correct.
21533 xa2 = xa + TWO52 - TWO52;
21538 else if (dxa > 0.5)
21540 x2 = copysign (xa2, x);
21543 enum machine_mode mode
= GET_MODE (operand0
);
21544 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21546 TWO52
= ix86_gen_TWO52 (mode
);
21548 /* Temporary for holding the result, initialized to the input
21549 operand to ease control flow. */
21550 res
= gen_reg_rtx (mode
);
21551 emit_move_insn (res
, operand1
);
21553 /* xa = abs (operand1) */
21554 xa
= ix86_expand_sse_fabs (res
, &mask
);
21556 /* if (!isless (xa, TWO52)) goto label; */
21557 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21559 /* xa2 = xa + TWO52 - TWO52; */
21560 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21561 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21563 /* dxa = xa2 - xa; */
21564 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21566 /* generate 0.5, 1.0 and -0.5 */
21567 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21568 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21569 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21573 tmp
= gen_reg_rtx (mode
);
21574 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21575 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21576 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21577 gen_rtx_AND (mode
, one
, tmp
)));
21578 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21579 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21580 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21581 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21582 gen_rtx_AND (mode
, one
, tmp
)));
21583 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21585 /* res = copysign (xa2, operand1) */
21586 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21588 emit_label (label
);
21589 LABEL_NUSES (label
) = 1;
21591 emit_move_insn (operand0
, res
);
21594 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21597 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21599 /* C code for SSE variant we expand below.
21600 double xa = fabs (x), x2;
21601 if (!isless (xa, TWO52))
21603 x2 = (double)(long)x;
21604 if (HONOR_SIGNED_ZEROS (mode))
21605 return copysign (x2, x);
21608 enum machine_mode mode
= GET_MODE (operand0
);
21609 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21611 TWO52
= ix86_gen_TWO52 (mode
);
21613 /* Temporary for holding the result, initialized to the input
21614 operand to ease control flow. */
21615 res
= gen_reg_rtx (mode
);
21616 emit_move_insn (res
, operand1
);
21618 /* xa = abs (operand1) */
21619 xa
= ix86_expand_sse_fabs (res
, &mask
);
21621 /* if (!isless (xa, TWO52)) goto label; */
21622 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21624 /* x = (double)(long)x */
21625 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21626 expand_fix (xi
, res
, 0);
21627 expand_float (res
, xi
, 0);
21629 if (HONOR_SIGNED_ZEROS (mode
))
21630 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21632 emit_label (label
);
21633 LABEL_NUSES (label
) = 1;
21635 emit_move_insn (operand0
, res
);
21638 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21641 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21643 enum machine_mode mode
= GET_MODE (operand0
);
21644 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21646 /* C code for SSE variant we expand below.
21647 double xa = fabs (x), x2;
21648 if (!isless (xa, TWO52))
21650 xa2 = xa + TWO52 - TWO52;
21654 x2 = copysign (xa2, x);
21658 TWO52
= ix86_gen_TWO52 (mode
);
21660 /* Temporary for holding the result, initialized to the input
21661 operand to ease control flow. */
21662 res
= gen_reg_rtx (mode
);
21663 emit_move_insn (res
, operand1
);
21665 /* xa = abs (operand1) */
21666 xa
= ix86_expand_sse_fabs (res
, &smask
);
21668 /* if (!isless (xa, TWO52)) goto label; */
21669 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21671 /* res = xa + TWO52 - TWO52; */
21672 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21673 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21674 emit_move_insn (res
, tmp
);
21677 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21679 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21680 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21681 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21682 gen_rtx_AND (mode
, mask
, one
)));
21683 tmp
= expand_simple_binop (mode
, MINUS
,
21684 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21685 emit_move_insn (res
, tmp
);
21687 /* res = copysign (res, operand1) */
21688 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21690 emit_label (label
);
21691 LABEL_NUSES (label
) = 1;
21693 emit_move_insn (operand0
, res
);
21696 /* Expand SSE sequence for computing round from OPERAND1 storing
21699 ix86_expand_round (rtx operand0
, rtx operand1
)
21701 /* C code for the stuff we're doing below:
21702 double xa = fabs (x);
21703 if (!isless (xa, TWO52))
21705 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21706 return copysign (xa, x);
21708 enum machine_mode mode
= GET_MODE (operand0
);
21709 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21710 const struct real_format
*fmt
;
21711 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21713 /* Temporary for holding the result, initialized to the input
21714 operand to ease control flow. */
21715 res
= gen_reg_rtx (mode
);
21716 emit_move_insn (res
, operand1
);
21718 TWO52
= ix86_gen_TWO52 (mode
);
21719 xa
= ix86_expand_sse_fabs (res
, &mask
);
21720 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21722 /* load nextafter (0.5, 0.0) */
21723 fmt
= REAL_MODE_FORMAT (mode
);
21724 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21725 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21727 /* xa = xa + 0.5 */
21728 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21729 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21731 /* xa = (double)(int64_t)xa */
21732 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21733 expand_fix (xi
, xa
, 0);
21734 expand_float (xa
, xi
, 0);
21736 /* res = copysign (xa, operand1) */
21737 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21739 emit_label (label
);
21740 LABEL_NUSES (label
) = 1;
21742 emit_move_insn (operand0
, res
);
21745 #include "gt-i386.h"