1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1010 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1011 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1012 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1015 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
1016 const int x86_double_with_add
= ~m_386
;
1017 const int x86_use_bit_test
= m_386
;
1018 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
1019 | m_K6
| m_CORE2
| m_GENERIC
;
1020 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1022 const int x86_3dnow_a
= m_ATHLON_K8_AMDFAM10
;
1023 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
1024 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints
= 0;
1030 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall
= m_PPRO
;
1041 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
1042 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
1043 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
1044 | m_CORE2
| m_GENERIC
);
1045 const int x86_use_mov0
= m_K6
;
1046 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
1047 const int x86_read_modify_write
= ~m_PENT
;
1048 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
1049 const int x86_split_long_moves
= m_PPRO
;
1050 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
1051 | m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1053 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
1054 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
1055 const int x86_qimode_math
= ~(0);
1056 const int x86_promote_qi_regs
= 0;
1057 /* On PPro this flag is meant to avoid partial register stalls. Just like
1058 the x86_partial_reg_stall this option might be considered for Generic32
1059 if our scheme for avoiding partial stalls was more effective. */
1060 const int x86_himode_math
= ~(m_PPRO
);
1061 const int x86_promote_hi_regs
= m_PPRO
;
1062 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1063 const int x86_sub_esp_4
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1064 | m_CORE2
| m_GENERIC
;
1065 const int x86_sub_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1066 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1067 const int x86_add_esp_4
= m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
1068 | m_CORE2
| m_GENERIC
;
1069 const int x86_add_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1070 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1071 /* Enable if integer moves are preferred for DFmode copies */
1072 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1073 | m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
1074 const int x86_partial_reg_dependency
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1075 | m_CORE2
| m_GENERIC
;
1076 const int x86_memory_mismatch_stall
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1077 | m_CORE2
| m_GENERIC
;
1078 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1079 for outgoing arguments will be computed and placed into the variable
1080 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1081 for each call; instead, the function prologue should increase the stack frame
1082 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1084 const int x86_accumulate_outgoing_args
= m_ATHLON_K8_AMDFAM10
| m_PENT4
1085 | m_NOCONA
| m_PPRO
| m_CORE2
1087 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1088 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1089 const int x86_shift1
= ~m_486
;
1090 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
1091 | m_ATHLON_K8_AMDFAM10
| m_PENT4
1092 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1093 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1094 that thread 128bit SSE registers as single units versus K8 based chips that
1095 divide SSE registers to two 64bit halves.
1096 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1097 to allow register renaming on 128bit SSE units, but usually results in one
1098 extra microop on 64bit SSE units. Experimental results shows that disabling
1099 this option on P4 brings over 20% SPECfp regression, while enabling it on
1100 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1102 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1103 | m_GENERIC
| m_AMDFAM10
;
1104 /* Set for machines where the type and dependencies are resolved on SSE
1105 register parts instead of whole registers, so we may maintain just
1106 lower part of scalar values in proper format leaving the upper part
1108 const int x86_sse_split_regs
= m_ATHLON_K8
;
1109 /* Code generation for scalar reg-reg moves of single and double precision data:
1110 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1114 if (x86_sse_partial_reg_dependency == true)
1119 Code generation for scalar loads of double precision data:
1120 if (x86_sse_split_regs == true)
1121 movlpd mem, reg (gas syntax)
1125 Code generation for unaligned packed loads of single precision data
1126 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1127 if (x86_sse_unaligned_move_optimal)
1130 if (x86_sse_partial_reg_dependency == true)
1142 Code generation for unaligned packed loads of double precision data
1143 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1144 if (x86_sse_unaligned_move_optimal)
1147 if (x86_sse_split_regs == true)
1158 const int x86_sse_unaligned_move_optimal
= m_AMDFAM10
;
1159 const int x86_sse_typeless_stores
= m_ATHLON_K8_AMDFAM10
;
1160 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
1161 const int x86_use_ffreep
= m_ATHLON_K8_AMDFAM10
;
1162 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
1164 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1165 integer data in xmm registers. Which results in pretty abysmal code. */
1166 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
1168 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
1169 | m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1170 /* Some CPU cores are not able to predict more than 4 branch instructions in
1171 the 16 byte window. */
1172 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1173 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1174 const int x86_schedule
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
1175 | m_CORE2
| m_GENERIC
;
1176 const int x86_use_bt
= m_ATHLON_K8_AMDFAM10
;
1177 /* Compare and exchange was added for 80486. */
1178 const int x86_cmpxchg
= ~m_386
;
1179 /* Compare and exchange 8 bytes was added for pentium. */
1180 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
1181 /* Exchange and add was added for 80486. */
1182 const int x86_xadd
= ~m_386
;
1183 /* Byteswap was added for 80486. */
1184 const int x86_bswap
= ~m_386
;
1185 const int x86_pad_returns
= m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1187 static enum stringop_alg stringop_alg
= no_stringop
;
1189 /* In case the average insn count for single function invocation is
1190 lower than this constant, emit fast (but longer) prologue and
1192 #define FAST_PROLOGUE_INSN_COUNT 20
1194 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1195 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1196 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1197 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1199 /* Array of the smallest class containing reg number REGNO, indexed by
1200 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1202 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1204 /* ax, dx, cx, bx */
1205 AREG
, DREG
, CREG
, BREG
,
1206 /* si, di, bp, sp */
1207 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1209 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1210 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1213 /* flags, fpsr, fpcr, frame */
1214 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1215 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1217 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1219 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1220 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1221 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1225 /* The "default" register map used in 32bit mode. */
1227 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1229 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1230 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1231 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1232 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1233 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1234 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1235 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1238 static int const x86_64_int_parameter_registers
[6] =
1240 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1241 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1244 static int const x86_64_int_return_registers
[4] =
1246 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1249 /* The "default" register map used in 64bit mode. */
1250 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1252 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1253 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1255 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1256 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1257 8,9,10,11,12,13,14,15, /* extended integer registers */
1258 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1261 /* Define the register numbers to be used in Dwarf debugging information.
1262 The SVR4 reference port C compiler uses the following register numbers
1263 in its Dwarf output code:
1264 0 for %eax (gcc regno = 0)
1265 1 for %ecx (gcc regno = 2)
1266 2 for %edx (gcc regno = 1)
1267 3 for %ebx (gcc regno = 3)
1268 4 for %esp (gcc regno = 7)
1269 5 for %ebp (gcc regno = 6)
1270 6 for %esi (gcc regno = 4)
1271 7 for %edi (gcc regno = 5)
1272 The following three DWARF register numbers are never generated by
1273 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1274 believes these numbers have these meanings.
1275 8 for %eip (no gcc equivalent)
1276 9 for %eflags (gcc regno = 17)
1277 10 for %trapno (no gcc equivalent)
1278 It is not at all clear how we should number the FP stack registers
1279 for the x86 architecture. If the version of SDB on x86/svr4 were
1280 a bit less brain dead with respect to floating-point then we would
1281 have a precedent to follow with respect to DWARF register numbers
1282 for x86 FP registers, but the SDB on x86/svr4 is so completely
1283 broken with respect to FP registers that it is hardly worth thinking
1284 of it as something to strive for compatibility with.
1285 The version of x86/svr4 SDB I have at the moment does (partially)
1286 seem to believe that DWARF register number 11 is associated with
1287 the x86 register %st(0), but that's about all. Higher DWARF
1288 register numbers don't seem to be associated with anything in
1289 particular, and even for DWARF regno 11, SDB only seems to under-
1290 stand that it should say that a variable lives in %st(0) (when
1291 asked via an `=' command) if we said it was in DWARF regno 11,
1292 but SDB still prints garbage when asked for the value of the
1293 variable in question (via a `/' command).
1294 (Also note that the labels SDB prints for various FP stack regs
1295 when doing an `x' command are all wrong.)
1296 Note that these problems generally don't affect the native SVR4
1297 C compiler because it doesn't allow the use of -O with -g and
1298 because when it is *not* optimizing, it allocates a memory
1299 location for each floating-point variable, and the memory
1300 location is what gets described in the DWARF AT_location
1301 attribute for the variable in question.
1302 Regardless of the severe mental illness of the x86/svr4 SDB, we
1303 do something sensible here and we use the following DWARF
1304 register numbers. Note that these are all stack-top-relative
1306 11 for %st(0) (gcc regno = 8)
1307 12 for %st(1) (gcc regno = 9)
1308 13 for %st(2) (gcc regno = 10)
1309 14 for %st(3) (gcc regno = 11)
1310 15 for %st(4) (gcc regno = 12)
1311 16 for %st(5) (gcc regno = 13)
1312 17 for %st(6) (gcc regno = 14)
1313 18 for %st(7) (gcc regno = 15)
1315 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1317 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1318 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1319 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1320 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1321 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1322 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1323 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1326 /* Test and compare insns in i386.md store the information needed to
1327 generate branch and scc insns here. */
1329 rtx ix86_compare_op0
= NULL_RTX
;
1330 rtx ix86_compare_op1
= NULL_RTX
;
1331 rtx ix86_compare_emitted
= NULL_RTX
;
1333 /* Size of the register save area. */
1334 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1336 /* Define the structure for the machine field in struct function. */
1338 struct stack_local_entry
GTY(())
1340 unsigned short mode
;
1343 struct stack_local_entry
*next
;
1346 /* Structure describing stack frame layout.
1347 Stack grows downward:
1353 saved frame pointer if frame_pointer_needed
1354 <- HARD_FRAME_POINTER
1359 [va_arg registers] (
1360 > to_allocate <- FRAME_POINTER
1370 HOST_WIDE_INT frame
;
1372 int outgoing_arguments_size
;
1375 HOST_WIDE_INT to_allocate
;
1376 /* The offsets relative to ARG_POINTER. */
1377 HOST_WIDE_INT frame_pointer_offset
;
1378 HOST_WIDE_INT hard_frame_pointer_offset
;
1379 HOST_WIDE_INT stack_pointer_offset
;
1381 /* When save_regs_using_mov is set, emit prologue using
1382 move instead of push instructions. */
1383 bool save_regs_using_mov
;
1386 /* Code model option. */
1387 enum cmodel ix86_cmodel
;
1389 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1391 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1393 /* Which unit we are generating floating point math for. */
1394 enum fpmath_unit ix86_fpmath
;
1396 /* Which cpu are we scheduling for. */
1397 enum processor_type ix86_tune
;
1398 /* Which instruction set architecture to use. */
1399 enum processor_type ix86_arch
;
1401 /* true if sse prefetch instruction is not NOOP. */
1402 int x86_prefetch_sse
;
1404 /* true if cmpxchg16b is supported. */
1407 /* ix86_regparm_string as a number */
1408 static int ix86_regparm
;
1410 /* -mstackrealign option */
1411 extern int ix86_force_align_arg_pointer
;
1412 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1414 /* Preferred alignment for stack boundary in bits. */
1415 unsigned int ix86_preferred_stack_boundary
;
1417 /* Values 1-5: see jump.c */
1418 int ix86_branch_cost
;
1420 /* Variables which are this size or smaller are put in the data/bss
1421 or ldata/lbss sections. */
1423 int ix86_section_threshold
= 65536;
1425 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1426 char internal_label_prefix
[16];
1427 int internal_label_prefix_len
;
1429 static bool ix86_handle_option (size_t, const char *, int);
1430 static void output_pic_addr_const (FILE *, rtx
, int);
1431 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1433 static const char *get_some_local_dynamic_name (void);
1434 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1435 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1436 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1438 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1439 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1441 static rtx
get_thread_pointer (int);
1442 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1443 static void get_pc_thunk_name (char [32], unsigned int);
1444 static rtx
gen_push (rtx
);
1445 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1446 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1447 static struct machine_function
* ix86_init_machine_status (void);
1448 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1449 static int ix86_nsaved_regs (void);
1450 static void ix86_emit_save_regs (void);
1451 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1452 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1453 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1454 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1455 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1456 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1457 static int ix86_issue_rate (void);
1458 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1459 static int ia32_multipass_dfa_lookahead (void);
1460 static void ix86_init_mmx_sse_builtins (void);
1461 static rtx
x86_this_parameter (tree
);
1462 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1463 HOST_WIDE_INT
, tree
);
1464 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1465 static void x86_file_start (void);
1466 static void ix86_reorg (void);
1467 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1468 static tree
ix86_build_builtin_va_list (void);
1469 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1471 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1472 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1473 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1475 static int ix86_address_cost (rtx
);
1476 static bool ix86_cannot_force_const_mem (rtx
);
1477 static rtx
ix86_delegitimize_address (rtx
);
1479 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1481 struct builtin_description
;
1482 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1484 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1486 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1487 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1488 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1489 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1490 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1491 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1492 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1493 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1494 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1495 static int ix86_fp_comparison_cost (enum rtx_code code
);
1496 static unsigned int ix86_select_alt_pic_regnum (void);
1497 static int ix86_save_reg (unsigned int, int);
1498 static void ix86_compute_frame_layout (struct ix86_frame
*);
1499 static int ix86_comp_type_attributes (tree
, tree
);
1500 static int ix86_function_regparm (tree
, tree
);
1501 const struct attribute_spec ix86_attribute_table
[];
1502 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1503 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1504 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1505 static bool contains_128bit_aligned_vector_p (tree
);
1506 static rtx
ix86_struct_value_rtx (tree
, int);
1507 static bool ix86_ms_bitfield_layout_p (tree
);
1508 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1509 static int extended_reg_mentioned_1 (rtx
*, void *);
1510 static bool ix86_rtx_costs (rtx
, int, int, int *);
1511 static int min_insn_size (rtx
);
1512 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1513 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1514 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1516 static void ix86_init_builtins (void);
1517 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1518 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1519 static const char *ix86_mangle_fundamental_type (tree
);
1520 static tree
ix86_stack_protect_fail (void);
1521 static rtx
ix86_internal_arg_pointer (void);
1522 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1524 /* This function is only used on Solaris. */
1525 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1528 /* Register class used for passing given 64bit part of the argument.
1529 These represent classes as documented by the PS ABI, with the exception
1530 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1531 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1533 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1534 whenever possible (upper half does contain padding).
1536 enum x86_64_reg_class
1539 X86_64_INTEGER_CLASS
,
1540 X86_64_INTEGERSI_CLASS
,
1547 X86_64_COMPLEX_X87_CLASS
,
1550 static const char * const x86_64_reg_class_name
[] = {
1551 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1552 "sseup", "x87", "x87up", "cplx87", "no"
1555 #define MAX_CLASSES 4
1557 /* Table of constants used by fldpi, fldln2, etc.... */
1558 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1559 static bool ext_80387_constants_init
= 0;
1560 static void init_ext_80387_constants (void);
1561 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1562 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1563 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1564 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1565 unsigned HOST_WIDE_INT align
)
1568 /* Initialize the GCC target structure. */
1569 #undef TARGET_ATTRIBUTE_TABLE
1570 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1571 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1572 # undef TARGET_MERGE_DECL_ATTRIBUTES
1573 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1576 #undef TARGET_COMP_TYPE_ATTRIBUTES
1577 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1579 #undef TARGET_INIT_BUILTINS
1580 #define TARGET_INIT_BUILTINS ix86_init_builtins
1581 #undef TARGET_EXPAND_BUILTIN
1582 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1583 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1584 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1586 #undef TARGET_ASM_FUNCTION_EPILOGUE
1587 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1589 #undef TARGET_ENCODE_SECTION_INFO
1590 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1591 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1593 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1596 #undef TARGET_ASM_OPEN_PAREN
1597 #define TARGET_ASM_OPEN_PAREN ""
1598 #undef TARGET_ASM_CLOSE_PAREN
1599 #define TARGET_ASM_CLOSE_PAREN ""
1601 #undef TARGET_ASM_ALIGNED_HI_OP
1602 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1603 #undef TARGET_ASM_ALIGNED_SI_OP
1604 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1606 #undef TARGET_ASM_ALIGNED_DI_OP
1607 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1610 #undef TARGET_ASM_UNALIGNED_HI_OP
1611 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1612 #undef TARGET_ASM_UNALIGNED_SI_OP
1613 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1614 #undef TARGET_ASM_UNALIGNED_DI_OP
1615 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1617 #undef TARGET_SCHED_ADJUST_COST
1618 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1619 #undef TARGET_SCHED_ISSUE_RATE
1620 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1621 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1622 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1623 ia32_multipass_dfa_lookahead
1625 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1626 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1629 #undef TARGET_HAVE_TLS
1630 #define TARGET_HAVE_TLS true
1632 #undef TARGET_CANNOT_FORCE_CONST_MEM
1633 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1634 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1635 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1637 #undef TARGET_DELEGITIMIZE_ADDRESS
1638 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1640 #undef TARGET_MS_BITFIELD_LAYOUT_P
1641 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1644 #undef TARGET_BINDS_LOCAL_P
1645 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1648 #undef TARGET_ASM_OUTPUT_MI_THUNK
1649 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1650 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1651 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1653 #undef TARGET_ASM_FILE_START
1654 #define TARGET_ASM_FILE_START x86_file_start
1656 #undef TARGET_DEFAULT_TARGET_FLAGS
1657 #define TARGET_DEFAULT_TARGET_FLAGS \
1659 | TARGET_64BIT_DEFAULT \
1660 | TARGET_SUBTARGET_DEFAULT \
1661 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1663 #undef TARGET_HANDLE_OPTION
1664 #define TARGET_HANDLE_OPTION ix86_handle_option
1666 #undef TARGET_RTX_COSTS
1667 #define TARGET_RTX_COSTS ix86_rtx_costs
1668 #undef TARGET_ADDRESS_COST
1669 #define TARGET_ADDRESS_COST ix86_address_cost
1671 #undef TARGET_FIXED_CONDITION_CODE_REGS
1672 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1673 #undef TARGET_CC_MODES_COMPATIBLE
1674 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1676 #undef TARGET_MACHINE_DEPENDENT_REORG
1677 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1679 #undef TARGET_BUILD_BUILTIN_VA_LIST
1680 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1682 #undef TARGET_MD_ASM_CLOBBERS
1683 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1685 #undef TARGET_PROMOTE_PROTOTYPES
1686 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1687 #undef TARGET_STRUCT_VALUE_RTX
1688 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1689 #undef TARGET_SETUP_INCOMING_VARARGS
1690 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1691 #undef TARGET_MUST_PASS_IN_STACK
1692 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1693 #undef TARGET_PASS_BY_REFERENCE
1694 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1695 #undef TARGET_INTERNAL_ARG_POINTER
1696 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1697 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1698 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1700 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1701 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1703 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1704 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1706 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1707 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1714 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1715 #undef TARGET_INSERT_ATTRIBUTES
1716 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1719 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1720 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1722 #undef TARGET_STACK_PROTECT_FAIL
1723 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1725 #undef TARGET_FUNCTION_VALUE
1726 #define TARGET_FUNCTION_VALUE ix86_function_value
1728 struct gcc_target targetm
= TARGET_INITIALIZER
;
1731 /* The svr4 ABI for the i386 says that records and unions are returned
1733 #ifndef DEFAULT_PCC_STRUCT_RETURN
1734 #define DEFAULT_PCC_STRUCT_RETURN 1
1737 /* Implement TARGET_HANDLE_OPTION. */
1740 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1747 target_flags
&= ~MASK_3DNOW_A
;
1748 target_flags_explicit
|= MASK_3DNOW_A
;
1755 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1756 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1763 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1764 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1771 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1772 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1779 target_flags
&= ~MASK_SSE4A
;
1780 target_flags_explicit
|= MASK_SSE4A
;
1789 /* Sometimes certain combinations of command options do not make
1790 sense on a particular target machine. You can define a macro
1791 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1792 defined, is executed once just after all the command options have
1795 Don't use this macro to turn on various extra optimizations for
1796 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1799 override_options (void)
1802 int ix86_tune_defaulted
= 0;
1804 /* Comes from final.c -- no real reason to change it. */
1805 #define MAX_CODE_ALIGN 16
1809 const struct processor_costs
*cost
; /* Processor costs */
1810 const int target_enable
; /* Target flags to enable. */
1811 const int target_disable
; /* Target flags to disable. */
1812 const int align_loop
; /* Default alignments. */
1813 const int align_loop_max_skip
;
1814 const int align_jump
;
1815 const int align_jump_max_skip
;
1816 const int align_func
;
1818 const processor_target_table
[PROCESSOR_max
] =
1820 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1821 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1822 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1823 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1824 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1825 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1826 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1827 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1828 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1829 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1830 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1831 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1832 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1833 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1836 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1839 const char *const name
; /* processor name or nickname. */
1840 const enum processor_type processor
;
1841 const enum pta_flags
1847 PTA_PREFETCH_SSE
= 16,
1858 const processor_alias_table
[] =
1860 {"i386", PROCESSOR_I386
, 0},
1861 {"i486", PROCESSOR_I486
, 0},
1862 {"i586", PROCESSOR_PENTIUM
, 0},
1863 {"pentium", PROCESSOR_PENTIUM
, 0},
1864 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1865 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1866 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1867 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1868 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1869 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1870 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1871 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1872 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1873 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1874 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1875 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1876 | PTA_MMX
| PTA_PREFETCH_SSE
},
1877 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1878 | PTA_MMX
| PTA_PREFETCH_SSE
},
1879 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1880 | PTA_MMX
| PTA_PREFETCH_SSE
},
1881 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1882 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1883 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1884 | PTA_64BIT
| PTA_MMX
1885 | PTA_PREFETCH_SSE
| PTA_CX16
},
1886 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1888 {"k6", PROCESSOR_K6
, PTA_MMX
},
1889 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1890 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1891 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1893 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1894 | PTA_3DNOW
| PTA_3DNOW_A
},
1895 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1896 | PTA_3DNOW_A
| PTA_SSE
},
1897 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1898 | PTA_3DNOW_A
| PTA_SSE
},
1899 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1900 | PTA_3DNOW_A
| PTA_SSE
},
1901 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1902 | PTA_SSE
| PTA_SSE2
},
1903 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1904 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1905 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1906 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1907 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1908 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1909 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1910 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1911 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1912 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1913 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1914 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1915 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1916 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1919 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1921 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1922 SUBTARGET_OVERRIDE_OPTIONS
;
1925 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1926 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1929 /* -fPIC is the default for x86_64. */
1930 if (TARGET_MACHO
&& TARGET_64BIT
)
1933 /* Set the default values for switches whose default depends on TARGET_64BIT
1934 in case they weren't overwritten by command line options. */
1937 /* Mach-O doesn't support omitting the frame pointer for now. */
1938 if (flag_omit_frame_pointer
== 2)
1939 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1940 if (flag_asynchronous_unwind_tables
== 2)
1941 flag_asynchronous_unwind_tables
= 1;
1942 if (flag_pcc_struct_return
== 2)
1943 flag_pcc_struct_return
= 0;
1947 if (flag_omit_frame_pointer
== 2)
1948 flag_omit_frame_pointer
= 0;
1949 if (flag_asynchronous_unwind_tables
== 2)
1950 flag_asynchronous_unwind_tables
= 0;
1951 if (flag_pcc_struct_return
== 2)
1952 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1955 /* Need to check -mtune=generic first. */
1956 if (ix86_tune_string
)
1958 if (!strcmp (ix86_tune_string
, "generic")
1959 || !strcmp (ix86_tune_string
, "i686")
1960 /* As special support for cross compilers we read -mtune=native
1961 as -mtune=generic. With native compilers we won't see the
1962 -mtune=native, as it was changed by the driver. */
1963 || !strcmp (ix86_tune_string
, "native"))
1966 ix86_tune_string
= "generic64";
1968 ix86_tune_string
= "generic32";
1970 else if (!strncmp (ix86_tune_string
, "generic", 7))
1971 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1975 if (ix86_arch_string
)
1976 ix86_tune_string
= ix86_arch_string
;
1977 if (!ix86_tune_string
)
1979 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1980 ix86_tune_defaulted
= 1;
1983 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1984 need to use a sensible tune option. */
1985 if (!strcmp (ix86_tune_string
, "generic")
1986 || !strcmp (ix86_tune_string
, "x86-64")
1987 || !strcmp (ix86_tune_string
, "i686"))
1990 ix86_tune_string
= "generic64";
1992 ix86_tune_string
= "generic32";
1995 if (ix86_stringop_string
)
1997 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1998 stringop_alg
= rep_prefix_1_byte
;
1999 else if (!strcmp (ix86_stringop_string
, "libcall"))
2000 stringop_alg
= libcall
;
2001 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2002 stringop_alg
= rep_prefix_4_byte
;
2003 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2004 stringop_alg
= rep_prefix_8_byte
;
2005 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2006 stringop_alg
= loop_1_byte
;
2007 else if (!strcmp (ix86_stringop_string
, "loop"))
2008 stringop_alg
= loop
;
2009 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2010 stringop_alg
= unrolled_loop
;
2012 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2014 if (!strcmp (ix86_tune_string
, "x86-64"))
2015 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2016 "-mtune=generic instead as appropriate.");
2018 if (!ix86_arch_string
)
2019 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2020 if (!strcmp (ix86_arch_string
, "generic"))
2021 error ("generic CPU can be used only for -mtune= switch");
2022 if (!strncmp (ix86_arch_string
, "generic", 7))
2023 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2025 if (ix86_cmodel_string
!= 0)
2027 if (!strcmp (ix86_cmodel_string
, "small"))
2028 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2029 else if (!strcmp (ix86_cmodel_string
, "medium"))
2030 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2032 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
2033 else if (!strcmp (ix86_cmodel_string
, "32"))
2034 ix86_cmodel
= CM_32
;
2035 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2036 ix86_cmodel
= CM_KERNEL
;
2037 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
2038 ix86_cmodel
= CM_LARGE
;
2040 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2044 ix86_cmodel
= CM_32
;
2046 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2048 if (ix86_asm_string
!= 0)
2051 && !strcmp (ix86_asm_string
, "intel"))
2052 ix86_asm_dialect
= ASM_INTEL
;
2053 else if (!strcmp (ix86_asm_string
, "att"))
2054 ix86_asm_dialect
= ASM_ATT
;
2056 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2058 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2059 error ("code model %qs not supported in the %s bit mode",
2060 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2061 if (ix86_cmodel
== CM_LARGE
)
2062 sorry ("code model %<large%> not supported yet");
2063 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2064 sorry ("%i-bit mode not compiled in",
2065 (target_flags
& MASK_64BIT
) ? 64 : 32);
2067 for (i
= 0; i
< pta_size
; i
++)
2068 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2070 ix86_arch
= processor_alias_table
[i
].processor
;
2071 /* Default cpu tuning to the architecture. */
2072 ix86_tune
= ix86_arch
;
2073 if (processor_alias_table
[i
].flags
& PTA_MMX
2074 && !(target_flags_explicit
& MASK_MMX
))
2075 target_flags
|= MASK_MMX
;
2076 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2077 && !(target_flags_explicit
& MASK_3DNOW
))
2078 target_flags
|= MASK_3DNOW
;
2079 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2080 && !(target_flags_explicit
& MASK_3DNOW_A
))
2081 target_flags
|= MASK_3DNOW_A
;
2082 if (processor_alias_table
[i
].flags
& PTA_SSE
2083 && !(target_flags_explicit
& MASK_SSE
))
2084 target_flags
|= MASK_SSE
;
2085 if (processor_alias_table
[i
].flags
& PTA_SSE2
2086 && !(target_flags_explicit
& MASK_SSE2
))
2087 target_flags
|= MASK_SSE2
;
2088 if (processor_alias_table
[i
].flags
& PTA_SSE3
2089 && !(target_flags_explicit
& MASK_SSE3
))
2090 target_flags
|= MASK_SSE3
;
2091 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2092 && !(target_flags_explicit
& MASK_SSSE3
))
2093 target_flags
|= MASK_SSSE3
;
2094 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2095 x86_prefetch_sse
= true;
2096 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2097 x86_cmpxchg16b
= true;
2098 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2099 && !(target_flags_explicit
& MASK_POPCNT
))
2100 target_flags
|= MASK_POPCNT
;
2101 if (processor_alias_table
[i
].flags
& PTA_ABM
2102 && !(target_flags_explicit
& MASK_ABM
))
2103 target_flags
|= MASK_ABM
;
2104 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2105 && !(target_flags_explicit
& MASK_SSE4A
))
2106 target_flags
|= MASK_SSE4A
;
2107 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2108 error ("CPU you selected does not support x86-64 "
2114 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2116 for (i
= 0; i
< pta_size
; i
++)
2117 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2119 ix86_tune
= processor_alias_table
[i
].processor
;
2120 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2122 if (ix86_tune_defaulted
)
2124 ix86_tune_string
= "x86-64";
2125 for (i
= 0; i
< pta_size
; i
++)
2126 if (! strcmp (ix86_tune_string
,
2127 processor_alias_table
[i
].name
))
2129 ix86_tune
= processor_alias_table
[i
].processor
;
2132 error ("CPU you selected does not support x86-64 "
2135 /* Intel CPUs have always interpreted SSE prefetch instructions as
2136 NOPs; so, we can enable SSE prefetch instructions even when
2137 -mtune (rather than -march) points us to a processor that has them.
2138 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2139 higher processors. */
2140 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2141 x86_prefetch_sse
= true;
2145 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2148 ix86_cost
= &size_cost
;
2150 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2151 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2152 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2154 /* Arrange to set up i386_stack_locals for all functions. */
2155 init_machine_status
= ix86_init_machine_status
;
2157 /* Validate -mregparm= value. */
2158 if (ix86_regparm_string
)
2160 i
= atoi (ix86_regparm_string
);
2161 if (i
< 0 || i
> REGPARM_MAX
)
2162 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2168 ix86_regparm
= REGPARM_MAX
;
2170 /* If the user has provided any of the -malign-* options,
2171 warn and use that value only if -falign-* is not set.
2172 Remove this code in GCC 3.2 or later. */
2173 if (ix86_align_loops_string
)
2175 warning (0, "-malign-loops is obsolete, use -falign-loops");
2176 if (align_loops
== 0)
2178 i
= atoi (ix86_align_loops_string
);
2179 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2180 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2182 align_loops
= 1 << i
;
2186 if (ix86_align_jumps_string
)
2188 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2189 if (align_jumps
== 0)
2191 i
= atoi (ix86_align_jumps_string
);
2192 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2193 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2195 align_jumps
= 1 << i
;
2199 if (ix86_align_funcs_string
)
2201 warning (0, "-malign-functions is obsolete, use -falign-functions");
2202 if (align_functions
== 0)
2204 i
= atoi (ix86_align_funcs_string
);
2205 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2206 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2208 align_functions
= 1 << i
;
2212 /* Default align_* from the processor table. */
2213 if (align_loops
== 0)
2215 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2216 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2218 if (align_jumps
== 0)
2220 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2221 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2223 if (align_functions
== 0)
2225 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2228 /* Validate -mbranch-cost= value, or provide default. */
2229 ix86_branch_cost
= ix86_cost
->branch_cost
;
2230 if (ix86_branch_cost_string
)
2232 i
= atoi (ix86_branch_cost_string
);
2234 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2236 ix86_branch_cost
= i
;
2238 if (ix86_section_threshold_string
)
2240 i
= atoi (ix86_section_threshold_string
);
2242 error ("-mlarge-data-threshold=%d is negative", i
);
2244 ix86_section_threshold
= i
;
2247 if (ix86_tls_dialect_string
)
2249 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2250 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2251 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2252 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2253 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2254 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2256 error ("bad value (%s) for -mtls-dialect= switch",
2257 ix86_tls_dialect_string
);
2260 /* Keep nonleaf frame pointers. */
2261 if (flag_omit_frame_pointer
)
2262 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2263 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2264 flag_omit_frame_pointer
= 1;
2266 /* If we're doing fast math, we don't care about comparison order
2267 wrt NaNs. This lets us use a shorter comparison sequence. */
2268 if (flag_finite_math_only
)
2269 target_flags
&= ~MASK_IEEE_FP
;
2271 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2272 since the insns won't need emulation. */
2273 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
2274 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2276 /* Likewise, if the target doesn't have a 387, or we've specified
2277 software floating point, don't use 387 inline intrinsics. */
2279 target_flags
|= MASK_NO_FANCY_MATH_387
;
2281 /* Turn on SSE3 builtins for -mssse3. */
2283 target_flags
|= MASK_SSE3
;
2285 /* Turn on SSE3 builtins for -msse4a. */
2287 target_flags
|= MASK_SSE3
;
2289 /* Turn on SSE2 builtins for -msse3. */
2291 target_flags
|= MASK_SSE2
;
2293 /* Turn on SSE builtins for -msse2. */
2295 target_flags
|= MASK_SSE
;
2297 /* Turn on MMX builtins for -msse. */
2300 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2301 x86_prefetch_sse
= true;
2304 /* Turn on MMX builtins for 3Dnow. */
2306 target_flags
|= MASK_MMX
;
2308 /* Turn on POPCNT builtins for -mabm. */
2310 target_flags
|= MASK_POPCNT
;
2314 if (TARGET_ALIGN_DOUBLE
)
2315 error ("-malign-double makes no sense in the 64bit mode");
2317 error ("-mrtd calling convention not supported in the 64bit mode");
2319 /* Enable by default the SSE and MMX builtins. Do allow the user to
2320 explicitly disable any of these. In particular, disabling SSE and
2321 MMX for kernel code is extremely useful. */
2323 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2324 & ~target_flags_explicit
);
2328 /* i386 ABI does not specify red zone. It still makes sense to use it
2329 when programmer takes care to stack from being destroyed. */
2330 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2331 target_flags
|= MASK_NO_RED_ZONE
;
2334 /* Validate -mpreferred-stack-boundary= value, or provide default.
2335 The default of 128 bits is for Pentium III's SSE __m128. We can't
2336 change it because of optimize_size. Otherwise, we can't mix object
2337 files compiled with -Os and -On. */
2338 ix86_preferred_stack_boundary
= 128;
2339 if (ix86_preferred_stack_boundary_string
)
2341 i
= atoi (ix86_preferred_stack_boundary_string
);
2342 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2343 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2344 TARGET_64BIT
? 4 : 2);
2346 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2349 /* Accept -msseregparm only if at least SSE support is enabled. */
2350 if (TARGET_SSEREGPARM
2352 error ("-msseregparm used without SSE enabled");
2354 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2356 if (ix86_fpmath_string
!= 0)
2358 if (! strcmp (ix86_fpmath_string
, "387"))
2359 ix86_fpmath
= FPMATH_387
;
2360 else if (! strcmp (ix86_fpmath_string
, "sse"))
2364 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2365 ix86_fpmath
= FPMATH_387
;
2368 ix86_fpmath
= FPMATH_SSE
;
2370 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2371 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2375 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2376 ix86_fpmath
= FPMATH_387
;
2378 else if (!TARGET_80387
)
2380 warning (0, "387 instruction set disabled, using SSE arithmetics");
2381 ix86_fpmath
= FPMATH_SSE
;
2384 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2387 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2390 /* If the i387 is disabled, then do not return values in it. */
2392 target_flags
&= ~MASK_FLOAT_RETURNS
;
2394 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2395 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2397 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2399 /* ??? Unwind info is not correct around the CFG unless either a frame
2400 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2401 unwind info generation to be aware of the CFG and propagating states
2403 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2404 || flag_exceptions
|| flag_non_call_exceptions
)
2405 && flag_omit_frame_pointer
2406 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2408 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2409 warning (0, "unwind tables currently require either a frame pointer "
2410 "or -maccumulate-outgoing-args for correctness");
2411 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2414 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2417 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2418 p
= strchr (internal_label_prefix
, 'X');
2419 internal_label_prefix_len
= p
- internal_label_prefix
;
2423 /* When scheduling description is not available, disable scheduler pass
2424 so it won't slow down the compilation and make x87 code slower. */
2425 if (!TARGET_SCHEDULE
)
2426 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2428 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2429 set_param_value ("simultaneous-prefetches",
2430 ix86_cost
->simultaneous_prefetches
);
2431 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2432 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2435 /* switch to the appropriate section for output of DECL.
2436 DECL is either a `VAR_DECL' node or a constant of some sort.
2437 RELOC indicates whether forming the initial value of DECL requires
2438 link-time relocations. */
2441 x86_64_elf_select_section (tree decl
, int reloc
,
2442 unsigned HOST_WIDE_INT align
)
2444 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2445 && ix86_in_large_data_p (decl
))
2447 const char *sname
= NULL
;
2448 unsigned int flags
= SECTION_WRITE
;
2449 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2454 case SECCAT_DATA_REL
:
2455 sname
= ".ldata.rel";
2457 case SECCAT_DATA_REL_LOCAL
:
2458 sname
= ".ldata.rel.local";
2460 case SECCAT_DATA_REL_RO
:
2461 sname
= ".ldata.rel.ro";
2463 case SECCAT_DATA_REL_RO_LOCAL
:
2464 sname
= ".ldata.rel.ro.local";
2468 flags
|= SECTION_BSS
;
2471 case SECCAT_RODATA_MERGE_STR
:
2472 case SECCAT_RODATA_MERGE_STR_INIT
:
2473 case SECCAT_RODATA_MERGE_CONST
:
2477 case SECCAT_SRODATA
:
2484 /* We don't split these for medium model. Place them into
2485 default sections and hope for best. */
2490 /* We might get called with string constants, but get_named_section
2491 doesn't like them as they are not DECLs. Also, we need to set
2492 flags in that case. */
2494 return get_section (sname
, flags
, NULL
);
2495 return get_named_section (decl
, sname
, reloc
);
2498 return default_elf_select_section (decl
, reloc
, align
);
2501 /* Build up a unique section name, expressed as a
2502 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2503 RELOC indicates whether the initial value of EXP requires
2504 link-time relocations. */
2507 x86_64_elf_unique_section (tree decl
, int reloc
)
2509 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2510 && ix86_in_large_data_p (decl
))
2512 const char *prefix
= NULL
;
2513 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2514 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2516 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2519 case SECCAT_DATA_REL
:
2520 case SECCAT_DATA_REL_LOCAL
:
2521 case SECCAT_DATA_REL_RO
:
2522 case SECCAT_DATA_REL_RO_LOCAL
:
2523 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2526 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2529 case SECCAT_RODATA_MERGE_STR
:
2530 case SECCAT_RODATA_MERGE_STR_INIT
:
2531 case SECCAT_RODATA_MERGE_CONST
:
2532 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2534 case SECCAT_SRODATA
:
2541 /* We don't split these for medium model. Place them into
2542 default sections and hope for best. */
2550 plen
= strlen (prefix
);
2552 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2553 name
= targetm
.strip_name_encoding (name
);
2554 nlen
= strlen (name
);
2556 string
= alloca (nlen
+ plen
+ 1);
2557 memcpy (string
, prefix
, plen
);
2558 memcpy (string
+ plen
, name
, nlen
+ 1);
2560 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2564 default_unique_section (decl
, reloc
);
2567 #ifdef COMMON_ASM_OP
2568 /* This says how to output assembler code to declare an
2569 uninitialized external linkage data object.
2571 For medium model x86-64 we need to use .largecomm opcode for
2574 x86_elf_aligned_common (FILE *file
,
2575 const char *name
, unsigned HOST_WIDE_INT size
,
2578 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2579 && size
> (unsigned int)ix86_section_threshold
)
2580 fprintf (file
, ".largecomm\t");
2582 fprintf (file
, "%s", COMMON_ASM_OP
);
2583 assemble_name (file
, name
);
2584 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2585 size
, align
/ BITS_PER_UNIT
);
2588 /* Utility function for targets to use in implementing
2589 ASM_OUTPUT_ALIGNED_BSS. */
2592 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2593 const char *name
, unsigned HOST_WIDE_INT size
,
2596 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2597 && size
> (unsigned int)ix86_section_threshold
)
2598 switch_to_section (get_named_section (decl
, ".lbss", 0));
2600 switch_to_section (bss_section
);
2601 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2602 #ifdef ASM_DECLARE_OBJECT_NAME
2603 last_assemble_variable_decl
= decl
;
2604 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2606 /* Standard thing is just output label for the object. */
2607 ASM_OUTPUT_LABEL (file
, name
);
2608 #endif /* ASM_DECLARE_OBJECT_NAME */
2609 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2613 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2615 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2616 make the problem with not enough registers even worse. */
2617 #ifdef INSN_SCHEDULING
2619 flag_schedule_insns
= 0;
2623 /* The Darwin libraries never set errno, so we might as well
2624 avoid calling them when that's the only reason we would. */
2625 flag_errno_math
= 0;
2627 /* The default values of these switches depend on the TARGET_64BIT
2628 that is not known at this moment. Mark these values with 2 and
2629 let user the to override these. In case there is no command line option
2630 specifying them, we will set the defaults in override_options. */
2632 flag_omit_frame_pointer
= 2;
2633 flag_pcc_struct_return
= 2;
2634 flag_asynchronous_unwind_tables
= 2;
2635 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2636 SUBTARGET_OPTIMIZATION_OPTIONS
;
2640 /* Table of valid machine attributes. */
2641 const struct attribute_spec ix86_attribute_table
[] =
2643 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2644 /* Stdcall attribute says callee is responsible for popping arguments
2645 if they are not variable. */
2646 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2647 /* Fastcall attribute says callee is responsible for popping arguments
2648 if they are not variable. */
2649 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2650 /* Cdecl attribute says the callee is a normal C declaration */
2651 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2652 /* Regparm attribute specifies how many integer arguments are to be
2653 passed in registers. */
2654 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2655 /* Sseregparm attribute says we are using x86_64 calling conventions
2656 for FP arguments. */
2657 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2658 /* force_align_arg_pointer says this function realigns the stack at entry. */
2659 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2660 false, true, true, ix86_handle_cconv_attribute
},
2661 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2662 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2663 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2664 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2666 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2667 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2668 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2669 SUBTARGET_ATTRIBUTE_TABLE
,
2671 { NULL
, 0, 0, false, false, false, NULL
}
2674 /* Decide whether we can make a sibling call to a function. DECL is the
2675 declaration of the function being targeted by the call and EXP is the
2676 CALL_EXPR representing the call. */
2679 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2684 /* If we are generating position-independent code, we cannot sibcall
2685 optimize any indirect call, or a direct call to a global function,
2686 as the PLT requires %ebx be live. */
2687 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2694 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2695 if (POINTER_TYPE_P (func
))
2696 func
= TREE_TYPE (func
);
2699 /* Check that the return value locations are the same. Like
2700 if we are returning floats on the 80387 register stack, we cannot
2701 make a sibcall from a function that doesn't return a float to a
2702 function that does or, conversely, from a function that does return
2703 a float to a function that doesn't; the necessary stack adjustment
2704 would not be executed. This is also the place we notice
2705 differences in the return value ABI. Note that it is ok for one
2706 of the functions to have void return type as long as the return
2707 value of the other is passed in a register. */
2708 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2709 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2711 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2713 if (!rtx_equal_p (a
, b
))
2716 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2718 else if (!rtx_equal_p (a
, b
))
2721 /* If this call is indirect, we'll need to be able to use a call-clobbered
2722 register for the address of the target function. Make sure that all
2723 such registers are not used for passing parameters. */
2724 if (!decl
&& !TARGET_64BIT
)
2728 /* We're looking at the CALL_EXPR, we need the type of the function. */
2729 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2730 type
= TREE_TYPE (type
); /* pointer type */
2731 type
= TREE_TYPE (type
); /* function type */
2733 if (ix86_function_regparm (type
, NULL
) >= 3)
2735 /* ??? Need to count the actual number of registers to be used,
2736 not the possible number of registers. Fix later. */
2741 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2742 /* Dllimport'd functions are also called indirectly. */
2743 if (decl
&& DECL_DLLIMPORT_P (decl
)
2744 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2748 /* If we forced aligned the stack, then sibcalling would unalign the
2749 stack, which may break the called function. */
2750 if (cfun
->machine
->force_align_arg_pointer
)
2753 /* Otherwise okay. That also includes certain types of indirect calls. */
2757 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2758 calling convention attributes;
2759 arguments as in struct attribute_spec.handler. */
2762 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2764 int flags ATTRIBUTE_UNUSED
,
2767 if (TREE_CODE (*node
) != FUNCTION_TYPE
2768 && TREE_CODE (*node
) != METHOD_TYPE
2769 && TREE_CODE (*node
) != FIELD_DECL
2770 && TREE_CODE (*node
) != TYPE_DECL
)
2772 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2773 IDENTIFIER_POINTER (name
));
2774 *no_add_attrs
= true;
2778 /* Can combine regparm with all attributes but fastcall. */
2779 if (is_attribute_p ("regparm", name
))
2783 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2785 error ("fastcall and regparm attributes are not compatible");
2788 cst
= TREE_VALUE (args
);
2789 if (TREE_CODE (cst
) != INTEGER_CST
)
2791 warning (OPT_Wattributes
,
2792 "%qs attribute requires an integer constant argument",
2793 IDENTIFIER_POINTER (name
));
2794 *no_add_attrs
= true;
2796 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2798 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2799 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2800 *no_add_attrs
= true;
2804 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2805 TYPE_ATTRIBUTES (*node
))
2806 && compare_tree_int (cst
, REGPARM_MAX
-1))
2808 error ("%s functions limited to %d register parameters",
2809 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2817 warning (OPT_Wattributes
, "%qs attribute ignored",
2818 IDENTIFIER_POINTER (name
));
2819 *no_add_attrs
= true;
2823 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2824 if (is_attribute_p ("fastcall", name
))
2826 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2828 error ("fastcall and cdecl attributes are not compatible");
2830 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2832 error ("fastcall and stdcall attributes are not compatible");
2834 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2836 error ("fastcall and regparm attributes are not compatible");
2840 /* Can combine stdcall with fastcall (redundant), regparm and
2842 else if (is_attribute_p ("stdcall", name
))
2844 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2846 error ("stdcall and cdecl attributes are not compatible");
2848 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2850 error ("stdcall and fastcall attributes are not compatible");
2854 /* Can combine cdecl with regparm and sseregparm. */
2855 else if (is_attribute_p ("cdecl", name
))
2857 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2859 error ("stdcall and cdecl attributes are not compatible");
2861 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2863 error ("fastcall and cdecl attributes are not compatible");
2867 /* Can combine sseregparm with all attributes. */
2872 /* Return 0 if the attributes for two types are incompatible, 1 if they
2873 are compatible, and 2 if they are nearly compatible (which causes a
2874 warning to be generated). */
2877 ix86_comp_type_attributes (tree type1
, tree type2
)
2879 /* Check for mismatch of non-default calling convention. */
2880 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2882 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2885 /* Check for mismatched fastcall/regparm types. */
2886 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2887 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2888 || (ix86_function_regparm (type1
, NULL
)
2889 != ix86_function_regparm (type2
, NULL
)))
2892 /* Check for mismatched sseregparm types. */
2893 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2894 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2897 /* Check for mismatched return types (cdecl vs stdcall). */
2898 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2899 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2905 /* Return the regparm value for a function with the indicated TYPE and DECL.
2906 DECL may be NULL when calling function indirectly
2907 or considering a libcall. */
2910 ix86_function_regparm (tree type
, tree decl
)
2913 int regparm
= ix86_regparm
;
2914 bool user_convention
= false;
2918 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2921 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2922 user_convention
= true;
2925 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2928 user_convention
= true;
2931 /* Use register calling convention for local functions when possible. */
2932 if (!TARGET_64BIT
&& !user_convention
&& decl
2933 && flag_unit_at_a_time
&& !profile_flag
)
2935 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2938 int local_regparm
, globals
= 0, regno
;
2940 /* Make sure no regparm register is taken by a global register
2942 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2943 if (global_regs
[local_regparm
])
2945 /* We can't use regparm(3) for nested functions as these use
2946 static chain pointer in third argument. */
2947 if (local_regparm
== 3
2948 && decl_function_context (decl
)
2949 && !DECL_NO_STATIC_CHAIN (decl
))
2951 /* If the function realigns its stackpointer, the
2952 prologue will clobber %ecx. If we've already
2953 generated code for the callee, the callee
2954 DECL_STRUCT_FUNCTION is gone, so we fall back to
2955 scanning the attributes for the self-realigning
2957 if ((DECL_STRUCT_FUNCTION (decl
)
2958 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2959 || (!DECL_STRUCT_FUNCTION (decl
)
2960 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2961 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2963 /* Each global register variable increases register preassure,
2964 so the more global reg vars there are, the smaller regparm
2965 optimization use, unless requested by the user explicitly. */
2966 for (regno
= 0; regno
< 6; regno
++)
2967 if (global_regs
[regno
])
2970 = globals
< local_regparm
? local_regparm
- globals
: 0;
2972 if (local_regparm
> regparm
)
2973 regparm
= local_regparm
;
2980 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2981 DFmode (2) arguments in SSE registers for a function with the
2982 indicated TYPE and DECL. DECL may be NULL when calling function
2983 indirectly or considering a libcall. Otherwise return 0. */
2986 ix86_function_sseregparm (tree type
, tree decl
)
2988 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2989 by the sseregparm attribute. */
2990 if (TARGET_SSEREGPARM
2992 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2997 error ("Calling %qD with attribute sseregparm without "
2998 "SSE/SSE2 enabled", decl
);
3000 error ("Calling %qT with attribute sseregparm without "
3001 "SSE/SSE2 enabled", type
);
3008 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3009 (and DFmode for SSE2) arguments in SSE registers,
3010 even for 32-bit targets. */
3011 if (!TARGET_64BIT
&& decl
3012 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3014 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3016 return TARGET_SSE2
? 2 : 1;
3022 /* Return true if EAX is live at the start of the function. Used by
3023 ix86_expand_prologue to determine if we need special help before
3024 calling allocate_stack_worker. */
3027 ix86_eax_live_at_start_p (void)
3029 /* Cheat. Don't bother working forward from ix86_function_regparm
3030 to the function type to whether an actual argument is located in
3031 eax. Instead just look at cfg info, which is still close enough
3032 to correct at this point. This gives false positives for broken
3033 functions that might use uninitialized data that happens to be
3034 allocated in eax, but who cares? */
3035 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3038 /* Value is the number of bytes of arguments automatically
3039 popped when returning from a subroutine call.
3040 FUNDECL is the declaration node of the function (as a tree),
3041 FUNTYPE is the data type of the function (as a tree),
3042 or for a library call it is an identifier node for the subroutine name.
3043 SIZE is the number of bytes of arguments passed on the stack.
3045 On the 80386, the RTD insn may be used to pop them if the number
3046 of args is fixed, but if the number is variable then the caller
3047 must pop them all. RTD can't be used for library calls now
3048 because the library is compiled with the Unix compiler.
3049 Use of RTD is a selectable option, since it is incompatible with
3050 standard Unix calling sequences. If the option is not selected,
3051 the caller must always pop the args.
3053 The attribute stdcall is equivalent to RTD on a per module basis. */
3056 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3058 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3060 /* Cdecl functions override -mrtd, and never pop the stack. */
3061 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3063 /* Stdcall and fastcall functions will pop the stack if not
3065 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3066 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3070 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3071 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3072 == void_type_node
)))
3076 /* Lose any fake structure return argument if it is passed on the stack. */
3077 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3079 && !KEEP_AGGREGATE_RETURN_POINTER
)
3081 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3084 return GET_MODE_SIZE (Pmode
);
3090 /* Argument support functions. */
3092 /* Return true when register may be used to pass function parameters. */
3094 ix86_function_arg_regno_p (int regno
)
3100 return (regno
< REGPARM_MAX
3101 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3103 return (regno
< REGPARM_MAX
3104 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3105 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3106 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3107 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3112 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3117 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3118 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3121 /* RAX is used as hidden argument to va_arg functions. */
3124 for (i
= 0; i
< REGPARM_MAX
; i
++)
3125 if (regno
== x86_64_int_parameter_registers
[i
])
3130 /* Return if we do not know how to pass TYPE solely in registers. */
3133 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3135 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3138 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3139 The layout_type routine is crafty and tries to trick us into passing
3140 currently unsupported vector types on the stack by using TImode. */
3141 return (!TARGET_64BIT
&& mode
== TImode
3142 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3145 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3146 for a call to a function whose data type is FNTYPE.
3147 For a library call, FNTYPE is 0. */
3150 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3151 tree fntype
, /* tree ptr for function decl */
3152 rtx libname
, /* SYMBOL_REF of library name or 0 */
3155 static CUMULATIVE_ARGS zero_cum
;
3156 tree param
, next_param
;
3158 if (TARGET_DEBUG_ARG
)
3160 fprintf (stderr
, "\ninit_cumulative_args (");
3162 fprintf (stderr
, "fntype code = %s, ret code = %s",
3163 tree_code_name
[(int) TREE_CODE (fntype
)],
3164 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3166 fprintf (stderr
, "no fntype");
3169 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3174 /* Set up the number of registers to use for passing arguments. */
3175 cum
->nregs
= ix86_regparm
;
3177 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3179 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3180 cum
->warn_sse
= true;
3181 cum
->warn_mmx
= true;
3182 cum
->maybe_vaarg
= false;
3184 /* Use ecx and edx registers if function has fastcall attribute,
3185 else look for regparm information. */
3186 if (fntype
&& !TARGET_64BIT
)
3188 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3194 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3197 /* Set up the number of SSE registers used for passing SFmode
3198 and DFmode arguments. Warn for mismatching ABI. */
3199 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3201 /* Determine if this function has variable arguments. This is
3202 indicated by the last argument being 'void_type_mode' if there
3203 are no variable arguments. If there are variable arguments, then
3204 we won't pass anything in registers in 32-bit mode. */
3206 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3208 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3209 param
!= 0; param
= next_param
)
3211 next_param
= TREE_CHAIN (param
);
3212 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3222 cum
->float_in_sse
= 0;
3224 cum
->maybe_vaarg
= true;
3228 if ((!fntype
&& !libname
)
3229 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3230 cum
->maybe_vaarg
= true;
3232 if (TARGET_DEBUG_ARG
)
3233 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3238 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3239 But in the case of vector types, it is some vector mode.
3241 When we have only some of our vector isa extensions enabled, then there
3242 are some modes for which vector_mode_supported_p is false. For these
3243 modes, the generic vector support in gcc will choose some non-vector mode
3244 in order to implement the type. By computing the natural mode, we'll
3245 select the proper ABI location for the operand and not depend on whatever
3246 the middle-end decides to do with these vector types. */
3248 static enum machine_mode
3249 type_natural_mode (tree type
)
3251 enum machine_mode mode
= TYPE_MODE (type
);
3253 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3255 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3256 if ((size
== 8 || size
== 16)
3257 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3258 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3260 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3262 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3263 mode
= MIN_MODE_VECTOR_FLOAT
;
3265 mode
= MIN_MODE_VECTOR_INT
;
3267 /* Get the mode which has this inner mode and number of units. */
3268 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3269 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3270 && GET_MODE_INNER (mode
) == innermode
)
3280 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3281 this may not agree with the mode that the type system has chosen for the
3282 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3283 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3286 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3291 if (orig_mode
!= BLKmode
)
3292 tmp
= gen_rtx_REG (orig_mode
, regno
);
3295 tmp
= gen_rtx_REG (mode
, regno
);
3296 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3297 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3303 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3304 of this code is to classify each 8bytes of incoming argument by the register
3305 class and assign registers accordingly. */
3307 /* Return the union class of CLASS1 and CLASS2.
3308 See the x86-64 PS ABI for details. */
3310 static enum x86_64_reg_class
3311 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3313 /* Rule #1: If both classes are equal, this is the resulting class. */
3314 if (class1
== class2
)
3317 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3319 if (class1
== X86_64_NO_CLASS
)
3321 if (class2
== X86_64_NO_CLASS
)
3324 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3325 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3326 return X86_64_MEMORY_CLASS
;
3328 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3329 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3330 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3331 return X86_64_INTEGERSI_CLASS
;
3332 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3333 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3334 return X86_64_INTEGER_CLASS
;
3336 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3338 if (class1
== X86_64_X87_CLASS
3339 || class1
== X86_64_X87UP_CLASS
3340 || class1
== X86_64_COMPLEX_X87_CLASS
3341 || class2
== X86_64_X87_CLASS
3342 || class2
== X86_64_X87UP_CLASS
3343 || class2
== X86_64_COMPLEX_X87_CLASS
)
3344 return X86_64_MEMORY_CLASS
;
3346 /* Rule #6: Otherwise class SSE is used. */
3347 return X86_64_SSE_CLASS
;
3350 /* Classify the argument of type TYPE and mode MODE.
3351 CLASSES will be filled by the register class used to pass each word
3352 of the operand. The number of words is returned. In case the parameter
3353 should be passed in memory, 0 is returned. As a special case for zero
3354 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3356 BIT_OFFSET is used internally for handling records and specifies offset
3357 of the offset in bits modulo 256 to avoid overflow cases.
3359 See the x86-64 PS ABI for details.
3363 classify_argument (enum machine_mode mode
, tree type
,
3364 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3366 HOST_WIDE_INT bytes
=
3367 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3368 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3370 /* Variable sized entities are always passed/returned in memory. */
3374 if (mode
!= VOIDmode
3375 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3378 if (type
&& AGGREGATE_TYPE_P (type
))
3382 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3384 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3388 for (i
= 0; i
< words
; i
++)
3389 classes
[i
] = X86_64_NO_CLASS
;
3391 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3392 signalize memory class, so handle it as special case. */
3395 classes
[0] = X86_64_NO_CLASS
;
3399 /* Classify each field of record and merge classes. */
3400 switch (TREE_CODE (type
))
3403 /* And now merge the fields of structure. */
3404 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3406 if (TREE_CODE (field
) == FIELD_DECL
)
3410 if (TREE_TYPE (field
) == error_mark_node
)
3413 /* Bitfields are always classified as integer. Handle them
3414 early, since later code would consider them to be
3415 misaligned integers. */
3416 if (DECL_BIT_FIELD (field
))
3418 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3419 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3420 + tree_low_cst (DECL_SIZE (field
), 0)
3423 merge_classes (X86_64_INTEGER_CLASS
,
3428 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3429 TREE_TYPE (field
), subclasses
,
3430 (int_bit_position (field
)
3431 + bit_offset
) % 256);
3434 for (i
= 0; i
< num
; i
++)
3437 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3439 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3447 /* Arrays are handled as small records. */
3450 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3451 TREE_TYPE (type
), subclasses
, bit_offset
);
3455 /* The partial classes are now full classes. */
3456 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3457 subclasses
[0] = X86_64_SSE_CLASS
;
3458 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3459 subclasses
[0] = X86_64_INTEGER_CLASS
;
3461 for (i
= 0; i
< words
; i
++)
3462 classes
[i
] = subclasses
[i
% num
];
3467 case QUAL_UNION_TYPE
:
3468 /* Unions are similar to RECORD_TYPE but offset is always 0.
3470 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3472 if (TREE_CODE (field
) == FIELD_DECL
)
3476 if (TREE_TYPE (field
) == error_mark_node
)
3479 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3480 TREE_TYPE (field
), subclasses
,
3484 for (i
= 0; i
< num
; i
++)
3485 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3494 /* Final merger cleanup. */
3495 for (i
= 0; i
< words
; i
++)
3497 /* If one class is MEMORY, everything should be passed in
3499 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3502 /* The X86_64_SSEUP_CLASS should be always preceded by
3503 X86_64_SSE_CLASS. */
3504 if (classes
[i
] == X86_64_SSEUP_CLASS
3505 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3506 classes
[i
] = X86_64_SSE_CLASS
;
3508 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3509 if (classes
[i
] == X86_64_X87UP_CLASS
3510 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3511 classes
[i
] = X86_64_SSE_CLASS
;
3516 /* Compute alignment needed. We align all types to natural boundaries with
3517 exception of XFmode that is aligned to 64bits. */
3518 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3520 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3523 mode_alignment
= 128;
3524 else if (mode
== XCmode
)
3525 mode_alignment
= 256;
3526 if (COMPLEX_MODE_P (mode
))
3527 mode_alignment
/= 2;
3528 /* Misaligned fields are always returned in memory. */
3529 if (bit_offset
% mode_alignment
)
3533 /* for V1xx modes, just use the base mode */
3534 if (VECTOR_MODE_P (mode
)
3535 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3536 mode
= GET_MODE_INNER (mode
);
3538 /* Classification of atomic types. */
3543 classes
[0] = X86_64_SSE_CLASS
;
3546 classes
[0] = X86_64_SSE_CLASS
;
3547 classes
[1] = X86_64_SSEUP_CLASS
;
3556 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3557 classes
[0] = X86_64_INTEGERSI_CLASS
;
3559 classes
[0] = X86_64_INTEGER_CLASS
;
3563 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3568 if (!(bit_offset
% 64))
3569 classes
[0] = X86_64_SSESF_CLASS
;
3571 classes
[0] = X86_64_SSE_CLASS
;
3574 classes
[0] = X86_64_SSEDF_CLASS
;
3577 classes
[0] = X86_64_X87_CLASS
;
3578 classes
[1] = X86_64_X87UP_CLASS
;
3581 classes
[0] = X86_64_SSE_CLASS
;
3582 classes
[1] = X86_64_SSEUP_CLASS
;
3585 classes
[0] = X86_64_SSE_CLASS
;
3588 classes
[0] = X86_64_SSEDF_CLASS
;
3589 classes
[1] = X86_64_SSEDF_CLASS
;
3592 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3595 /* This modes is larger than 16 bytes. */
3603 classes
[0] = X86_64_SSE_CLASS
;
3604 classes
[1] = X86_64_SSEUP_CLASS
;
3610 classes
[0] = X86_64_SSE_CLASS
;
3616 gcc_assert (VECTOR_MODE_P (mode
));
3621 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3623 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3624 classes
[0] = X86_64_INTEGERSI_CLASS
;
3626 classes
[0] = X86_64_INTEGER_CLASS
;
3627 classes
[1] = X86_64_INTEGER_CLASS
;
3628 return 1 + (bytes
> 8);
3632 /* Examine the argument and return set number of register required in each
3633 class. Return 0 iff parameter should be passed in memory. */
3635 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3636 int *int_nregs
, int *sse_nregs
)
3638 enum x86_64_reg_class
class[MAX_CLASSES
];
3639 int n
= classify_argument (mode
, type
, class, 0);
3645 for (n
--; n
>= 0; n
--)
3648 case X86_64_INTEGER_CLASS
:
3649 case X86_64_INTEGERSI_CLASS
:
3652 case X86_64_SSE_CLASS
:
3653 case X86_64_SSESF_CLASS
:
3654 case X86_64_SSEDF_CLASS
:
3657 case X86_64_NO_CLASS
:
3658 case X86_64_SSEUP_CLASS
:
3660 case X86_64_X87_CLASS
:
3661 case X86_64_X87UP_CLASS
:
3665 case X86_64_COMPLEX_X87_CLASS
:
3666 return in_return
? 2 : 0;
3667 case X86_64_MEMORY_CLASS
:
3673 /* Construct container for the argument used by GCC interface. See
3674 FUNCTION_ARG for the detailed description. */
3677 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3678 tree type
, int in_return
, int nintregs
, int nsseregs
,
3679 const int *intreg
, int sse_regno
)
3681 /* The following variables hold the static issued_error state. */
3682 static bool issued_sse_arg_error
;
3683 static bool issued_sse_ret_error
;
3684 static bool issued_x87_ret_error
;
3686 enum machine_mode tmpmode
;
3688 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3689 enum x86_64_reg_class
class[MAX_CLASSES
];
3693 int needed_sseregs
, needed_intregs
;
3694 rtx exp
[MAX_CLASSES
];
3697 n
= classify_argument (mode
, type
, class, 0);
3698 if (TARGET_DEBUG_ARG
)
3701 fprintf (stderr
, "Memory class\n");
3704 fprintf (stderr
, "Classes:");
3705 for (i
= 0; i
< n
; i
++)
3707 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3709 fprintf (stderr
, "\n");
3714 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3717 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3720 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3721 some less clueful developer tries to use floating-point anyway. */
3722 if (needed_sseregs
&& !TARGET_SSE
)
3726 if (!issued_sse_ret_error
)
3728 error ("SSE register return with SSE disabled");
3729 issued_sse_ret_error
= true;
3732 else if (!issued_sse_arg_error
)
3734 error ("SSE register argument with SSE disabled");
3735 issued_sse_arg_error
= true;
3740 /* Likewise, error if the ABI requires us to return values in the
3741 x87 registers and the user specified -mno-80387. */
3742 if (!TARGET_80387
&& in_return
)
3743 for (i
= 0; i
< n
; i
++)
3744 if (class[i
] == X86_64_X87_CLASS
3745 || class[i
] == X86_64_X87UP_CLASS
3746 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3748 if (!issued_x87_ret_error
)
3750 error ("x87 register return with x87 disabled");
3751 issued_x87_ret_error
= true;
3756 /* First construct simple cases. Avoid SCmode, since we want to use
3757 single register to pass this type. */
3758 if (n
== 1 && mode
!= SCmode
)
3761 case X86_64_INTEGER_CLASS
:
3762 case X86_64_INTEGERSI_CLASS
:
3763 return gen_rtx_REG (mode
, intreg
[0]);
3764 case X86_64_SSE_CLASS
:
3765 case X86_64_SSESF_CLASS
:
3766 case X86_64_SSEDF_CLASS
:
3767 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3768 case X86_64_X87_CLASS
:
3769 case X86_64_COMPLEX_X87_CLASS
:
3770 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3771 case X86_64_NO_CLASS
:
3772 /* Zero sized array, struct or class. */
3777 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3779 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3781 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3782 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3783 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3784 && class[1] == X86_64_INTEGER_CLASS
3785 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3786 && intreg
[0] + 1 == intreg
[1])
3787 return gen_rtx_REG (mode
, intreg
[0]);
3789 /* Otherwise figure out the entries of the PARALLEL. */
3790 for (i
= 0; i
< n
; i
++)
3794 case X86_64_NO_CLASS
:
3796 case X86_64_INTEGER_CLASS
:
3797 case X86_64_INTEGERSI_CLASS
:
3798 /* Merge TImodes on aligned occasions here too. */
3799 if (i
* 8 + 8 > bytes
)
3800 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3801 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3805 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3806 if (tmpmode
== BLKmode
)
3808 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3809 gen_rtx_REG (tmpmode
, *intreg
),
3813 case X86_64_SSESF_CLASS
:
3814 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3815 gen_rtx_REG (SFmode
,
3816 SSE_REGNO (sse_regno
)),
3820 case X86_64_SSEDF_CLASS
:
3821 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3822 gen_rtx_REG (DFmode
,
3823 SSE_REGNO (sse_regno
)),
3827 case X86_64_SSE_CLASS
:
3828 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3832 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3833 gen_rtx_REG (tmpmode
,
3834 SSE_REGNO (sse_regno
)),
3836 if (tmpmode
== TImode
)
3845 /* Empty aligned struct, union or class. */
3849 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3850 for (i
= 0; i
< nexps
; i
++)
3851 XVECEXP (ret
, 0, i
) = exp
[i
];
3855 /* Update the data in CUM to advance over an argument
3856 of mode MODE and data type TYPE.
3857 (TYPE is null for libcalls where that information may not be available.) */
3860 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3861 tree type
, int named
)
3864 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3865 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3868 mode
= type_natural_mode (type
);
3870 if (TARGET_DEBUG_ARG
)
3871 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3872 "mode=%s, named=%d)\n\n",
3873 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3874 GET_MODE_NAME (mode
), named
);
3878 int int_nregs
, sse_nregs
;
3879 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3880 cum
->words
+= words
;
3881 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3883 cum
->nregs
-= int_nregs
;
3884 cum
->sse_nregs
-= sse_nregs
;
3885 cum
->regno
+= int_nregs
;
3886 cum
->sse_regno
+= sse_nregs
;
3889 cum
->words
+= words
;
3907 cum
->words
+= words
;
3908 cum
->nregs
-= words
;
3909 cum
->regno
+= words
;
3911 if (cum
->nregs
<= 0)
3919 if (cum
->float_in_sse
< 2)
3922 if (cum
->float_in_sse
< 1)
3933 if (!type
|| !AGGREGATE_TYPE_P (type
))
3935 cum
->sse_words
+= words
;
3936 cum
->sse_nregs
-= 1;
3937 cum
->sse_regno
+= 1;
3938 if (cum
->sse_nregs
<= 0)
3950 if (!type
|| !AGGREGATE_TYPE_P (type
))
3952 cum
->mmx_words
+= words
;
3953 cum
->mmx_nregs
-= 1;
3954 cum
->mmx_regno
+= 1;
3955 if (cum
->mmx_nregs
<= 0)
3966 /* Define where to put the arguments to a function.
3967 Value is zero to push the argument on the stack,
3968 or a hard register in which to store the argument.
3970 MODE is the argument's machine mode.
3971 TYPE is the data type of the argument (as a tree).
3972 This is null for libcalls where that information may
3974 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3975 the preceding args and about the function being called.
3976 NAMED is nonzero if this argument is a named parameter
3977 (otherwise it is an extra parameter matching an ellipsis). */
3980 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3981 tree type
, int named
)
3983 enum machine_mode mode
= orig_mode
;
3986 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3987 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3988 static bool warnedsse
, warnedmmx
;
3990 /* To simplify the code below, represent vector types with a vector mode
3991 even if MMX/SSE are not active. */
3992 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3993 mode
= type_natural_mode (type
);
3995 /* Handle a hidden AL argument containing number of registers for varargs
3996 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3998 if (mode
== VOIDmode
)
4001 return GEN_INT (cum
->maybe_vaarg
4002 ? (cum
->sse_nregs
< 0
4010 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4012 &x86_64_int_parameter_registers
[cum
->regno
],
4017 /* For now, pass fp/complex values on the stack. */
4029 if (words
<= cum
->nregs
)
4031 int regno
= cum
->regno
;
4033 /* Fastcall allocates the first two DWORD (SImode) or
4034 smaller arguments to ECX and EDX. */
4037 if (mode
== BLKmode
|| mode
== DImode
)
4040 /* ECX not EAX is the first allocated register. */
4044 ret
= gen_rtx_REG (mode
, regno
);
4048 if (cum
->float_in_sse
< 2)
4051 if (cum
->float_in_sse
< 1)
4061 if (!type
|| !AGGREGATE_TYPE_P (type
))
4063 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4066 warning (0, "SSE vector argument without SSE enabled "
4070 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4071 cum
->sse_regno
+ FIRST_SSE_REG
);
4078 if (!type
|| !AGGREGATE_TYPE_P (type
))
4080 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4083 warning (0, "MMX vector argument without MMX enabled "
4087 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4088 cum
->mmx_regno
+ FIRST_MMX_REG
);
4093 if (TARGET_DEBUG_ARG
)
4096 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4097 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4100 print_simple_rtl (stderr
, ret
);
4102 fprintf (stderr
, ", stack");
4104 fprintf (stderr
, " )\n");
4110 /* A C expression that indicates when an argument must be passed by
4111 reference. If nonzero for an argument, a copy of that argument is
4112 made in memory and a pointer to the argument is passed instead of
4113 the argument itself. The pointer is passed in whatever way is
4114 appropriate for passing a pointer to that type. */
4117 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4118 enum machine_mode mode ATTRIBUTE_UNUSED
,
4119 tree type
, bool named ATTRIBUTE_UNUSED
)
4124 if (type
&& int_size_in_bytes (type
) == -1)
4126 if (TARGET_DEBUG_ARG
)
4127 fprintf (stderr
, "function_arg_pass_by_reference\n");
4134 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4135 ABI. Only called if TARGET_SSE. */
4137 contains_128bit_aligned_vector_p (tree type
)
4139 enum machine_mode mode
= TYPE_MODE (type
);
4140 if (SSE_REG_MODE_P (mode
)
4141 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4143 if (TYPE_ALIGN (type
) < 128)
4146 if (AGGREGATE_TYPE_P (type
))
4148 /* Walk the aggregates recursively. */
4149 switch (TREE_CODE (type
))
4153 case QUAL_UNION_TYPE
:
4157 /* Walk all the structure fields. */
4158 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4160 if (TREE_CODE (field
) == FIELD_DECL
4161 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4168 /* Just for use if some languages passes arrays by value. */
4169 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4180 /* Gives the alignment boundary, in bits, of an argument with the
4181 specified mode and type. */
4184 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4188 align
= TYPE_ALIGN (type
);
4190 align
= GET_MODE_ALIGNMENT (mode
);
4191 if (align
< PARM_BOUNDARY
)
4192 align
= PARM_BOUNDARY
;
4195 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4196 make an exception for SSE modes since these require 128bit
4199 The handling here differs from field_alignment. ICC aligns MMX
4200 arguments to 4 byte boundaries, while structure fields are aligned
4201 to 8 byte boundaries. */
4203 align
= PARM_BOUNDARY
;
4206 if (!SSE_REG_MODE_P (mode
))
4207 align
= PARM_BOUNDARY
;
4211 if (!contains_128bit_aligned_vector_p (type
))
4212 align
= PARM_BOUNDARY
;
4220 /* Return true if N is a possible register number of function value. */
4222 ix86_function_value_regno_p (int regno
)
4228 return ((regno
) == 0
4229 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4230 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4232 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4233 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4234 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4239 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4240 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4244 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4251 /* Define how to find the value returned by a function.
4252 VALTYPE is the data type of the value (as a tree).
4253 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4254 otherwise, FUNC is 0. */
4256 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4257 bool outgoing ATTRIBUTE_UNUSED
)
4259 enum machine_mode natmode
= type_natural_mode (valtype
);
4263 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4264 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4265 x86_64_int_return_registers
, 0);
4266 /* For zero sized structures, construct_container return NULL, but we
4267 need to keep rest of compiler happy by returning meaningful value. */
4269 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4274 tree fn
= NULL_TREE
, fntype
;
4276 && DECL_P (fntype_or_decl
))
4277 fn
= fntype_or_decl
;
4278 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4279 return gen_rtx_REG (TYPE_MODE (valtype
),
4280 ix86_value_regno (natmode
, fn
, fntype
));
4284 /* Return true iff type is returned in memory. */
4286 ix86_return_in_memory (tree type
)
4288 int needed_intregs
, needed_sseregs
, size
;
4289 enum machine_mode mode
= type_natural_mode (type
);
4292 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4294 if (mode
== BLKmode
)
4297 size
= int_size_in_bytes (type
);
4299 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4302 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4304 /* User-created vectors small enough to fit in EAX. */
4308 /* MMX/3dNow values are returned in MM0,
4309 except when it doesn't exits. */
4311 return (TARGET_MMX
? 0 : 1);
4313 /* SSE values are returned in XMM0, except when it doesn't exist. */
4315 return (TARGET_SSE
? 0 : 1);
4329 /* When returning SSE vector types, we have a choice of either
4330 (1) being abi incompatible with a -march switch, or
4331 (2) generating an error.
4332 Given no good solution, I think the safest thing is one warning.
4333 The user won't be able to use -Werror, but....
4335 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4336 called in response to actually generating a caller or callee that
4337 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4338 via aggregate_value_p for general type probing from tree-ssa. */
4341 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4343 static bool warnedsse
, warnedmmx
;
4347 /* Look at the return type of the function, not the function type. */
4348 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4350 if (!TARGET_SSE
&& !warnedsse
)
4353 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4356 warning (0, "SSE vector return without SSE enabled "
4361 if (!TARGET_MMX
&& !warnedmmx
)
4363 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4366 warning (0, "MMX vector return without MMX enabled "
4375 /* Define how to find the value returned by a library function
4376 assuming the value has mode MODE. */
4378 ix86_libcall_value (enum machine_mode mode
)
4392 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4395 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4399 return gen_rtx_REG (mode
, 0);
4403 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4406 /* Given a mode, return the register to use for a return value. */
4409 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4411 gcc_assert (!TARGET_64BIT
);
4413 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4414 we normally prevent this case when mmx is not available. However
4415 some ABIs may require the result to be returned like DImode. */
4416 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4417 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4419 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4420 we prevent this case when sse is not available. However some ABIs
4421 may require the result to be returned like integer TImode. */
4422 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4423 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4425 /* Decimal floating point values can go in %eax, unlike other float modes. */
4426 if (DECIMAL_FLOAT_MODE_P (mode
))
4429 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4430 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4433 /* Floating point return values in %st(0), except for local functions when
4434 SSE math is enabled or for functions with sseregparm attribute. */
4435 if ((func
|| fntype
)
4436 && (mode
== SFmode
|| mode
== DFmode
))
4438 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4439 if ((sse_level
>= 1 && mode
== SFmode
)
4440 || (sse_level
== 2 && mode
== DFmode
))
4441 return FIRST_SSE_REG
;
4444 return FIRST_FLOAT_REG
;
4447 /* Create the va_list data type. */
4450 ix86_build_builtin_va_list (void)
4452 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4454 /* For i386 we use plain pointer to argument area. */
4456 return build_pointer_type (char_type_node
);
4458 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4459 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4461 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4462 unsigned_type_node
);
4463 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4464 unsigned_type_node
);
4465 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4467 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4470 va_list_gpr_counter_field
= f_gpr
;
4471 va_list_fpr_counter_field
= f_fpr
;
4473 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4474 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4475 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4476 DECL_FIELD_CONTEXT (f_sav
) = record
;
4478 TREE_CHAIN (record
) = type_decl
;
4479 TYPE_NAME (record
) = type_decl
;
4480 TYPE_FIELDS (record
) = f_gpr
;
4481 TREE_CHAIN (f_gpr
) = f_fpr
;
4482 TREE_CHAIN (f_fpr
) = f_ovf
;
4483 TREE_CHAIN (f_ovf
) = f_sav
;
4485 layout_type (record
);
4487 /* The correct type is an array type of one element. */
4488 return build_array_type (record
, build_index_type (size_zero_node
));
4491 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4494 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4495 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4498 CUMULATIVE_ARGS next_cum
;
4499 rtx save_area
= NULL_RTX
, mem
;
4512 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4515 /* Indicate to allocate space on the stack for varargs save area. */
4516 ix86_save_varrargs_registers
= 1;
4518 cfun
->stack_alignment_needed
= 128;
4520 fntype
= TREE_TYPE (current_function_decl
);
4521 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4522 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4523 != void_type_node
));
4525 /* For varargs, we do not want to skip the dummy va_dcl argument.
4526 For stdargs, we do want to skip the last named argument. */
4529 function_arg_advance (&next_cum
, mode
, type
, 1);
4532 save_area
= frame_pointer_rtx
;
4534 set
= get_varargs_alias_set ();
4536 for (i
= next_cum
.regno
;
4538 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4541 mem
= gen_rtx_MEM (Pmode
,
4542 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4543 MEM_NOTRAP_P (mem
) = 1;
4544 set_mem_alias_set (mem
, set
);
4545 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4546 x86_64_int_parameter_registers
[i
]));
4549 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4551 /* Now emit code to save SSE registers. The AX parameter contains number
4552 of SSE parameter registers used to call this function. We use
4553 sse_prologue_save insn template that produces computed jump across
4554 SSE saves. We need some preparation work to get this working. */
4556 label
= gen_label_rtx ();
4557 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4559 /* Compute address to jump to :
4560 label - 5*eax + nnamed_sse_arguments*5 */
4561 tmp_reg
= gen_reg_rtx (Pmode
);
4562 nsse_reg
= gen_reg_rtx (Pmode
);
4563 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4564 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4565 gen_rtx_MULT (Pmode
, nsse_reg
,
4567 if (next_cum
.sse_regno
)
4570 gen_rtx_CONST (DImode
,
4571 gen_rtx_PLUS (DImode
,
4573 GEN_INT (next_cum
.sse_regno
* 4))));
4575 emit_move_insn (nsse_reg
, label_ref
);
4576 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4578 /* Compute address of memory block we save into. We always use pointer
4579 pointing 127 bytes after first byte to store - this is needed to keep
4580 instruction size limited by 4 bytes. */
4581 tmp_reg
= gen_reg_rtx (Pmode
);
4582 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4583 plus_constant (save_area
,
4584 8 * REGPARM_MAX
+ 127)));
4585 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4586 MEM_NOTRAP_P (mem
) = 1;
4587 set_mem_alias_set (mem
, set
);
4588 set_mem_align (mem
, BITS_PER_WORD
);
4590 /* And finally do the dirty job! */
4591 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4592 GEN_INT (next_cum
.sse_regno
), label
));
4597 /* Implement va_start. */
4600 ix86_va_start (tree valist
, rtx nextarg
)
4602 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4603 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4604 tree gpr
, fpr
, ovf
, sav
, t
;
4607 /* Only 64bit target needs something special. */
4610 std_expand_builtin_va_start (valist
, nextarg
);
4614 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4615 f_fpr
= TREE_CHAIN (f_gpr
);
4616 f_ovf
= TREE_CHAIN (f_fpr
);
4617 f_sav
= TREE_CHAIN (f_ovf
);
4619 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4620 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4621 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4622 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4623 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4625 /* Count number of gp and fp argument registers used. */
4626 words
= current_function_args_info
.words
;
4627 n_gpr
= current_function_args_info
.regno
;
4628 n_fpr
= current_function_args_info
.sse_regno
;
4630 if (TARGET_DEBUG_ARG
)
4631 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4632 (int) words
, (int) n_gpr
, (int) n_fpr
);
4634 if (cfun
->va_list_gpr_size
)
4636 type
= TREE_TYPE (gpr
);
4637 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4638 build_int_cst (type
, n_gpr
* 8));
4639 TREE_SIDE_EFFECTS (t
) = 1;
4640 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4643 if (cfun
->va_list_fpr_size
)
4645 type
= TREE_TYPE (fpr
);
4646 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4647 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4648 TREE_SIDE_EFFECTS (t
) = 1;
4649 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4652 /* Find the overflow area. */
4653 type
= TREE_TYPE (ovf
);
4654 t
= make_tree (type
, virtual_incoming_args_rtx
);
4656 t
= build2 (PLUS_EXPR
, type
, t
,
4657 build_int_cst (type
, words
* UNITS_PER_WORD
));
4658 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4659 TREE_SIDE_EFFECTS (t
) = 1;
4660 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4662 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4664 /* Find the register save area.
4665 Prologue of the function save it right above stack frame. */
4666 type
= TREE_TYPE (sav
);
4667 t
= make_tree (type
, frame_pointer_rtx
);
4668 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4669 TREE_SIDE_EFFECTS (t
) = 1;
4670 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4674 /* Implement va_arg. */
4677 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4679 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4680 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4681 tree gpr
, fpr
, ovf
, sav
, t
;
4683 tree lab_false
, lab_over
= NULL_TREE
;
4688 enum machine_mode nat_mode
;
4690 /* Only 64bit target needs something special. */
4692 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4694 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4695 f_fpr
= TREE_CHAIN (f_gpr
);
4696 f_ovf
= TREE_CHAIN (f_fpr
);
4697 f_sav
= TREE_CHAIN (f_ovf
);
4699 valist
= build_va_arg_indirect_ref (valist
);
4700 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4701 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4702 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4703 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4705 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4707 type
= build_pointer_type (type
);
4708 size
= int_size_in_bytes (type
);
4709 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4711 nat_mode
= type_natural_mode (type
);
4712 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4713 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4715 /* Pull the value out of the saved registers. */
4717 addr
= create_tmp_var (ptr_type_node
, "addr");
4718 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4722 int needed_intregs
, needed_sseregs
;
4724 tree int_addr
, sse_addr
;
4726 lab_false
= create_artificial_label ();
4727 lab_over
= create_artificial_label ();
4729 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4731 need_temp
= (!REG_P (container
)
4732 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4733 || TYPE_ALIGN (type
) > 128));
4735 /* In case we are passing structure, verify that it is consecutive block
4736 on the register save area. If not we need to do moves. */
4737 if (!need_temp
&& !REG_P (container
))
4739 /* Verify that all registers are strictly consecutive */
4740 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4744 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4746 rtx slot
= XVECEXP (container
, 0, i
);
4747 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4748 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4756 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4758 rtx slot
= XVECEXP (container
, 0, i
);
4759 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4760 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4772 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4773 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4774 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4775 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4778 /* First ensure that we fit completely in registers. */
4781 t
= build_int_cst (TREE_TYPE (gpr
),
4782 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4783 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4784 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4785 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4786 gimplify_and_add (t
, pre_p
);
4790 t
= build_int_cst (TREE_TYPE (fpr
),
4791 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4793 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4794 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4795 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4796 gimplify_and_add (t
, pre_p
);
4799 /* Compute index to start of area used for integer regs. */
4802 /* int_addr = gpr + sav; */
4803 t
= fold_convert (ptr_type_node
, gpr
);
4804 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4805 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4806 gimplify_and_add (t
, pre_p
);
4810 /* sse_addr = fpr + sav; */
4811 t
= fold_convert (ptr_type_node
, fpr
);
4812 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4813 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4814 gimplify_and_add (t
, pre_p
);
4819 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4822 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4823 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4824 gimplify_and_add (t
, pre_p
);
4826 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4828 rtx slot
= XVECEXP (container
, 0, i
);
4829 rtx reg
= XEXP (slot
, 0);
4830 enum machine_mode mode
= GET_MODE (reg
);
4831 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4832 tree addr_type
= build_pointer_type (piece_type
);
4835 tree dest_addr
, dest
;
4837 if (SSE_REGNO_P (REGNO (reg
)))
4839 src_addr
= sse_addr
;
4840 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4844 src_addr
= int_addr
;
4845 src_offset
= REGNO (reg
) * 8;
4847 src_addr
= fold_convert (addr_type
, src_addr
);
4848 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4849 size_int (src_offset
)));
4850 src
= build_va_arg_indirect_ref (src_addr
);
4852 dest_addr
= fold_convert (addr_type
, addr
);
4853 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4854 size_int (INTVAL (XEXP (slot
, 1)))));
4855 dest
= build_va_arg_indirect_ref (dest_addr
);
4857 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4858 gimplify_and_add (t
, pre_p
);
4864 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4865 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4866 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4867 gimplify_and_add (t
, pre_p
);
4871 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4872 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4873 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4874 gimplify_and_add (t
, pre_p
);
4877 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4878 gimplify_and_add (t
, pre_p
);
4880 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4881 append_to_statement_list (t
, pre_p
);
4884 /* ... otherwise out of the overflow area. */
4886 /* Care for on-stack alignment if needed. */
4887 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4888 || integer_zerop (TYPE_SIZE (type
)))
4892 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4893 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4894 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4895 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4896 build_int_cst (TREE_TYPE (t
), -align
));
4898 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4900 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4901 gimplify_and_add (t2
, pre_p
);
4903 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4904 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4905 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4906 gimplify_and_add (t
, pre_p
);
4910 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4911 append_to_statement_list (t
, pre_p
);
4914 ptrtype
= build_pointer_type (type
);
4915 addr
= fold_convert (ptrtype
, addr
);
4918 addr
= build_va_arg_indirect_ref (addr
);
4919 return build_va_arg_indirect_ref (addr
);
4922 /* Return nonzero if OPNUM's MEM should be matched
4923 in movabs* patterns. */
4926 ix86_check_movabs (rtx insn
, int opnum
)
4930 set
= PATTERN (insn
);
4931 if (GET_CODE (set
) == PARALLEL
)
4932 set
= XVECEXP (set
, 0, 0);
4933 gcc_assert (GET_CODE (set
) == SET
);
4934 mem
= XEXP (set
, opnum
);
4935 while (GET_CODE (mem
) == SUBREG
)
4936 mem
= SUBREG_REG (mem
);
4937 gcc_assert (MEM_P (mem
));
4938 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4941 /* Initialize the table of extra 80387 mathematical constants. */
4944 init_ext_80387_constants (void)
4946 static const char * cst
[5] =
4948 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4949 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4950 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4951 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4952 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4956 for (i
= 0; i
< 5; i
++)
4958 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4959 /* Ensure each constant is rounded to XFmode precision. */
4960 real_convert (&ext_80387_constants_table
[i
],
4961 XFmode
, &ext_80387_constants_table
[i
]);
4964 ext_80387_constants_init
= 1;
4967 /* Return true if the constant is something that can be loaded with
4968 a special instruction. */
4971 standard_80387_constant_p (rtx x
)
4975 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4978 if (x
== CONST0_RTX (GET_MODE (x
)))
4980 if (x
== CONST1_RTX (GET_MODE (x
)))
4983 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4985 /* For XFmode constants, try to find a special 80387 instruction when
4986 optimizing for size or on those CPUs that benefit from them. */
4987 if (GET_MODE (x
) == XFmode
4988 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4992 if (! ext_80387_constants_init
)
4993 init_ext_80387_constants ();
4995 for (i
= 0; i
< 5; i
++)
4996 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5000 /* Load of the constant -0.0 or -1.0 will be split as
5001 fldz;fchs or fld1;fchs sequence. */
5002 if (real_isnegzero (&r
))
5004 if (real_identical (&r
, &dconstm1
))
5010 /* Return the opcode of the special instruction to be used to load
5014 standard_80387_constant_opcode (rtx x
)
5016 switch (standard_80387_constant_p (x
))
5040 /* Return the CONST_DOUBLE representing the 80387 constant that is
5041 loaded by the specified special instruction. The argument IDX
5042 matches the return value from standard_80387_constant_p. */
5045 standard_80387_constant_rtx (int idx
)
5049 if (! ext_80387_constants_init
)
5050 init_ext_80387_constants ();
5066 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5070 /* Return 1 if mode is a valid mode for sse. */
5072 standard_sse_mode_p (enum machine_mode mode
)
5089 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5092 standard_sse_constant_p (rtx x
)
5094 enum machine_mode mode
= GET_MODE (x
);
5096 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5098 if (vector_all_ones_operand (x
, mode
)
5099 && standard_sse_mode_p (mode
))
5100 return TARGET_SSE2
? 2 : -1;
5105 /* Return the opcode of the special instruction to be used to load
5109 standard_sse_constant_opcode (rtx insn
, rtx x
)
5111 switch (standard_sse_constant_p (x
))
5114 if (get_attr_mode (insn
) == MODE_V4SF
)
5115 return "xorps\t%0, %0";
5116 else if (get_attr_mode (insn
) == MODE_V2DF
)
5117 return "xorpd\t%0, %0";
5119 return "pxor\t%0, %0";
5121 return "pcmpeqd\t%0, %0";
5126 /* Returns 1 if OP contains a symbol reference */
5129 symbolic_reference_mentioned_p (rtx op
)
5134 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5137 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5138 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5144 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5145 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5149 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5156 /* Return 1 if it is appropriate to emit `ret' instructions in the
5157 body of a function. Do this only if the epilogue is simple, needing a
5158 couple of insns. Prior to reloading, we can't tell how many registers
5159 must be saved, so return 0 then. Return 0 if there is no frame
5160 marker to de-allocate. */
5163 ix86_can_use_return_insn_p (void)
5165 struct ix86_frame frame
;
5167 if (! reload_completed
|| frame_pointer_needed
)
5170 /* Don't allow more than 32 pop, since that's all we can do
5171 with one instruction. */
5172 if (current_function_pops_args
5173 && current_function_args_size
>= 32768)
5176 ix86_compute_frame_layout (&frame
);
5177 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5180 /* Value should be nonzero if functions must have frame pointers.
5181 Zero means the frame pointer need not be set up (and parms may
5182 be accessed via the stack pointer) in functions that seem suitable. */
5185 ix86_frame_pointer_required (void)
5187 /* If we accessed previous frames, then the generated code expects
5188 to be able to access the saved ebp value in our frame. */
5189 if (cfun
->machine
->accesses_prev_frame
)
5192 /* Several x86 os'es need a frame pointer for other reasons,
5193 usually pertaining to setjmp. */
5194 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5197 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5198 the frame pointer by default. Turn it back on now if we've not
5199 got a leaf function. */
5200 if (TARGET_OMIT_LEAF_FRAME_POINTER
5201 && (!current_function_is_leaf
5202 || ix86_current_function_calls_tls_descriptor
))
5205 if (current_function_profile
)
5211 /* Record that the current function accesses previous call frames. */
5214 ix86_setup_frame_addresses (void)
5216 cfun
->machine
->accesses_prev_frame
= 1;
5219 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5220 # define USE_HIDDEN_LINKONCE 1
5222 # define USE_HIDDEN_LINKONCE 0
5225 static int pic_labels_used
;
5227 /* Fills in the label name that should be used for a pc thunk for
5228 the given register. */
5231 get_pc_thunk_name (char name
[32], unsigned int regno
)
5233 gcc_assert (!TARGET_64BIT
);
5235 if (USE_HIDDEN_LINKONCE
)
5236 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5238 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5242 /* This function generates code for -fpic that loads %ebx with
5243 the return address of the caller and then returns. */
5246 ix86_file_end (void)
5251 for (regno
= 0; regno
< 8; ++regno
)
5255 if (! ((pic_labels_used
>> regno
) & 1))
5258 get_pc_thunk_name (name
, regno
);
5263 switch_to_section (darwin_sections
[text_coal_section
]);
5264 fputs ("\t.weak_definition\t", asm_out_file
);
5265 assemble_name (asm_out_file
, name
);
5266 fputs ("\n\t.private_extern\t", asm_out_file
);
5267 assemble_name (asm_out_file
, name
);
5268 fputs ("\n", asm_out_file
);
5269 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5273 if (USE_HIDDEN_LINKONCE
)
5277 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5279 TREE_PUBLIC (decl
) = 1;
5280 TREE_STATIC (decl
) = 1;
5281 DECL_ONE_ONLY (decl
) = 1;
5283 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5284 switch_to_section (get_named_section (decl
, NULL
, 0));
5286 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5287 fputs ("\t.hidden\t", asm_out_file
);
5288 assemble_name (asm_out_file
, name
);
5289 fputc ('\n', asm_out_file
);
5290 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5294 switch_to_section (text_section
);
5295 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5298 xops
[0] = gen_rtx_REG (SImode
, regno
);
5299 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5300 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5301 output_asm_insn ("ret", xops
);
5304 if (NEED_INDICATE_EXEC_STACK
)
5305 file_end_indicate_exec_stack ();
5308 /* Emit code for the SET_GOT patterns. */
5311 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5316 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5318 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5320 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5323 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5325 output_asm_insn ("call\t%a2", xops
);
5328 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5329 is what will be referenced by the Mach-O PIC subsystem. */
5331 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5334 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5335 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5338 output_asm_insn ("pop{l}\t%0", xops
);
5343 get_pc_thunk_name (name
, REGNO (dest
));
5344 pic_labels_used
|= 1 << REGNO (dest
);
5346 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5347 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5348 output_asm_insn ("call\t%X2", xops
);
5349 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5350 is what will be referenced by the Mach-O PIC subsystem. */
5353 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5355 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5356 CODE_LABEL_NUMBER (label
));
5363 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5364 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5366 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5371 /* Generate an "push" pattern for input ARG. */
5376 return gen_rtx_SET (VOIDmode
,
5378 gen_rtx_PRE_DEC (Pmode
,
5379 stack_pointer_rtx
)),
5383 /* Return >= 0 if there is an unused call-clobbered register available
5384 for the entire function. */
5387 ix86_select_alt_pic_regnum (void)
5389 if (current_function_is_leaf
&& !current_function_profile
5390 && !ix86_current_function_calls_tls_descriptor
)
5393 for (i
= 2; i
>= 0; --i
)
5394 if (!regs_ever_live
[i
])
5398 return INVALID_REGNUM
;
5401 /* Return 1 if we need to save REGNO. */
5403 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5405 if (pic_offset_table_rtx
5406 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5407 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5408 || current_function_profile
5409 || current_function_calls_eh_return
5410 || current_function_uses_const_pool
))
5412 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5417 if (current_function_calls_eh_return
&& maybe_eh_return
)
5422 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5423 if (test
== INVALID_REGNUM
)
5430 if (cfun
->machine
->force_align_arg_pointer
5431 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5434 return (regs_ever_live
[regno
]
5435 && !call_used_regs
[regno
]
5436 && !fixed_regs
[regno
]
5437 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5440 /* Return number of registers to be saved on the stack. */
5443 ix86_nsaved_regs (void)
5448 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5449 if (ix86_save_reg (regno
, true))
5454 /* Return the offset between two registers, one to be eliminated, and the other
5455 its replacement, at the start of a routine. */
5458 ix86_initial_elimination_offset (int from
, int to
)
5460 struct ix86_frame frame
;
5461 ix86_compute_frame_layout (&frame
);
5463 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5464 return frame
.hard_frame_pointer_offset
;
5465 else if (from
== FRAME_POINTER_REGNUM
5466 && to
== HARD_FRAME_POINTER_REGNUM
)
5467 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5470 gcc_assert (to
== STACK_POINTER_REGNUM
);
5472 if (from
== ARG_POINTER_REGNUM
)
5473 return frame
.stack_pointer_offset
;
5475 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5476 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5480 /* Fill structure ix86_frame about frame of currently computed function. */
5483 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5485 HOST_WIDE_INT total_size
;
5486 unsigned int stack_alignment_needed
;
5487 HOST_WIDE_INT offset
;
5488 unsigned int preferred_alignment
;
5489 HOST_WIDE_INT size
= get_frame_size ();
5491 frame
->nregs
= ix86_nsaved_regs ();
5494 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5495 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5497 /* During reload iteration the amount of registers saved can change.
5498 Recompute the value as needed. Do not recompute when amount of registers
5499 didn't change as reload does multiple calls to the function and does not
5500 expect the decision to change within single iteration. */
5502 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5504 int count
= frame
->nregs
;
5506 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5507 /* The fast prologue uses move instead of push to save registers. This
5508 is significantly longer, but also executes faster as modern hardware
5509 can execute the moves in parallel, but can't do that for push/pop.
5511 Be careful about choosing what prologue to emit: When function takes
5512 many instructions to execute we may use slow version as well as in
5513 case function is known to be outside hot spot (this is known with
5514 feedback only). Weight the size of function by number of registers
5515 to save as it is cheap to use one or two push instructions but very
5516 slow to use many of them. */
5518 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5519 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5520 || (flag_branch_probabilities
5521 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5522 cfun
->machine
->use_fast_prologue_epilogue
= false;
5524 cfun
->machine
->use_fast_prologue_epilogue
5525 = !expensive_function_p (count
);
5527 if (TARGET_PROLOGUE_USING_MOVE
5528 && cfun
->machine
->use_fast_prologue_epilogue
)
5529 frame
->save_regs_using_mov
= true;
5531 frame
->save_regs_using_mov
= false;
5534 /* Skip return address and saved base pointer. */
5535 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5537 frame
->hard_frame_pointer_offset
= offset
;
5539 /* Do some sanity checking of stack_alignment_needed and
5540 preferred_alignment, since i386 port is the only using those features
5541 that may break easily. */
5543 gcc_assert (!size
|| stack_alignment_needed
);
5544 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5545 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5546 gcc_assert (stack_alignment_needed
5547 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5549 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5550 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5552 /* Register save area */
5553 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5556 if (ix86_save_varrargs_registers
)
5558 offset
+= X86_64_VARARGS_SIZE
;
5559 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5562 frame
->va_arg_size
= 0;
5564 /* Align start of frame for local function. */
5565 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5566 & -stack_alignment_needed
) - offset
;
5568 offset
+= frame
->padding1
;
5570 /* Frame pointer points here. */
5571 frame
->frame_pointer_offset
= offset
;
5575 /* Add outgoing arguments area. Can be skipped if we eliminated
5576 all the function calls as dead code.
5577 Skipping is however impossible when function calls alloca. Alloca
5578 expander assumes that last current_function_outgoing_args_size
5579 of stack frame are unused. */
5580 if (ACCUMULATE_OUTGOING_ARGS
5581 && (!current_function_is_leaf
|| current_function_calls_alloca
5582 || ix86_current_function_calls_tls_descriptor
))
5584 offset
+= current_function_outgoing_args_size
;
5585 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5588 frame
->outgoing_arguments_size
= 0;
5590 /* Align stack boundary. Only needed if we're calling another function
5592 if (!current_function_is_leaf
|| current_function_calls_alloca
5593 || ix86_current_function_calls_tls_descriptor
)
5594 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5595 & -preferred_alignment
) - offset
;
5597 frame
->padding2
= 0;
5599 offset
+= frame
->padding2
;
5601 /* We've reached end of stack frame. */
5602 frame
->stack_pointer_offset
= offset
;
5604 /* Size prologue needs to allocate. */
5605 frame
->to_allocate
=
5606 (size
+ frame
->padding1
+ frame
->padding2
5607 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5609 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5610 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5611 frame
->save_regs_using_mov
= false;
5613 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5614 && current_function_is_leaf
5615 && !ix86_current_function_calls_tls_descriptor
)
5617 frame
->red_zone_size
= frame
->to_allocate
;
5618 if (frame
->save_regs_using_mov
)
5619 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5620 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5621 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5624 frame
->red_zone_size
= 0;
5625 frame
->to_allocate
-= frame
->red_zone_size
;
5626 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5628 fprintf (stderr
, "\n");
5629 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5630 fprintf (stderr
, "size: %ld\n", (long)size
);
5631 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5632 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5633 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5634 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5635 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5636 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5637 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5638 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5639 (long)frame
->hard_frame_pointer_offset
);
5640 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5641 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5642 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5643 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5647 /* Emit code to save registers in the prologue. */
5650 ix86_emit_save_regs (void)
5655 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5656 if (ix86_save_reg (regno
, true))
5658 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5659 RTX_FRAME_RELATED_P (insn
) = 1;
5663 /* Emit code to save registers using MOV insns. First register
5664 is restored from POINTER + OFFSET. */
5666 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5671 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5672 if (ix86_save_reg (regno
, true))
5674 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5676 gen_rtx_REG (Pmode
, regno
));
5677 RTX_FRAME_RELATED_P (insn
) = 1;
5678 offset
+= UNITS_PER_WORD
;
5682 /* Expand prologue or epilogue stack adjustment.
5683 The pattern exist to put a dependency on all ebp-based memory accesses.
5684 STYLE should be negative if instructions should be marked as frame related,
5685 zero if %r11 register is live and cannot be freely used and positive
5689 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5694 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5695 else if (x86_64_immediate_operand (offset
, DImode
))
5696 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5700 /* r11 is used by indirect sibcall return as well, set before the
5701 epilogue and used after the epilogue. ATM indirect sibcall
5702 shouldn't be used together with huge frame sizes in one
5703 function because of the frame_size check in sibcall.c. */
5705 r11
= gen_rtx_REG (DImode
, R11_REG
);
5706 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5708 RTX_FRAME_RELATED_P (insn
) = 1;
5709 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5713 RTX_FRAME_RELATED_P (insn
) = 1;
5716 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5719 ix86_internal_arg_pointer (void)
5721 bool has_force_align_arg_pointer
=
5722 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5723 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5724 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5725 && DECL_NAME (current_function_decl
)
5726 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5727 && DECL_FILE_SCOPE_P (current_function_decl
))
5728 || ix86_force_align_arg_pointer
5729 || has_force_align_arg_pointer
)
5731 /* Nested functions can't realign the stack due to a register
5733 if (DECL_CONTEXT (current_function_decl
)
5734 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5736 if (ix86_force_align_arg_pointer
)
5737 warning (0, "-mstackrealign ignored for nested functions");
5738 if (has_force_align_arg_pointer
)
5739 error ("%s not supported for nested functions",
5740 ix86_force_align_arg_pointer_string
);
5741 return virtual_incoming_args_rtx
;
5743 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5744 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5747 return virtual_incoming_args_rtx
;
5750 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5751 This is called from dwarf2out.c to emit call frame instructions
5752 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5754 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5756 rtx unspec
= SET_SRC (pattern
);
5757 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5761 case UNSPEC_REG_SAVE
:
5762 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5763 SET_DEST (pattern
));
5765 case UNSPEC_DEF_CFA
:
5766 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5767 INTVAL (XVECEXP (unspec
, 0, 0)));
5774 /* Expand the prologue into a bunch of separate insns. */
5777 ix86_expand_prologue (void)
5781 struct ix86_frame frame
;
5782 HOST_WIDE_INT allocate
;
5784 ix86_compute_frame_layout (&frame
);
5786 if (cfun
->machine
->force_align_arg_pointer
)
5790 /* Grab the argument pointer. */
5791 x
= plus_constant (stack_pointer_rtx
, 4);
5792 y
= cfun
->machine
->force_align_arg_pointer
;
5793 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5794 RTX_FRAME_RELATED_P (insn
) = 1;
5796 /* The unwind info consists of two parts: install the fafp as the cfa,
5797 and record the fafp as the "save register" of the stack pointer.
5798 The later is there in order that the unwinder can see where it
5799 should restore the stack pointer across the and insn. */
5800 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5801 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5802 RTX_FRAME_RELATED_P (x
) = 1;
5803 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5805 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5806 RTX_FRAME_RELATED_P (y
) = 1;
5807 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5808 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5809 REG_NOTES (insn
) = x
;
5811 /* Align the stack. */
5812 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5815 /* And here we cheat like madmen with the unwind info. We force the
5816 cfa register back to sp+4, which is exactly what it was at the
5817 start of the function. Re-pushing the return address results in
5818 the return at the same spot relative to the cfa, and thus is
5819 correct wrt the unwind info. */
5820 x
= cfun
->machine
->force_align_arg_pointer
;
5821 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5822 insn
= emit_insn (gen_push (x
));
5823 RTX_FRAME_RELATED_P (insn
) = 1;
5826 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5827 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5828 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5829 REG_NOTES (insn
) = x
;
5832 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5833 slower on all targets. Also sdb doesn't like it. */
5835 if (frame_pointer_needed
)
5837 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5838 RTX_FRAME_RELATED_P (insn
) = 1;
5840 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5841 RTX_FRAME_RELATED_P (insn
) = 1;
5844 allocate
= frame
.to_allocate
;
5846 if (!frame
.save_regs_using_mov
)
5847 ix86_emit_save_regs ();
5849 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5851 /* When using red zone we may start register saving before allocating
5852 the stack frame saving one cycle of the prologue. */
5853 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5854 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5855 : stack_pointer_rtx
,
5856 -frame
.nregs
* UNITS_PER_WORD
);
5860 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5861 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5862 GEN_INT (-allocate
), -1);
5865 /* Only valid for Win32. */
5866 rtx eax
= gen_rtx_REG (SImode
, 0);
5867 bool eax_live
= ix86_eax_live_at_start_p ();
5870 gcc_assert (!TARGET_64BIT
);
5874 emit_insn (gen_push (eax
));
5878 emit_move_insn (eax
, GEN_INT (allocate
));
5880 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5881 RTX_FRAME_RELATED_P (insn
) = 1;
5882 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5883 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5884 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5885 t
, REG_NOTES (insn
));
5889 if (frame_pointer_needed
)
5890 t
= plus_constant (hard_frame_pointer_rtx
,
5893 - frame
.nregs
* UNITS_PER_WORD
);
5895 t
= plus_constant (stack_pointer_rtx
, allocate
);
5896 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5900 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5902 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5903 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5905 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5906 -frame
.nregs
* UNITS_PER_WORD
);
5909 pic_reg_used
= false;
5910 if (pic_offset_table_rtx
5911 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5912 || current_function_profile
))
5914 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5916 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5917 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5919 pic_reg_used
= true;
5925 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5927 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5929 /* Even with accurate pre-reload life analysis, we can wind up
5930 deleting all references to the pic register after reload.
5931 Consider if cross-jumping unifies two sides of a branch
5932 controlled by a comparison vs the only read from a global.
5933 In which case, allow the set_got to be deleted, though we're
5934 too late to do anything about the ebx save in the prologue. */
5935 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5938 /* Prevent function calls from be scheduled before the call to mcount.
5939 In the pic_reg_used case, make sure that the got load isn't deleted. */
5940 if (current_function_profile
)
5941 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5944 /* Emit code to restore saved registers using MOV insns. First register
5945 is restored from POINTER + OFFSET. */
5947 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5948 int maybe_eh_return
)
5951 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5953 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5954 if (ix86_save_reg (regno
, maybe_eh_return
))
5956 /* Ensure that adjust_address won't be forced to produce pointer
5957 out of range allowed by x86-64 instruction set. */
5958 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5962 r11
= gen_rtx_REG (DImode
, R11_REG
);
5963 emit_move_insn (r11
, GEN_INT (offset
));
5964 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5965 base_address
= gen_rtx_MEM (Pmode
, r11
);
5968 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5969 adjust_address (base_address
, Pmode
, offset
));
5970 offset
+= UNITS_PER_WORD
;
5974 /* Restore function stack, frame, and registers. */
5977 ix86_expand_epilogue (int style
)
5980 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5981 struct ix86_frame frame
;
5982 HOST_WIDE_INT offset
;
5984 ix86_compute_frame_layout (&frame
);
5986 /* Calculate start of saved registers relative to ebp. Special care
5987 must be taken for the normal return case of a function using
5988 eh_return: the eax and edx registers are marked as saved, but not
5989 restored along this path. */
5990 offset
= frame
.nregs
;
5991 if (current_function_calls_eh_return
&& style
!= 2)
5993 offset
*= -UNITS_PER_WORD
;
5995 /* If we're only restoring one register and sp is not valid then
5996 using a move instruction to restore the register since it's
5997 less work than reloading sp and popping the register.
5999 The default code result in stack adjustment using add/lea instruction,
6000 while this code results in LEAVE instruction (or discrete equivalent),
6001 so it is profitable in some other cases as well. Especially when there
6002 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6003 and there is exactly one register to pop. This heuristic may need some
6004 tuning in future. */
6005 if ((!sp_valid
&& frame
.nregs
<= 1)
6006 || (TARGET_EPILOGUE_USING_MOVE
6007 && cfun
->machine
->use_fast_prologue_epilogue
6008 && (frame
.nregs
> 1 || frame
.to_allocate
))
6009 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6010 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6011 && cfun
->machine
->use_fast_prologue_epilogue
6012 && frame
.nregs
== 1)
6013 || current_function_calls_eh_return
)
6015 /* Restore registers. We can use ebp or esp to address the memory
6016 locations. If both are available, default to ebp, since offsets
6017 are known to be small. Only exception is esp pointing directly to the
6018 end of block of saved registers, where we may simplify addressing
6021 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6022 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6023 frame
.to_allocate
, style
== 2);
6025 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6026 offset
, style
== 2);
6028 /* eh_return epilogues need %ecx added to the stack pointer. */
6031 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6033 if (frame_pointer_needed
)
6035 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6036 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6037 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6039 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6040 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6042 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6047 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6048 tmp
= plus_constant (tmp
, (frame
.to_allocate
6049 + frame
.nregs
* UNITS_PER_WORD
));
6050 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6053 else if (!frame_pointer_needed
)
6054 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6055 GEN_INT (frame
.to_allocate
6056 + frame
.nregs
* UNITS_PER_WORD
),
6058 /* If not an i386, mov & pop is faster than "leave". */
6059 else if (TARGET_USE_LEAVE
|| optimize_size
6060 || !cfun
->machine
->use_fast_prologue_epilogue
)
6061 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6064 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6065 hard_frame_pointer_rtx
,
6068 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6070 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6075 /* First step is to deallocate the stack frame so that we can
6076 pop the registers. */
6079 gcc_assert (frame_pointer_needed
);
6080 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6081 hard_frame_pointer_rtx
,
6082 GEN_INT (offset
), style
);
6084 else if (frame
.to_allocate
)
6085 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6086 GEN_INT (frame
.to_allocate
), style
);
6088 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6089 if (ix86_save_reg (regno
, false))
6092 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6094 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6096 if (frame_pointer_needed
)
6098 /* Leave results in shorter dependency chains on CPUs that are
6099 able to grok it fast. */
6100 if (TARGET_USE_LEAVE
)
6101 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6102 else if (TARGET_64BIT
)
6103 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6105 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6109 if (cfun
->machine
->force_align_arg_pointer
)
6111 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6112 cfun
->machine
->force_align_arg_pointer
,
6116 /* Sibcall epilogues don't want a return instruction. */
6120 if (current_function_pops_args
&& current_function_args_size
)
6122 rtx popc
= GEN_INT (current_function_pops_args
);
6124 /* i386 can only pop 64K bytes. If asked to pop more, pop
6125 return address, do explicit add, and jump indirectly to the
6128 if (current_function_pops_args
>= 65536)
6130 rtx ecx
= gen_rtx_REG (SImode
, 2);
6132 /* There is no "pascal" calling convention in 64bit ABI. */
6133 gcc_assert (!TARGET_64BIT
);
6135 emit_insn (gen_popsi1 (ecx
));
6136 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6137 emit_jump_insn (gen_return_indirect_internal (ecx
));
6140 emit_jump_insn (gen_return_pop_internal (popc
));
6143 emit_jump_insn (gen_return_internal ());
6146 /* Reset from the function's potential modifications. */
6149 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6150 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6152 if (pic_offset_table_rtx
)
6153 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6155 /* Mach-O doesn't support labels at the end of objects, so if
6156 it looks like we might want one, insert a NOP. */
6158 rtx insn
= get_last_insn ();
6161 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6162 insn
= PREV_INSN (insn
);
6166 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6167 fputs ("\tnop\n", file
);
6173 /* Extract the parts of an RTL expression that is a valid memory address
6174 for an instruction. Return 0 if the structure of the address is
6175 grossly off. Return -1 if the address contains ASHIFT, so it is not
6176 strictly valid, but still used for computing length of lea instruction. */
6179 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6181 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6182 rtx base_reg
, index_reg
;
6183 HOST_WIDE_INT scale
= 1;
6184 rtx scale_rtx
= NULL_RTX
;
6186 enum ix86_address_seg seg
= SEG_DEFAULT
;
6188 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6190 else if (GET_CODE (addr
) == PLUS
)
6200 addends
[n
++] = XEXP (op
, 1);
6203 while (GET_CODE (op
) == PLUS
);
6208 for (i
= n
; i
>= 0; --i
)
6211 switch (GET_CODE (op
))
6216 index
= XEXP (op
, 0);
6217 scale_rtx
= XEXP (op
, 1);
6221 if (XINT (op
, 1) == UNSPEC_TP
6222 && TARGET_TLS_DIRECT_SEG_REFS
6223 && seg
== SEG_DEFAULT
)
6224 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6253 else if (GET_CODE (addr
) == MULT
)
6255 index
= XEXP (addr
, 0); /* index*scale */
6256 scale_rtx
= XEXP (addr
, 1);
6258 else if (GET_CODE (addr
) == ASHIFT
)
6262 /* We're called for lea too, which implements ashift on occasion. */
6263 index
= XEXP (addr
, 0);
6264 tmp
= XEXP (addr
, 1);
6265 if (!CONST_INT_P (tmp
))
6267 scale
= INTVAL (tmp
);
6268 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6274 disp
= addr
; /* displacement */
6276 /* Extract the integral value of scale. */
6279 if (!CONST_INT_P (scale_rtx
))
6281 scale
= INTVAL (scale_rtx
);
6284 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6285 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6287 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6288 if (base_reg
&& index_reg
&& scale
== 1
6289 && (index_reg
== arg_pointer_rtx
6290 || index_reg
== frame_pointer_rtx
6291 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6294 tmp
= base
, base
= index
, index
= tmp
;
6295 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6298 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6299 if ((base_reg
== hard_frame_pointer_rtx
6300 || base_reg
== frame_pointer_rtx
6301 || base_reg
== arg_pointer_rtx
) && !disp
)
6304 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6305 Avoid this by transforming to [%esi+0]. */
6306 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6307 && base_reg
&& !index_reg
&& !disp
6309 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6312 /* Special case: encode reg+reg instead of reg*2. */
6313 if (!base
&& index
&& scale
&& scale
== 2)
6314 base
= index
, base_reg
= index_reg
, scale
= 1;
6316 /* Special case: scaling cannot be encoded without base or displacement. */
6317 if (!base
&& !disp
&& index
&& scale
!= 1)
6329 /* Return cost of the memory address x.
6330 For i386, it is better to use a complex address than let gcc copy
6331 the address into a reg and make a new pseudo. But not if the address
6332 requires to two regs - that would mean more pseudos with longer
6335 ix86_address_cost (rtx x
)
6337 struct ix86_address parts
;
6339 int ok
= ix86_decompose_address (x
, &parts
);
6343 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6344 parts
.base
= SUBREG_REG (parts
.base
);
6345 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6346 parts
.index
= SUBREG_REG (parts
.index
);
6348 /* More complex memory references are better. */
6349 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6351 if (parts
.seg
!= SEG_DEFAULT
)
6354 /* Attempt to minimize number of registers in the address. */
6356 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6358 && (!REG_P (parts
.index
)
6359 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6363 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6365 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6366 && parts
.base
!= parts
.index
)
6369 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6370 since it's predecode logic can't detect the length of instructions
6371 and it degenerates to vector decoded. Increase cost of such
6372 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6373 to split such addresses or even refuse such addresses at all.
6375 Following addressing modes are affected:
6380 The first and last case may be avoidable by explicitly coding the zero in
6381 memory address, but I don't have AMD-K6 machine handy to check this
6385 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6386 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6387 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6393 /* If X is a machine specific address (i.e. a symbol or label being
6394 referenced as a displacement from the GOT implemented using an
6395 UNSPEC), then return the base term. Otherwise return X. */
6398 ix86_find_base_term (rtx x
)
6404 if (GET_CODE (x
) != CONST
)
6407 if (GET_CODE (term
) == PLUS
6408 && (CONST_INT_P (XEXP (term
, 1))
6409 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6410 term
= XEXP (term
, 0);
6411 if (GET_CODE (term
) != UNSPEC
6412 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6415 term
= XVECEXP (term
, 0, 0);
6417 if (GET_CODE (term
) != SYMBOL_REF
6418 && GET_CODE (term
) != LABEL_REF
)
6424 term
= ix86_delegitimize_address (x
);
6426 if (GET_CODE (term
) != SYMBOL_REF
6427 && GET_CODE (term
) != LABEL_REF
)
6433 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6434 this is used for to form addresses to local data when -fPIC is in
6438 darwin_local_data_pic (rtx disp
)
6440 if (GET_CODE (disp
) == MINUS
)
6442 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6443 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6444 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6446 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6447 if (! strcmp (sym_name
, "<pic base>"))
6455 /* Determine if a given RTX is a valid constant. We already know this
6456 satisfies CONSTANT_P. */
6459 legitimate_constant_p (rtx x
)
6461 switch (GET_CODE (x
))
6466 if (GET_CODE (x
) == PLUS
)
6468 if (!CONST_INT_P (XEXP (x
, 1)))
6473 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6476 /* Only some unspecs are valid as "constants". */
6477 if (GET_CODE (x
) == UNSPEC
)
6478 switch (XINT (x
, 1))
6481 return TARGET_64BIT
;
6484 x
= XVECEXP (x
, 0, 0);
6485 return (GET_CODE (x
) == SYMBOL_REF
6486 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6488 x
= XVECEXP (x
, 0, 0);
6489 return (GET_CODE (x
) == SYMBOL_REF
6490 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6495 /* We must have drilled down to a symbol. */
6496 if (GET_CODE (x
) == LABEL_REF
)
6498 if (GET_CODE (x
) != SYMBOL_REF
)
6503 /* TLS symbols are never valid. */
6504 if (SYMBOL_REF_TLS_MODEL (x
))
6509 if (GET_MODE (x
) == TImode
6510 && x
!= CONST0_RTX (TImode
)
6516 if (x
== CONST0_RTX (GET_MODE (x
)))
6524 /* Otherwise we handle everything else in the move patterns. */
6528 /* Determine if it's legal to put X into the constant pool. This
6529 is not possible for the address of thread-local symbols, which
6530 is checked above. */
6533 ix86_cannot_force_const_mem (rtx x
)
6535 /* We can always put integral constants and vectors in memory. */
6536 switch (GET_CODE (x
))
6546 return !legitimate_constant_p (x
);
6549 /* Determine if a given RTX is a valid constant address. */
6552 constant_address_p (rtx x
)
6554 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6557 /* Nonzero if the constant value X is a legitimate general operand
6558 when generating PIC code. It is given that flag_pic is on and
6559 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6562 legitimate_pic_operand_p (rtx x
)
6566 switch (GET_CODE (x
))
6569 inner
= XEXP (x
, 0);
6570 if (GET_CODE (inner
) == PLUS
6571 && CONST_INT_P (XEXP (inner
, 1)))
6572 inner
= XEXP (inner
, 0);
6574 /* Only some unspecs are valid as "constants". */
6575 if (GET_CODE (inner
) == UNSPEC
)
6576 switch (XINT (inner
, 1))
6579 return TARGET_64BIT
;
6581 x
= XVECEXP (inner
, 0, 0);
6582 return (GET_CODE (x
) == SYMBOL_REF
6583 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6591 return legitimate_pic_address_disp_p (x
);
6598 /* Determine if a given CONST RTX is a valid memory displacement
6602 legitimate_pic_address_disp_p (rtx disp
)
6606 /* In 64bit mode we can allow direct addresses of symbols and labels
6607 when they are not dynamic symbols. */
6610 rtx op0
= disp
, op1
;
6612 switch (GET_CODE (disp
))
6618 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6620 op0
= XEXP (XEXP (disp
, 0), 0);
6621 op1
= XEXP (XEXP (disp
, 0), 1);
6622 if (!CONST_INT_P (op1
)
6623 || INTVAL (op1
) >= 16*1024*1024
6624 || INTVAL (op1
) < -16*1024*1024)
6626 if (GET_CODE (op0
) == LABEL_REF
)
6628 if (GET_CODE (op0
) != SYMBOL_REF
)
6633 /* TLS references should always be enclosed in UNSPEC. */
6634 if (SYMBOL_REF_TLS_MODEL (op0
))
6636 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6644 if (GET_CODE (disp
) != CONST
)
6646 disp
= XEXP (disp
, 0);
6650 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6651 of GOT tables. We should not need these anyway. */
6652 if (GET_CODE (disp
) != UNSPEC
6653 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6654 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6657 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6658 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6664 if (GET_CODE (disp
) == PLUS
)
6666 if (!CONST_INT_P (XEXP (disp
, 1)))
6668 disp
= XEXP (disp
, 0);
6672 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6675 if (GET_CODE (disp
) != UNSPEC
)
6678 switch (XINT (disp
, 1))
6683 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6685 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6686 While ABI specify also 32bit relocation but we don't produce it in
6687 small PIC model at all. */
6688 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6689 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6691 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6693 case UNSPEC_GOTTPOFF
:
6694 case UNSPEC_GOTNTPOFF
:
6695 case UNSPEC_INDNTPOFF
:
6698 disp
= XVECEXP (disp
, 0, 0);
6699 return (GET_CODE (disp
) == SYMBOL_REF
6700 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6702 disp
= XVECEXP (disp
, 0, 0);
6703 return (GET_CODE (disp
) == SYMBOL_REF
6704 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6706 disp
= XVECEXP (disp
, 0, 0);
6707 return (GET_CODE (disp
) == SYMBOL_REF
6708 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6714 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6715 memory address for an instruction. The MODE argument is the machine mode
6716 for the MEM expression that wants to use this address.
6718 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6719 convert common non-canonical forms to canonical form so that they will
6723 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6725 struct ix86_address parts
;
6726 rtx base
, index
, disp
;
6727 HOST_WIDE_INT scale
;
6728 const char *reason
= NULL
;
6729 rtx reason_rtx
= NULL_RTX
;
6731 if (TARGET_DEBUG_ADDR
)
6734 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6735 GET_MODE_NAME (mode
), strict
);
6739 if (ix86_decompose_address (addr
, &parts
) <= 0)
6741 reason
= "decomposition failed";
6746 index
= parts
.index
;
6748 scale
= parts
.scale
;
6750 /* Validate base register.
6752 Don't allow SUBREG's that span more than a word here. It can lead to spill
6753 failures when the base is one word out of a two word structure, which is
6754 represented internally as a DImode int. */
6763 else if (GET_CODE (base
) == SUBREG
6764 && REG_P (SUBREG_REG (base
))
6765 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6767 reg
= SUBREG_REG (base
);
6770 reason
= "base is not a register";
6774 if (GET_MODE (base
) != Pmode
)
6776 reason
= "base is not in Pmode";
6780 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6781 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6783 reason
= "base is not valid";
6788 /* Validate index register.
6790 Don't allow SUBREG's that span more than a word here -- same as above. */
6799 else if (GET_CODE (index
) == SUBREG
6800 && REG_P (SUBREG_REG (index
))
6801 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6803 reg
= SUBREG_REG (index
);
6806 reason
= "index is not a register";
6810 if (GET_MODE (index
) != Pmode
)
6812 reason
= "index is not in Pmode";
6816 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6817 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6819 reason
= "index is not valid";
6824 /* Validate scale factor. */
6827 reason_rtx
= GEN_INT (scale
);
6830 reason
= "scale without index";
6834 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6836 reason
= "scale is not a valid multiplier";
6841 /* Validate displacement. */
6846 if (GET_CODE (disp
) == CONST
6847 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6848 switch (XINT (XEXP (disp
, 0), 1))
6850 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6851 used. While ABI specify also 32bit relocations, we don't produce
6852 them at all and use IP relative instead. */
6855 gcc_assert (flag_pic
);
6857 goto is_legitimate_pic
;
6858 reason
= "64bit address unspec";
6861 case UNSPEC_GOTPCREL
:
6862 gcc_assert (flag_pic
);
6863 goto is_legitimate_pic
;
6865 case UNSPEC_GOTTPOFF
:
6866 case UNSPEC_GOTNTPOFF
:
6867 case UNSPEC_INDNTPOFF
:
6873 reason
= "invalid address unspec";
6877 else if (SYMBOLIC_CONST (disp
)
6881 && MACHOPIC_INDIRECT
6882 && !machopic_operand_p (disp
)
6888 if (TARGET_64BIT
&& (index
|| base
))
6890 /* foo@dtpoff(%rX) is ok. */
6891 if (GET_CODE (disp
) != CONST
6892 || GET_CODE (XEXP (disp
, 0)) != PLUS
6893 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6894 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6895 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6896 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6898 reason
= "non-constant pic memory reference";
6902 else if (! legitimate_pic_address_disp_p (disp
))
6904 reason
= "displacement is an invalid pic construct";
6908 /* This code used to verify that a symbolic pic displacement
6909 includes the pic_offset_table_rtx register.
6911 While this is good idea, unfortunately these constructs may
6912 be created by "adds using lea" optimization for incorrect
6921 This code is nonsensical, but results in addressing
6922 GOT table with pic_offset_table_rtx base. We can't
6923 just refuse it easily, since it gets matched by
6924 "addsi3" pattern, that later gets split to lea in the
6925 case output register differs from input. While this
6926 can be handled by separate addsi pattern for this case
6927 that never results in lea, this seems to be easier and
6928 correct fix for crash to disable this test. */
6930 else if (GET_CODE (disp
) != LABEL_REF
6931 && !CONST_INT_P (disp
)
6932 && (GET_CODE (disp
) != CONST
6933 || !legitimate_constant_p (disp
))
6934 && (GET_CODE (disp
) != SYMBOL_REF
6935 || !legitimate_constant_p (disp
)))
6937 reason
= "displacement is not constant";
6940 else if (TARGET_64BIT
6941 && !x86_64_immediate_operand (disp
, VOIDmode
))
6943 reason
= "displacement is out of range";
6948 /* Everything looks valid. */
6949 if (TARGET_DEBUG_ADDR
)
6950 fprintf (stderr
, "Success.\n");
6954 if (TARGET_DEBUG_ADDR
)
6956 fprintf (stderr
, "Error: %s\n", reason
);
6957 debug_rtx (reason_rtx
);
6962 /* Return a unique alias set for the GOT. */
6964 static HOST_WIDE_INT
6965 ix86_GOT_alias_set (void)
6967 static HOST_WIDE_INT set
= -1;
6969 set
= new_alias_set ();
6973 /* Return a legitimate reference for ORIG (an address) using the
6974 register REG. If REG is 0, a new pseudo is generated.
6976 There are two types of references that must be handled:
6978 1. Global data references must load the address from the GOT, via
6979 the PIC reg. An insn is emitted to do this load, and the reg is
6982 2. Static data references, constant pool addresses, and code labels
6983 compute the address as an offset from the GOT, whose base is in
6984 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6985 differentiate them from global data objects. The returned
6986 address is the PIC reg + an unspec constant.
6988 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6989 reg also appears in the address. */
6992 legitimize_pic_address (rtx orig
, rtx reg
)
6999 if (TARGET_MACHO
&& !TARGET_64BIT
)
7002 reg
= gen_reg_rtx (Pmode
);
7003 /* Use the generic Mach-O PIC machinery. */
7004 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7008 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7010 else if (TARGET_64BIT
7011 && ix86_cmodel
!= CM_SMALL_PIC
7012 && local_symbolic_operand (addr
, Pmode
))
7015 /* This symbol may be referenced via a displacement from the PIC
7016 base address (@GOTOFF). */
7018 if (reload_in_progress
)
7019 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7020 if (GET_CODE (addr
) == CONST
)
7021 addr
= XEXP (addr
, 0);
7022 if (GET_CODE (addr
) == PLUS
)
7024 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7025 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7028 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7029 new = gen_rtx_CONST (Pmode
, new);
7031 tmpreg
= gen_reg_rtx (Pmode
);
7034 emit_move_insn (tmpreg
, new);
7038 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7039 tmpreg
, 1, OPTAB_DIRECT
);
7042 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7044 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
7046 /* This symbol may be referenced via a displacement from the PIC
7047 base address (@GOTOFF). */
7049 if (reload_in_progress
)
7050 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7051 if (GET_CODE (addr
) == CONST
)
7052 addr
= XEXP (addr
, 0);
7053 if (GET_CODE (addr
) == PLUS
)
7055 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7056 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7059 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7060 new = gen_rtx_CONST (Pmode
, new);
7061 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7065 emit_move_insn (reg
, new);
7069 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7073 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7074 new = gen_rtx_CONST (Pmode
, new);
7075 new = gen_const_mem (Pmode
, new);
7076 set_mem_alias_set (new, ix86_GOT_alias_set ());
7079 reg
= gen_reg_rtx (Pmode
);
7080 /* Use directly gen_movsi, otherwise the address is loaded
7081 into register for CSE. We don't want to CSE this addresses,
7082 instead we CSE addresses from the GOT table, so skip this. */
7083 emit_insn (gen_movsi (reg
, new));
7088 /* This symbol must be referenced via a load from the
7089 Global Offset Table (@GOT). */
7091 if (reload_in_progress
)
7092 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7093 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7094 new = gen_rtx_CONST (Pmode
, new);
7095 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7096 new = gen_const_mem (Pmode
, new);
7097 set_mem_alias_set (new, ix86_GOT_alias_set ());
7100 reg
= gen_reg_rtx (Pmode
);
7101 emit_move_insn (reg
, new);
7107 if (CONST_INT_P (addr
)
7108 && !x86_64_immediate_operand (addr
, VOIDmode
))
7112 emit_move_insn (reg
, addr
);
7116 new = force_reg (Pmode
, addr
);
7118 else if (GET_CODE (addr
) == CONST
)
7120 addr
= XEXP (addr
, 0);
7122 /* We must match stuff we generate before. Assume the only
7123 unspecs that can get here are ours. Not that we could do
7124 anything with them anyway.... */
7125 if (GET_CODE (addr
) == UNSPEC
7126 || (GET_CODE (addr
) == PLUS
7127 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7129 gcc_assert (GET_CODE (addr
) == PLUS
);
7131 if (GET_CODE (addr
) == PLUS
)
7133 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7135 /* Check first to see if this is a constant offset from a @GOTOFF
7136 symbol reference. */
7137 if (local_symbolic_operand (op0
, Pmode
)
7138 && CONST_INT_P (op1
))
7142 if (reload_in_progress
)
7143 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7144 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7146 new = gen_rtx_PLUS (Pmode
, new, op1
);
7147 new = gen_rtx_CONST (Pmode
, new);
7148 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7152 emit_move_insn (reg
, new);
7158 if (INTVAL (op1
) < -16*1024*1024
7159 || INTVAL (op1
) >= 16*1024*1024)
7161 if (!x86_64_immediate_operand (op1
, Pmode
))
7162 op1
= force_reg (Pmode
, op1
);
7163 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7169 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7170 new = legitimize_pic_address (XEXP (addr
, 1),
7171 base
== reg
? NULL_RTX
: reg
);
7173 if (CONST_INT_P (new))
7174 new = plus_constant (base
, INTVAL (new));
7177 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7179 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7180 new = XEXP (new, 1);
7182 new = gen_rtx_PLUS (Pmode
, base
, new);
7190 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7193 get_thread_pointer (int to_reg
)
7197 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7201 reg
= gen_reg_rtx (Pmode
);
7202 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7203 insn
= emit_insn (insn
);
7208 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7209 false if we expect this to be used for a memory address and true if
7210 we expect to load the address into a register. */
7213 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7215 rtx dest
, base
, off
, pic
, tp
;
7220 case TLS_MODEL_GLOBAL_DYNAMIC
:
7221 dest
= gen_reg_rtx (Pmode
);
7222 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7224 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7226 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7229 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7230 insns
= get_insns ();
7233 emit_libcall_block (insns
, dest
, rax
, x
);
7235 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7236 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7238 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7240 if (TARGET_GNU2_TLS
)
7242 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7244 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7248 case TLS_MODEL_LOCAL_DYNAMIC
:
7249 base
= gen_reg_rtx (Pmode
);
7250 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7252 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7254 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7257 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7258 insns
= get_insns ();
7261 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7262 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7263 emit_libcall_block (insns
, base
, rax
, note
);
7265 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7266 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7268 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7270 if (TARGET_GNU2_TLS
)
7272 rtx x
= ix86_tls_module_base ();
7274 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7275 gen_rtx_MINUS (Pmode
, x
, tp
));
7278 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7279 off
= gen_rtx_CONST (Pmode
, off
);
7281 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7283 if (TARGET_GNU2_TLS
)
7285 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7287 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7292 case TLS_MODEL_INITIAL_EXEC
:
7296 type
= UNSPEC_GOTNTPOFF
;
7300 if (reload_in_progress
)
7301 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7302 pic
= pic_offset_table_rtx
;
7303 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7305 else if (!TARGET_ANY_GNU_TLS
)
7307 pic
= gen_reg_rtx (Pmode
);
7308 emit_insn (gen_set_got (pic
));
7309 type
= UNSPEC_GOTTPOFF
;
7314 type
= UNSPEC_INDNTPOFF
;
7317 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7318 off
= gen_rtx_CONST (Pmode
, off
);
7320 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7321 off
= gen_const_mem (Pmode
, off
);
7322 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7324 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7326 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7327 off
= force_reg (Pmode
, off
);
7328 return gen_rtx_PLUS (Pmode
, base
, off
);
7332 base
= get_thread_pointer (true);
7333 dest
= gen_reg_rtx (Pmode
);
7334 emit_insn (gen_subsi3 (dest
, base
, off
));
7338 case TLS_MODEL_LOCAL_EXEC
:
7339 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7340 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7341 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7342 off
= gen_rtx_CONST (Pmode
, off
);
7344 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7346 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7347 return gen_rtx_PLUS (Pmode
, base
, off
);
7351 base
= get_thread_pointer (true);
7352 dest
= gen_reg_rtx (Pmode
);
7353 emit_insn (gen_subsi3 (dest
, base
, off
));
7364 /* Try machine-dependent ways of modifying an illegitimate address
7365 to be legitimate. If we find one, return the new, valid address.
7366 This macro is used in only one place: `memory_address' in explow.c.
7368 OLDX is the address as it was before break_out_memory_refs was called.
7369 In some cases it is useful to look at this to decide what needs to be done.
7371 MODE and WIN are passed so that this macro can use
7372 GO_IF_LEGITIMATE_ADDRESS.
7374 It is always safe for this macro to do nothing. It exists to recognize
7375 opportunities to optimize the output.
7377 For the 80386, we handle X+REG by loading X into a register R and
7378 using R+REG. R will go in a general reg and indexing will be used.
7379 However, if REG is a broken-out memory address or multiplication,
7380 nothing needs to be done because REG can certainly go in a general reg.
7382 When -fpic is used, special handling is needed for symbolic references.
7383 See comments by legitimize_pic_address in i386.c for details. */
7386 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7391 if (TARGET_DEBUG_ADDR
)
7393 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7394 GET_MODE_NAME (mode
));
7398 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7400 return legitimize_tls_address (x
, log
, false);
7401 if (GET_CODE (x
) == CONST
7402 && GET_CODE (XEXP (x
, 0)) == PLUS
7403 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7404 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7406 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7407 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7410 if (flag_pic
&& SYMBOLIC_CONST (x
))
7411 return legitimize_pic_address (x
, 0);
7413 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7414 if (GET_CODE (x
) == ASHIFT
7415 && CONST_INT_P (XEXP (x
, 1))
7416 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7419 log
= INTVAL (XEXP (x
, 1));
7420 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7421 GEN_INT (1 << log
));
7424 if (GET_CODE (x
) == PLUS
)
7426 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7428 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7429 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7430 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7433 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7434 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7435 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7436 GEN_INT (1 << log
));
7439 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7440 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7441 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7444 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7445 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7446 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7447 GEN_INT (1 << log
));
7450 /* Put multiply first if it isn't already. */
7451 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7453 rtx tmp
= XEXP (x
, 0);
7454 XEXP (x
, 0) = XEXP (x
, 1);
7459 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7460 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7461 created by virtual register instantiation, register elimination, and
7462 similar optimizations. */
7463 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7466 x
= gen_rtx_PLUS (Pmode
,
7467 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7468 XEXP (XEXP (x
, 1), 0)),
7469 XEXP (XEXP (x
, 1), 1));
7473 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7474 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7475 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7476 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7477 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7478 && CONSTANT_P (XEXP (x
, 1)))
7481 rtx other
= NULL_RTX
;
7483 if (CONST_INT_P (XEXP (x
, 1)))
7485 constant
= XEXP (x
, 1);
7486 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7488 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7490 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7491 other
= XEXP (x
, 1);
7499 x
= gen_rtx_PLUS (Pmode
,
7500 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7501 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7502 plus_constant (other
, INTVAL (constant
)));
7506 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7509 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7512 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7515 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7518 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7522 && REG_P (XEXP (x
, 1))
7523 && REG_P (XEXP (x
, 0)))
7526 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7529 x
= legitimize_pic_address (x
, 0);
7532 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7535 if (REG_P (XEXP (x
, 0)))
7537 rtx temp
= gen_reg_rtx (Pmode
);
7538 rtx val
= force_operand (XEXP (x
, 1), temp
);
7540 emit_move_insn (temp
, val
);
7546 else if (REG_P (XEXP (x
, 1)))
7548 rtx temp
= gen_reg_rtx (Pmode
);
7549 rtx val
= force_operand (XEXP (x
, 0), temp
);
7551 emit_move_insn (temp
, val
);
7561 /* Print an integer constant expression in assembler syntax. Addition
7562 and subtraction are the only arithmetic that may appear in these
7563 expressions. FILE is the stdio stream to write to, X is the rtx, and
7564 CODE is the operand print code from the output string. */
7567 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7571 switch (GET_CODE (x
))
7574 gcc_assert (flag_pic
);
7579 output_addr_const (file
, x
);
7580 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7581 fputs ("@PLT", file
);
7588 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7589 assemble_name (asm_out_file
, buf
);
7593 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7597 /* This used to output parentheses around the expression,
7598 but that does not work on the 386 (either ATT or BSD assembler). */
7599 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7603 if (GET_MODE (x
) == VOIDmode
)
7605 /* We can use %d if the number is <32 bits and positive. */
7606 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7607 fprintf (file
, "0x%lx%08lx",
7608 (unsigned long) CONST_DOUBLE_HIGH (x
),
7609 (unsigned long) CONST_DOUBLE_LOW (x
));
7611 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7614 /* We can't handle floating point constants;
7615 PRINT_OPERAND must handle them. */
7616 output_operand_lossage ("floating constant misused");
7620 /* Some assemblers need integer constants to appear first. */
7621 if (CONST_INT_P (XEXP (x
, 0)))
7623 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7625 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7629 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7630 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7632 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7638 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7639 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7641 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7643 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7647 gcc_assert (XVECLEN (x
, 0) == 1);
7648 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7649 switch (XINT (x
, 1))
7652 fputs ("@GOT", file
);
7655 fputs ("@GOTOFF", file
);
7657 case UNSPEC_GOTPCREL
:
7658 fputs ("@GOTPCREL(%rip)", file
);
7660 case UNSPEC_GOTTPOFF
:
7661 /* FIXME: This might be @TPOFF in Sun ld too. */
7662 fputs ("@GOTTPOFF", file
);
7665 fputs ("@TPOFF", file
);
7669 fputs ("@TPOFF", file
);
7671 fputs ("@NTPOFF", file
);
7674 fputs ("@DTPOFF", file
);
7676 case UNSPEC_GOTNTPOFF
:
7678 fputs ("@GOTTPOFF(%rip)", file
);
7680 fputs ("@GOTNTPOFF", file
);
7682 case UNSPEC_INDNTPOFF
:
7683 fputs ("@INDNTPOFF", file
);
7686 output_operand_lossage ("invalid UNSPEC as operand");
7692 output_operand_lossage ("invalid expression as operand");
7696 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7697 We need to emit DTP-relative relocations. */
7700 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7702 fputs (ASM_LONG
, file
);
7703 output_addr_const (file
, x
);
7704 fputs ("@DTPOFF", file
);
7710 fputs (", 0", file
);
7717 /* In the name of slightly smaller debug output, and to cater to
7718 general assembler lossage, recognize PIC+GOTOFF and turn it back
7719 into a direct symbol reference.
7721 On Darwin, this is necessary to avoid a crash, because Darwin
7722 has a different PIC label for each routine but the DWARF debugging
7723 information is not associated with any particular routine, so it's
7724 necessary to remove references to the PIC label from RTL stored by
7725 the DWARF output code. */
7728 ix86_delegitimize_address (rtx orig_x
)
7731 /* reg_addend is NULL or a multiple of some register. */
7732 rtx reg_addend
= NULL_RTX
;
7733 /* const_addend is NULL or a const_int. */
7734 rtx const_addend
= NULL_RTX
;
7735 /* This is the result, or NULL. */
7736 rtx result
= NULL_RTX
;
7743 if (GET_CODE (x
) != CONST
7744 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7745 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7748 return XVECEXP (XEXP (x
, 0), 0, 0);
7751 if (GET_CODE (x
) != PLUS
7752 || GET_CODE (XEXP (x
, 1)) != CONST
)
7755 if (REG_P (XEXP (x
, 0))
7756 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7757 /* %ebx + GOT/GOTOFF */
7759 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7761 /* %ebx + %reg * scale + GOT/GOTOFF */
7762 reg_addend
= XEXP (x
, 0);
7763 if (REG_P (XEXP (reg_addend
, 0))
7764 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7765 reg_addend
= XEXP (reg_addend
, 1);
7766 else if (REG_P (XEXP (reg_addend
, 1))
7767 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7768 reg_addend
= XEXP (reg_addend
, 0);
7771 if (!REG_P (reg_addend
)
7772 && GET_CODE (reg_addend
) != MULT
7773 && GET_CODE (reg_addend
) != ASHIFT
)
7779 x
= XEXP (XEXP (x
, 1), 0);
7780 if (GET_CODE (x
) == PLUS
7781 && CONST_INT_P (XEXP (x
, 1)))
7783 const_addend
= XEXP (x
, 1);
7787 if (GET_CODE (x
) == UNSPEC
7788 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7789 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7790 result
= XVECEXP (x
, 0, 0);
7792 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7794 result
= XEXP (x
, 0);
7800 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7802 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7807 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7812 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7814 enum rtx_code second_code
, bypass_code
;
7815 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7816 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7817 code
= ix86_fp_compare_code_to_integer (code
);
7821 code
= reverse_condition (code
);
7832 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7836 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7837 Those same assemblers have the same but opposite lossage on cmov. */
7838 gcc_assert (mode
== CCmode
);
7839 suffix
= fp
? "nbe" : "a";
7859 gcc_assert (mode
== CCmode
);
7881 gcc_assert (mode
== CCmode
);
7882 suffix
= fp
? "nb" : "ae";
7885 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7889 gcc_assert (mode
== CCmode
);
7893 suffix
= fp
? "u" : "p";
7896 suffix
= fp
? "nu" : "np";
7901 fputs (suffix
, file
);
7904 /* Print the name of register X to FILE based on its machine mode and number.
7905 If CODE is 'w', pretend the mode is HImode.
7906 If CODE is 'b', pretend the mode is QImode.
7907 If CODE is 'k', pretend the mode is SImode.
7908 If CODE is 'q', pretend the mode is DImode.
7909 If CODE is 'h', pretend the reg is the 'high' byte register.
7910 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7913 print_reg (rtx x
, int code
, FILE *file
)
7915 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7916 && REGNO (x
) != FRAME_POINTER_REGNUM
7917 && REGNO (x
) != FLAGS_REG
7918 && REGNO (x
) != FPSR_REG
7919 && REGNO (x
) != FPCR_REG
);
7921 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7924 if (code
== 'w' || MMX_REG_P (x
))
7926 else if (code
== 'b')
7928 else if (code
== 'k')
7930 else if (code
== 'q')
7932 else if (code
== 'y')
7934 else if (code
== 'h')
7937 code
= GET_MODE_SIZE (GET_MODE (x
));
7939 /* Irritatingly, AMD extended registers use different naming convention
7940 from the normal registers. */
7941 if (REX_INT_REG_P (x
))
7943 gcc_assert (TARGET_64BIT
);
7947 error ("extended registers have no high halves");
7950 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7953 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7956 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7959 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7962 error ("unsupported operand size for extended register");
7970 if (STACK_TOP_P (x
))
7972 fputs ("st(0)", file
);
7979 if (! ANY_FP_REG_P (x
))
7980 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7985 fputs (hi_reg_name
[REGNO (x
)], file
);
7988 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7990 fputs (qi_reg_name
[REGNO (x
)], file
);
7993 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7995 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8002 /* Locate some local-dynamic symbol still in use by this function
8003 so that we can print its name in some tls_local_dynamic_base
8007 get_some_local_dynamic_name (void)
8011 if (cfun
->machine
->some_ld_name
)
8012 return cfun
->machine
->some_ld_name
;
8014 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8016 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8017 return cfun
->machine
->some_ld_name
;
8023 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8027 if (GET_CODE (x
) == SYMBOL_REF
8028 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8030 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8038 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8039 C -- print opcode suffix for set/cmov insn.
8040 c -- like C, but print reversed condition
8041 F,f -- likewise, but for floating-point.
8042 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8044 R -- print the prefix for register names.
8045 z -- print the opcode suffix for the size of the current operand.
8046 * -- print a star (in certain assembler syntax)
8047 A -- print an absolute memory reference.
8048 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8049 s -- print a shift double count, followed by the assemblers argument
8051 b -- print the QImode name of the register for the indicated operand.
8052 %b0 would print %al if operands[0] is reg 0.
8053 w -- likewise, print the HImode name of the register.
8054 k -- likewise, print the SImode name of the register.
8055 q -- likewise, print the DImode name of the register.
8056 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8057 y -- print "st(0)" instead of "st" as a register.
8058 D -- print condition for SSE cmp instruction.
8059 P -- if PIC, print an @PLT suffix.
8060 X -- don't print any sort of PIC '@' suffix for a symbol.
8061 & -- print some in-use local-dynamic symbol name.
8062 H -- print a memory address offset by 8; used for sse high-parts
8066 print_operand (FILE *file
, rtx x
, int code
)
8073 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8078 assemble_name (file
, get_some_local_dynamic_name ());
8082 switch (ASSEMBLER_DIALECT
)
8089 /* Intel syntax. For absolute addresses, registers should not
8090 be surrounded by braces. */
8094 PRINT_OPERAND (file
, x
, 0);
8104 PRINT_OPERAND (file
, x
, 0);
8109 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8114 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8119 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8124 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8129 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8134 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8139 /* 387 opcodes don't get size suffixes if the operands are
8141 if (STACK_REG_P (x
))
8144 /* Likewise if using Intel opcodes. */
8145 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8148 /* This is the size of op from size of operand. */
8149 switch (GET_MODE_SIZE (GET_MODE (x
)))
8156 #ifdef HAVE_GAS_FILDS_FISTS
8162 if (GET_MODE (x
) == SFmode
)
8177 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8179 #ifdef GAS_MNEMONICS
8205 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8207 PRINT_OPERAND (file
, x
, 0);
8213 /* Little bit of braindamage here. The SSE compare instructions
8214 does use completely different names for the comparisons that the
8215 fp conditional moves. */
8216 switch (GET_CODE (x
))
8231 fputs ("unord", file
);
8235 fputs ("neq", file
);
8239 fputs ("nlt", file
);
8243 fputs ("nle", file
);
8246 fputs ("ord", file
);
8253 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8254 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8256 switch (GET_MODE (x
))
8258 case HImode
: putc ('w', file
); break;
8260 case SFmode
: putc ('l', file
); break;
8262 case DFmode
: putc ('q', file
); break;
8263 default: gcc_unreachable ();
8270 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8273 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8274 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8277 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8280 /* Like above, but reverse condition */
8282 /* Check to see if argument to %c is really a constant
8283 and not a condition code which needs to be reversed. */
8284 if (!COMPARISON_P (x
))
8286 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8289 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8292 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8293 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8296 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8300 /* It doesn't actually matter what mode we use here, as we're
8301 only going to use this for printing. */
8302 x
= adjust_address_nv (x
, DImode
, 8);
8309 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8312 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8315 int pred_val
= INTVAL (XEXP (x
, 0));
8317 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8318 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8320 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8321 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8323 /* Emit hints only in the case default branch prediction
8324 heuristics would fail. */
8325 if (taken
!= cputaken
)
8327 /* We use 3e (DS) prefix for taken branches and
8328 2e (CS) prefix for not taken branches. */
8330 fputs ("ds ; ", file
);
8332 fputs ("cs ; ", file
);
8339 output_operand_lossage ("invalid operand code '%c'", code
);
8344 print_reg (x
, code
, file
);
8348 /* No `byte ptr' prefix for call instructions. */
8349 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8352 switch (GET_MODE_SIZE (GET_MODE (x
)))
8354 case 1: size
= "BYTE"; break;
8355 case 2: size
= "WORD"; break;
8356 case 4: size
= "DWORD"; break;
8357 case 8: size
= "QWORD"; break;
8358 case 12: size
= "XWORD"; break;
8359 case 16: size
= "XMMWORD"; break;
8364 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8367 else if (code
== 'w')
8369 else if (code
== 'k')
8373 fputs (" PTR ", file
);
8377 /* Avoid (%rip) for call operands. */
8378 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8379 && !CONST_INT_P (x
))
8380 output_addr_const (file
, x
);
8381 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8382 output_operand_lossage ("invalid constraints for operand");
8387 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8392 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8393 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8395 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8397 fprintf (file
, "0x%08lx", l
);
8400 /* These float cases don't actually occur as immediate operands. */
8401 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8405 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8406 fprintf (file
, "%s", dstr
);
8409 else if (GET_CODE (x
) == CONST_DOUBLE
8410 && GET_MODE (x
) == XFmode
)
8414 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8415 fprintf (file
, "%s", dstr
);
8420 /* We have patterns that allow zero sets of memory, for instance.
8421 In 64-bit mode, we should probably support all 8-byte vectors,
8422 since we can in fact encode that into an immediate. */
8423 if (GET_CODE (x
) == CONST_VECTOR
)
8425 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8431 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8433 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8436 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8437 || GET_CODE (x
) == LABEL_REF
)
8439 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8442 fputs ("OFFSET FLAT:", file
);
8445 if (CONST_INT_P (x
))
8446 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8448 output_pic_addr_const (file
, x
, code
);
8450 output_addr_const (file
, x
);
8454 /* Print a memory operand whose address is ADDR. */
8457 print_operand_address (FILE *file
, rtx addr
)
8459 struct ix86_address parts
;
8460 rtx base
, index
, disp
;
8462 int ok
= ix86_decompose_address (addr
, &parts
);
8467 index
= parts
.index
;
8469 scale
= parts
.scale
;
8477 if (USER_LABEL_PREFIX
[0] == 0)
8479 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8485 if (!base
&& !index
)
8487 /* Displacement only requires special attention. */
8489 if (CONST_INT_P (disp
))
8491 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8493 if (USER_LABEL_PREFIX
[0] == 0)
8495 fputs ("ds:", file
);
8497 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8500 output_pic_addr_const (file
, disp
, 0);
8502 output_addr_const (file
, disp
);
8504 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8507 if (GET_CODE (disp
) == CONST
8508 && GET_CODE (XEXP (disp
, 0)) == PLUS
8509 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8510 disp
= XEXP (XEXP (disp
, 0), 0);
8511 if (GET_CODE (disp
) == LABEL_REF
8512 || (GET_CODE (disp
) == SYMBOL_REF
8513 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8514 fputs ("(%rip)", file
);
8519 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8524 output_pic_addr_const (file
, disp
, 0);
8525 else if (GET_CODE (disp
) == LABEL_REF
)
8526 output_asm_label (disp
);
8528 output_addr_const (file
, disp
);
8533 print_reg (base
, 0, file
);
8537 print_reg (index
, 0, file
);
8539 fprintf (file
, ",%d", scale
);
8545 rtx offset
= NULL_RTX
;
8549 /* Pull out the offset of a symbol; print any symbol itself. */
8550 if (GET_CODE (disp
) == CONST
8551 && GET_CODE (XEXP (disp
, 0)) == PLUS
8552 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8554 offset
= XEXP (XEXP (disp
, 0), 1);
8555 disp
= gen_rtx_CONST (VOIDmode
,
8556 XEXP (XEXP (disp
, 0), 0));
8560 output_pic_addr_const (file
, disp
, 0);
8561 else if (GET_CODE (disp
) == LABEL_REF
)
8562 output_asm_label (disp
);
8563 else if (CONST_INT_P (disp
))
8566 output_addr_const (file
, disp
);
8572 print_reg (base
, 0, file
);
8575 if (INTVAL (offset
) >= 0)
8577 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8581 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8588 print_reg (index
, 0, file
);
8590 fprintf (file
, "*%d", scale
);
8598 output_addr_const_extra (FILE *file
, rtx x
)
8602 if (GET_CODE (x
) != UNSPEC
)
8605 op
= XVECEXP (x
, 0, 0);
8606 switch (XINT (x
, 1))
8608 case UNSPEC_GOTTPOFF
:
8609 output_addr_const (file
, op
);
8610 /* FIXME: This might be @TPOFF in Sun ld. */
8611 fputs ("@GOTTPOFF", file
);
8614 output_addr_const (file
, op
);
8615 fputs ("@TPOFF", file
);
8618 output_addr_const (file
, op
);
8620 fputs ("@TPOFF", file
);
8622 fputs ("@NTPOFF", file
);
8625 output_addr_const (file
, op
);
8626 fputs ("@DTPOFF", file
);
8628 case UNSPEC_GOTNTPOFF
:
8629 output_addr_const (file
, op
);
8631 fputs ("@GOTTPOFF(%rip)", file
);
8633 fputs ("@GOTNTPOFF", file
);
8635 case UNSPEC_INDNTPOFF
:
8636 output_addr_const (file
, op
);
8637 fputs ("@INDNTPOFF", file
);
8647 /* Split one or more DImode RTL references into pairs of SImode
8648 references. The RTL can be REG, offsettable MEM, integer constant, or
8649 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8650 split and "num" is its length. lo_half and hi_half are output arrays
8651 that parallel "operands". */
8654 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8658 rtx op
= operands
[num
];
8660 /* simplify_subreg refuse to split volatile memory addresses,
8661 but we still have to handle it. */
8664 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8665 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8669 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8670 GET_MODE (op
) == VOIDmode
8671 ? DImode
: GET_MODE (op
), 0);
8672 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8673 GET_MODE (op
) == VOIDmode
8674 ? DImode
: GET_MODE (op
), 4);
8678 /* Split one or more TImode RTL references into pairs of DImode
8679 references. The RTL can be REG, offsettable MEM, integer constant, or
8680 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8681 split and "num" is its length. lo_half and hi_half are output arrays
8682 that parallel "operands". */
8685 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8689 rtx op
= operands
[num
];
8691 /* simplify_subreg refuse to split volatile memory addresses, but we
8692 still have to handle it. */
8695 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8696 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8700 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8701 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8706 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8707 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8708 is the expression of the binary operation. The output may either be
8709 emitted here, or returned to the caller, like all output_* functions.
8711 There is no guarantee that the operands are the same mode, as they
8712 might be within FLOAT or FLOAT_EXTEND expressions. */
8714 #ifndef SYSV386_COMPAT
8715 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8716 wants to fix the assemblers because that causes incompatibility
8717 with gcc. No-one wants to fix gcc because that causes
8718 incompatibility with assemblers... You can use the option of
8719 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8720 #define SYSV386_COMPAT 1
8724 output_387_binary_op (rtx insn
, rtx
*operands
)
8726 static char buf
[30];
8729 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8731 #ifdef ENABLE_CHECKING
8732 /* Even if we do not want to check the inputs, this documents input
8733 constraints. Which helps in understanding the following code. */
8734 if (STACK_REG_P (operands
[0])
8735 && ((REG_P (operands
[1])
8736 && REGNO (operands
[0]) == REGNO (operands
[1])
8737 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8738 || (REG_P (operands
[2])
8739 && REGNO (operands
[0]) == REGNO (operands
[2])
8740 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8741 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8744 gcc_assert (is_sse
);
8747 switch (GET_CODE (operands
[3]))
8750 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8751 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8759 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8760 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8768 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8769 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8777 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8778 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8792 if (GET_MODE (operands
[0]) == SFmode
)
8793 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8795 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8800 switch (GET_CODE (operands
[3]))
8804 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8806 rtx temp
= operands
[2];
8807 operands
[2] = operands
[1];
8811 /* know operands[0] == operands[1]. */
8813 if (MEM_P (operands
[2]))
8819 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8821 if (STACK_TOP_P (operands
[0]))
8822 /* How is it that we are storing to a dead operand[2]?
8823 Well, presumably operands[1] is dead too. We can't
8824 store the result to st(0) as st(0) gets popped on this
8825 instruction. Instead store to operands[2] (which I
8826 think has to be st(1)). st(1) will be popped later.
8827 gcc <= 2.8.1 didn't have this check and generated
8828 assembly code that the Unixware assembler rejected. */
8829 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8831 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8835 if (STACK_TOP_P (operands
[0]))
8836 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8838 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8843 if (MEM_P (operands
[1]))
8849 if (MEM_P (operands
[2]))
8855 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8858 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8859 derived assemblers, confusingly reverse the direction of
8860 the operation for fsub{r} and fdiv{r} when the
8861 destination register is not st(0). The Intel assembler
8862 doesn't have this brain damage. Read !SYSV386_COMPAT to
8863 figure out what the hardware really does. */
8864 if (STACK_TOP_P (operands
[0]))
8865 p
= "{p\t%0, %2|rp\t%2, %0}";
8867 p
= "{rp\t%2, %0|p\t%0, %2}";
8869 if (STACK_TOP_P (operands
[0]))
8870 /* As above for fmul/fadd, we can't store to st(0). */
8871 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8873 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8878 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8881 if (STACK_TOP_P (operands
[0]))
8882 p
= "{rp\t%0, %1|p\t%1, %0}";
8884 p
= "{p\t%1, %0|rp\t%0, %1}";
8886 if (STACK_TOP_P (operands
[0]))
8887 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8889 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8894 if (STACK_TOP_P (operands
[0]))
8896 if (STACK_TOP_P (operands
[1]))
8897 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8899 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8902 else if (STACK_TOP_P (operands
[1]))
8905 p
= "{\t%1, %0|r\t%0, %1}";
8907 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8913 p
= "{r\t%2, %0|\t%0, %2}";
8915 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8928 /* Return needed mode for entity in optimize_mode_switching pass. */
8931 ix86_mode_needed (int entity
, rtx insn
)
8933 enum attr_i387_cw mode
;
8935 /* The mode UNINITIALIZED is used to store control word after a
8936 function call or ASM pattern. The mode ANY specify that function
8937 has no requirements on the control word and make no changes in the
8938 bits we are interested in. */
8941 || (NONJUMP_INSN_P (insn
)
8942 && (asm_noperands (PATTERN (insn
)) >= 0
8943 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8944 return I387_CW_UNINITIALIZED
;
8946 if (recog_memoized (insn
) < 0)
8949 mode
= get_attr_i387_cw (insn
);
8954 if (mode
== I387_CW_TRUNC
)
8959 if (mode
== I387_CW_FLOOR
)
8964 if (mode
== I387_CW_CEIL
)
8969 if (mode
== I387_CW_MASK_PM
)
8980 /* Output code to initialize control word copies used by trunc?f?i and
8981 rounding patterns. CURRENT_MODE is set to current control word,
8982 while NEW_MODE is set to new control word. */
8985 emit_i387_cw_initialization (int mode
)
8987 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8992 rtx reg
= gen_reg_rtx (HImode
);
8994 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8995 emit_move_insn (reg
, copy_rtx (stored_mode
));
8997 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9002 /* round toward zero (truncate) */
9003 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9004 slot
= SLOT_CW_TRUNC
;
9008 /* round down toward -oo */
9009 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9010 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9011 slot
= SLOT_CW_FLOOR
;
9015 /* round up toward +oo */
9016 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9017 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9018 slot
= SLOT_CW_CEIL
;
9021 case I387_CW_MASK_PM
:
9022 /* mask precision exception for nearbyint() */
9023 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9024 slot
= SLOT_CW_MASK_PM
;
9036 /* round toward zero (truncate) */
9037 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9038 slot
= SLOT_CW_TRUNC
;
9042 /* round down toward -oo */
9043 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9044 slot
= SLOT_CW_FLOOR
;
9048 /* round up toward +oo */
9049 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9050 slot
= SLOT_CW_CEIL
;
9053 case I387_CW_MASK_PM
:
9054 /* mask precision exception for nearbyint() */
9055 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9056 slot
= SLOT_CW_MASK_PM
;
9064 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9066 new_mode
= assign_386_stack_local (HImode
, slot
);
9067 emit_move_insn (new_mode
, reg
);
9070 /* Output code for INSN to convert a float to a signed int. OPERANDS
9071 are the insn operands. The output may be [HSD]Imode and the input
9072 operand may be [SDX]Fmode. */
9075 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9077 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9078 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9079 int round_mode
= get_attr_i387_cw (insn
);
9081 /* Jump through a hoop or two for DImode, since the hardware has no
9082 non-popping instruction. We used to do this a different way, but
9083 that was somewhat fragile and broke with post-reload splitters. */
9084 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9085 output_asm_insn ("fld\t%y1", operands
);
9087 gcc_assert (STACK_TOP_P (operands
[1]));
9088 gcc_assert (MEM_P (operands
[0]));
9091 output_asm_insn ("fisttp%z0\t%0", operands
);
9094 if (round_mode
!= I387_CW_ANY
)
9095 output_asm_insn ("fldcw\t%3", operands
);
9096 if (stack_top_dies
|| dimode_p
)
9097 output_asm_insn ("fistp%z0\t%0", operands
);
9099 output_asm_insn ("fist%z0\t%0", operands
);
9100 if (round_mode
!= I387_CW_ANY
)
9101 output_asm_insn ("fldcw\t%2", operands
);
9107 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9108 have the values zero or one, indicates the ffreep insn's operand
9109 from the OPERANDS array. */
9112 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9114 if (TARGET_USE_FFREEP
)
9115 #if HAVE_AS_IX86_FFREEP
9116 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9119 static char retval
[] = ".word\t0xc_df";
9120 int regno
= REGNO (operands
[opno
]);
9122 gcc_assert (FP_REGNO_P (regno
));
9124 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9129 return opno
? "fstp\t%y1" : "fstp\t%y0";
9133 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9134 should be used. UNORDERED_P is true when fucom should be used. */
9137 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9140 rtx cmp_op0
, cmp_op1
;
9141 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9145 cmp_op0
= operands
[0];
9146 cmp_op1
= operands
[1];
9150 cmp_op0
= operands
[1];
9151 cmp_op1
= operands
[2];
9156 if (GET_MODE (operands
[0]) == SFmode
)
9158 return "ucomiss\t{%1, %0|%0, %1}";
9160 return "comiss\t{%1, %0|%0, %1}";
9163 return "ucomisd\t{%1, %0|%0, %1}";
9165 return "comisd\t{%1, %0|%0, %1}";
9168 gcc_assert (STACK_TOP_P (cmp_op0
));
9170 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9172 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9176 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9177 return output_387_ffreep (operands
, 1);
9180 return "ftst\n\tfnstsw\t%0";
9183 if (STACK_REG_P (cmp_op1
)
9185 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9186 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9188 /* If both the top of the 387 stack dies, and the other operand
9189 is also a stack register that dies, then this must be a
9190 `fcompp' float compare */
9194 /* There is no double popping fcomi variant. Fortunately,
9195 eflags is immune from the fstp's cc clobbering. */
9197 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9199 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9200 return output_387_ffreep (operands
, 0);
9205 return "fucompp\n\tfnstsw\t%0";
9207 return "fcompp\n\tfnstsw\t%0";
9212 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9214 static const char * const alt
[16] =
9216 "fcom%z2\t%y2\n\tfnstsw\t%0",
9217 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9218 "fucom%z2\t%y2\n\tfnstsw\t%0",
9219 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9221 "ficom%z2\t%y2\n\tfnstsw\t%0",
9222 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9226 "fcomi\t{%y1, %0|%0, %y1}",
9227 "fcomip\t{%y1, %0|%0, %y1}",
9228 "fucomi\t{%y1, %0|%0, %y1}",
9229 "fucomip\t{%y1, %0|%0, %y1}",
9240 mask
= eflags_p
<< 3;
9241 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9242 mask
|= unordered_p
<< 1;
9243 mask
|= stack_top_dies
;
9245 gcc_assert (mask
< 16);
9254 ix86_output_addr_vec_elt (FILE *file
, int value
)
9256 const char *directive
= ASM_LONG
;
9260 directive
= ASM_QUAD
;
9262 gcc_assert (!TARGET_64BIT
);
9265 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9269 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9272 fprintf (file
, "%s%s%d-%s%d\n",
9273 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9274 else if (HAVE_AS_GOTOFF_IN_DATA
)
9275 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9277 else if (TARGET_MACHO
)
9279 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9280 machopic_output_function_base_name (file
);
9281 fprintf(file
, "\n");
9285 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9286 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9289 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9293 ix86_expand_clear (rtx dest
)
9297 /* We play register width games, which are only valid after reload. */
9298 gcc_assert (reload_completed
);
9300 /* Avoid HImode and its attendant prefix byte. */
9301 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9302 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9304 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9306 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9307 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9309 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9310 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9316 /* X is an unchanging MEM. If it is a constant pool reference, return
9317 the constant pool rtx, else NULL. */
9320 maybe_get_pool_constant (rtx x
)
9322 x
= ix86_delegitimize_address (XEXP (x
, 0));
9324 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9325 return get_pool_constant (x
);
9331 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9333 int strict
= (reload_in_progress
|| reload_completed
);
9335 enum tls_model model
;
9340 if (GET_CODE (op1
) == SYMBOL_REF
)
9342 model
= SYMBOL_REF_TLS_MODEL (op1
);
9345 op1
= legitimize_tls_address (op1
, model
, true);
9346 op1
= force_operand (op1
, op0
);
9351 else if (GET_CODE (op1
) == CONST
9352 && GET_CODE (XEXP (op1
, 0)) == PLUS
9353 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9355 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9358 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9359 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9360 op1
= force_operand (op1
, NULL
);
9361 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9362 op0
, 1, OPTAB_DIRECT
);
9368 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9370 if (TARGET_MACHO
&& !TARGET_64BIT
)
9375 rtx temp
= ((reload_in_progress
9376 || ((op0
&& REG_P (op0
))
9378 ? op0
: gen_reg_rtx (Pmode
));
9379 op1
= machopic_indirect_data_reference (op1
, temp
);
9380 op1
= machopic_legitimize_pic_address (op1
, mode
,
9381 temp
== op1
? 0 : temp
);
9383 else if (MACHOPIC_INDIRECT
)
9384 op1
= machopic_indirect_data_reference (op1
, 0);
9392 op1
= force_reg (Pmode
, op1
);
9394 op1
= legitimize_address (op1
, op1
, Pmode
);
9400 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9401 || !push_operand (op0
, mode
))
9403 op1
= force_reg (mode
, op1
);
9405 if (push_operand (op0
, mode
)
9406 && ! general_no_elim_operand (op1
, mode
))
9407 op1
= copy_to_mode_reg (mode
, op1
);
9409 /* Force large constants in 64bit compilation into register
9410 to get them CSEed. */
9411 if (TARGET_64BIT
&& mode
== DImode
9412 && immediate_operand (op1
, mode
)
9413 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9414 && !register_operand (op0
, mode
)
9415 && optimize
&& !reload_completed
&& !reload_in_progress
)
9416 op1
= copy_to_mode_reg (mode
, op1
);
9418 if (FLOAT_MODE_P (mode
))
9420 /* If we are loading a floating point constant to a register,
9421 force the value to memory now, since we'll get better code
9422 out the back end. */
9426 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9428 op1
= validize_mem (force_const_mem (mode
, op1
));
9429 if (!register_operand (op0
, mode
))
9431 rtx temp
= gen_reg_rtx (mode
);
9432 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9433 emit_move_insn (op0
, temp
);
9440 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9444 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9446 rtx op0
= operands
[0], op1
= operands
[1];
9448 /* Force constants other than zero into memory. We do not know how
9449 the instructions used to build constants modify the upper 64 bits
9450 of the register, once we have that information we may be able
9451 to handle some of them more efficiently. */
9452 if ((reload_in_progress
| reload_completed
) == 0
9453 && register_operand (op0
, mode
)
9455 && standard_sse_constant_p (op1
) <= 0)
9456 op1
= validize_mem (force_const_mem (mode
, op1
));
9458 /* Make operand1 a register if it isn't already. */
9460 && !register_operand (op0
, mode
)
9461 && !register_operand (op1
, mode
))
9463 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9467 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9470 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9471 straight to ix86_expand_vector_move. */
9474 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9483 /* If we're optimizing for size, movups is the smallest. */
9486 op0
= gen_lowpart (V4SFmode
, op0
);
9487 op1
= gen_lowpart (V4SFmode
, op1
);
9488 emit_insn (gen_sse_movups (op0
, op1
));
9492 /* ??? If we have typed data, then it would appear that using
9493 movdqu is the only way to get unaligned data loaded with
9495 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9497 op0
= gen_lowpart (V16QImode
, op0
);
9498 op1
= gen_lowpart (V16QImode
, op1
);
9499 emit_insn (gen_sse2_movdqu (op0
, op1
));
9503 if (TARGET_SSE2
&& mode
== V2DFmode
)
9507 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9509 op0
= gen_lowpart (V2DFmode
, op0
);
9510 op1
= gen_lowpart (V2DFmode
, op1
);
9511 emit_insn (gen_sse2_movupd (op0
, op1
));
9515 /* When SSE registers are split into halves, we can avoid
9516 writing to the top half twice. */
9517 if (TARGET_SSE_SPLIT_REGS
)
9519 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9524 /* ??? Not sure about the best option for the Intel chips.
9525 The following would seem to satisfy; the register is
9526 entirely cleared, breaking the dependency chain. We
9527 then store to the upper half, with a dependency depth
9528 of one. A rumor has it that Intel recommends two movsd
9529 followed by an unpacklpd, but this is unconfirmed. And
9530 given that the dependency depth of the unpacklpd would
9531 still be one, I'm not sure why this would be better. */
9532 zero
= CONST0_RTX (V2DFmode
);
9535 m
= adjust_address (op1
, DFmode
, 0);
9536 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9537 m
= adjust_address (op1
, DFmode
, 8);
9538 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9542 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9544 op0
= gen_lowpart (V4SFmode
, op0
);
9545 op1
= gen_lowpart (V4SFmode
, op1
);
9546 emit_insn (gen_sse_movups (op0
, op1
));
9550 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9551 emit_move_insn (op0
, CONST0_RTX (mode
));
9553 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9555 if (mode
!= V4SFmode
)
9556 op0
= gen_lowpart (V4SFmode
, op0
);
9557 m
= adjust_address (op1
, V2SFmode
, 0);
9558 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9559 m
= adjust_address (op1
, V2SFmode
, 8);
9560 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9563 else if (MEM_P (op0
))
9565 /* If we're optimizing for size, movups is the smallest. */
9568 op0
= gen_lowpart (V4SFmode
, op0
);
9569 op1
= gen_lowpart (V4SFmode
, op1
);
9570 emit_insn (gen_sse_movups (op0
, op1
));
9574 /* ??? Similar to above, only less clear because of quote
9575 typeless stores unquote. */
9576 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9577 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9579 op0
= gen_lowpart (V16QImode
, op0
);
9580 op1
= gen_lowpart (V16QImode
, op1
);
9581 emit_insn (gen_sse2_movdqu (op0
, op1
));
9585 if (TARGET_SSE2
&& mode
== V2DFmode
)
9587 m
= adjust_address (op0
, DFmode
, 0);
9588 emit_insn (gen_sse2_storelpd (m
, op1
));
9589 m
= adjust_address (op0
, DFmode
, 8);
9590 emit_insn (gen_sse2_storehpd (m
, op1
));
9594 if (mode
!= V4SFmode
)
9595 op1
= gen_lowpart (V4SFmode
, op1
);
9596 m
= adjust_address (op0
, V2SFmode
, 0);
9597 emit_insn (gen_sse_storelps (m
, op1
));
9598 m
= adjust_address (op0
, V2SFmode
, 8);
9599 emit_insn (gen_sse_storehps (m
, op1
));
9606 /* Expand a push in MODE. This is some mode for which we do not support
9607 proper push instructions, at least from the registers that we expect
9608 the value to live in. */
9611 ix86_expand_push (enum machine_mode mode
, rtx x
)
9615 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9616 GEN_INT (-GET_MODE_SIZE (mode
)),
9617 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9618 if (tmp
!= stack_pointer_rtx
)
9619 emit_move_insn (stack_pointer_rtx
, tmp
);
9621 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9622 emit_move_insn (tmp
, x
);
9625 /* Helper function of ix86_fixup_binary_operands to canonicalize
9626 operand order. Returns true if the operands should be swapped. */
9629 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9632 rtx dst
= operands
[0];
9633 rtx src1
= operands
[1];
9634 rtx src2
= operands
[2];
9636 /* If the operation is not commutative, we can't do anything. */
9637 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9640 /* Highest priority is that src1 should match dst. */
9641 if (rtx_equal_p (dst
, src1
))
9643 if (rtx_equal_p (dst
, src2
))
9646 /* Next highest priority is that immediate constants come second. */
9647 if (immediate_operand (src2
, mode
))
9649 if (immediate_operand (src1
, mode
))
9652 /* Lowest priority is that memory references should come second. */
9662 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9663 destination to use for the operation. If different from the true
9664 destination in operands[0], a copy operation will be required. */
9667 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9670 rtx dst
= operands
[0];
9671 rtx src1
= operands
[1];
9672 rtx src2
= operands
[2];
9674 /* Canonicalize operand order. */
9675 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9682 /* Both source operands cannot be in memory. */
9683 if (MEM_P (src1
) && MEM_P (src2
))
9685 /* Optimization: Only read from memory once. */
9686 if (rtx_equal_p (src1
, src2
))
9688 src2
= force_reg (mode
, src2
);
9692 src2
= force_reg (mode
, src2
);
9695 /* If the destination is memory, and we do not have matching source
9696 operands, do things in registers. */
9697 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9698 dst
= gen_reg_rtx (mode
);
9700 /* Source 1 cannot be a constant. */
9701 if (CONSTANT_P (src1
))
9702 src1
= force_reg (mode
, src1
);
9704 /* Source 1 cannot be a non-matching memory. */
9705 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9706 src1
= force_reg (mode
, src1
);
9713 /* Similarly, but assume that the destination has already been
9717 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9718 enum machine_mode mode
, rtx operands
[])
9720 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9721 gcc_assert (dst
== operands
[0]);
9724 /* Attempt to expand a binary operator. Make the expansion closer to the
9725 actual machine, then just general_operand, which will allow 3 separate
9726 memory references (one output, two input) in a single insn. */
9729 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9732 rtx src1
, src2
, dst
, op
, clob
;
9734 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9738 /* Emit the instruction. */
9740 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9741 if (reload_in_progress
)
9743 /* Reload doesn't know about the flags register, and doesn't know that
9744 it doesn't want to clobber it. We can only do this with PLUS. */
9745 gcc_assert (code
== PLUS
);
9750 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9751 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9754 /* Fix up the destination if needed. */
9755 if (dst
!= operands
[0])
9756 emit_move_insn (operands
[0], dst
);
9759 /* Return TRUE or FALSE depending on whether the binary operator meets the
9760 appropriate constraints. */
9763 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9766 rtx dst
= operands
[0];
9767 rtx src1
= operands
[1];
9768 rtx src2
= operands
[2];
9770 /* Both source operands cannot be in memory. */
9771 if (MEM_P (src1
) && MEM_P (src2
))
9774 /* Canonicalize operand order for commutative operators. */
9775 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9782 /* If the destination is memory, we must have a matching source operand. */
9783 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9786 /* Source 1 cannot be a constant. */
9787 if (CONSTANT_P (src1
))
9790 /* Source 1 cannot be a non-matching memory. */
9791 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9797 /* Attempt to expand a unary operator. Make the expansion closer to the
9798 actual machine, then just general_operand, which will allow 2 separate
9799 memory references (one output, one input) in a single insn. */
9802 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9805 int matching_memory
;
9806 rtx src
, dst
, op
, clob
;
9811 /* If the destination is memory, and we do not have matching source
9812 operands, do things in registers. */
9813 matching_memory
= 0;
9816 if (rtx_equal_p (dst
, src
))
9817 matching_memory
= 1;
9819 dst
= gen_reg_rtx (mode
);
9822 /* When source operand is memory, destination must match. */
9823 if (MEM_P (src
) && !matching_memory
)
9824 src
= force_reg (mode
, src
);
9826 /* Emit the instruction. */
9828 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9829 if (reload_in_progress
|| code
== NOT
)
9831 /* Reload doesn't know about the flags register, and doesn't know that
9832 it doesn't want to clobber it. */
9833 gcc_assert (code
== NOT
);
9838 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9839 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9842 /* Fix up the destination if needed. */
9843 if (dst
!= operands
[0])
9844 emit_move_insn (operands
[0], dst
);
9847 /* Return TRUE or FALSE depending on whether the unary operator meets the
9848 appropriate constraints. */
9851 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9852 enum machine_mode mode ATTRIBUTE_UNUSED
,
9853 rtx operands
[2] ATTRIBUTE_UNUSED
)
9855 /* If one of operands is memory, source and destination must match. */
9856 if ((MEM_P (operands
[0])
9857 || MEM_P (operands
[1]))
9858 && ! rtx_equal_p (operands
[0], operands
[1]))
9863 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9864 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9865 true, then replicate the mask for all elements of the vector register.
9866 If INVERT is true, then create a mask excluding the sign bit. */
9869 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9871 enum machine_mode vec_mode
;
9872 HOST_WIDE_INT hi
, lo
;
9877 /* Find the sign bit, sign extended to 2*HWI. */
9879 lo
= 0x80000000, hi
= lo
< 0;
9880 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9881 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9883 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9888 /* Force this value into the low part of a fp vector constant. */
9889 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9890 mask
= gen_lowpart (mode
, mask
);
9895 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9897 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9898 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9899 vec_mode
= V4SFmode
;
9904 v
= gen_rtvec (2, mask
, mask
);
9906 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9907 vec_mode
= V2DFmode
;
9910 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9913 /* Generate code for floating point ABS or NEG. */
9916 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9919 rtx mask
, set
, use
, clob
, dst
, src
;
9920 bool matching_memory
;
9921 bool use_sse
= false;
9922 bool vector_mode
= VECTOR_MODE_P (mode
);
9923 enum machine_mode elt_mode
= mode
;
9927 elt_mode
= GET_MODE_INNER (mode
);
9930 else if (TARGET_SSE_MATH
)
9931 use_sse
= SSE_FLOAT_MODE_P (mode
);
9933 /* NEG and ABS performed with SSE use bitwise mask operations.
9934 Create the appropriate mask now. */
9936 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9943 /* If the destination is memory, and we don't have matching source
9944 operands or we're using the x87, do things in registers. */
9945 matching_memory
= false;
9948 if (use_sse
&& rtx_equal_p (dst
, src
))
9949 matching_memory
= true;
9951 dst
= gen_reg_rtx (mode
);
9953 if (MEM_P (src
) && !matching_memory
)
9954 src
= force_reg (mode
, src
);
9958 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9959 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9964 set
= gen_rtx_fmt_e (code
, mode
, src
);
9965 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9968 use
= gen_rtx_USE (VOIDmode
, mask
);
9969 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9970 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9971 gen_rtvec (3, set
, use
, clob
)));
9977 if (dst
!= operands
[0])
9978 emit_move_insn (operands
[0], dst
);
9981 /* Expand a copysign operation. Special case operand 0 being a constant. */
9984 ix86_expand_copysign (rtx operands
[])
9986 enum machine_mode mode
, vmode
;
9987 rtx dest
, op0
, op1
, mask
, nmask
;
9993 mode
= GET_MODE (dest
);
9994 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9996 if (GET_CODE (op0
) == CONST_DOUBLE
)
10000 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10001 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10003 if (op0
== CONST0_RTX (mode
))
10004 op0
= CONST0_RTX (vmode
);
10007 if (mode
== SFmode
)
10008 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10009 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10011 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10012 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10015 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10017 if (mode
== SFmode
)
10018 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10020 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10024 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10025 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10027 if (mode
== SFmode
)
10028 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10030 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10034 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10035 be a constant, and so has already been expanded into a vector constant. */
10038 ix86_split_copysign_const (rtx operands
[])
10040 enum machine_mode mode
, vmode
;
10041 rtx dest
, op0
, op1
, mask
, x
;
10043 dest
= operands
[0];
10046 mask
= operands
[3];
10048 mode
= GET_MODE (dest
);
10049 vmode
= GET_MODE (mask
);
10051 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10052 x
= gen_rtx_AND (vmode
, dest
, mask
);
10053 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10055 if (op0
!= CONST0_RTX (vmode
))
10057 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10058 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10062 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10063 so we have to do two masks. */
10066 ix86_split_copysign_var (rtx operands
[])
10068 enum machine_mode mode
, vmode
;
10069 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10071 dest
= operands
[0];
10072 scratch
= operands
[1];
10075 nmask
= operands
[4];
10076 mask
= operands
[5];
10078 mode
= GET_MODE (dest
);
10079 vmode
= GET_MODE (mask
);
10081 if (rtx_equal_p (op0
, op1
))
10083 /* Shouldn't happen often (it's useless, obviously), but when it does
10084 we'd generate incorrect code if we continue below. */
10085 emit_move_insn (dest
, op0
);
10089 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10091 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10093 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10094 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10097 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10098 x
= gen_rtx_NOT (vmode
, dest
);
10099 x
= gen_rtx_AND (vmode
, x
, op0
);
10100 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10104 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10106 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10108 else /* alternative 2,4 */
10110 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10111 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10112 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10114 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10116 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10118 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10119 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10121 else /* alternative 3,4 */
10123 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10125 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10126 x
= gen_rtx_AND (vmode
, dest
, op0
);
10128 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10131 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10132 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10135 /* Return TRUE or FALSE depending on whether the first SET in INSN
10136 has source and destination with matching CC modes, and that the
10137 CC mode is at least as constrained as REQ_MODE. */
10140 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10143 enum machine_mode set_mode
;
10145 set
= PATTERN (insn
);
10146 if (GET_CODE (set
) == PARALLEL
)
10147 set
= XVECEXP (set
, 0, 0);
10148 gcc_assert (GET_CODE (set
) == SET
);
10149 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10151 set_mode
= GET_MODE (SET_DEST (set
));
10155 if (req_mode
!= CCNOmode
10156 && (req_mode
!= CCmode
10157 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10161 if (req_mode
== CCGCmode
)
10165 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10169 if (req_mode
== CCZmode
)
10176 gcc_unreachable ();
10179 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10182 /* Generate insn patterns to do an integer compare of OPERANDS. */
10185 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10187 enum machine_mode cmpmode
;
10190 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10191 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10193 /* This is very simple, but making the interface the same as in the
10194 FP case makes the rest of the code easier. */
10195 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10196 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10198 /* Return the test that should be put into the flags user, i.e.
10199 the bcc, scc, or cmov instruction. */
10200 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10203 /* Figure out whether to use ordered or unordered fp comparisons.
10204 Return the appropriate mode to use. */
10207 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10209 /* ??? In order to make all comparisons reversible, we do all comparisons
10210 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10211 all forms trapping and nontrapping comparisons, we can make inequality
10212 comparisons trapping again, since it results in better code when using
10213 FCOM based compares. */
10214 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10218 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10220 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10221 return ix86_fp_compare_mode (code
);
10224 /* Only zero flag is needed. */
10225 case EQ
: /* ZF=0 */
10226 case NE
: /* ZF!=0 */
10228 /* Codes needing carry flag. */
10229 case GEU
: /* CF=0 */
10230 case GTU
: /* CF=0 & ZF=0 */
10231 case LTU
: /* CF=1 */
10232 case LEU
: /* CF=1 | ZF=1 */
10234 /* Codes possibly doable only with sign flag when
10235 comparing against zero. */
10236 case GE
: /* SF=OF or SF=0 */
10237 case LT
: /* SF<>OF or SF=1 */
10238 if (op1
== const0_rtx
)
10241 /* For other cases Carry flag is not required. */
10243 /* Codes doable only with sign flag when comparing
10244 against zero, but we miss jump instruction for it
10245 so we need to use relational tests against overflow
10246 that thus needs to be zero. */
10247 case GT
: /* ZF=0 & SF=OF */
10248 case LE
: /* ZF=1 | SF<>OF */
10249 if (op1
== const0_rtx
)
10253 /* strcmp pattern do (use flags) and combine may ask us for proper
10258 gcc_unreachable ();
10262 /* Return the fixed registers used for condition codes. */
10265 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10272 /* If two condition code modes are compatible, return a condition code
10273 mode which is compatible with both. Otherwise, return
10276 static enum machine_mode
10277 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10282 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10285 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10286 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10292 gcc_unreachable ();
10314 /* These are only compatible with themselves, which we already
10320 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10323 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10325 enum rtx_code swapped_code
= swap_condition (code
);
10326 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10327 || (ix86_fp_comparison_cost (swapped_code
)
10328 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10331 /* Swap, force into registers, or otherwise massage the two operands
10332 to a fp comparison. The operands are updated in place; the new
10333 comparison code is returned. */
10335 static enum rtx_code
10336 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10338 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10339 rtx op0
= *pop0
, op1
= *pop1
;
10340 enum machine_mode op_mode
= GET_MODE (op0
);
10341 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10343 /* All of the unordered compare instructions only work on registers.
10344 The same is true of the fcomi compare instructions. The XFmode
10345 compare instructions require registers except when comparing
10346 against zero or when converting operand 1 from fixed point to
10350 && (fpcmp_mode
== CCFPUmode
10351 || (op_mode
== XFmode
10352 && ! (standard_80387_constant_p (op0
) == 1
10353 || standard_80387_constant_p (op1
) == 1)
10354 && GET_CODE (op1
) != FLOAT
)
10355 || ix86_use_fcomi_compare (code
)))
10357 op0
= force_reg (op_mode
, op0
);
10358 op1
= force_reg (op_mode
, op1
);
10362 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10363 things around if they appear profitable, otherwise force op0
10364 into a register. */
10366 if (standard_80387_constant_p (op0
) == 0
10368 && ! (standard_80387_constant_p (op1
) == 0
10372 tmp
= op0
, op0
= op1
, op1
= tmp
;
10373 code
= swap_condition (code
);
10377 op0
= force_reg (op_mode
, op0
);
10379 if (CONSTANT_P (op1
))
10381 int tmp
= standard_80387_constant_p (op1
);
10383 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10387 op1
= force_reg (op_mode
, op1
);
10390 op1
= force_reg (op_mode
, op1
);
10394 /* Try to rearrange the comparison to make it cheaper. */
10395 if (ix86_fp_comparison_cost (code
)
10396 > ix86_fp_comparison_cost (swap_condition (code
))
10397 && (REG_P (op1
) || !no_new_pseudos
))
10400 tmp
= op0
, op0
= op1
, op1
= tmp
;
10401 code
= swap_condition (code
);
10403 op0
= force_reg (op_mode
, op0
);
10411 /* Convert comparison codes we use to represent FP comparison to integer
10412 code that will result in proper branch. Return UNKNOWN if no such code
10416 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10445 /* Split comparison code CODE into comparisons we can do using branch
10446 instructions. BYPASS_CODE is comparison code for branch that will
10447 branch around FIRST_CODE and SECOND_CODE. If some of branches
10448 is not required, set value to UNKNOWN.
10449 We never require more than two branches. */
10452 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10453 enum rtx_code
*first_code
,
10454 enum rtx_code
*second_code
)
10456 *first_code
= code
;
10457 *bypass_code
= UNKNOWN
;
10458 *second_code
= UNKNOWN
;
10460 /* The fcomi comparison sets flags as follows:
10470 case GT
: /* GTU - CF=0 & ZF=0 */
10471 case GE
: /* GEU - CF=0 */
10472 case ORDERED
: /* PF=0 */
10473 case UNORDERED
: /* PF=1 */
10474 case UNEQ
: /* EQ - ZF=1 */
10475 case UNLT
: /* LTU - CF=1 */
10476 case UNLE
: /* LEU - CF=1 | ZF=1 */
10477 case LTGT
: /* EQ - ZF=0 */
10479 case LT
: /* LTU - CF=1 - fails on unordered */
10480 *first_code
= UNLT
;
10481 *bypass_code
= UNORDERED
;
10483 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10484 *first_code
= UNLE
;
10485 *bypass_code
= UNORDERED
;
10487 case EQ
: /* EQ - ZF=1 - fails on unordered */
10488 *first_code
= UNEQ
;
10489 *bypass_code
= UNORDERED
;
10491 case NE
: /* NE - ZF=0 - fails on unordered */
10492 *first_code
= LTGT
;
10493 *second_code
= UNORDERED
;
10495 case UNGE
: /* GEU - CF=0 - fails on unordered */
10497 *second_code
= UNORDERED
;
10499 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10501 *second_code
= UNORDERED
;
10504 gcc_unreachable ();
10506 if (!TARGET_IEEE_FP
)
10508 *second_code
= UNKNOWN
;
10509 *bypass_code
= UNKNOWN
;
10513 /* Return cost of comparison done fcom + arithmetics operations on AX.
10514 All following functions do use number of instructions as a cost metrics.
10515 In future this should be tweaked to compute bytes for optimize_size and
10516 take into account performance of various instructions on various CPUs. */
10518 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10520 if (!TARGET_IEEE_FP
)
10522 /* The cost of code output by ix86_expand_fp_compare. */
10546 gcc_unreachable ();
10550 /* Return cost of comparison done using fcomi operation.
10551 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10553 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10555 enum rtx_code bypass_code
, first_code
, second_code
;
10556 /* Return arbitrarily high cost when instruction is not supported - this
10557 prevents gcc from using it. */
10560 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10561 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10564 /* Return cost of comparison done using sahf operation.
10565 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10567 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10569 enum rtx_code bypass_code
, first_code
, second_code
;
10570 /* Return arbitrarily high cost when instruction is not preferred - this
10571 avoids gcc from using it. */
10572 if (!TARGET_USE_SAHF
&& !optimize_size
)
10574 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10575 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10578 /* Compute cost of the comparison done using any method.
10579 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10581 ix86_fp_comparison_cost (enum rtx_code code
)
10583 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10586 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10587 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10589 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10590 if (min
> sahf_cost
)
10592 if (min
> fcomi_cost
)
10597 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10600 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10601 rtx
*second_test
, rtx
*bypass_test
)
10603 enum machine_mode fpcmp_mode
, intcmp_mode
;
10605 int cost
= ix86_fp_comparison_cost (code
);
10606 enum rtx_code bypass_code
, first_code
, second_code
;
10608 fpcmp_mode
= ix86_fp_compare_mode (code
);
10609 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10612 *second_test
= NULL_RTX
;
10614 *bypass_test
= NULL_RTX
;
10616 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10618 /* Do fcomi/sahf based test when profitable. */
10619 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10620 && (second_code
== UNKNOWN
|| second_test
)
10621 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10625 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10626 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10632 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10633 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10635 scratch
= gen_reg_rtx (HImode
);
10636 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10637 emit_insn (gen_x86_sahf_1 (scratch
));
10640 /* The FP codes work out to act like unsigned. */
10641 intcmp_mode
= fpcmp_mode
;
10643 if (bypass_code
!= UNKNOWN
)
10644 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10645 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10647 if (second_code
!= UNKNOWN
)
10648 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10649 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10654 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10655 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10656 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10658 scratch
= gen_reg_rtx (HImode
);
10659 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10661 /* In the unordered case, we have to check C2 for NaN's, which
10662 doesn't happen to work out to anything nice combination-wise.
10663 So do some bit twiddling on the value we've got in AH to come
10664 up with an appropriate set of condition codes. */
10666 intcmp_mode
= CCNOmode
;
10671 if (code
== GT
|| !TARGET_IEEE_FP
)
10673 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10678 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10679 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10680 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10681 intcmp_mode
= CCmode
;
10687 if (code
== LT
&& TARGET_IEEE_FP
)
10689 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10690 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10691 intcmp_mode
= CCmode
;
10696 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10702 if (code
== GE
|| !TARGET_IEEE_FP
)
10704 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10709 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10710 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10717 if (code
== LE
&& TARGET_IEEE_FP
)
10719 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10720 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10721 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10722 intcmp_mode
= CCmode
;
10727 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10733 if (code
== EQ
&& TARGET_IEEE_FP
)
10735 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10736 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10737 intcmp_mode
= CCmode
;
10742 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10749 if (code
== NE
&& TARGET_IEEE_FP
)
10751 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10752 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10758 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10764 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10768 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10773 gcc_unreachable ();
10777 /* Return the test that should be put into the flags user, i.e.
10778 the bcc, scc, or cmov instruction. */
10779 return gen_rtx_fmt_ee (code
, VOIDmode
,
10780 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10785 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10788 op0
= ix86_compare_op0
;
10789 op1
= ix86_compare_op1
;
10792 *second_test
= NULL_RTX
;
10794 *bypass_test
= NULL_RTX
;
10796 if (ix86_compare_emitted
)
10798 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10799 ix86_compare_emitted
= NULL_RTX
;
10801 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10802 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10803 second_test
, bypass_test
);
10805 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10810 /* Return true if the CODE will result in nontrivial jump sequence. */
10812 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10814 enum rtx_code bypass_code
, first_code
, second_code
;
10817 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10818 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10822 ix86_expand_branch (enum rtx_code code
, rtx label
)
10826 /* If we have emitted a compare insn, go straight to simple.
10827 ix86_expand_compare won't emit anything if ix86_compare_emitted
10829 if (ix86_compare_emitted
)
10832 switch (GET_MODE (ix86_compare_op0
))
10838 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10839 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10840 gen_rtx_LABEL_REF (VOIDmode
, label
),
10842 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10851 enum rtx_code bypass_code
, first_code
, second_code
;
10853 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10854 &ix86_compare_op1
);
10856 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10858 /* Check whether we will use the natural sequence with one jump. If
10859 so, we can expand jump early. Otherwise delay expansion by
10860 creating compound insn to not confuse optimizers. */
10861 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10864 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10865 gen_rtx_LABEL_REF (VOIDmode
, label
),
10866 pc_rtx
, NULL_RTX
, NULL_RTX
);
10870 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10871 ix86_compare_op0
, ix86_compare_op1
);
10872 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10873 gen_rtx_LABEL_REF (VOIDmode
, label
),
10875 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10877 use_fcomi
= ix86_use_fcomi_compare (code
);
10878 vec
= rtvec_alloc (3 + !use_fcomi
);
10879 RTVEC_ELT (vec
, 0) = tmp
;
10881 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10883 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10886 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10888 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10897 /* Expand DImode branch into multiple compare+branch. */
10899 rtx lo
[2], hi
[2], label2
;
10900 enum rtx_code code1
, code2
, code3
;
10901 enum machine_mode submode
;
10903 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10905 tmp
= ix86_compare_op0
;
10906 ix86_compare_op0
= ix86_compare_op1
;
10907 ix86_compare_op1
= tmp
;
10908 code
= swap_condition (code
);
10910 if (GET_MODE (ix86_compare_op0
) == DImode
)
10912 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10913 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10918 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10919 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10923 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10924 avoid two branches. This costs one extra insn, so disable when
10925 optimizing for size. */
10927 if ((code
== EQ
|| code
== NE
)
10929 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10934 if (hi
[1] != const0_rtx
)
10935 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10936 NULL_RTX
, 0, OPTAB_WIDEN
);
10939 if (lo
[1] != const0_rtx
)
10940 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10941 NULL_RTX
, 0, OPTAB_WIDEN
);
10943 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10944 NULL_RTX
, 0, OPTAB_WIDEN
);
10946 ix86_compare_op0
= tmp
;
10947 ix86_compare_op1
= const0_rtx
;
10948 ix86_expand_branch (code
, label
);
10952 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10953 op1 is a constant and the low word is zero, then we can just
10954 examine the high word. */
10956 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
10959 case LT
: case LTU
: case GE
: case GEU
:
10960 ix86_compare_op0
= hi
[0];
10961 ix86_compare_op1
= hi
[1];
10962 ix86_expand_branch (code
, label
);
10968 /* Otherwise, we need two or three jumps. */
10970 label2
= gen_label_rtx ();
10973 code2
= swap_condition (code
);
10974 code3
= unsigned_condition (code
);
10978 case LT
: case GT
: case LTU
: case GTU
:
10981 case LE
: code1
= LT
; code2
= GT
; break;
10982 case GE
: code1
= GT
; code2
= LT
; break;
10983 case LEU
: code1
= LTU
; code2
= GTU
; break;
10984 case GEU
: code1
= GTU
; code2
= LTU
; break;
10986 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10987 case NE
: code2
= UNKNOWN
; break;
10990 gcc_unreachable ();
10995 * if (hi(a) < hi(b)) goto true;
10996 * if (hi(a) > hi(b)) goto false;
10997 * if (lo(a) < lo(b)) goto true;
11001 ix86_compare_op0
= hi
[0];
11002 ix86_compare_op1
= hi
[1];
11004 if (code1
!= UNKNOWN
)
11005 ix86_expand_branch (code1
, label
);
11006 if (code2
!= UNKNOWN
)
11007 ix86_expand_branch (code2
, label2
);
11009 ix86_compare_op0
= lo
[0];
11010 ix86_compare_op1
= lo
[1];
11011 ix86_expand_branch (code3
, label
);
11013 if (code2
!= UNKNOWN
)
11014 emit_label (label2
);
11019 gcc_unreachable ();
11023 /* Split branch based on floating point condition. */
11025 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11026 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11028 rtx second
, bypass
;
11029 rtx label
= NULL_RTX
;
11031 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11034 if (target2
!= pc_rtx
)
11037 code
= reverse_condition_maybe_unordered (code
);
11042 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11043 tmp
, &second
, &bypass
);
11045 /* Remove pushed operand from stack. */
11047 ix86_free_from_memory (GET_MODE (pushed
));
11049 if (split_branch_probability
>= 0)
11051 /* Distribute the probabilities across the jumps.
11052 Assume the BYPASS and SECOND to be always test
11054 probability
= split_branch_probability
;
11056 /* Value of 1 is low enough to make no need for probability
11057 to be updated. Later we may run some experiments and see
11058 if unordered values are more frequent in practice. */
11060 bypass_probability
= 1;
11062 second_probability
= 1;
11064 if (bypass
!= NULL_RTX
)
11066 label
= gen_label_rtx ();
11067 i
= emit_jump_insn (gen_rtx_SET
11069 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11071 gen_rtx_LABEL_REF (VOIDmode
,
11074 if (bypass_probability
>= 0)
11076 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11077 GEN_INT (bypass_probability
),
11080 i
= emit_jump_insn (gen_rtx_SET
11082 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11083 condition
, target1
, target2
)));
11084 if (probability
>= 0)
11086 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11087 GEN_INT (probability
),
11089 if (second
!= NULL_RTX
)
11091 i
= emit_jump_insn (gen_rtx_SET
11093 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11095 if (second_probability
>= 0)
11097 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11098 GEN_INT (second_probability
),
11101 if (label
!= NULL_RTX
)
11102 emit_label (label
);
11106 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11108 rtx ret
, tmp
, tmpreg
, equiv
;
11109 rtx second_test
, bypass_test
;
11111 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11112 return 0; /* FAIL */
11114 gcc_assert (GET_MODE (dest
) == QImode
);
11116 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11117 PUT_MODE (ret
, QImode
);
11122 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11123 if (bypass_test
|| second_test
)
11125 rtx test
= second_test
;
11127 rtx tmp2
= gen_reg_rtx (QImode
);
11130 gcc_assert (!second_test
);
11131 test
= bypass_test
;
11133 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11135 PUT_MODE (test
, QImode
);
11136 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11139 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11141 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11144 /* Attach a REG_EQUAL note describing the comparison result. */
11145 if (ix86_compare_op0
&& ix86_compare_op1
)
11147 equiv
= simplify_gen_relational (code
, QImode
,
11148 GET_MODE (ix86_compare_op0
),
11149 ix86_compare_op0
, ix86_compare_op1
);
11150 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11153 return 1; /* DONE */
11156 /* Expand comparison setting or clearing carry flag. Return true when
11157 successful and set pop for the operation. */
11159 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11161 enum machine_mode mode
=
11162 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11164 /* Do not handle DImode compares that go through special path. Also we can't
11165 deal with FP compares yet. This is possible to add. */
11166 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11168 if (FLOAT_MODE_P (mode
))
11170 rtx second_test
= NULL
, bypass_test
= NULL
;
11171 rtx compare_op
, compare_seq
;
11173 /* Shortcut: following common codes never translate into carry flag compares. */
11174 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11175 || code
== ORDERED
|| code
== UNORDERED
)
11178 /* These comparisons require zero flag; swap operands so they won't. */
11179 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11180 && !TARGET_IEEE_FP
)
11185 code
= swap_condition (code
);
11188 /* Try to expand the comparison and verify that we end up with carry flag
11189 based comparison. This is fails to be true only when we decide to expand
11190 comparison using arithmetic that is not too common scenario. */
11192 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11193 &second_test
, &bypass_test
);
11194 compare_seq
= get_insns ();
11197 if (second_test
|| bypass_test
)
11199 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11200 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11201 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11203 code
= GET_CODE (compare_op
);
11204 if (code
!= LTU
&& code
!= GEU
)
11206 emit_insn (compare_seq
);
11210 if (!INTEGRAL_MODE_P (mode
))
11218 /* Convert a==0 into (unsigned)a<1. */
11221 if (op1
!= const0_rtx
)
11224 code
= (code
== EQ
? LTU
: GEU
);
11227 /* Convert a>b into b<a or a>=b-1. */
11230 if (CONST_INT_P (op1
))
11232 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11233 /* Bail out on overflow. We still can swap operands but that
11234 would force loading of the constant into register. */
11235 if (op1
== const0_rtx
11236 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11238 code
= (code
== GTU
? GEU
: LTU
);
11245 code
= (code
== GTU
? LTU
: GEU
);
11249 /* Convert a>=0 into (unsigned)a<0x80000000. */
11252 if (mode
== DImode
|| op1
!= const0_rtx
)
11254 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11255 code
= (code
== LT
? GEU
: LTU
);
11259 if (mode
== DImode
|| op1
!= constm1_rtx
)
11261 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11262 code
= (code
== LE
? GEU
: LTU
);
11268 /* Swapping operands may cause constant to appear as first operand. */
11269 if (!nonimmediate_operand (op0
, VOIDmode
))
11271 if (no_new_pseudos
)
11273 op0
= force_reg (mode
, op0
);
11275 ix86_compare_op0
= op0
;
11276 ix86_compare_op1
= op1
;
11277 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11278 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11283 ix86_expand_int_movcc (rtx operands
[])
11285 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11286 rtx compare_seq
, compare_op
;
11287 rtx second_test
, bypass_test
;
11288 enum machine_mode mode
= GET_MODE (operands
[0]);
11289 bool sign_bit_compare_p
= false;;
11292 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11293 compare_seq
= get_insns ();
11296 compare_code
= GET_CODE (compare_op
);
11298 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11299 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11300 sign_bit_compare_p
= true;
11302 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11303 HImode insns, we'd be swallowed in word prefix ops. */
11305 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11306 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11307 && CONST_INT_P (operands
[2])
11308 && CONST_INT_P (operands
[3]))
11310 rtx out
= operands
[0];
11311 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11312 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11313 HOST_WIDE_INT diff
;
11316 /* Sign bit compares are better done using shifts than we do by using
11318 if (sign_bit_compare_p
11319 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11320 ix86_compare_op1
, &compare_op
))
11322 /* Detect overlap between destination and compare sources. */
11325 if (!sign_bit_compare_p
)
11327 bool fpcmp
= false;
11329 compare_code
= GET_CODE (compare_op
);
11331 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11332 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11335 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11338 /* To simplify rest of code, restrict to the GEU case. */
11339 if (compare_code
== LTU
)
11341 HOST_WIDE_INT tmp
= ct
;
11344 compare_code
= reverse_condition (compare_code
);
11345 code
= reverse_condition (code
);
11350 PUT_CODE (compare_op
,
11351 reverse_condition_maybe_unordered
11352 (GET_CODE (compare_op
)));
11354 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11358 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11359 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11360 tmp
= gen_reg_rtx (mode
);
11362 if (mode
== DImode
)
11363 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11365 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11369 if (code
== GT
|| code
== GE
)
11370 code
= reverse_condition (code
);
11373 HOST_WIDE_INT tmp
= ct
;
11378 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11379 ix86_compare_op1
, VOIDmode
, 0, -1);
11392 tmp
= expand_simple_binop (mode
, PLUS
,
11394 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11405 tmp
= expand_simple_binop (mode
, IOR
,
11407 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11409 else if (diff
== -1 && ct
)
11419 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11421 tmp
= expand_simple_binop (mode
, PLUS
,
11422 copy_rtx (tmp
), GEN_INT (cf
),
11423 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11431 * andl cf - ct, dest
11441 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11444 tmp
= expand_simple_binop (mode
, AND
,
11446 gen_int_mode (cf
- ct
, mode
),
11447 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11449 tmp
= expand_simple_binop (mode
, PLUS
,
11450 copy_rtx (tmp
), GEN_INT (ct
),
11451 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11454 if (!rtx_equal_p (tmp
, out
))
11455 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11457 return 1; /* DONE */
11463 tmp
= ct
, ct
= cf
, cf
= tmp
;
11465 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11467 /* We may be reversing unordered compare to normal compare, that
11468 is not valid in general (we may convert non-trapping condition
11469 to trapping one), however on i386 we currently emit all
11470 comparisons unordered. */
11471 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11472 code
= reverse_condition_maybe_unordered (code
);
11476 compare_code
= reverse_condition (compare_code
);
11477 code
= reverse_condition (code
);
11481 compare_code
= UNKNOWN
;
11482 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11483 && CONST_INT_P (ix86_compare_op1
))
11485 if (ix86_compare_op1
== const0_rtx
11486 && (code
== LT
|| code
== GE
))
11487 compare_code
= code
;
11488 else if (ix86_compare_op1
== constm1_rtx
)
11492 else if (code
== GT
)
11497 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11498 if (compare_code
!= UNKNOWN
11499 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11500 && (cf
== -1 || ct
== -1))
11502 /* If lea code below could be used, only optimize
11503 if it results in a 2 insn sequence. */
11505 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11506 || diff
== 3 || diff
== 5 || diff
== 9)
11507 || (compare_code
== LT
&& ct
== -1)
11508 || (compare_code
== GE
&& cf
== -1))
11511 * notl op1 (if necessary)
11519 code
= reverse_condition (code
);
11522 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11523 ix86_compare_op1
, VOIDmode
, 0, -1);
11525 out
= expand_simple_binop (mode
, IOR
,
11527 out
, 1, OPTAB_DIRECT
);
11528 if (out
!= operands
[0])
11529 emit_move_insn (operands
[0], out
);
11531 return 1; /* DONE */
11536 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11537 || diff
== 3 || diff
== 5 || diff
== 9)
11538 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11540 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11546 * lea cf(dest*(ct-cf)),dest
11550 * This also catches the degenerate setcc-only case.
11556 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11557 ix86_compare_op1
, VOIDmode
, 0, 1);
11560 /* On x86_64 the lea instruction operates on Pmode, so we need
11561 to get arithmetics done in proper mode to match. */
11563 tmp
= copy_rtx (out
);
11567 out1
= copy_rtx (out
);
11568 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11572 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11578 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11581 if (!rtx_equal_p (tmp
, out
))
11584 out
= force_operand (tmp
, copy_rtx (out
));
11586 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11588 if (!rtx_equal_p (out
, operands
[0]))
11589 emit_move_insn (operands
[0], copy_rtx (out
));
11591 return 1; /* DONE */
11595 * General case: Jumpful:
11596 * xorl dest,dest cmpl op1, op2
11597 * cmpl op1, op2 movl ct, dest
11598 * setcc dest jcc 1f
11599 * decl dest movl cf, dest
11600 * andl (cf-ct),dest 1:
11603 * Size 20. Size 14.
11605 * This is reasonably steep, but branch mispredict costs are
11606 * high on modern cpus, so consider failing only if optimizing
11610 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11611 && BRANCH_COST
>= 2)
11617 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11618 /* We may be reversing unordered compare to normal compare,
11619 that is not valid in general (we may convert non-trapping
11620 condition to trapping one), however on i386 we currently
11621 emit all comparisons unordered. */
11622 code
= reverse_condition_maybe_unordered (code
);
11625 code
= reverse_condition (code
);
11626 if (compare_code
!= UNKNOWN
)
11627 compare_code
= reverse_condition (compare_code
);
11631 if (compare_code
!= UNKNOWN
)
11633 /* notl op1 (if needed)
11638 For x < 0 (resp. x <= -1) there will be no notl,
11639 so if possible swap the constants to get rid of the
11641 True/false will be -1/0 while code below (store flag
11642 followed by decrement) is 0/-1, so the constants need
11643 to be exchanged once more. */
11645 if (compare_code
== GE
|| !cf
)
11647 code
= reverse_condition (code
);
11652 HOST_WIDE_INT tmp
= cf
;
11657 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11658 ix86_compare_op1
, VOIDmode
, 0, -1);
11662 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11663 ix86_compare_op1
, VOIDmode
, 0, 1);
11665 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11666 copy_rtx (out
), 1, OPTAB_DIRECT
);
11669 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11670 gen_int_mode (cf
- ct
, mode
),
11671 copy_rtx (out
), 1, OPTAB_DIRECT
);
11673 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11674 copy_rtx (out
), 1, OPTAB_DIRECT
);
11675 if (!rtx_equal_p (out
, operands
[0]))
11676 emit_move_insn (operands
[0], copy_rtx (out
));
11678 return 1; /* DONE */
11682 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11684 /* Try a few things more with specific constants and a variable. */
11687 rtx var
, orig_out
, out
, tmp
;
11689 if (BRANCH_COST
<= 2)
11690 return 0; /* FAIL */
11692 /* If one of the two operands is an interesting constant, load a
11693 constant with the above and mask it in with a logical operation. */
11695 if (CONST_INT_P (operands
[2]))
11698 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11699 operands
[3] = constm1_rtx
, op
= and_optab
;
11700 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11701 operands
[3] = const0_rtx
, op
= ior_optab
;
11703 return 0; /* FAIL */
11705 else if (CONST_INT_P (operands
[3]))
11708 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11709 operands
[2] = constm1_rtx
, op
= and_optab
;
11710 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11711 operands
[2] = const0_rtx
, op
= ior_optab
;
11713 return 0; /* FAIL */
11716 return 0; /* FAIL */
11718 orig_out
= operands
[0];
11719 tmp
= gen_reg_rtx (mode
);
11722 /* Recurse to get the constant loaded. */
11723 if (ix86_expand_int_movcc (operands
) == 0)
11724 return 0; /* FAIL */
11726 /* Mask in the interesting variable. */
11727 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11729 if (!rtx_equal_p (out
, orig_out
))
11730 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11732 return 1; /* DONE */
11736 * For comparison with above,
11746 if (! nonimmediate_operand (operands
[2], mode
))
11747 operands
[2] = force_reg (mode
, operands
[2]);
11748 if (! nonimmediate_operand (operands
[3], mode
))
11749 operands
[3] = force_reg (mode
, operands
[3]);
11751 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11753 rtx tmp
= gen_reg_rtx (mode
);
11754 emit_move_insn (tmp
, operands
[3]);
11757 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11759 rtx tmp
= gen_reg_rtx (mode
);
11760 emit_move_insn (tmp
, operands
[2]);
11764 if (! register_operand (operands
[2], VOIDmode
)
11766 || ! register_operand (operands
[3], VOIDmode
)))
11767 operands
[2] = force_reg (mode
, operands
[2]);
11770 && ! register_operand (operands
[3], VOIDmode
))
11771 operands
[3] = force_reg (mode
, operands
[3]);
11773 emit_insn (compare_seq
);
11774 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11775 gen_rtx_IF_THEN_ELSE (mode
,
11776 compare_op
, operands
[2],
11779 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11780 gen_rtx_IF_THEN_ELSE (mode
,
11782 copy_rtx (operands
[3]),
11783 copy_rtx (operands
[0]))));
11785 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11786 gen_rtx_IF_THEN_ELSE (mode
,
11788 copy_rtx (operands
[2]),
11789 copy_rtx (operands
[0]))));
11791 return 1; /* DONE */
11794 /* Swap, force into registers, or otherwise massage the two operands
11795 to an sse comparison with a mask result. Thus we differ a bit from
11796 ix86_prepare_fp_compare_args which expects to produce a flags result.
11798 The DEST operand exists to help determine whether to commute commutative
11799 operators. The POP0/POP1 operands are updated in place. The new
11800 comparison code is returned, or UNKNOWN if not implementable. */
11802 static enum rtx_code
11803 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11804 rtx
*pop0
, rtx
*pop1
)
11812 /* We have no LTGT as an operator. We could implement it with
11813 NE & ORDERED, but this requires an extra temporary. It's
11814 not clear that it's worth it. */
11821 /* These are supported directly. */
11828 /* For commutative operators, try to canonicalize the destination
11829 operand to be first in the comparison - this helps reload to
11830 avoid extra moves. */
11831 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11839 /* These are not supported directly. Swap the comparison operands
11840 to transform into something that is supported. */
11844 code
= swap_condition (code
);
11848 gcc_unreachable ();
11854 /* Detect conditional moves that exactly match min/max operational
11855 semantics. Note that this is IEEE safe, as long as we don't
11856 interchange the operands.
11858 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11859 and TRUE if the operation is successful and instructions are emitted. */
11862 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11863 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11865 enum machine_mode mode
;
11871 else if (code
== UNGE
)
11874 if_true
= if_false
;
11880 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11882 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11887 mode
= GET_MODE (dest
);
11889 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11890 but MODE may be a vector mode and thus not appropriate. */
11891 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11893 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11896 if_true
= force_reg (mode
, if_true
);
11897 v
= gen_rtvec (2, if_true
, if_false
);
11898 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11902 code
= is_min
? SMIN
: SMAX
;
11903 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11906 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11910 /* Expand an sse vector comparison. Return the register with the result. */
11913 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11914 rtx op_true
, rtx op_false
)
11916 enum machine_mode mode
= GET_MODE (dest
);
11919 cmp_op0
= force_reg (mode
, cmp_op0
);
11920 if (!nonimmediate_operand (cmp_op1
, mode
))
11921 cmp_op1
= force_reg (mode
, cmp_op1
);
11924 || reg_overlap_mentioned_p (dest
, op_true
)
11925 || reg_overlap_mentioned_p (dest
, op_false
))
11926 dest
= gen_reg_rtx (mode
);
11928 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11929 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11934 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11935 operations. This is used for both scalar and vector conditional moves. */
11938 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11940 enum machine_mode mode
= GET_MODE (dest
);
11943 if (op_false
== CONST0_RTX (mode
))
11945 op_true
= force_reg (mode
, op_true
);
11946 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11947 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11949 else if (op_true
== CONST0_RTX (mode
))
11951 op_false
= force_reg (mode
, op_false
);
11952 x
= gen_rtx_NOT (mode
, cmp
);
11953 x
= gen_rtx_AND (mode
, x
, op_false
);
11954 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11958 op_true
= force_reg (mode
, op_true
);
11959 op_false
= force_reg (mode
, op_false
);
11961 t2
= gen_reg_rtx (mode
);
11963 t3
= gen_reg_rtx (mode
);
11967 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11968 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11970 x
= gen_rtx_NOT (mode
, cmp
);
11971 x
= gen_rtx_AND (mode
, x
, op_false
);
11972 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11974 x
= gen_rtx_IOR (mode
, t3
, t2
);
11975 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11979 /* Expand a floating-point conditional move. Return true if successful. */
11982 ix86_expand_fp_movcc (rtx operands
[])
11984 enum machine_mode mode
= GET_MODE (operands
[0]);
11985 enum rtx_code code
= GET_CODE (operands
[1]);
11986 rtx tmp
, compare_op
, second_test
, bypass_test
;
11988 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11990 enum machine_mode cmode
;
11992 /* Since we've no cmove for sse registers, don't force bad register
11993 allocation just to gain access to it. Deny movcc when the
11994 comparison mode doesn't match the move mode. */
11995 cmode
= GET_MODE (ix86_compare_op0
);
11996 if (cmode
== VOIDmode
)
11997 cmode
= GET_MODE (ix86_compare_op1
);
12001 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12003 &ix86_compare_op1
);
12004 if (code
== UNKNOWN
)
12007 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12008 ix86_compare_op1
, operands
[2],
12012 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12013 ix86_compare_op1
, operands
[2], operands
[3]);
12014 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12018 /* The floating point conditional move instructions don't directly
12019 support conditions resulting from a signed integer comparison. */
12021 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12023 /* The floating point conditional move instructions don't directly
12024 support signed integer comparisons. */
12026 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12028 gcc_assert (!second_test
&& !bypass_test
);
12029 tmp
= gen_reg_rtx (QImode
);
12030 ix86_expand_setcc (code
, tmp
);
12032 ix86_compare_op0
= tmp
;
12033 ix86_compare_op1
= const0_rtx
;
12034 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12036 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12038 tmp
= gen_reg_rtx (mode
);
12039 emit_move_insn (tmp
, operands
[3]);
12042 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12044 tmp
= gen_reg_rtx (mode
);
12045 emit_move_insn (tmp
, operands
[2]);
12049 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12050 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12051 operands
[2], operands
[3])));
12053 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12054 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12055 operands
[3], operands
[0])));
12057 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12058 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12059 operands
[2], operands
[0])));
12064 /* Expand a floating-point vector conditional move; a vcond operation
12065 rather than a movcc operation. */
12068 ix86_expand_fp_vcond (rtx operands
[])
12070 enum rtx_code code
= GET_CODE (operands
[3]);
12073 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12074 &operands
[4], &operands
[5]);
12075 if (code
== UNKNOWN
)
12078 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12079 operands
[5], operands
[1], operands
[2]))
12082 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12083 operands
[1], operands
[2]);
12084 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12088 /* Expand a signed integral vector conditional move. */
12091 ix86_expand_int_vcond (rtx operands
[])
12093 enum machine_mode mode
= GET_MODE (operands
[0]);
12094 enum rtx_code code
= GET_CODE (operands
[3]);
12095 bool negate
= false;
12098 cop0
= operands
[4];
12099 cop1
= operands
[5];
12101 /* Canonicalize the comparison to EQ, GT, GTU. */
12112 code
= reverse_condition (code
);
12118 code
= reverse_condition (code
);
12124 code
= swap_condition (code
);
12125 x
= cop0
, cop0
= cop1
, cop1
= x
;
12129 gcc_unreachable ();
12132 /* Unsigned parallel compare is not supported by the hardware. Play some
12133 tricks to turn this into a signed comparison against 0. */
12136 cop0
= force_reg (mode
, cop0
);
12144 /* Perform a parallel modulo subtraction. */
12145 t1
= gen_reg_rtx (mode
);
12146 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12148 /* Extract the original sign bit of op0. */
12149 mask
= GEN_INT (-0x80000000);
12150 mask
= gen_rtx_CONST_VECTOR (mode
,
12151 gen_rtvec (4, mask
, mask
, mask
, mask
));
12152 mask
= force_reg (mode
, mask
);
12153 t2
= gen_reg_rtx (mode
);
12154 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12156 /* XOR it back into the result of the subtraction. This results
12157 in the sign bit set iff we saw unsigned underflow. */
12158 x
= gen_reg_rtx (mode
);
12159 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12167 /* Perform a parallel unsigned saturating subtraction. */
12168 x
= gen_reg_rtx (mode
);
12169 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12170 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12177 gcc_unreachable ();
12181 cop1
= CONST0_RTX (mode
);
12184 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12185 operands
[1+negate
], operands
[2-negate
]);
12187 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12188 operands
[2-negate
]);
12192 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12193 true if we should do zero extension, else sign extension. HIGH_P is
12194 true if we want the N/2 high elements, else the low elements. */
12197 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12199 enum machine_mode imode
= GET_MODE (operands
[1]);
12200 rtx (*unpack
)(rtx
, rtx
, rtx
);
12207 unpack
= gen_vec_interleave_highv16qi
;
12209 unpack
= gen_vec_interleave_lowv16qi
;
12213 unpack
= gen_vec_interleave_highv8hi
;
12215 unpack
= gen_vec_interleave_lowv8hi
;
12219 unpack
= gen_vec_interleave_highv4si
;
12221 unpack
= gen_vec_interleave_lowv4si
;
12224 gcc_unreachable ();
12227 dest
= gen_lowpart (imode
, operands
[0]);
12230 se
= force_reg (imode
, CONST0_RTX (imode
));
12232 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12233 operands
[1], pc_rtx
, pc_rtx
);
12235 emit_insn (unpack (dest
, operands
[1], se
));
12238 /* Expand conditional increment or decrement using adb/sbb instructions.
12239 The default case using setcc followed by the conditional move can be
12240 done by generic code. */
12242 ix86_expand_int_addcc (rtx operands
[])
12244 enum rtx_code code
= GET_CODE (operands
[1]);
12246 rtx val
= const0_rtx
;
12247 bool fpcmp
= false;
12248 enum machine_mode mode
= GET_MODE (operands
[0]);
12250 if (operands
[3] != const1_rtx
12251 && operands
[3] != constm1_rtx
)
12253 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12254 ix86_compare_op1
, &compare_op
))
12256 code
= GET_CODE (compare_op
);
12258 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12259 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12262 code
= ix86_fp_compare_code_to_integer (code
);
12269 PUT_CODE (compare_op
,
12270 reverse_condition_maybe_unordered
12271 (GET_CODE (compare_op
)));
12273 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12275 PUT_MODE (compare_op
, mode
);
12277 /* Construct either adc or sbb insn. */
12278 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12280 switch (GET_MODE (operands
[0]))
12283 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12286 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12289 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12292 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12295 gcc_unreachable ();
12300 switch (GET_MODE (operands
[0]))
12303 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12306 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12309 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12312 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12315 gcc_unreachable ();
12318 return 1; /* DONE */
12322 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12323 works for floating pointer parameters and nonoffsetable memories.
12324 For pushes, it returns just stack offsets; the values will be saved
12325 in the right order. Maximally three parts are generated. */
12328 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12333 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12335 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12337 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12338 gcc_assert (size
>= 2 && size
<= 3);
12340 /* Optimize constant pool reference to immediates. This is used by fp
12341 moves, that force all constants to memory to allow combining. */
12342 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12344 rtx tmp
= maybe_get_pool_constant (operand
);
12349 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12351 /* The only non-offsetable memories we handle are pushes. */
12352 int ok
= push_operand (operand
, VOIDmode
);
12356 operand
= copy_rtx (operand
);
12357 PUT_MODE (operand
, Pmode
);
12358 parts
[0] = parts
[1] = parts
[2] = operand
;
12362 if (GET_CODE (operand
) == CONST_VECTOR
)
12364 enum machine_mode imode
= int_mode_for_mode (mode
);
12365 /* Caution: if we looked through a constant pool memory above,
12366 the operand may actually have a different mode now. That's
12367 ok, since we want to pun this all the way back to an integer. */
12368 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12369 gcc_assert (operand
!= NULL
);
12375 if (mode
== DImode
)
12376 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12379 if (REG_P (operand
))
12381 gcc_assert (reload_completed
);
12382 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12383 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12385 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12387 else if (offsettable_memref_p (operand
))
12389 operand
= adjust_address (operand
, SImode
, 0);
12390 parts
[0] = operand
;
12391 parts
[1] = adjust_address (operand
, SImode
, 4);
12393 parts
[2] = adjust_address (operand
, SImode
, 8);
12395 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12400 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12404 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12405 parts
[2] = gen_int_mode (l
[2], SImode
);
12408 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12411 gcc_unreachable ();
12413 parts
[1] = gen_int_mode (l
[1], SImode
);
12414 parts
[0] = gen_int_mode (l
[0], SImode
);
12417 gcc_unreachable ();
12422 if (mode
== TImode
)
12423 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12424 if (mode
== XFmode
|| mode
== TFmode
)
12426 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12427 if (REG_P (operand
))
12429 gcc_assert (reload_completed
);
12430 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12431 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12433 else if (offsettable_memref_p (operand
))
12435 operand
= adjust_address (operand
, DImode
, 0);
12436 parts
[0] = operand
;
12437 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12439 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12444 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12445 real_to_target (l
, &r
, mode
);
12447 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12448 if (HOST_BITS_PER_WIDE_INT
>= 64)
12451 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12452 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12455 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12457 if (upper_mode
== SImode
)
12458 parts
[1] = gen_int_mode (l
[2], SImode
);
12459 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12462 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12463 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12466 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12469 gcc_unreachable ();
12476 /* Emit insns to perform a move or push of DI, DF, and XF values.
12477 Return false when normal moves are needed; true when all required
12478 insns have been emitted. Operands 2-4 contain the input values
12479 int the correct order; operands 5-7 contain the output values. */
12482 ix86_split_long_move (rtx operands
[])
12487 int collisions
= 0;
12488 enum machine_mode mode
= GET_MODE (operands
[0]);
12490 /* The DFmode expanders may ask us to move double.
12491 For 64bit target this is single move. By hiding the fact
12492 here we simplify i386.md splitters. */
12493 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12495 /* Optimize constant pool reference to immediates. This is used by
12496 fp moves, that force all constants to memory to allow combining. */
12498 if (MEM_P (operands
[1])
12499 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12500 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12501 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12502 if (push_operand (operands
[0], VOIDmode
))
12504 operands
[0] = copy_rtx (operands
[0]);
12505 PUT_MODE (operands
[0], Pmode
);
12508 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12509 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12510 emit_move_insn (operands
[0], operands
[1]);
12514 /* The only non-offsettable memory we handle is push. */
12515 if (push_operand (operands
[0], VOIDmode
))
12518 gcc_assert (!MEM_P (operands
[0])
12519 || offsettable_memref_p (operands
[0]));
12521 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12522 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12524 /* When emitting push, take care for source operands on the stack. */
12525 if (push
&& MEM_P (operands
[1])
12526 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12529 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12530 XEXP (part
[1][2], 0));
12531 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12532 XEXP (part
[1][1], 0));
12535 /* We need to do copy in the right order in case an address register
12536 of the source overlaps the destination. */
12537 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12539 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12541 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12544 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12547 /* Collision in the middle part can be handled by reordering. */
12548 if (collisions
== 1 && nparts
== 3
12549 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12552 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12553 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12556 /* If there are more collisions, we can't handle it by reordering.
12557 Do an lea to the last part and use only one colliding move. */
12558 else if (collisions
> 1)
12564 base
= part
[0][nparts
- 1];
12566 /* Handle the case when the last part isn't valid for lea.
12567 Happens in 64-bit mode storing the 12-byte XFmode. */
12568 if (GET_MODE (base
) != Pmode
)
12569 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12571 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12572 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12573 part
[1][1] = replace_equiv_address (part
[1][1],
12574 plus_constant (base
, UNITS_PER_WORD
));
12576 part
[1][2] = replace_equiv_address (part
[1][2],
12577 plus_constant (base
, 8));
12587 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12588 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12589 emit_move_insn (part
[0][2], part
[1][2]);
12594 /* In 64bit mode we don't have 32bit push available. In case this is
12595 register, it is OK - we will just use larger counterpart. We also
12596 retype memory - these comes from attempt to avoid REX prefix on
12597 moving of second half of TFmode value. */
12598 if (GET_MODE (part
[1][1]) == SImode
)
12600 switch (GET_CODE (part
[1][1]))
12603 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12607 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12611 gcc_unreachable ();
12614 if (GET_MODE (part
[1][0]) == SImode
)
12615 part
[1][0] = part
[1][1];
12618 emit_move_insn (part
[0][1], part
[1][1]);
12619 emit_move_insn (part
[0][0], part
[1][0]);
12623 /* Choose correct order to not overwrite the source before it is copied. */
12624 if ((REG_P (part
[0][0])
12625 && REG_P (part
[1][1])
12626 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12628 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12630 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12634 operands
[2] = part
[0][2];
12635 operands
[3] = part
[0][1];
12636 operands
[4] = part
[0][0];
12637 operands
[5] = part
[1][2];
12638 operands
[6] = part
[1][1];
12639 operands
[7] = part
[1][0];
12643 operands
[2] = part
[0][1];
12644 operands
[3] = part
[0][0];
12645 operands
[5] = part
[1][1];
12646 operands
[6] = part
[1][0];
12653 operands
[2] = part
[0][0];
12654 operands
[3] = part
[0][1];
12655 operands
[4] = part
[0][2];
12656 operands
[5] = part
[1][0];
12657 operands
[6] = part
[1][1];
12658 operands
[7] = part
[1][2];
12662 operands
[2] = part
[0][0];
12663 operands
[3] = part
[0][1];
12664 operands
[5] = part
[1][0];
12665 operands
[6] = part
[1][1];
12669 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12672 if (CONST_INT_P (operands
[5])
12673 && operands
[5] != const0_rtx
12674 && REG_P (operands
[2]))
12676 if (CONST_INT_P (operands
[6])
12677 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12678 operands
[6] = operands
[2];
12681 && CONST_INT_P (operands
[7])
12682 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12683 operands
[7] = operands
[2];
12687 && CONST_INT_P (operands
[6])
12688 && operands
[6] != const0_rtx
12689 && REG_P (operands
[3])
12690 && CONST_INT_P (operands
[7])
12691 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12692 operands
[7] = operands
[3];
12695 emit_move_insn (operands
[2], operands
[5]);
12696 emit_move_insn (operands
[3], operands
[6]);
12698 emit_move_insn (operands
[4], operands
[7]);
12703 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12704 left shift by a constant, either using a single shift or
12705 a sequence of add instructions. */
12708 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12712 emit_insn ((mode
== DImode
12714 : gen_adddi3
) (operand
, operand
, operand
));
12716 else if (!optimize_size
12717 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12720 for (i
=0; i
<count
; i
++)
12722 emit_insn ((mode
== DImode
12724 : gen_adddi3
) (operand
, operand
, operand
));
12728 emit_insn ((mode
== DImode
12730 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12734 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12736 rtx low
[2], high
[2];
12738 const int single_width
= mode
== DImode
? 32 : 64;
12740 if (CONST_INT_P (operands
[2]))
12742 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12743 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12745 if (count
>= single_width
)
12747 emit_move_insn (high
[0], low
[1]);
12748 emit_move_insn (low
[0], const0_rtx
);
12750 if (count
> single_width
)
12751 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12755 if (!rtx_equal_p (operands
[0], operands
[1]))
12756 emit_move_insn (operands
[0], operands
[1]);
12757 emit_insn ((mode
== DImode
12759 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12760 ix86_expand_ashl_const (low
[0], count
, mode
);
12765 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12767 if (operands
[1] == const1_rtx
)
12769 /* Assuming we've chosen a QImode capable registers, then 1 << N
12770 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12771 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12773 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12775 ix86_expand_clear (low
[0]);
12776 ix86_expand_clear (high
[0]);
12777 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12779 d
= gen_lowpart (QImode
, low
[0]);
12780 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12781 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12782 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12784 d
= gen_lowpart (QImode
, high
[0]);
12785 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12786 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12787 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12790 /* Otherwise, we can get the same results by manually performing
12791 a bit extract operation on bit 5/6, and then performing the two
12792 shifts. The two methods of getting 0/1 into low/high are exactly
12793 the same size. Avoiding the shift in the bit extract case helps
12794 pentium4 a bit; no one else seems to care much either way. */
12799 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12800 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12802 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12803 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12805 emit_insn ((mode
== DImode
12807 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12808 emit_insn ((mode
== DImode
12810 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12811 emit_move_insn (low
[0], high
[0]);
12812 emit_insn ((mode
== DImode
12814 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12817 emit_insn ((mode
== DImode
12819 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12820 emit_insn ((mode
== DImode
12822 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12826 if (operands
[1] == constm1_rtx
)
12828 /* For -1 << N, we can avoid the shld instruction, because we
12829 know that we're shifting 0...31/63 ones into a -1. */
12830 emit_move_insn (low
[0], constm1_rtx
);
12832 emit_move_insn (high
[0], low
[0]);
12834 emit_move_insn (high
[0], constm1_rtx
);
12838 if (!rtx_equal_p (operands
[0], operands
[1]))
12839 emit_move_insn (operands
[0], operands
[1]);
12841 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12842 emit_insn ((mode
== DImode
12844 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12847 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12849 if (TARGET_CMOVE
&& scratch
)
12851 ix86_expand_clear (scratch
);
12852 emit_insn ((mode
== DImode
12853 ? gen_x86_shift_adj_1
12854 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12857 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12861 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12863 rtx low
[2], high
[2];
12865 const int single_width
= mode
== DImode
? 32 : 64;
12867 if (CONST_INT_P (operands
[2]))
12869 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12870 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12872 if (count
== single_width
* 2 - 1)
12874 emit_move_insn (high
[0], high
[1]);
12875 emit_insn ((mode
== DImode
12877 : gen_ashrdi3
) (high
[0], high
[0],
12878 GEN_INT (single_width
- 1)));
12879 emit_move_insn (low
[0], high
[0]);
12882 else if (count
>= single_width
)
12884 emit_move_insn (low
[0], high
[1]);
12885 emit_move_insn (high
[0], low
[0]);
12886 emit_insn ((mode
== DImode
12888 : gen_ashrdi3
) (high
[0], high
[0],
12889 GEN_INT (single_width
- 1)));
12890 if (count
> single_width
)
12891 emit_insn ((mode
== DImode
12893 : gen_ashrdi3
) (low
[0], low
[0],
12894 GEN_INT (count
- single_width
)));
12898 if (!rtx_equal_p (operands
[0], operands
[1]))
12899 emit_move_insn (operands
[0], operands
[1]);
12900 emit_insn ((mode
== DImode
12902 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12903 emit_insn ((mode
== DImode
12905 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12910 if (!rtx_equal_p (operands
[0], operands
[1]))
12911 emit_move_insn (operands
[0], operands
[1]);
12913 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12915 emit_insn ((mode
== DImode
12917 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12918 emit_insn ((mode
== DImode
12920 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12922 if (TARGET_CMOVE
&& scratch
)
12924 emit_move_insn (scratch
, high
[0]);
12925 emit_insn ((mode
== DImode
12927 : gen_ashrdi3
) (scratch
, scratch
,
12928 GEN_INT (single_width
- 1)));
12929 emit_insn ((mode
== DImode
12930 ? gen_x86_shift_adj_1
12931 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12935 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12940 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12942 rtx low
[2], high
[2];
12944 const int single_width
= mode
== DImode
? 32 : 64;
12946 if (CONST_INT_P (operands
[2]))
12948 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12949 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12951 if (count
>= single_width
)
12953 emit_move_insn (low
[0], high
[1]);
12954 ix86_expand_clear (high
[0]);
12956 if (count
> single_width
)
12957 emit_insn ((mode
== DImode
12959 : gen_lshrdi3
) (low
[0], low
[0],
12960 GEN_INT (count
- single_width
)));
12964 if (!rtx_equal_p (operands
[0], operands
[1]))
12965 emit_move_insn (operands
[0], operands
[1]);
12966 emit_insn ((mode
== DImode
12968 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12969 emit_insn ((mode
== DImode
12971 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12976 if (!rtx_equal_p (operands
[0], operands
[1]))
12977 emit_move_insn (operands
[0], operands
[1]);
12979 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12981 emit_insn ((mode
== DImode
12983 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12984 emit_insn ((mode
== DImode
12986 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12988 /* Heh. By reversing the arguments, we can reuse this pattern. */
12989 if (TARGET_CMOVE
&& scratch
)
12991 ix86_expand_clear (scratch
);
12992 emit_insn ((mode
== DImode
12993 ? gen_x86_shift_adj_1
12994 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12998 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13002 /* Predict just emitted jump instruction to be taken with probability PROB. */
13004 predict_jump (int prob
)
13006 rtx insn
= get_last_insn ();
13007 gcc_assert (JUMP_P (insn
));
13009 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13014 /* Helper function for the string operations below. Dest VARIABLE whether
13015 it is aligned to VALUE bytes. If true, jump to the label. */
13017 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13019 rtx label
= gen_label_rtx ();
13020 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13021 if (GET_MODE (variable
) == DImode
)
13022 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13024 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13025 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13028 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13030 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13034 /* Adjust COUNTER by the VALUE. */
13036 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13038 if (GET_MODE (countreg
) == DImode
)
13039 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13041 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13044 /* Zero extend possibly SImode EXP to Pmode register. */
13046 ix86_zero_extend_to_Pmode (rtx exp
)
13049 if (GET_MODE (exp
) == VOIDmode
)
13050 return force_reg (Pmode
, exp
);
13051 if (GET_MODE (exp
) == Pmode
)
13052 return copy_to_mode_reg (Pmode
, exp
);
13053 r
= gen_reg_rtx (Pmode
);
13054 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13058 /* Divide COUNTREG by SCALE. */
13060 scale_counter (rtx countreg
, int scale
)
13063 rtx piece_size_mask
;
13067 if (CONST_INT_P (countreg
))
13068 return GEN_INT (INTVAL (countreg
) / scale
);
13069 gcc_assert (REG_P (countreg
));
13071 piece_size_mask
= GEN_INT (scale
- 1);
13072 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13073 GEN_INT (exact_log2 (scale
)),
13074 NULL
, 1, OPTAB_DIRECT
);
13078 /* When SRCPTR is non-NULL, output simple loop to move memory
13079 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13080 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13081 equivalent loop to set memory by VALUE (supposed to be in MODE).
13083 The size is rounded down to whole number of chunk size moved at once.
13084 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13088 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13089 rtx destptr
, rtx srcptr
, rtx value
,
13090 rtx count
, enum machine_mode mode
, int unroll
,
13093 rtx out_label
, top_label
, iter
, tmp
;
13094 enum machine_mode iter_mode
;
13095 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13096 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13102 iter_mode
= GET_MODE (count
);
13103 if (iter_mode
== VOIDmode
)
13104 iter_mode
= word_mode
;
13106 top_label
= gen_label_rtx ();
13107 out_label
= gen_label_rtx ();
13108 iter
= gen_reg_rtx (iter_mode
);
13110 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13111 NULL
, 1, OPTAB_DIRECT
);
13112 /* Those two should combine. */
13113 if (piece_size
== const1_rtx
)
13115 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13117 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13119 emit_move_insn (iter
, const0_rtx
);
13121 emit_label (top_label
);
13123 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13124 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13125 destmem
= change_address (destmem
, mode
, x_addr
);
13129 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13130 srcmem
= change_address (srcmem
, mode
, y_addr
);
13132 /* When unrolling for chips that reorder memory reads and writes,
13133 we can save registers by using single temporary.
13134 Also using 4 temporaries is overkill in 32bit mode. */
13135 if (!TARGET_64BIT
&& 0)
13137 for (i
= 0; i
< unroll
; i
++)
13142 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13144 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13146 emit_move_insn (destmem
, srcmem
);
13152 gcc_assert (unroll
<= 4);
13153 for (i
= 0; i
< unroll
; i
++)
13155 tmpreg
[i
] = gen_reg_rtx (mode
);
13159 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13161 emit_move_insn (tmpreg
[i
], srcmem
);
13163 for (i
= 0; i
< unroll
; i
++)
13168 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13170 emit_move_insn (destmem
, tmpreg
[i
]);
13175 for (i
= 0; i
< unroll
; i
++)
13179 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13180 emit_move_insn (destmem
, value
);
13183 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13184 true, OPTAB_LIB_WIDEN
);
13186 emit_move_insn (iter
, tmp
);
13188 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13190 if (expected_size
!= -1)
13192 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13193 if (expected_size
== 0)
13195 else if (expected_size
> REG_BR_PROB_BASE
)
13196 predict_jump (REG_BR_PROB_BASE
- 1);
13198 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13201 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13202 iter
= ix86_zero_extend_to_Pmode (iter
);
13203 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13204 true, OPTAB_LIB_WIDEN
);
13205 if (tmp
!= destptr
)
13206 emit_move_insn (destptr
, tmp
);
13209 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13210 true, OPTAB_LIB_WIDEN
);
13212 emit_move_insn (srcptr
, tmp
);
13214 emit_label (out_label
);
13217 /* Output "rep; mov" instruction.
13218 Arguments have same meaning as for previous function */
13220 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13221 rtx destptr
, rtx srcptr
,
13223 enum machine_mode mode
)
13229 /* If the size is known, it is shorter to use rep movs. */
13230 if (mode
== QImode
&& CONST_INT_P (count
)
13231 && !(INTVAL (count
) & 3))
13234 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13235 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13236 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13237 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13238 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13239 if (mode
!= QImode
)
13241 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13242 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13243 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13244 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13245 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13246 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13250 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13251 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13253 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13257 /* Output "rep; stos" instruction.
13258 Arguments have same meaning as for previous function */
13260 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13262 enum machine_mode mode
)
13267 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13268 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13269 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13270 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13271 if (mode
!= QImode
)
13273 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13274 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13275 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13278 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13279 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13283 emit_strmov (rtx destmem
, rtx srcmem
,
13284 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13286 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13287 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13288 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13291 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13293 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13294 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13297 if (CONST_INT_P (count
))
13299 HOST_WIDE_INT countval
= INTVAL (count
);
13302 if ((countval
& 0x16) && max_size
> 16)
13306 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13307 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13310 gcc_unreachable ();
13313 if ((countval
& 0x08) && max_size
> 8)
13316 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13319 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13320 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 4);
13324 if ((countval
& 0x04) && max_size
> 4)
13326 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13329 if ((countval
& 0x02) && max_size
> 2)
13331 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13334 if ((countval
& 0x01) && max_size
> 1)
13336 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13343 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13344 count
, 1, OPTAB_DIRECT
);
13345 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13346 count
, QImode
, 1, 4);
13350 /* When there are stringops, we can cheaply increase dest and src pointers.
13351 Otherwise we save code size by maintaining offset (zero is readily
13352 available from preceding rep operation) and using x86 addressing modes.
13354 if (TARGET_SINGLE_STRINGOP
)
13358 rtx label
= ix86_expand_aligntest (count
, 4, true);
13359 src
= change_address (srcmem
, SImode
, srcptr
);
13360 dest
= change_address (destmem
, SImode
, destptr
);
13361 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13362 emit_label (label
);
13363 LABEL_NUSES (label
) = 1;
13367 rtx label
= ix86_expand_aligntest (count
, 2, true);
13368 src
= change_address (srcmem
, HImode
, srcptr
);
13369 dest
= change_address (destmem
, HImode
, destptr
);
13370 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13371 emit_label (label
);
13372 LABEL_NUSES (label
) = 1;
13376 rtx label
= ix86_expand_aligntest (count
, 1, true);
13377 src
= change_address (srcmem
, QImode
, srcptr
);
13378 dest
= change_address (destmem
, QImode
, destptr
);
13379 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13380 emit_label (label
);
13381 LABEL_NUSES (label
) = 1;
13386 rtx offset
= force_reg (Pmode
, const0_rtx
);
13391 rtx label
= ix86_expand_aligntest (count
, 4, true);
13392 src
= change_address (srcmem
, SImode
, srcptr
);
13393 dest
= change_address (destmem
, SImode
, destptr
);
13394 emit_move_insn (dest
, src
);
13395 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13396 true, OPTAB_LIB_WIDEN
);
13398 emit_move_insn (offset
, tmp
);
13399 emit_label (label
);
13400 LABEL_NUSES (label
) = 1;
13404 rtx label
= ix86_expand_aligntest (count
, 2, true);
13405 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13406 src
= change_address (srcmem
, HImode
, tmp
);
13407 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13408 dest
= change_address (destmem
, HImode
, tmp
);
13409 emit_move_insn (dest
, src
);
13410 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13411 true, OPTAB_LIB_WIDEN
);
13413 emit_move_insn (offset
, tmp
);
13414 emit_label (label
);
13415 LABEL_NUSES (label
) = 1;
13419 rtx label
= ix86_expand_aligntest (count
, 1, true);
13420 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13421 src
= change_address (srcmem
, QImode
, tmp
);
13422 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13423 dest
= change_address (destmem
, QImode
, tmp
);
13424 emit_move_insn (dest
, src
);
13425 emit_label (label
);
13426 LABEL_NUSES (label
) = 1;
13431 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13433 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13434 rtx count
, int max_size
)
13437 expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13438 count
, 1, OPTAB_DIRECT
);
13439 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13440 gen_lowpart (QImode
, value
), count
, QImode
,
13444 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13446 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13450 if (CONST_INT_P (count
))
13452 HOST_WIDE_INT countval
= INTVAL (count
);
13455 if ((countval
& 0x16) && max_size
> 16)
13459 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13460 emit_insn (gen_strset (destptr
, dest
, value
));
13461 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13462 emit_insn (gen_strset (destptr
, dest
, value
));
13465 gcc_unreachable ();
13468 if ((countval
& 0x08) && max_size
> 8)
13472 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13473 emit_insn (gen_strset (destptr
, dest
, value
));
13477 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13478 emit_insn (gen_strset (destptr
, dest
, value
));
13479 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13480 emit_insn (gen_strset (destptr
, dest
, value
));
13484 if ((countval
& 0x04) && max_size
> 4)
13486 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13487 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13490 if ((countval
& 0x02) && max_size
> 2)
13492 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13493 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13496 if ((countval
& 0x01) && max_size
> 1)
13498 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13499 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13506 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13511 rtx label
= ix86_expand_aligntest (count
, 16, true);
13514 dest
= change_address (destmem
, DImode
, destptr
);
13515 emit_insn (gen_strset (destptr
, dest
, value
));
13516 emit_insn (gen_strset (destptr
, dest
, value
));
13520 dest
= change_address (destmem
, SImode
, destptr
);
13521 emit_insn (gen_strset (destptr
, dest
, value
));
13522 emit_insn (gen_strset (destptr
, dest
, value
));
13523 emit_insn (gen_strset (destptr
, dest
, value
));
13524 emit_insn (gen_strset (destptr
, dest
, value
));
13526 emit_label (label
);
13527 LABEL_NUSES (label
) = 1;
13531 rtx label
= ix86_expand_aligntest (count
, 8, true);
13534 dest
= change_address (destmem
, DImode
, destptr
);
13535 emit_insn (gen_strset (destptr
, dest
, value
));
13539 dest
= change_address (destmem
, SImode
, destptr
);
13540 emit_insn (gen_strset (destptr
, dest
, value
));
13541 emit_insn (gen_strset (destptr
, dest
, value
));
13543 emit_label (label
);
13544 LABEL_NUSES (label
) = 1;
13548 rtx label
= ix86_expand_aligntest (count
, 4, true);
13549 dest
= change_address (destmem
, SImode
, destptr
);
13550 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13551 emit_label (label
);
13552 LABEL_NUSES (label
) = 1;
13556 rtx label
= ix86_expand_aligntest (count
, 2, true);
13557 dest
= change_address (destmem
, HImode
, destptr
);
13558 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13559 emit_label (label
);
13560 LABEL_NUSES (label
) = 1;
13564 rtx label
= ix86_expand_aligntest (count
, 1, true);
13565 dest
= change_address (destmem
, QImode
, destptr
);
13566 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13567 emit_label (label
);
13568 LABEL_NUSES (label
) = 1;
13572 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13573 DESIRED_ALIGNMENT. */
13575 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13576 rtx destptr
, rtx srcptr
, rtx count
,
13577 int align
, int desired_alignment
)
13579 if (align
<= 1 && desired_alignment
> 1)
13581 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13582 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13583 destmem
= change_address (destmem
, QImode
, destptr
);
13584 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13585 ix86_adjust_counter (count
, 1);
13586 emit_label (label
);
13587 LABEL_NUSES (label
) = 1;
13589 if (align
<= 2 && desired_alignment
> 2)
13591 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13592 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13593 destmem
= change_address (destmem
, HImode
, destptr
);
13594 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13595 ix86_adjust_counter (count
, 2);
13596 emit_label (label
);
13597 LABEL_NUSES (label
) = 1;
13599 if (align
<= 4 && desired_alignment
> 4)
13601 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13602 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13603 destmem
= change_address (destmem
, SImode
, destptr
);
13604 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13605 ix86_adjust_counter (count
, 4);
13606 emit_label (label
);
13607 LABEL_NUSES (label
) = 1;
13609 gcc_assert (desired_alignment
<= 8);
13612 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13613 DESIRED_ALIGNMENT. */
13615 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13616 int align
, int desired_alignment
)
13618 if (align
<= 1 && desired_alignment
> 1)
13620 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13621 destmem
= change_address (destmem
, QImode
, destptr
);
13622 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13623 ix86_adjust_counter (count
, 1);
13624 emit_label (label
);
13625 LABEL_NUSES (label
) = 1;
13627 if (align
<= 2 && desired_alignment
> 2)
13629 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13630 destmem
= change_address (destmem
, HImode
, destptr
);
13631 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13632 ix86_adjust_counter (count
, 2);
13633 emit_label (label
);
13634 LABEL_NUSES (label
) = 1;
13636 if (align
<= 4 && desired_alignment
> 4)
13638 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13639 destmem
= change_address (destmem
, SImode
, destptr
);
13640 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13641 ix86_adjust_counter (count
, 4);
13642 emit_label (label
);
13643 LABEL_NUSES (label
) = 1;
13645 gcc_assert (desired_alignment
<= 8);
13648 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13649 static enum stringop_alg
13650 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13651 int *dynamic_check
)
13653 const struct stringop_algs
* algs
;
13655 *dynamic_check
= -1;
13657 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13659 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13660 if (stringop_alg
!= no_stringop
)
13661 return stringop_alg
;
13662 /* rep; movq or rep; movl is the smallest variant. */
13663 else if (optimize_size
)
13665 if (!count
|| (count
& 3))
13666 return rep_prefix_1_byte
;
13668 return rep_prefix_4_byte
;
13670 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13672 else if (expected_size
!= -1 && expected_size
< 4)
13673 return loop_1_byte
;
13674 else if (expected_size
!= -1)
13677 enum stringop_alg alg
= libcall
;
13678 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13680 gcc_assert (algs
->size
[i
].max
);
13681 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13683 if (algs
->size
[i
].alg
!= libcall
)
13684 alg
= algs
->size
[i
].alg
;
13685 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13686 last non-libcall inline algorithm. */
13687 if (TARGET_INLINE_ALL_STRINGOPS
)
13689 /* When the current size is best to be copied by a libcall,
13690 but we are still forced to inline, run the heuristic bellow
13691 that will pick code for medium sized blocks. */
13692 if (alg
!= libcall
)
13697 return algs
->size
[i
].alg
;
13700 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13702 /* When asked to inline the call anyway, try to pick meaningful choice.
13703 We look for maximal size of block that is faster to copy by hand and
13704 take blocks of at most of that size guessing that average size will
13705 be roughly half of the block.
13707 If this turns out to be bad, we might simply specify the preferred
13708 choice in ix86_costs. */
13709 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13710 && algs
->unknown_size
== libcall
)
13713 enum stringop_alg alg
;
13716 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13717 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13718 max
= algs
->size
[i
].max
;
13721 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13722 gcc_assert (*dynamic_check
== -1);
13723 gcc_assert (alg
!= libcall
);
13724 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13725 *dynamic_check
= max
;
13728 return algs
->unknown_size
;
13731 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13732 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13734 decide_alignment (int align
,
13735 enum stringop_alg alg
,
13738 int desired_align
= 0;
13742 gcc_unreachable ();
13744 case unrolled_loop
:
13745 desired_align
= GET_MODE_SIZE (Pmode
);
13747 case rep_prefix_8_byte
:
13750 case rep_prefix_4_byte
:
13751 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13752 copying whole cacheline at once. */
13753 if (TARGET_PENTIUMPRO
)
13758 case rep_prefix_1_byte
:
13759 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13760 copying whole cacheline at once. */
13761 if (TARGET_PENTIUMPRO
)
13775 if (desired_align
< align
)
13776 desired_align
= align
;
13777 if (expected_size
!= -1 && expected_size
< 4)
13778 desired_align
= align
;
13779 return desired_align
;
13782 /* Return the smallest power of 2 greater than VAL. */
13784 smallest_pow2_greater_than (int val
)
13792 /* Expand string move (memcpy) operation. Use i386 string operations when
13793 profitable. expand_clrmem contains similar code. The code depends upon
13794 architecture, block size and alignment, but always has the same
13797 1) Prologue guard: Conditional that jumps up to epilogues for small
13798 blocks that can be handled by epilogue alone. This is faster but
13799 also needed for correctness, since prologue assume the block is larger
13800 than the desired alignment.
13802 Optional dynamic check for size and libcall for large
13803 blocks is emitted here too, with -minline-stringops-dynamically.
13805 2) Prologue: copy first few bytes in order to get destination aligned
13806 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13807 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13808 We emit either a jump tree on power of two sized blocks, or a byte loop.
13810 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13811 with specified algorithm.
13813 4) Epilogue: code copying tail of the block that is too small to be
13814 handled by main body (or up to size guarded by prologue guard). */
13817 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
13818 rtx expected_align_exp
, rtx expected_size_exp
)
13824 rtx jump_around_label
= NULL
;
13825 HOST_WIDE_INT align
= 1;
13826 unsigned HOST_WIDE_INT count
= 0;
13827 HOST_WIDE_INT expected_size
= -1;
13828 int size_needed
= 0, epilogue_size_needed
;
13829 int desired_align
= 0;
13830 enum stringop_alg alg
;
13833 if (CONST_INT_P (align_exp
))
13834 align
= INTVAL (align_exp
);
13835 /* i386 can do misaligned access on reasonably increased cost. */
13836 if (CONST_INT_P (expected_align_exp
)
13837 && INTVAL (expected_align_exp
) > align
)
13838 align
= INTVAL (expected_align_exp
);
13839 if (CONST_INT_P (count_exp
))
13840 count
= expected_size
= INTVAL (count_exp
);
13841 if (CONST_INT_P (expected_size_exp
) && count
== 0)
13842 expected_size
= INTVAL (expected_size_exp
);
13844 /* Step 0: Decide on preferred algorithm, desired alignment and
13845 size of chunks to be copied by main loop. */
13847 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
13848 desired_align
= decide_alignment (align
, alg
, expected_size
);
13850 if (!TARGET_ALIGN_STRINGOPS
)
13851 align
= desired_align
;
13853 if (alg
== libcall
)
13855 gcc_assert (alg
!= no_stringop
);
13857 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
13858 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13859 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
13864 gcc_unreachable ();
13866 size_needed
= GET_MODE_SIZE (Pmode
);
13868 case unrolled_loop
:
13869 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
13871 case rep_prefix_8_byte
:
13874 case rep_prefix_4_byte
:
13877 case rep_prefix_1_byte
:
13883 epilogue_size_needed
= size_needed
;
13885 /* Step 1: Prologue guard. */
13887 /* Alignment code needs count to be in register. */
13888 if (CONST_INT_P (count_exp
) && desired_align
> align
)
13890 enum machine_mode mode
= SImode
;
13891 if (TARGET_64BIT
&& (count
& ~0xffffffff))
13893 count_exp
= force_reg (mode
, count_exp
);
13895 gcc_assert (desired_align
>= 1 && align
>= 1);
13897 /* Ensure that alignment prologue won't copy past end of block. */
13898 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
13901 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
13903 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
13904 Make sure it is power of 2. */
13905 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
13907 label
= gen_label_rtx ();
13908 emit_cmp_and_jump_insns (count_exp
,
13909 GEN_INT (epilogue_size_needed
),
13910 LTU
, 0, GET_MODE (count_exp
), 1, label
);
13911 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
13912 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
13914 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
13916 /* Emit code to decide on runtime whether library call or inline should be
13918 if (dynamic_check
!= -1)
13920 rtx hot_label
= gen_label_rtx ();
13921 jump_around_label
= gen_label_rtx ();
13922 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
13923 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
13924 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13925 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
13926 emit_jump (jump_around_label
);
13927 emit_label (hot_label
);
13930 /* Step 2: Alignment prologue. */
13932 if (desired_align
> align
)
13934 /* Except for the first move in epilogue, we no longer know
13935 constant offset in aliasing info. It don't seems to worth
13936 the pain to maintain it for the first move, so throw away
13938 src
= change_address (src
, BLKmode
, srcreg
);
13939 dst
= change_address (dst
, BLKmode
, destreg
);
13940 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
13943 if (label
&& size_needed
== 1)
13945 emit_label (label
);
13946 LABEL_NUSES (label
) = 1;
13950 /* Step 3: Main loop. */
13956 gcc_unreachable ();
13958 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13959 count_exp
, QImode
, 1, expected_size
);
13962 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13963 count_exp
, Pmode
, 1, expected_size
);
13965 case unrolled_loop
:
13966 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13967 registers for 4 temporaries anyway. */
13968 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13969 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
13972 case rep_prefix_8_byte
:
13973 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13976 case rep_prefix_4_byte
:
13977 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13980 case rep_prefix_1_byte
:
13981 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13985 /* Adjust properly the offset of src and dest memory for aliasing. */
13986 if (CONST_INT_P (count_exp
))
13988 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
13989 (count
/ size_needed
) * size_needed
);
13990 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
13991 (count
/ size_needed
) * size_needed
);
13995 src
= change_address (src
, BLKmode
, srcreg
);
13996 dst
= change_address (dst
, BLKmode
, destreg
);
13999 /* Step 4: Epilogue to copy the remaining bytes. */
14003 /* When the main loop is done, COUNT_EXP might hold original count,
14004 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14005 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14006 bytes. Compensate if needed. */
14008 if (size_needed
< epilogue_size_needed
)
14011 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14012 GEN_INT (size_needed
- 1), count_exp
, 1,
14014 if (tmp
!= count_exp
)
14015 emit_move_insn (count_exp
, tmp
);
14017 emit_label (label
);
14018 LABEL_NUSES (label
) = 1;
14021 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14022 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14023 epilogue_size_needed
);
14024 if (jump_around_label
)
14025 emit_label (jump_around_label
);
14029 /* Helper function for memcpy. For QImode value 0xXY produce
14030 0xXYXYXYXY of wide specified by MODE. This is essentially
14031 a * 0x10101010, but we can do slightly better than
14032 synth_mult by unwinding the sequence by hand on CPUs with
14035 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14037 enum machine_mode valmode
= GET_MODE (val
);
14039 int nops
= mode
== DImode
? 3 : 2;
14041 gcc_assert (mode
== SImode
|| mode
== DImode
);
14042 if (val
== const0_rtx
)
14043 return copy_to_mode_reg (mode
, const0_rtx
);
14044 if (CONST_INT_P (val
))
14046 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14050 if (mode
== DImode
)
14051 v
|= (v
<< 16) << 16;
14052 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14055 if (valmode
== VOIDmode
)
14057 if (valmode
!= QImode
)
14058 val
= gen_lowpart (QImode
, val
);
14059 if (mode
== QImode
)
14061 if (!TARGET_PARTIAL_REG_STALL
)
14063 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14064 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14065 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14066 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14068 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14069 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14070 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14075 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14077 if (!TARGET_PARTIAL_REG_STALL
)
14078 if (mode
== SImode
)
14079 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14081 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14084 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14085 NULL
, 1, OPTAB_DIRECT
);
14087 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14089 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14090 NULL
, 1, OPTAB_DIRECT
);
14091 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14092 if (mode
== SImode
)
14094 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14095 NULL
, 1, OPTAB_DIRECT
);
14096 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14101 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14102 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14103 alignment from ALIGN to DESIRED_ALIGN. */
14105 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14110 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14111 promoted_val
= promote_duplicated_reg (DImode
, val
);
14112 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14113 promoted_val
= promote_duplicated_reg (SImode
, val
);
14114 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14115 promoted_val
= promote_duplicated_reg (HImode
, val
);
14117 promoted_val
= val
;
14119 return promoted_val
;
14122 /* Expand string clear operation (bzero). Use i386 string operations when
14123 profitable. See expand_movmem comment for explanation of individual
14124 steps performed. */
14126 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14127 rtx expected_align_exp
, rtx expected_size_exp
)
14132 rtx jump_around_label
= NULL
;
14133 HOST_WIDE_INT align
= 1;
14134 unsigned HOST_WIDE_INT count
= 0;
14135 HOST_WIDE_INT expected_size
= -1;
14136 int size_needed
= 0, epilogue_size_needed
;
14137 int desired_align
= 0;
14138 enum stringop_alg alg
;
14139 rtx promoted_val
= NULL
;
14140 bool force_loopy_epilogue
= false;
14143 if (CONST_INT_P (align_exp
))
14144 align
= INTVAL (align_exp
);
14145 /* i386 can do misaligned access on reasonably increased cost. */
14146 if (CONST_INT_P (expected_align_exp
)
14147 && INTVAL (expected_align_exp
) > align
)
14148 align
= INTVAL (expected_align_exp
);
14149 if (CONST_INT_P (count_exp
))
14150 count
= expected_size
= INTVAL (count_exp
);
14151 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14152 expected_size
= INTVAL (expected_size_exp
);
14154 /* Step 0: Decide on preferred algorithm, desired alignment and
14155 size of chunks to be copied by main loop. */
14157 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14158 desired_align
= decide_alignment (align
, alg
, expected_size
);
14160 if (!TARGET_ALIGN_STRINGOPS
)
14161 align
= desired_align
;
14163 if (alg
== libcall
)
14165 gcc_assert (alg
!= no_stringop
);
14167 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14168 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14173 gcc_unreachable ();
14175 size_needed
= GET_MODE_SIZE (Pmode
);
14177 case unrolled_loop
:
14178 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14180 case rep_prefix_8_byte
:
14183 case rep_prefix_4_byte
:
14186 case rep_prefix_1_byte
:
14191 epilogue_size_needed
= size_needed
;
14193 /* Step 1: Prologue guard. */
14195 /* Alignment code needs count to be in register. */
14196 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14198 enum machine_mode mode
= SImode
;
14199 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14201 count_exp
= force_reg (mode
, count_exp
);
14203 /* Do the cheap promotion to allow better CSE across the
14204 main loop and epilogue (ie one load of the big constant in the
14205 front of all code. */
14206 if (CONST_INT_P (val_exp
))
14207 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14208 desired_align
, align
);
14209 /* Ensure that alignment prologue won't copy past end of block. */
14210 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14213 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14215 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14216 Make sure it is power of 2. */
14217 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14219 /* To improve performance of small blocks, we jump around the VAL
14220 promoting mode. This mean that if the promoted VAL is not constant,
14221 we might not use it in the epilogue and have to use byte
14223 if (epilogue_size_needed
> 2 && !promoted_val
)
14224 force_loopy_epilogue
= true;
14225 label
= gen_label_rtx ();
14226 emit_cmp_and_jump_insns (count_exp
,
14227 GEN_INT (epilogue_size_needed
),
14228 LTU
, 0, GET_MODE (count_exp
), 1, label
);
14229 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14230 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14232 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14234 if (dynamic_check
!= -1)
14236 rtx hot_label
= gen_label_rtx ();
14237 jump_around_label
= gen_label_rtx ();
14238 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14239 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14240 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14241 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14242 emit_jump (jump_around_label
);
14243 emit_label (hot_label
);
14246 /* Step 2: Alignment prologue. */
14248 /* Do the expensive promotion once we branched off the small blocks. */
14250 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14251 desired_align
, align
);
14252 gcc_assert (desired_align
>= 1 && align
>= 1);
14254 if (desired_align
> align
)
14256 /* Except for the first move in epilogue, we no longer know
14257 constant offset in aliasing info. It don't seems to worth
14258 the pain to maintain it for the first move, so throw away
14260 dst
= change_address (dst
, BLKmode
, destreg
);
14261 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14264 if (label
&& size_needed
== 1)
14266 emit_label (label
);
14267 LABEL_NUSES (label
) = 1;
14271 /* Step 3: Main loop. */
14277 gcc_unreachable ();
14279 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14280 count_exp
, QImode
, 1, expected_size
);
14283 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14284 count_exp
, Pmode
, 1, expected_size
);
14286 case unrolled_loop
:
14287 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14288 count_exp
, Pmode
, 4, expected_size
);
14290 case rep_prefix_8_byte
:
14291 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14294 case rep_prefix_4_byte
:
14295 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14298 case rep_prefix_1_byte
:
14299 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14303 /* Adjust properly the offset of src and dest memory for aliasing. */
14304 if (CONST_INT_P (count_exp
))
14305 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14306 (count
/ size_needed
) * size_needed
);
14308 dst
= change_address (dst
, BLKmode
, destreg
);
14310 /* Step 4: Epilogue to copy the remaining bytes. */
14314 /* When the main loop is done, COUNT_EXP might hold original count,
14315 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14316 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14317 bytes. Compensate if needed. */
14319 if (size_needed
< desired_align
- align
)
14322 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14323 GEN_INT (size_needed
- 1), count_exp
, 1,
14325 size_needed
= desired_align
- align
+ 1;
14326 if (tmp
!= count_exp
)
14327 emit_move_insn (count_exp
, tmp
);
14329 emit_label (label
);
14330 LABEL_NUSES (label
) = 1;
14332 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14334 if (force_loopy_epilogue
)
14335 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14338 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14341 if (jump_around_label
)
14342 emit_label (jump_around_label
);
14346 /* Expand strlen. */
14348 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14350 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14352 /* The generic case of strlen expander is long. Avoid it's
14353 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14355 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14356 && !TARGET_INLINE_ALL_STRINGOPS
14358 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14361 addr
= force_reg (Pmode
, XEXP (src
, 0));
14362 scratch1
= gen_reg_rtx (Pmode
);
14364 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14367 /* Well it seems that some optimizer does not combine a call like
14368 foo(strlen(bar), strlen(bar));
14369 when the move and the subtraction is done here. It does calculate
14370 the length just once when these instructions are done inside of
14371 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14372 often used and I use one fewer register for the lifetime of
14373 output_strlen_unroll() this is better. */
14375 emit_move_insn (out
, addr
);
14377 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14379 /* strlensi_unroll_1 returns the address of the zero at the end of
14380 the string, like memchr(), so compute the length by subtracting
14381 the start address. */
14383 emit_insn (gen_subdi3 (out
, out
, addr
));
14385 emit_insn (gen_subsi3 (out
, out
, addr
));
14390 scratch2
= gen_reg_rtx (Pmode
);
14391 scratch3
= gen_reg_rtx (Pmode
);
14392 scratch4
= force_reg (Pmode
, constm1_rtx
);
14394 emit_move_insn (scratch3
, addr
);
14395 eoschar
= force_reg (QImode
, eoschar
);
14397 src
= replace_equiv_address_nv (src
, scratch3
);
14399 /* If .md starts supporting :P, this can be done in .md. */
14400 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14401 scratch4
), UNSPEC_SCAS
);
14402 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14405 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14406 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14410 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14411 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14417 /* Expand the appropriate insns for doing strlen if not just doing
14420 out = result, initialized with the start address
14421 align_rtx = alignment of the address.
14422 scratch = scratch register, initialized with the startaddress when
14423 not aligned, otherwise undefined
14425 This is just the body. It needs the initializations mentioned above and
14426 some address computing at the end. These things are done in i386.md. */
14429 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14433 rtx align_2_label
= NULL_RTX
;
14434 rtx align_3_label
= NULL_RTX
;
14435 rtx align_4_label
= gen_label_rtx ();
14436 rtx end_0_label
= gen_label_rtx ();
14438 rtx tmpreg
= gen_reg_rtx (SImode
);
14439 rtx scratch
= gen_reg_rtx (SImode
);
14443 if (CONST_INT_P (align_rtx
))
14444 align
= INTVAL (align_rtx
);
14446 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14448 /* Is there a known alignment and is it less than 4? */
14451 rtx scratch1
= gen_reg_rtx (Pmode
);
14452 emit_move_insn (scratch1
, out
);
14453 /* Is there a known alignment and is it not 2? */
14456 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14457 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14459 /* Leave just the 3 lower bits. */
14460 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14461 NULL_RTX
, 0, OPTAB_WIDEN
);
14463 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14464 Pmode
, 1, align_4_label
);
14465 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14466 Pmode
, 1, align_2_label
);
14467 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14468 Pmode
, 1, align_3_label
);
14472 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14473 check if is aligned to 4 - byte. */
14475 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14476 NULL_RTX
, 0, OPTAB_WIDEN
);
14478 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14479 Pmode
, 1, align_4_label
);
14482 mem
= change_address (src
, QImode
, out
);
14484 /* Now compare the bytes. */
14486 /* Compare the first n unaligned byte on a byte per byte basis. */
14487 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14488 QImode
, 1, end_0_label
);
14490 /* Increment the address. */
14492 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14494 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14496 /* Not needed with an alignment of 2 */
14499 emit_label (align_2_label
);
14501 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14505 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14507 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14509 emit_label (align_3_label
);
14512 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14516 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14518 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14521 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14522 align this loop. It gives only huge programs, but does not help to
14524 emit_label (align_4_label
);
14526 mem
= change_address (src
, SImode
, out
);
14527 emit_move_insn (scratch
, mem
);
14529 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14531 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14533 /* This formula yields a nonzero result iff one of the bytes is zero.
14534 This saves three branches inside loop and many cycles. */
14536 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14537 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14538 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14539 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14540 gen_int_mode (0x80808080, SImode
)));
14541 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14546 rtx reg
= gen_reg_rtx (SImode
);
14547 rtx reg2
= gen_reg_rtx (Pmode
);
14548 emit_move_insn (reg
, tmpreg
);
14549 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14551 /* If zero is not in the first two bytes, move two bytes forward. */
14552 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14553 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14554 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14555 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14556 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14559 /* Emit lea manually to avoid clobbering of flags. */
14560 emit_insn (gen_rtx_SET (SImode
, reg2
,
14561 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14563 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14564 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14565 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14566 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14573 rtx end_2_label
= gen_label_rtx ();
14574 /* Is zero in the first two bytes? */
14576 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14577 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14578 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14579 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14580 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14582 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14583 JUMP_LABEL (tmp
) = end_2_label
;
14585 /* Not in the first two. Move two bytes forward. */
14586 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14588 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14590 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14592 emit_label (end_2_label
);
14596 /* Avoid branch in fixing the byte. */
14597 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14598 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14599 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14601 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14603 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14605 emit_label (end_0_label
);
14609 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14610 rtx callarg2 ATTRIBUTE_UNUSED
,
14611 rtx pop
, int sibcall
)
14613 rtx use
= NULL
, call
;
14615 if (pop
== const0_rtx
)
14617 gcc_assert (!TARGET_64BIT
|| !pop
);
14619 if (TARGET_MACHO
&& !TARGET_64BIT
)
14622 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14623 fnaddr
= machopic_indirect_call_target (fnaddr
);
14628 /* Static functions and indirect calls don't need the pic register. */
14629 if (! TARGET_64BIT
&& flag_pic
14630 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14631 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14632 use_reg (&use
, pic_offset_table_rtx
);
14635 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14637 rtx al
= gen_rtx_REG (QImode
, 0);
14638 emit_move_insn (al
, callarg2
);
14639 use_reg (&use
, al
);
14642 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14644 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14645 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14647 if (sibcall
&& TARGET_64BIT
14648 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14651 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14652 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14653 emit_move_insn (fnaddr
, addr
);
14654 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14657 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14659 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14662 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14663 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14664 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14667 call
= emit_call_insn (call
);
14669 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14673 /* Clear stack slot assignments remembered from previous functions.
14674 This is called from INIT_EXPANDERS once before RTL is emitted for each
14677 static struct machine_function
*
14678 ix86_init_machine_status (void)
14680 struct machine_function
*f
;
14682 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14683 f
->use_fast_prologue_epilogue_nregs
= -1;
14684 f
->tls_descriptor_call_expanded_p
= 0;
14689 /* Return a MEM corresponding to a stack slot with mode MODE.
14690 Allocate a new slot if necessary.
14692 The RTL for a function can have several slots available: N is
14693 which slot to use. */
14696 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14698 struct stack_local_entry
*s
;
14700 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14702 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14703 if (s
->mode
== mode
&& s
->n
== n
)
14704 return copy_rtx (s
->rtl
);
14706 s
= (struct stack_local_entry
*)
14707 ggc_alloc (sizeof (struct stack_local_entry
));
14710 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14712 s
->next
= ix86_stack_locals
;
14713 ix86_stack_locals
= s
;
14717 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14719 static GTY(()) rtx ix86_tls_symbol
;
14721 ix86_tls_get_addr (void)
14724 if (!ix86_tls_symbol
)
14726 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14727 (TARGET_ANY_GNU_TLS
14729 ? "___tls_get_addr"
14730 : "__tls_get_addr");
14733 return ix86_tls_symbol
;
14736 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14738 static GTY(()) rtx ix86_tls_module_base_symbol
;
14740 ix86_tls_module_base (void)
14743 if (!ix86_tls_module_base_symbol
)
14745 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14746 "_TLS_MODULE_BASE_");
14747 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
14748 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
14751 return ix86_tls_module_base_symbol
;
14754 /* Calculate the length of the memory address in the instruction
14755 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14758 memory_address_length (rtx addr
)
14760 struct ix86_address parts
;
14761 rtx base
, index
, disp
;
14765 if (GET_CODE (addr
) == PRE_DEC
14766 || GET_CODE (addr
) == POST_INC
14767 || GET_CODE (addr
) == PRE_MODIFY
14768 || GET_CODE (addr
) == POST_MODIFY
)
14771 ok
= ix86_decompose_address (addr
, &parts
);
14774 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14775 parts
.base
= SUBREG_REG (parts
.base
);
14776 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14777 parts
.index
= SUBREG_REG (parts
.index
);
14780 index
= parts
.index
;
14785 - esp as the base always wants an index,
14786 - ebp as the base always wants a displacement. */
14788 /* Register Indirect. */
14789 if (base
&& !index
&& !disp
)
14791 /* esp (for its index) and ebp (for its displacement) need
14792 the two-byte modrm form. */
14793 if (addr
== stack_pointer_rtx
14794 || addr
== arg_pointer_rtx
14795 || addr
== frame_pointer_rtx
14796 || addr
== hard_frame_pointer_rtx
)
14800 /* Direct Addressing. */
14801 else if (disp
&& !base
&& !index
)
14806 /* Find the length of the displacement constant. */
14809 if (base
&& satisfies_constraint_K (disp
))
14814 /* ebp always wants a displacement. */
14815 else if (base
== hard_frame_pointer_rtx
)
14818 /* An index requires the two-byte modrm form.... */
14820 /* ...like esp, which always wants an index. */
14821 || base
== stack_pointer_rtx
14822 || base
== arg_pointer_rtx
14823 || base
== frame_pointer_rtx
)
14830 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14831 is set, expect that insn have 8bit immediate alternative. */
14833 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
14837 extract_insn_cached (insn
);
14838 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14839 if (CONSTANT_P (recog_data
.operand
[i
]))
14842 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
14846 switch (get_attr_mode (insn
))
14857 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14862 fatal_insn ("unknown insn mode", insn
);
14868 /* Compute default value for "length_address" attribute. */
14870 ix86_attr_length_address_default (rtx insn
)
14874 if (get_attr_type (insn
) == TYPE_LEA
)
14876 rtx set
= PATTERN (insn
);
14878 if (GET_CODE (set
) == PARALLEL
)
14879 set
= XVECEXP (set
, 0, 0);
14881 gcc_assert (GET_CODE (set
) == SET
);
14883 return memory_address_length (SET_SRC (set
));
14886 extract_insn_cached (insn
);
14887 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14888 if (MEM_P (recog_data
.operand
[i
]))
14890 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
14896 /* Return the maximum number of instructions a cpu can issue. */
14899 ix86_issue_rate (void)
14903 case PROCESSOR_PENTIUM
:
14907 case PROCESSOR_PENTIUMPRO
:
14908 case PROCESSOR_PENTIUM4
:
14909 case PROCESSOR_ATHLON
:
14911 case PROCESSOR_AMDFAM10
:
14912 case PROCESSOR_NOCONA
:
14913 case PROCESSOR_GENERIC32
:
14914 case PROCESSOR_GENERIC64
:
14917 case PROCESSOR_CORE2
:
14925 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14926 by DEP_INSN and nothing set by DEP_INSN. */
14929 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
14933 /* Simplify the test for uninteresting insns. */
14934 if (insn_type
!= TYPE_SETCC
14935 && insn_type
!= TYPE_ICMOV
14936 && insn_type
!= TYPE_FCMOV
14937 && insn_type
!= TYPE_IBR
)
14940 if ((set
= single_set (dep_insn
)) != 0)
14942 set
= SET_DEST (set
);
14945 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
14946 && XVECLEN (PATTERN (dep_insn
), 0) == 2
14947 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
14948 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
14950 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
14951 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
14956 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
14959 /* This test is true if the dependent insn reads the flags but
14960 not any other potentially set register. */
14961 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
14964 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
14970 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14971 address with operands set by DEP_INSN. */
14974 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
14978 if (insn_type
== TYPE_LEA
14981 addr
= PATTERN (insn
);
14983 if (GET_CODE (addr
) == PARALLEL
)
14984 addr
= XVECEXP (addr
, 0, 0);
14986 gcc_assert (GET_CODE (addr
) == SET
);
14988 addr
= SET_SRC (addr
);
14993 extract_insn_cached (insn
);
14994 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14995 if (MEM_P (recog_data
.operand
[i
]))
14997 addr
= XEXP (recog_data
.operand
[i
], 0);
15004 return modified_in_p (addr
, dep_insn
);
15008 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15010 enum attr_type insn_type
, dep_insn_type
;
15011 enum attr_memory memory
;
15013 int dep_insn_code_number
;
15015 /* Anti and output dependencies have zero cost on all CPUs. */
15016 if (REG_NOTE_KIND (link
) != 0)
15019 dep_insn_code_number
= recog_memoized (dep_insn
);
15021 /* If we can't recognize the insns, we can't really do anything. */
15022 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15025 insn_type
= get_attr_type (insn
);
15026 dep_insn_type
= get_attr_type (dep_insn
);
15030 case PROCESSOR_PENTIUM
:
15031 /* Address Generation Interlock adds a cycle of latency. */
15032 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15035 /* ??? Compares pair with jump/setcc. */
15036 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15039 /* Floating point stores require value to be ready one cycle earlier. */
15040 if (insn_type
== TYPE_FMOV
15041 && get_attr_memory (insn
) == MEMORY_STORE
15042 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15046 case PROCESSOR_PENTIUMPRO
:
15047 memory
= get_attr_memory (insn
);
15049 /* INT->FP conversion is expensive. */
15050 if (get_attr_fp_int_src (dep_insn
))
15053 /* There is one cycle extra latency between an FP op and a store. */
15054 if (insn_type
== TYPE_FMOV
15055 && (set
= single_set (dep_insn
)) != NULL_RTX
15056 && (set2
= single_set (insn
)) != NULL_RTX
15057 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15058 && MEM_P (SET_DEST (set2
)))
15061 /* Show ability of reorder buffer to hide latency of load by executing
15062 in parallel with previous instruction in case
15063 previous instruction is not needed to compute the address. */
15064 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15065 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15067 /* Claim moves to take one cycle, as core can issue one load
15068 at time and the next load can start cycle later. */
15069 if (dep_insn_type
== TYPE_IMOV
15070 || dep_insn_type
== TYPE_FMOV
)
15078 memory
= get_attr_memory (insn
);
15080 /* The esp dependency is resolved before the instruction is really
15082 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15083 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15086 /* INT->FP conversion is expensive. */
15087 if (get_attr_fp_int_src (dep_insn
))
15090 /* Show ability of reorder buffer to hide latency of load by executing
15091 in parallel with previous instruction in case
15092 previous instruction is not needed to compute the address. */
15093 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15094 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15096 /* Claim moves to take one cycle, as core can issue one load
15097 at time and the next load can start cycle later. */
15098 if (dep_insn_type
== TYPE_IMOV
15099 || dep_insn_type
== TYPE_FMOV
)
15108 case PROCESSOR_ATHLON
:
15110 case PROCESSOR_AMDFAM10
:
15111 case PROCESSOR_GENERIC32
:
15112 case PROCESSOR_GENERIC64
:
15113 memory
= get_attr_memory (insn
);
15115 /* Show ability of reorder buffer to hide latency of load by executing
15116 in parallel with previous instruction in case
15117 previous instruction is not needed to compute the address. */
15118 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15119 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15121 enum attr_unit unit
= get_attr_unit (insn
);
15124 /* Because of the difference between the length of integer and
15125 floating unit pipeline preparation stages, the memory operands
15126 for floating point are cheaper.
15128 ??? For Athlon it the difference is most probably 2. */
15129 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15132 loadcost
= TARGET_ATHLON
? 2 : 0;
15134 if (cost
>= loadcost
)
15147 /* How many alternative schedules to try. This should be as wide as the
15148 scheduling freedom in the DFA, but no wider. Making this value too
15149 large results extra work for the scheduler. */
15152 ia32_multipass_dfa_lookahead (void)
15154 if (ix86_tune
== PROCESSOR_PENTIUM
)
15157 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15158 || ix86_tune
== PROCESSOR_K6
)
15166 /* Compute the alignment given to a constant that is being placed in memory.
15167 EXP is the constant and ALIGN is the alignment that the object would
15169 The value of this function is used instead of that alignment to align
15173 ix86_constant_alignment (tree exp
, int align
)
15175 if (TREE_CODE (exp
) == REAL_CST
)
15177 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15179 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15182 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15183 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15184 return BITS_PER_WORD
;
15189 /* Compute the alignment for a static variable.
15190 TYPE is the data type, and ALIGN is the alignment that
15191 the object would ordinarily have. The value of this function is used
15192 instead of that alignment to align the object. */
15195 ix86_data_alignment (tree type
, int align
)
15197 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
15199 if (AGGREGATE_TYPE_P (type
)
15200 && TYPE_SIZE (type
)
15201 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15202 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15203 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15204 && align
< max_align
)
15207 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15208 to 16byte boundary. */
15211 if (AGGREGATE_TYPE_P (type
)
15212 && TYPE_SIZE (type
)
15213 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15214 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15215 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15219 if (TREE_CODE (type
) == ARRAY_TYPE
)
15221 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15223 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15226 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15229 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15231 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15234 else if ((TREE_CODE (type
) == RECORD_TYPE
15235 || TREE_CODE (type
) == UNION_TYPE
15236 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15237 && TYPE_FIELDS (type
))
15239 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15241 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15244 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15245 || TREE_CODE (type
) == INTEGER_TYPE
)
15247 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15249 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15256 /* Compute the alignment for a local variable.
15257 TYPE is the data type, and ALIGN is the alignment that
15258 the object would ordinarily have. The value of this macro is used
15259 instead of that alignment to align the object. */
15262 ix86_local_alignment (tree type
, int align
)
15264 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15265 to 16byte boundary. */
15268 if (AGGREGATE_TYPE_P (type
)
15269 && TYPE_SIZE (type
)
15270 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15271 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15272 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15275 if (TREE_CODE (type
) == ARRAY_TYPE
)
15277 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15279 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15282 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15284 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15286 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15289 else if ((TREE_CODE (type
) == RECORD_TYPE
15290 || TREE_CODE (type
) == UNION_TYPE
15291 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15292 && TYPE_FIELDS (type
))
15294 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15296 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15299 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15300 || TREE_CODE (type
) == INTEGER_TYPE
)
15303 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15305 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15311 /* Emit RTL insns to initialize the variable parts of a trampoline.
15312 FNADDR is an RTX for the address of the function's pure code.
15313 CXT is an RTX for the static chain value for the function. */
15315 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15319 /* Compute offset from the end of the jmp to the target function. */
15320 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15321 plus_constant (tramp
, 10),
15322 NULL_RTX
, 1, OPTAB_DIRECT
);
15323 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15324 gen_int_mode (0xb9, QImode
));
15325 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15326 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15327 gen_int_mode (0xe9, QImode
));
15328 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15333 /* Try to load address using shorter movl instead of movabs.
15334 We may want to support movq for kernel mode, but kernel does not use
15335 trampolines at the moment. */
15336 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15338 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15339 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15340 gen_int_mode (0xbb41, HImode
));
15341 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15342 gen_lowpart (SImode
, fnaddr
));
15347 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15348 gen_int_mode (0xbb49, HImode
));
15349 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15353 /* Load static chain using movabs to r10. */
15354 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15355 gen_int_mode (0xba49, HImode
));
15356 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15359 /* Jump to the r11 */
15360 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15361 gen_int_mode (0xff49, HImode
));
15362 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15363 gen_int_mode (0xe3, QImode
));
15365 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15368 #ifdef ENABLE_EXECUTE_STACK
15369 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15370 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15374 /* Codes for all the SSE/MMX builtins. */
15377 IX86_BUILTIN_ADDPS
,
15378 IX86_BUILTIN_ADDSS
,
15379 IX86_BUILTIN_DIVPS
,
15380 IX86_BUILTIN_DIVSS
,
15381 IX86_BUILTIN_MULPS
,
15382 IX86_BUILTIN_MULSS
,
15383 IX86_BUILTIN_SUBPS
,
15384 IX86_BUILTIN_SUBSS
,
15386 IX86_BUILTIN_CMPEQPS
,
15387 IX86_BUILTIN_CMPLTPS
,
15388 IX86_BUILTIN_CMPLEPS
,
15389 IX86_BUILTIN_CMPGTPS
,
15390 IX86_BUILTIN_CMPGEPS
,
15391 IX86_BUILTIN_CMPNEQPS
,
15392 IX86_BUILTIN_CMPNLTPS
,
15393 IX86_BUILTIN_CMPNLEPS
,
15394 IX86_BUILTIN_CMPNGTPS
,
15395 IX86_BUILTIN_CMPNGEPS
,
15396 IX86_BUILTIN_CMPORDPS
,
15397 IX86_BUILTIN_CMPUNORDPS
,
15398 IX86_BUILTIN_CMPEQSS
,
15399 IX86_BUILTIN_CMPLTSS
,
15400 IX86_BUILTIN_CMPLESS
,
15401 IX86_BUILTIN_CMPNEQSS
,
15402 IX86_BUILTIN_CMPNLTSS
,
15403 IX86_BUILTIN_CMPNLESS
,
15404 IX86_BUILTIN_CMPNGTSS
,
15405 IX86_BUILTIN_CMPNGESS
,
15406 IX86_BUILTIN_CMPORDSS
,
15407 IX86_BUILTIN_CMPUNORDSS
,
15409 IX86_BUILTIN_COMIEQSS
,
15410 IX86_BUILTIN_COMILTSS
,
15411 IX86_BUILTIN_COMILESS
,
15412 IX86_BUILTIN_COMIGTSS
,
15413 IX86_BUILTIN_COMIGESS
,
15414 IX86_BUILTIN_COMINEQSS
,
15415 IX86_BUILTIN_UCOMIEQSS
,
15416 IX86_BUILTIN_UCOMILTSS
,
15417 IX86_BUILTIN_UCOMILESS
,
15418 IX86_BUILTIN_UCOMIGTSS
,
15419 IX86_BUILTIN_UCOMIGESS
,
15420 IX86_BUILTIN_UCOMINEQSS
,
15422 IX86_BUILTIN_CVTPI2PS
,
15423 IX86_BUILTIN_CVTPS2PI
,
15424 IX86_BUILTIN_CVTSI2SS
,
15425 IX86_BUILTIN_CVTSI642SS
,
15426 IX86_BUILTIN_CVTSS2SI
,
15427 IX86_BUILTIN_CVTSS2SI64
,
15428 IX86_BUILTIN_CVTTPS2PI
,
15429 IX86_BUILTIN_CVTTSS2SI
,
15430 IX86_BUILTIN_CVTTSS2SI64
,
15432 IX86_BUILTIN_MAXPS
,
15433 IX86_BUILTIN_MAXSS
,
15434 IX86_BUILTIN_MINPS
,
15435 IX86_BUILTIN_MINSS
,
15437 IX86_BUILTIN_LOADUPS
,
15438 IX86_BUILTIN_STOREUPS
,
15439 IX86_BUILTIN_MOVSS
,
15441 IX86_BUILTIN_MOVHLPS
,
15442 IX86_BUILTIN_MOVLHPS
,
15443 IX86_BUILTIN_LOADHPS
,
15444 IX86_BUILTIN_LOADLPS
,
15445 IX86_BUILTIN_STOREHPS
,
15446 IX86_BUILTIN_STORELPS
,
15448 IX86_BUILTIN_MASKMOVQ
,
15449 IX86_BUILTIN_MOVMSKPS
,
15450 IX86_BUILTIN_PMOVMSKB
,
15452 IX86_BUILTIN_MOVNTPS
,
15453 IX86_BUILTIN_MOVNTQ
,
15455 IX86_BUILTIN_LOADDQU
,
15456 IX86_BUILTIN_STOREDQU
,
15458 IX86_BUILTIN_PACKSSWB
,
15459 IX86_BUILTIN_PACKSSDW
,
15460 IX86_BUILTIN_PACKUSWB
,
15462 IX86_BUILTIN_PADDB
,
15463 IX86_BUILTIN_PADDW
,
15464 IX86_BUILTIN_PADDD
,
15465 IX86_BUILTIN_PADDQ
,
15466 IX86_BUILTIN_PADDSB
,
15467 IX86_BUILTIN_PADDSW
,
15468 IX86_BUILTIN_PADDUSB
,
15469 IX86_BUILTIN_PADDUSW
,
15470 IX86_BUILTIN_PSUBB
,
15471 IX86_BUILTIN_PSUBW
,
15472 IX86_BUILTIN_PSUBD
,
15473 IX86_BUILTIN_PSUBQ
,
15474 IX86_BUILTIN_PSUBSB
,
15475 IX86_BUILTIN_PSUBSW
,
15476 IX86_BUILTIN_PSUBUSB
,
15477 IX86_BUILTIN_PSUBUSW
,
15480 IX86_BUILTIN_PANDN
,
15484 IX86_BUILTIN_PAVGB
,
15485 IX86_BUILTIN_PAVGW
,
15487 IX86_BUILTIN_PCMPEQB
,
15488 IX86_BUILTIN_PCMPEQW
,
15489 IX86_BUILTIN_PCMPEQD
,
15490 IX86_BUILTIN_PCMPGTB
,
15491 IX86_BUILTIN_PCMPGTW
,
15492 IX86_BUILTIN_PCMPGTD
,
15494 IX86_BUILTIN_PMADDWD
,
15496 IX86_BUILTIN_PMAXSW
,
15497 IX86_BUILTIN_PMAXUB
,
15498 IX86_BUILTIN_PMINSW
,
15499 IX86_BUILTIN_PMINUB
,
15501 IX86_BUILTIN_PMULHUW
,
15502 IX86_BUILTIN_PMULHW
,
15503 IX86_BUILTIN_PMULLW
,
15505 IX86_BUILTIN_PSADBW
,
15506 IX86_BUILTIN_PSHUFW
,
15508 IX86_BUILTIN_PSLLW
,
15509 IX86_BUILTIN_PSLLD
,
15510 IX86_BUILTIN_PSLLQ
,
15511 IX86_BUILTIN_PSRAW
,
15512 IX86_BUILTIN_PSRAD
,
15513 IX86_BUILTIN_PSRLW
,
15514 IX86_BUILTIN_PSRLD
,
15515 IX86_BUILTIN_PSRLQ
,
15516 IX86_BUILTIN_PSLLWI
,
15517 IX86_BUILTIN_PSLLDI
,
15518 IX86_BUILTIN_PSLLQI
,
15519 IX86_BUILTIN_PSRAWI
,
15520 IX86_BUILTIN_PSRADI
,
15521 IX86_BUILTIN_PSRLWI
,
15522 IX86_BUILTIN_PSRLDI
,
15523 IX86_BUILTIN_PSRLQI
,
15525 IX86_BUILTIN_PUNPCKHBW
,
15526 IX86_BUILTIN_PUNPCKHWD
,
15527 IX86_BUILTIN_PUNPCKHDQ
,
15528 IX86_BUILTIN_PUNPCKLBW
,
15529 IX86_BUILTIN_PUNPCKLWD
,
15530 IX86_BUILTIN_PUNPCKLDQ
,
15532 IX86_BUILTIN_SHUFPS
,
15534 IX86_BUILTIN_RCPPS
,
15535 IX86_BUILTIN_RCPSS
,
15536 IX86_BUILTIN_RSQRTPS
,
15537 IX86_BUILTIN_RSQRTSS
,
15538 IX86_BUILTIN_SQRTPS
,
15539 IX86_BUILTIN_SQRTSS
,
15541 IX86_BUILTIN_UNPCKHPS
,
15542 IX86_BUILTIN_UNPCKLPS
,
15544 IX86_BUILTIN_ANDPS
,
15545 IX86_BUILTIN_ANDNPS
,
15547 IX86_BUILTIN_XORPS
,
15550 IX86_BUILTIN_LDMXCSR
,
15551 IX86_BUILTIN_STMXCSR
,
15552 IX86_BUILTIN_SFENCE
,
15554 /* 3DNow! Original */
15555 IX86_BUILTIN_FEMMS
,
15556 IX86_BUILTIN_PAVGUSB
,
15557 IX86_BUILTIN_PF2ID
,
15558 IX86_BUILTIN_PFACC
,
15559 IX86_BUILTIN_PFADD
,
15560 IX86_BUILTIN_PFCMPEQ
,
15561 IX86_BUILTIN_PFCMPGE
,
15562 IX86_BUILTIN_PFCMPGT
,
15563 IX86_BUILTIN_PFMAX
,
15564 IX86_BUILTIN_PFMIN
,
15565 IX86_BUILTIN_PFMUL
,
15566 IX86_BUILTIN_PFRCP
,
15567 IX86_BUILTIN_PFRCPIT1
,
15568 IX86_BUILTIN_PFRCPIT2
,
15569 IX86_BUILTIN_PFRSQIT1
,
15570 IX86_BUILTIN_PFRSQRT
,
15571 IX86_BUILTIN_PFSUB
,
15572 IX86_BUILTIN_PFSUBR
,
15573 IX86_BUILTIN_PI2FD
,
15574 IX86_BUILTIN_PMULHRW
,
15576 /* 3DNow! Athlon Extensions */
15577 IX86_BUILTIN_PF2IW
,
15578 IX86_BUILTIN_PFNACC
,
15579 IX86_BUILTIN_PFPNACC
,
15580 IX86_BUILTIN_PI2FW
,
15581 IX86_BUILTIN_PSWAPDSI
,
15582 IX86_BUILTIN_PSWAPDSF
,
15585 IX86_BUILTIN_ADDPD
,
15586 IX86_BUILTIN_ADDSD
,
15587 IX86_BUILTIN_DIVPD
,
15588 IX86_BUILTIN_DIVSD
,
15589 IX86_BUILTIN_MULPD
,
15590 IX86_BUILTIN_MULSD
,
15591 IX86_BUILTIN_SUBPD
,
15592 IX86_BUILTIN_SUBSD
,
15594 IX86_BUILTIN_CMPEQPD
,
15595 IX86_BUILTIN_CMPLTPD
,
15596 IX86_BUILTIN_CMPLEPD
,
15597 IX86_BUILTIN_CMPGTPD
,
15598 IX86_BUILTIN_CMPGEPD
,
15599 IX86_BUILTIN_CMPNEQPD
,
15600 IX86_BUILTIN_CMPNLTPD
,
15601 IX86_BUILTIN_CMPNLEPD
,
15602 IX86_BUILTIN_CMPNGTPD
,
15603 IX86_BUILTIN_CMPNGEPD
,
15604 IX86_BUILTIN_CMPORDPD
,
15605 IX86_BUILTIN_CMPUNORDPD
,
15606 IX86_BUILTIN_CMPNEPD
,
15607 IX86_BUILTIN_CMPEQSD
,
15608 IX86_BUILTIN_CMPLTSD
,
15609 IX86_BUILTIN_CMPLESD
,
15610 IX86_BUILTIN_CMPNEQSD
,
15611 IX86_BUILTIN_CMPNLTSD
,
15612 IX86_BUILTIN_CMPNLESD
,
15613 IX86_BUILTIN_CMPORDSD
,
15614 IX86_BUILTIN_CMPUNORDSD
,
15615 IX86_BUILTIN_CMPNESD
,
15617 IX86_BUILTIN_COMIEQSD
,
15618 IX86_BUILTIN_COMILTSD
,
15619 IX86_BUILTIN_COMILESD
,
15620 IX86_BUILTIN_COMIGTSD
,
15621 IX86_BUILTIN_COMIGESD
,
15622 IX86_BUILTIN_COMINEQSD
,
15623 IX86_BUILTIN_UCOMIEQSD
,
15624 IX86_BUILTIN_UCOMILTSD
,
15625 IX86_BUILTIN_UCOMILESD
,
15626 IX86_BUILTIN_UCOMIGTSD
,
15627 IX86_BUILTIN_UCOMIGESD
,
15628 IX86_BUILTIN_UCOMINEQSD
,
15630 IX86_BUILTIN_MAXPD
,
15631 IX86_BUILTIN_MAXSD
,
15632 IX86_BUILTIN_MINPD
,
15633 IX86_BUILTIN_MINSD
,
15635 IX86_BUILTIN_ANDPD
,
15636 IX86_BUILTIN_ANDNPD
,
15638 IX86_BUILTIN_XORPD
,
15640 IX86_BUILTIN_SQRTPD
,
15641 IX86_BUILTIN_SQRTSD
,
15643 IX86_BUILTIN_UNPCKHPD
,
15644 IX86_BUILTIN_UNPCKLPD
,
15646 IX86_BUILTIN_SHUFPD
,
15648 IX86_BUILTIN_LOADUPD
,
15649 IX86_BUILTIN_STOREUPD
,
15650 IX86_BUILTIN_MOVSD
,
15652 IX86_BUILTIN_LOADHPD
,
15653 IX86_BUILTIN_LOADLPD
,
15655 IX86_BUILTIN_CVTDQ2PD
,
15656 IX86_BUILTIN_CVTDQ2PS
,
15658 IX86_BUILTIN_CVTPD2DQ
,
15659 IX86_BUILTIN_CVTPD2PI
,
15660 IX86_BUILTIN_CVTPD2PS
,
15661 IX86_BUILTIN_CVTTPD2DQ
,
15662 IX86_BUILTIN_CVTTPD2PI
,
15664 IX86_BUILTIN_CVTPI2PD
,
15665 IX86_BUILTIN_CVTSI2SD
,
15666 IX86_BUILTIN_CVTSI642SD
,
15668 IX86_BUILTIN_CVTSD2SI
,
15669 IX86_BUILTIN_CVTSD2SI64
,
15670 IX86_BUILTIN_CVTSD2SS
,
15671 IX86_BUILTIN_CVTSS2SD
,
15672 IX86_BUILTIN_CVTTSD2SI
,
15673 IX86_BUILTIN_CVTTSD2SI64
,
15675 IX86_BUILTIN_CVTPS2DQ
,
15676 IX86_BUILTIN_CVTPS2PD
,
15677 IX86_BUILTIN_CVTTPS2DQ
,
15679 IX86_BUILTIN_MOVNTI
,
15680 IX86_BUILTIN_MOVNTPD
,
15681 IX86_BUILTIN_MOVNTDQ
,
15684 IX86_BUILTIN_MASKMOVDQU
,
15685 IX86_BUILTIN_MOVMSKPD
,
15686 IX86_BUILTIN_PMOVMSKB128
,
15688 IX86_BUILTIN_PACKSSWB128
,
15689 IX86_BUILTIN_PACKSSDW128
,
15690 IX86_BUILTIN_PACKUSWB128
,
15692 IX86_BUILTIN_PADDB128
,
15693 IX86_BUILTIN_PADDW128
,
15694 IX86_BUILTIN_PADDD128
,
15695 IX86_BUILTIN_PADDQ128
,
15696 IX86_BUILTIN_PADDSB128
,
15697 IX86_BUILTIN_PADDSW128
,
15698 IX86_BUILTIN_PADDUSB128
,
15699 IX86_BUILTIN_PADDUSW128
,
15700 IX86_BUILTIN_PSUBB128
,
15701 IX86_BUILTIN_PSUBW128
,
15702 IX86_BUILTIN_PSUBD128
,
15703 IX86_BUILTIN_PSUBQ128
,
15704 IX86_BUILTIN_PSUBSB128
,
15705 IX86_BUILTIN_PSUBSW128
,
15706 IX86_BUILTIN_PSUBUSB128
,
15707 IX86_BUILTIN_PSUBUSW128
,
15709 IX86_BUILTIN_PAND128
,
15710 IX86_BUILTIN_PANDN128
,
15711 IX86_BUILTIN_POR128
,
15712 IX86_BUILTIN_PXOR128
,
15714 IX86_BUILTIN_PAVGB128
,
15715 IX86_BUILTIN_PAVGW128
,
15717 IX86_BUILTIN_PCMPEQB128
,
15718 IX86_BUILTIN_PCMPEQW128
,
15719 IX86_BUILTIN_PCMPEQD128
,
15720 IX86_BUILTIN_PCMPGTB128
,
15721 IX86_BUILTIN_PCMPGTW128
,
15722 IX86_BUILTIN_PCMPGTD128
,
15724 IX86_BUILTIN_PMADDWD128
,
15726 IX86_BUILTIN_PMAXSW128
,
15727 IX86_BUILTIN_PMAXUB128
,
15728 IX86_BUILTIN_PMINSW128
,
15729 IX86_BUILTIN_PMINUB128
,
15731 IX86_BUILTIN_PMULUDQ
,
15732 IX86_BUILTIN_PMULUDQ128
,
15733 IX86_BUILTIN_PMULHUW128
,
15734 IX86_BUILTIN_PMULHW128
,
15735 IX86_BUILTIN_PMULLW128
,
15737 IX86_BUILTIN_PSADBW128
,
15738 IX86_BUILTIN_PSHUFHW
,
15739 IX86_BUILTIN_PSHUFLW
,
15740 IX86_BUILTIN_PSHUFD
,
15742 IX86_BUILTIN_PSLLW128
,
15743 IX86_BUILTIN_PSLLD128
,
15744 IX86_BUILTIN_PSLLQ128
,
15745 IX86_BUILTIN_PSRAW128
,
15746 IX86_BUILTIN_PSRAD128
,
15747 IX86_BUILTIN_PSRLW128
,
15748 IX86_BUILTIN_PSRLD128
,
15749 IX86_BUILTIN_PSRLQ128
,
15750 IX86_BUILTIN_PSLLDQI128
,
15751 IX86_BUILTIN_PSLLWI128
,
15752 IX86_BUILTIN_PSLLDI128
,
15753 IX86_BUILTIN_PSLLQI128
,
15754 IX86_BUILTIN_PSRAWI128
,
15755 IX86_BUILTIN_PSRADI128
,
15756 IX86_BUILTIN_PSRLDQI128
,
15757 IX86_BUILTIN_PSRLWI128
,
15758 IX86_BUILTIN_PSRLDI128
,
15759 IX86_BUILTIN_PSRLQI128
,
15761 IX86_BUILTIN_PUNPCKHBW128
,
15762 IX86_BUILTIN_PUNPCKHWD128
,
15763 IX86_BUILTIN_PUNPCKHDQ128
,
15764 IX86_BUILTIN_PUNPCKHQDQ128
,
15765 IX86_BUILTIN_PUNPCKLBW128
,
15766 IX86_BUILTIN_PUNPCKLWD128
,
15767 IX86_BUILTIN_PUNPCKLDQ128
,
15768 IX86_BUILTIN_PUNPCKLQDQ128
,
15770 IX86_BUILTIN_CLFLUSH
,
15771 IX86_BUILTIN_MFENCE
,
15772 IX86_BUILTIN_LFENCE
,
15774 /* Prescott New Instructions. */
15775 IX86_BUILTIN_ADDSUBPS
,
15776 IX86_BUILTIN_HADDPS
,
15777 IX86_BUILTIN_HSUBPS
,
15778 IX86_BUILTIN_MOVSHDUP
,
15779 IX86_BUILTIN_MOVSLDUP
,
15780 IX86_BUILTIN_ADDSUBPD
,
15781 IX86_BUILTIN_HADDPD
,
15782 IX86_BUILTIN_HSUBPD
,
15783 IX86_BUILTIN_LDDQU
,
15785 IX86_BUILTIN_MONITOR
,
15786 IX86_BUILTIN_MWAIT
,
15789 IX86_BUILTIN_PHADDW
,
15790 IX86_BUILTIN_PHADDD
,
15791 IX86_BUILTIN_PHADDSW
,
15792 IX86_BUILTIN_PHSUBW
,
15793 IX86_BUILTIN_PHSUBD
,
15794 IX86_BUILTIN_PHSUBSW
,
15795 IX86_BUILTIN_PMADDUBSW
,
15796 IX86_BUILTIN_PMULHRSW
,
15797 IX86_BUILTIN_PSHUFB
,
15798 IX86_BUILTIN_PSIGNB
,
15799 IX86_BUILTIN_PSIGNW
,
15800 IX86_BUILTIN_PSIGND
,
15801 IX86_BUILTIN_PALIGNR
,
15802 IX86_BUILTIN_PABSB
,
15803 IX86_BUILTIN_PABSW
,
15804 IX86_BUILTIN_PABSD
,
15806 IX86_BUILTIN_PHADDW128
,
15807 IX86_BUILTIN_PHADDD128
,
15808 IX86_BUILTIN_PHADDSW128
,
15809 IX86_BUILTIN_PHSUBW128
,
15810 IX86_BUILTIN_PHSUBD128
,
15811 IX86_BUILTIN_PHSUBSW128
,
15812 IX86_BUILTIN_PMADDUBSW128
,
15813 IX86_BUILTIN_PMULHRSW128
,
15814 IX86_BUILTIN_PSHUFB128
,
15815 IX86_BUILTIN_PSIGNB128
,
15816 IX86_BUILTIN_PSIGNW128
,
15817 IX86_BUILTIN_PSIGND128
,
15818 IX86_BUILTIN_PALIGNR128
,
15819 IX86_BUILTIN_PABSB128
,
15820 IX86_BUILTIN_PABSW128
,
15821 IX86_BUILTIN_PABSD128
,
15823 /* AMDFAM10 - SSE4A New Instructions. */
15824 IX86_BUILTIN_MOVNTSD
,
15825 IX86_BUILTIN_MOVNTSS
,
15826 IX86_BUILTIN_EXTRQI
,
15827 IX86_BUILTIN_EXTRQ
,
15828 IX86_BUILTIN_INSERTQI
,
15829 IX86_BUILTIN_INSERTQ
,
15831 IX86_BUILTIN_VEC_INIT_V2SI
,
15832 IX86_BUILTIN_VEC_INIT_V4HI
,
15833 IX86_BUILTIN_VEC_INIT_V8QI
,
15834 IX86_BUILTIN_VEC_EXT_V2DF
,
15835 IX86_BUILTIN_VEC_EXT_V2DI
,
15836 IX86_BUILTIN_VEC_EXT_V4SF
,
15837 IX86_BUILTIN_VEC_EXT_V4SI
,
15838 IX86_BUILTIN_VEC_EXT_V8HI
,
15839 IX86_BUILTIN_VEC_EXT_V2SI
,
15840 IX86_BUILTIN_VEC_EXT_V4HI
,
15841 IX86_BUILTIN_VEC_SET_V8HI
,
15842 IX86_BUILTIN_VEC_SET_V4HI
,
15847 /* Table for the ix86 builtin decls. */
15848 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
15850 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15851 * if the target_flags include one of MASK. Stores the function decl
15852 * in the ix86_builtins array.
15853 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15856 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
15858 tree decl
= NULL_TREE
;
15860 if (mask
& target_flags
15861 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
15863 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
15865 ix86_builtins
[(int) code
] = decl
;
15871 /* Like def_builtin, but also marks the function decl "const". */
15874 def_builtin_const (int mask
, const char *name
, tree type
,
15875 enum ix86_builtins code
)
15877 tree decl
= def_builtin (mask
, name
, type
, code
);
15879 TREE_READONLY (decl
) = 1;
15883 /* Bits for builtin_description.flag. */
15885 /* Set when we don't support the comparison natively, and should
15886 swap_comparison in order to support it. */
15887 #define BUILTIN_DESC_SWAP_OPERANDS 1
15889 struct builtin_description
15891 const unsigned int mask
;
15892 const enum insn_code icode
;
15893 const char *const name
;
15894 const enum ix86_builtins code
;
15895 const enum rtx_code comparison
;
15896 const unsigned int flag
;
15899 static const struct builtin_description bdesc_comi
[] =
15901 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
15902 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
15903 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
15904 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
15905 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
15906 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
15907 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
15908 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
15909 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
15910 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
15911 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
15912 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
15913 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
15914 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
15915 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
15916 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
15917 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
15918 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
15919 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
15920 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
15921 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
15922 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
15923 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
15924 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
15927 static const struct builtin_description bdesc_2arg
[] =
15930 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
15931 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
15932 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
15933 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
15934 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
15935 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
15936 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
15937 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
15939 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
15940 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
15941 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
15942 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
15943 BUILTIN_DESC_SWAP_OPERANDS
},
15944 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
15945 BUILTIN_DESC_SWAP_OPERANDS
},
15946 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
15947 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
15948 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
15949 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
15950 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
15951 BUILTIN_DESC_SWAP_OPERANDS
},
15952 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
15953 BUILTIN_DESC_SWAP_OPERANDS
},
15954 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
15955 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
15956 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
15957 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
15958 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
15959 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
15960 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
15961 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
15962 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
15963 BUILTIN_DESC_SWAP_OPERANDS
},
15964 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
15965 BUILTIN_DESC_SWAP_OPERANDS
},
15966 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
15968 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
15969 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
15970 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
15971 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
15973 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
15974 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
15975 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
15976 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
15978 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
15979 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
15980 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
15981 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
15982 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
15985 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
15986 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
15987 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
15988 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
15989 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
15990 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
15991 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
15992 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
15994 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
15995 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
15996 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
15997 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
15998 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
15999 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16000 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16001 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16003 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16004 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16005 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16007 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16008 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16009 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16010 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16012 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16013 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16015 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16016 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16017 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16018 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16019 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16020 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16022 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16023 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16024 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16025 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16027 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16028 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16029 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16030 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16031 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16032 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16035 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16036 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16037 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16039 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16040 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16041 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16043 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16044 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16045 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16046 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16047 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16048 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16050 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16051 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16052 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16053 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16054 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16055 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16057 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16058 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16059 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16060 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16062 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16063 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16066 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16067 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16068 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16069 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16070 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16071 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16072 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16073 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16075 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16076 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16077 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16078 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16079 BUILTIN_DESC_SWAP_OPERANDS
},
16080 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16081 BUILTIN_DESC_SWAP_OPERANDS
},
16082 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16083 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16084 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16085 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16086 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16087 BUILTIN_DESC_SWAP_OPERANDS
},
16088 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16089 BUILTIN_DESC_SWAP_OPERANDS
},
16090 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16091 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16092 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16093 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16094 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16095 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16096 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16097 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16098 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16100 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16101 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16102 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16103 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16105 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16106 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16107 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16108 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16110 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16111 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16112 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16115 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16116 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16117 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16118 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16119 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16120 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16121 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16122 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16124 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16125 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16126 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16127 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16128 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16129 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16130 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16131 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16133 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16134 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16136 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16137 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16138 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16139 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16141 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16142 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16144 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16145 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16146 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16147 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16148 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16149 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16151 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16152 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16153 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16154 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16156 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16157 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16158 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16159 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16160 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16161 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16162 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16163 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16165 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16166 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16167 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16169 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16170 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16172 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16173 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16175 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16176 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16177 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16179 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16180 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16181 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16183 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16184 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16186 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16188 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16189 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16190 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16191 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16194 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16195 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16196 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16197 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16198 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16199 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16202 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16203 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16204 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16205 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16206 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16207 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16208 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16209 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16210 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16211 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16212 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16213 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16214 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16215 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16216 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16217 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16218 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16219 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16220 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16221 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16222 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16223 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16224 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16225 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16228 static const struct builtin_description bdesc_1arg
[] =
16230 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16231 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16233 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16234 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16235 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16237 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16238 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16239 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16240 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16241 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16242 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16244 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16245 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16247 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16249 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16250 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16252 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16253 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16254 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16255 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16256 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16258 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16260 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16261 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16262 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16263 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16265 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16266 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16267 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16270 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16271 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16274 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16275 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16276 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16277 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16278 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16279 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16283 ix86_init_builtins (void)
16286 ix86_init_mmx_sse_builtins ();
16289 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16290 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16293 ix86_init_mmx_sse_builtins (void)
16295 const struct builtin_description
* d
;
16298 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16299 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16300 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16301 tree V2DI_type_node
16302 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16303 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16304 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16305 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16306 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16307 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16308 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16310 tree pchar_type_node
= build_pointer_type (char_type_node
);
16311 tree pcchar_type_node
= build_pointer_type (
16312 build_type_variant (char_type_node
, 1, 0));
16313 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16314 tree pcfloat_type_node
= build_pointer_type (
16315 build_type_variant (float_type_node
, 1, 0));
16316 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16317 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16318 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16321 tree int_ftype_v4sf_v4sf
16322 = build_function_type_list (integer_type_node
,
16323 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16324 tree v4si_ftype_v4sf_v4sf
16325 = build_function_type_list (V4SI_type_node
,
16326 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16327 /* MMX/SSE/integer conversions. */
16328 tree int_ftype_v4sf
16329 = build_function_type_list (integer_type_node
,
16330 V4SF_type_node
, NULL_TREE
);
16331 tree int64_ftype_v4sf
16332 = build_function_type_list (long_long_integer_type_node
,
16333 V4SF_type_node
, NULL_TREE
);
16334 tree int_ftype_v8qi
16335 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16336 tree v4sf_ftype_v4sf_int
16337 = build_function_type_list (V4SF_type_node
,
16338 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16339 tree v4sf_ftype_v4sf_int64
16340 = build_function_type_list (V4SF_type_node
,
16341 V4SF_type_node
, long_long_integer_type_node
,
16343 tree v4sf_ftype_v4sf_v2si
16344 = build_function_type_list (V4SF_type_node
,
16345 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16347 /* Miscellaneous. */
16348 tree v8qi_ftype_v4hi_v4hi
16349 = build_function_type_list (V8QI_type_node
,
16350 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16351 tree v4hi_ftype_v2si_v2si
16352 = build_function_type_list (V4HI_type_node
,
16353 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16354 tree v4sf_ftype_v4sf_v4sf_int
16355 = build_function_type_list (V4SF_type_node
,
16356 V4SF_type_node
, V4SF_type_node
,
16357 integer_type_node
, NULL_TREE
);
16358 tree v2si_ftype_v4hi_v4hi
16359 = build_function_type_list (V2SI_type_node
,
16360 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16361 tree v4hi_ftype_v4hi_int
16362 = build_function_type_list (V4HI_type_node
,
16363 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16364 tree v4hi_ftype_v4hi_di
16365 = build_function_type_list (V4HI_type_node
,
16366 V4HI_type_node
, long_long_unsigned_type_node
,
16368 tree v2si_ftype_v2si_di
16369 = build_function_type_list (V2SI_type_node
,
16370 V2SI_type_node
, long_long_unsigned_type_node
,
16372 tree void_ftype_void
16373 = build_function_type (void_type_node
, void_list_node
);
16374 tree void_ftype_unsigned
16375 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16376 tree void_ftype_unsigned_unsigned
16377 = build_function_type_list (void_type_node
, unsigned_type_node
,
16378 unsigned_type_node
, NULL_TREE
);
16379 tree void_ftype_pcvoid_unsigned_unsigned
16380 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16381 unsigned_type_node
, unsigned_type_node
,
16383 tree unsigned_ftype_void
16384 = build_function_type (unsigned_type_node
, void_list_node
);
16385 tree v2si_ftype_v4sf
16386 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16387 /* Loads/stores. */
16388 tree void_ftype_v8qi_v8qi_pchar
16389 = build_function_type_list (void_type_node
,
16390 V8QI_type_node
, V8QI_type_node
,
16391 pchar_type_node
, NULL_TREE
);
16392 tree v4sf_ftype_pcfloat
16393 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16394 /* @@@ the type is bogus */
16395 tree v4sf_ftype_v4sf_pv2si
16396 = build_function_type_list (V4SF_type_node
,
16397 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16398 tree void_ftype_pv2si_v4sf
16399 = build_function_type_list (void_type_node
,
16400 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16401 tree void_ftype_pfloat_v4sf
16402 = build_function_type_list (void_type_node
,
16403 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16404 tree void_ftype_pdi_di
16405 = build_function_type_list (void_type_node
,
16406 pdi_type_node
, long_long_unsigned_type_node
,
16408 tree void_ftype_pv2di_v2di
16409 = build_function_type_list (void_type_node
,
16410 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16411 /* Normal vector unops. */
16412 tree v4sf_ftype_v4sf
16413 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16414 tree v16qi_ftype_v16qi
16415 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16416 tree v8hi_ftype_v8hi
16417 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16418 tree v4si_ftype_v4si
16419 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16420 tree v8qi_ftype_v8qi
16421 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16422 tree v4hi_ftype_v4hi
16423 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16425 /* Normal vector binops. */
16426 tree v4sf_ftype_v4sf_v4sf
16427 = build_function_type_list (V4SF_type_node
,
16428 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16429 tree v8qi_ftype_v8qi_v8qi
16430 = build_function_type_list (V8QI_type_node
,
16431 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16432 tree v4hi_ftype_v4hi_v4hi
16433 = build_function_type_list (V4HI_type_node
,
16434 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16435 tree v2si_ftype_v2si_v2si
16436 = build_function_type_list (V2SI_type_node
,
16437 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16438 tree di_ftype_di_di
16439 = build_function_type_list (long_long_unsigned_type_node
,
16440 long_long_unsigned_type_node
,
16441 long_long_unsigned_type_node
, NULL_TREE
);
16443 tree di_ftype_di_di_int
16444 = build_function_type_list (long_long_unsigned_type_node
,
16445 long_long_unsigned_type_node
,
16446 long_long_unsigned_type_node
,
16447 integer_type_node
, NULL_TREE
);
16449 tree v2si_ftype_v2sf
16450 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16451 tree v2sf_ftype_v2si
16452 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16453 tree v2si_ftype_v2si
16454 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16455 tree v2sf_ftype_v2sf
16456 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16457 tree v2sf_ftype_v2sf_v2sf
16458 = build_function_type_list (V2SF_type_node
,
16459 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16460 tree v2si_ftype_v2sf_v2sf
16461 = build_function_type_list (V2SI_type_node
,
16462 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16463 tree pint_type_node
= build_pointer_type (integer_type_node
);
16464 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16465 tree pcdouble_type_node
= build_pointer_type (
16466 build_type_variant (double_type_node
, 1, 0));
16467 tree int_ftype_v2df_v2df
16468 = build_function_type_list (integer_type_node
,
16469 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16471 tree void_ftype_pcvoid
16472 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16473 tree v4sf_ftype_v4si
16474 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16475 tree v4si_ftype_v4sf
16476 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16477 tree v2df_ftype_v4si
16478 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16479 tree v4si_ftype_v2df
16480 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16481 tree v2si_ftype_v2df
16482 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16483 tree v4sf_ftype_v2df
16484 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16485 tree v2df_ftype_v2si
16486 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16487 tree v2df_ftype_v4sf
16488 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16489 tree int_ftype_v2df
16490 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16491 tree int64_ftype_v2df
16492 = build_function_type_list (long_long_integer_type_node
,
16493 V2DF_type_node
, NULL_TREE
);
16494 tree v2df_ftype_v2df_int
16495 = build_function_type_list (V2DF_type_node
,
16496 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16497 tree v2df_ftype_v2df_int64
16498 = build_function_type_list (V2DF_type_node
,
16499 V2DF_type_node
, long_long_integer_type_node
,
16501 tree v4sf_ftype_v4sf_v2df
16502 = build_function_type_list (V4SF_type_node
,
16503 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16504 tree v2df_ftype_v2df_v4sf
16505 = build_function_type_list (V2DF_type_node
,
16506 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16507 tree v2df_ftype_v2df_v2df_int
16508 = build_function_type_list (V2DF_type_node
,
16509 V2DF_type_node
, V2DF_type_node
,
16512 tree v2df_ftype_v2df_pcdouble
16513 = build_function_type_list (V2DF_type_node
,
16514 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16515 tree void_ftype_pdouble_v2df
16516 = build_function_type_list (void_type_node
,
16517 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16518 tree void_ftype_pint_int
16519 = build_function_type_list (void_type_node
,
16520 pint_type_node
, integer_type_node
, NULL_TREE
);
16521 tree void_ftype_v16qi_v16qi_pchar
16522 = build_function_type_list (void_type_node
,
16523 V16QI_type_node
, V16QI_type_node
,
16524 pchar_type_node
, NULL_TREE
);
16525 tree v2df_ftype_pcdouble
16526 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16527 tree v2df_ftype_v2df_v2df
16528 = build_function_type_list (V2DF_type_node
,
16529 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16530 tree v16qi_ftype_v16qi_v16qi
16531 = build_function_type_list (V16QI_type_node
,
16532 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16533 tree v8hi_ftype_v8hi_v8hi
16534 = build_function_type_list (V8HI_type_node
,
16535 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16536 tree v4si_ftype_v4si_v4si
16537 = build_function_type_list (V4SI_type_node
,
16538 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16539 tree v2di_ftype_v2di_v2di
16540 = build_function_type_list (V2DI_type_node
,
16541 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16542 tree v2di_ftype_v2df_v2df
16543 = build_function_type_list (V2DI_type_node
,
16544 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16545 tree v2df_ftype_v2df
16546 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16547 tree v2di_ftype_v2di_int
16548 = build_function_type_list (V2DI_type_node
,
16549 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16550 tree v2di_ftype_v2di_v2di_int
16551 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16552 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16553 tree v4si_ftype_v4si_int
16554 = build_function_type_list (V4SI_type_node
,
16555 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16556 tree v8hi_ftype_v8hi_int
16557 = build_function_type_list (V8HI_type_node
,
16558 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16559 tree v8hi_ftype_v8hi_v2di
16560 = build_function_type_list (V8HI_type_node
,
16561 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16562 tree v4si_ftype_v4si_v2di
16563 = build_function_type_list (V4SI_type_node
,
16564 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16565 tree v4si_ftype_v8hi_v8hi
16566 = build_function_type_list (V4SI_type_node
,
16567 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16568 tree di_ftype_v8qi_v8qi
16569 = build_function_type_list (long_long_unsigned_type_node
,
16570 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16571 tree di_ftype_v2si_v2si
16572 = build_function_type_list (long_long_unsigned_type_node
,
16573 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16574 tree v2di_ftype_v16qi_v16qi
16575 = build_function_type_list (V2DI_type_node
,
16576 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16577 tree v2di_ftype_v4si_v4si
16578 = build_function_type_list (V2DI_type_node
,
16579 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16580 tree int_ftype_v16qi
16581 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16582 tree v16qi_ftype_pcchar
16583 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16584 tree void_ftype_pchar_v16qi
16585 = build_function_type_list (void_type_node
,
16586 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16588 tree v2di_ftype_v2di_unsigned_unsigned
16589 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16590 unsigned_type_node
, unsigned_type_node
,
16592 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16593 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16594 unsigned_type_node
, unsigned_type_node
,
16596 tree v2di_ftype_v2di_v16qi
16597 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16601 tree float128_type
;
16604 /* The __float80 type. */
16605 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16606 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16610 /* The __float80 type. */
16611 float80_type
= make_node (REAL_TYPE
);
16612 TYPE_PRECISION (float80_type
) = 80;
16613 layout_type (float80_type
);
16614 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16619 float128_type
= make_node (REAL_TYPE
);
16620 TYPE_PRECISION (float128_type
) = 128;
16621 layout_type (float128_type
);
16622 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16625 /* Add all builtins that are more or less simple operations on two
16627 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16629 /* Use one of the operands; the target can have a different mode for
16630 mask-generating compares. */
16631 enum machine_mode mode
;
16636 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16641 type
= v16qi_ftype_v16qi_v16qi
;
16644 type
= v8hi_ftype_v8hi_v8hi
;
16647 type
= v4si_ftype_v4si_v4si
;
16650 type
= v2di_ftype_v2di_v2di
;
16653 type
= v2df_ftype_v2df_v2df
;
16656 type
= v4sf_ftype_v4sf_v4sf
;
16659 type
= v8qi_ftype_v8qi_v8qi
;
16662 type
= v4hi_ftype_v4hi_v4hi
;
16665 type
= v2si_ftype_v2si_v2si
;
16668 type
= di_ftype_di_di
;
16672 gcc_unreachable ();
16675 /* Override for comparisons. */
16676 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16677 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16678 type
= v4si_ftype_v4sf_v4sf
;
16680 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16681 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16682 type
= v2di_ftype_v2df_v2df
;
16684 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16687 /* Add all builtins that are more or less simple operations on 1 operand. */
16688 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16690 enum machine_mode mode
;
16695 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16700 type
= v16qi_ftype_v16qi
;
16703 type
= v8hi_ftype_v8hi
;
16706 type
= v4si_ftype_v4si
;
16709 type
= v2df_ftype_v2df
;
16712 type
= v4sf_ftype_v4sf
;
16715 type
= v8qi_ftype_v8qi
;
16718 type
= v4hi_ftype_v4hi
;
16721 type
= v2si_ftype_v2si
;
16728 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16731 /* Add the remaining MMX insns with somewhat more complicated types. */
16732 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16733 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16734 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16735 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16737 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16738 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16739 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16741 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
16742 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
16744 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
16745 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
16747 /* comi/ucomi insns. */
16748 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16749 if (d
->mask
== MASK_SSE2
)
16750 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
16752 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
16754 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
16755 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
16756 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
16758 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
16759 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
16760 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
16761 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
16762 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
16763 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
16764 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
16765 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
16766 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
16767 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
16768 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
16770 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
16772 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
16773 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
16775 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
16776 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
16777 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
16778 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
16780 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
16781 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
16782 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
16783 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
16785 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
16787 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
16789 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
16790 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
16791 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
16792 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
16793 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
16794 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
16796 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
16798 /* Original 3DNow! */
16799 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
16800 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
16801 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
16802 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
16803 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
16804 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
16805 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
16806 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
16807 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
16808 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
16809 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
16810 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
16811 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
16812 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
16813 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
16814 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
16815 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
16816 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
16817 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
16818 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
16820 /* 3DNow! extension as used in the Athlon CPU. */
16821 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
16822 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
16823 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
16824 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
16825 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
16826 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
16829 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
16831 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
16832 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
16834 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
16835 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
16837 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
16838 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
16839 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
16840 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
16841 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
16843 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
16844 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
16845 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
16846 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
16848 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
16849 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
16851 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
16853 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
16854 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
16856 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
16857 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
16858 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
16859 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
16860 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
16862 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
16864 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
16865 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
16866 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
16867 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
16869 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
16870 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
16871 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
16873 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
16874 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
16875 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
16876 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
16878 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
16879 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
16880 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
16882 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
16883 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
16885 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
16886 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
16888 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
16889 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
16890 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
16892 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
16893 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
16894 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
16896 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
16897 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
16899 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
16900 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
16901 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
16902 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
16904 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
16905 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
16906 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
16907 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
16909 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
16910 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
16912 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
16914 /* Prescott New Instructions. */
16915 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
16916 void_ftype_pcvoid_unsigned_unsigned
,
16917 IX86_BUILTIN_MONITOR
);
16918 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
16919 void_ftype_unsigned_unsigned
,
16920 IX86_BUILTIN_MWAIT
);
16921 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
16923 IX86_BUILTIN_MOVSHDUP
);
16924 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
16926 IX86_BUILTIN_MOVSLDUP
);
16927 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
16928 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
16931 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
16932 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
16933 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
16934 IX86_BUILTIN_PALIGNR
);
16936 /* AMDFAM10 SSE4A New built-ins */
16937 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
16938 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
16939 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
16940 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
16941 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
16942 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
16943 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
16944 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
16945 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
16946 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
16947 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
16948 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
16950 /* Access to the vec_init patterns. */
16951 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
16952 integer_type_node
, NULL_TREE
);
16953 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
16954 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
16956 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
16957 short_integer_type_node
,
16958 short_integer_type_node
,
16959 short_integer_type_node
, NULL_TREE
);
16960 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
16961 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
16963 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
16964 char_type_node
, char_type_node
,
16965 char_type_node
, char_type_node
,
16966 char_type_node
, char_type_node
,
16967 char_type_node
, NULL_TREE
);
16968 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
16969 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
16971 /* Access to the vec_extract patterns. */
16972 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
16973 integer_type_node
, NULL_TREE
);
16974 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
16975 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
16977 ftype
= build_function_type_list (long_long_integer_type_node
,
16978 V2DI_type_node
, integer_type_node
,
16980 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
16981 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
16983 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
16984 integer_type_node
, NULL_TREE
);
16985 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
16986 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
16988 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
16989 integer_type_node
, NULL_TREE
);
16990 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
16991 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
16993 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
16994 integer_type_node
, NULL_TREE
);
16995 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
16996 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
16998 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
16999 integer_type_node
, NULL_TREE
);
17000 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17001 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17003 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17004 integer_type_node
, NULL_TREE
);
17005 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17006 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17008 /* Access to the vec_set patterns. */
17009 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17011 integer_type_node
, NULL_TREE
);
17012 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17013 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17015 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17017 integer_type_node
, NULL_TREE
);
17018 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17019 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17022 /* Errors in the source file can cause expand_expr to return const0_rtx
17023 where we expect a vector. To avoid crashing, use one of the vector
17024 clear instructions. */
17026 safe_vector_operand (rtx x
, enum machine_mode mode
)
17028 if (x
== const0_rtx
)
17029 x
= CONST0_RTX (mode
);
17033 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17036 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
17039 tree arg0
= TREE_VALUE (arglist
);
17040 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17041 rtx op0
= expand_normal (arg0
);
17042 rtx op1
= expand_normal (arg1
);
17043 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17044 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17045 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17047 if (VECTOR_MODE_P (mode0
))
17048 op0
= safe_vector_operand (op0
, mode0
);
17049 if (VECTOR_MODE_P (mode1
))
17050 op1
= safe_vector_operand (op1
, mode1
);
17052 if (optimize
|| !target
17053 || GET_MODE (target
) != tmode
17054 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17055 target
= gen_reg_rtx (tmode
);
17057 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17059 rtx x
= gen_reg_rtx (V4SImode
);
17060 emit_insn (gen_sse2_loadd (x
, op1
));
17061 op1
= gen_lowpart (TImode
, x
);
17064 /* The insn must want input operands in the same modes as the
17066 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17067 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17069 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17070 op0
= copy_to_mode_reg (mode0
, op0
);
17071 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17072 op1
= copy_to_mode_reg (mode1
, op1
);
17074 /* ??? Using ix86_fixup_binary_operands is problematic when
17075 we've got mismatched modes. Fake it. */
17081 if (tmode
== mode0
&& tmode
== mode1
)
17083 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17087 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17089 op0
= force_reg (mode0
, op0
);
17090 op1
= force_reg (mode1
, op1
);
17091 target
= gen_reg_rtx (tmode
);
17094 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17101 /* Subroutine of ix86_expand_builtin to take care of stores. */
17104 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
17107 tree arg0
= TREE_VALUE (arglist
);
17108 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17109 rtx op0
= expand_normal (arg0
);
17110 rtx op1
= expand_normal (arg1
);
17111 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17112 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17114 if (VECTOR_MODE_P (mode1
))
17115 op1
= safe_vector_operand (op1
, mode1
);
17117 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17118 op1
= copy_to_mode_reg (mode1
, op1
);
17120 pat
= GEN_FCN (icode
) (op0
, op1
);
17126 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17129 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
17130 rtx target
, int do_load
)
17133 tree arg0
= TREE_VALUE (arglist
);
17134 rtx op0
= expand_normal (arg0
);
17135 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17136 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17138 if (optimize
|| !target
17139 || GET_MODE (target
) != tmode
17140 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17141 target
= gen_reg_rtx (tmode
);
17143 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17146 if (VECTOR_MODE_P (mode0
))
17147 op0
= safe_vector_operand (op0
, mode0
);
17149 if ((optimize
&& !register_operand (op0
, mode0
))
17150 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17151 op0
= copy_to_mode_reg (mode0
, op0
);
17154 pat
= GEN_FCN (icode
) (target
, op0
);
17161 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17162 sqrtss, rsqrtss, rcpss. */
17165 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
17168 tree arg0
= TREE_VALUE (arglist
);
17169 rtx op1
, op0
= expand_normal (arg0
);
17170 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17171 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17173 if (optimize
|| !target
17174 || GET_MODE (target
) != tmode
17175 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17176 target
= gen_reg_rtx (tmode
);
17178 if (VECTOR_MODE_P (mode0
))
17179 op0
= safe_vector_operand (op0
, mode0
);
17181 if ((optimize
&& !register_operand (op0
, mode0
))
17182 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17183 op0
= copy_to_mode_reg (mode0
, op0
);
17186 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17187 op1
= copy_to_mode_reg (mode0
, op1
);
17189 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17196 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17199 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
17203 tree arg0
= TREE_VALUE (arglist
);
17204 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17205 rtx op0
= expand_normal (arg0
);
17206 rtx op1
= expand_normal (arg1
);
17208 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17209 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17210 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17211 enum rtx_code comparison
= d
->comparison
;
17213 if (VECTOR_MODE_P (mode0
))
17214 op0
= safe_vector_operand (op0
, mode0
);
17215 if (VECTOR_MODE_P (mode1
))
17216 op1
= safe_vector_operand (op1
, mode1
);
17218 /* Swap operands if we have a comparison that isn't available in
17220 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17222 rtx tmp
= gen_reg_rtx (mode1
);
17223 emit_move_insn (tmp
, op1
);
17228 if (optimize
|| !target
17229 || GET_MODE (target
) != tmode
17230 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17231 target
= gen_reg_rtx (tmode
);
17233 if ((optimize
&& !register_operand (op0
, mode0
))
17234 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17235 op0
= copy_to_mode_reg (mode0
, op0
);
17236 if ((optimize
&& !register_operand (op1
, mode1
))
17237 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17238 op1
= copy_to_mode_reg (mode1
, op1
);
17240 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17241 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17248 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17251 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
17255 tree arg0
= TREE_VALUE (arglist
);
17256 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17257 rtx op0
= expand_normal (arg0
);
17258 rtx op1
= expand_normal (arg1
);
17260 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17261 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17262 enum rtx_code comparison
= d
->comparison
;
17264 if (VECTOR_MODE_P (mode0
))
17265 op0
= safe_vector_operand (op0
, mode0
);
17266 if (VECTOR_MODE_P (mode1
))
17267 op1
= safe_vector_operand (op1
, mode1
);
17269 /* Swap operands if we have a comparison that isn't available in
17271 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17278 target
= gen_reg_rtx (SImode
);
17279 emit_move_insn (target
, const0_rtx
);
17280 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17282 if ((optimize
&& !register_operand (op0
, mode0
))
17283 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17284 op0
= copy_to_mode_reg (mode0
, op0
);
17285 if ((optimize
&& !register_operand (op1
, mode1
))
17286 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17287 op1
= copy_to_mode_reg (mode1
, op1
);
17289 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17290 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17294 emit_insn (gen_rtx_SET (VOIDmode
,
17295 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17296 gen_rtx_fmt_ee (comparison
, QImode
,
17300 return SUBREG_REG (target
);
17303 /* Return the integer constant in ARG. Constrain it to be in the range
17304 of the subparts of VEC_TYPE; issue an error if not. */
17307 get_element_number (tree vec_type
, tree arg
)
17309 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17311 if (!host_integerp (arg
, 1)
17312 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17314 error ("selector must be an integer constant in the range 0..%wi", max
);
17321 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17322 ix86_expand_vector_init. We DO have language-level syntax for this, in
17323 the form of (type){ init-list }. Except that since we can't place emms
17324 instructions from inside the compiler, we can't allow the use of MMX
17325 registers unless the user explicitly asks for it. So we do *not* define
17326 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17327 we have builtins invoked by mmintrin.h that gives us license to emit
17328 these sorts of instructions. */
17331 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
17333 enum machine_mode tmode
= TYPE_MODE (type
);
17334 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17335 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17336 rtvec v
= rtvec_alloc (n_elt
);
17338 gcc_assert (VECTOR_MODE_P (tmode
));
17340 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
17342 rtx x
= expand_normal (TREE_VALUE (arglist
));
17343 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17346 gcc_assert (arglist
== NULL
);
17348 if (!target
|| !register_operand (target
, tmode
))
17349 target
= gen_reg_rtx (tmode
);
17351 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17355 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17356 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17357 had a language-level syntax for referencing vector elements. */
17360 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
17362 enum machine_mode tmode
, mode0
;
17367 arg0
= TREE_VALUE (arglist
);
17368 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17370 op0
= expand_normal (arg0
);
17371 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17373 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17374 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17375 gcc_assert (VECTOR_MODE_P (mode0
));
17377 op0
= force_reg (mode0
, op0
);
17379 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17380 target
= gen_reg_rtx (tmode
);
17382 ix86_expand_vector_extract (true, target
, op0
, elt
);
17387 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17388 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17389 a language-level syntax for referencing vector elements. */
17392 ix86_expand_vec_set_builtin (tree arglist
)
17394 enum machine_mode tmode
, mode1
;
17395 tree arg0
, arg1
, arg2
;
17399 arg0
= TREE_VALUE (arglist
);
17400 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17401 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17403 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17404 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17405 gcc_assert (VECTOR_MODE_P (tmode
));
17407 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17408 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17409 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17411 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17412 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17414 op0
= force_reg (tmode
, op0
);
17415 op1
= force_reg (mode1
, op1
);
17417 ix86_expand_vector_set (true, op0
, op1
, elt
);
17422 /* Expand an expression EXP that calls a built-in function,
17423 with result going to TARGET if that's convenient
17424 (and in mode MODE if that's convenient).
17425 SUBTARGET may be used as the target for computing one of EXP's operands.
17426 IGNORE is nonzero if the value is to be ignored. */
17429 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17430 enum machine_mode mode ATTRIBUTE_UNUSED
,
17431 int ignore ATTRIBUTE_UNUSED
)
17433 const struct builtin_description
*d
;
17435 enum insn_code icode
;
17436 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
17437 tree arglist
= TREE_OPERAND (exp
, 1);
17438 tree arg0
, arg1
, arg2
, arg3
;
17439 rtx op0
, op1
, op2
, op3
, pat
;
17440 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17441 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17445 case IX86_BUILTIN_EMMS
:
17446 emit_insn (gen_mmx_emms ());
17449 case IX86_BUILTIN_SFENCE
:
17450 emit_insn (gen_sse_sfence ());
17453 case IX86_BUILTIN_MASKMOVQ
:
17454 case IX86_BUILTIN_MASKMOVDQU
:
17455 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17456 ? CODE_FOR_mmx_maskmovq
17457 : CODE_FOR_sse2_maskmovdqu
);
17458 /* Note the arg order is different from the operand order. */
17459 arg1
= TREE_VALUE (arglist
);
17460 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
17461 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17462 op0
= expand_normal (arg0
);
17463 op1
= expand_normal (arg1
);
17464 op2
= expand_normal (arg2
);
17465 mode0
= insn_data
[icode
].operand
[0].mode
;
17466 mode1
= insn_data
[icode
].operand
[1].mode
;
17467 mode2
= insn_data
[icode
].operand
[2].mode
;
17469 op0
= force_reg (Pmode
, op0
);
17470 op0
= gen_rtx_MEM (mode1
, op0
);
17472 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17473 op0
= copy_to_mode_reg (mode0
, op0
);
17474 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17475 op1
= copy_to_mode_reg (mode1
, op1
);
17476 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17477 op2
= copy_to_mode_reg (mode2
, op2
);
17478 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17484 case IX86_BUILTIN_SQRTSS
:
17485 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
17486 case IX86_BUILTIN_RSQRTSS
:
17487 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
17488 case IX86_BUILTIN_RCPSS
:
17489 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
17491 case IX86_BUILTIN_LOADUPS
:
17492 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
17494 case IX86_BUILTIN_STOREUPS
:
17495 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
17497 case IX86_BUILTIN_LOADHPS
:
17498 case IX86_BUILTIN_LOADLPS
:
17499 case IX86_BUILTIN_LOADHPD
:
17500 case IX86_BUILTIN_LOADLPD
:
17501 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17502 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17503 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17504 : CODE_FOR_sse2_loadlpd
);
17505 arg0
= TREE_VALUE (arglist
);
17506 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17507 op0
= expand_normal (arg0
);
17508 op1
= expand_normal (arg1
);
17509 tmode
= insn_data
[icode
].operand
[0].mode
;
17510 mode0
= insn_data
[icode
].operand
[1].mode
;
17511 mode1
= insn_data
[icode
].operand
[2].mode
;
17513 op0
= force_reg (mode0
, op0
);
17514 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17515 if (optimize
|| target
== 0
17516 || GET_MODE (target
) != tmode
17517 || !register_operand (target
, tmode
))
17518 target
= gen_reg_rtx (tmode
);
17519 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17525 case IX86_BUILTIN_STOREHPS
:
17526 case IX86_BUILTIN_STORELPS
:
17527 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17528 : CODE_FOR_sse_storelps
);
17529 arg0
= TREE_VALUE (arglist
);
17530 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17531 op0
= expand_normal (arg0
);
17532 op1
= expand_normal (arg1
);
17533 mode0
= insn_data
[icode
].operand
[0].mode
;
17534 mode1
= insn_data
[icode
].operand
[1].mode
;
17536 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17537 op1
= force_reg (mode1
, op1
);
17539 pat
= GEN_FCN (icode
) (op0
, op1
);
17545 case IX86_BUILTIN_MOVNTPS
:
17546 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
17547 case IX86_BUILTIN_MOVNTQ
:
17548 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
17550 case IX86_BUILTIN_LDMXCSR
:
17551 op0
= expand_normal (TREE_VALUE (arglist
));
17552 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17553 emit_move_insn (target
, op0
);
17554 emit_insn (gen_sse_ldmxcsr (target
));
17557 case IX86_BUILTIN_STMXCSR
:
17558 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17559 emit_insn (gen_sse_stmxcsr (target
));
17560 return copy_to_mode_reg (SImode
, target
);
17562 case IX86_BUILTIN_SHUFPS
:
17563 case IX86_BUILTIN_SHUFPD
:
17564 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17565 ? CODE_FOR_sse_shufps
17566 : CODE_FOR_sse2_shufpd
);
17567 arg0
= TREE_VALUE (arglist
);
17568 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17569 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17570 op0
= expand_normal (arg0
);
17571 op1
= expand_normal (arg1
);
17572 op2
= expand_normal (arg2
);
17573 tmode
= insn_data
[icode
].operand
[0].mode
;
17574 mode0
= insn_data
[icode
].operand
[1].mode
;
17575 mode1
= insn_data
[icode
].operand
[2].mode
;
17576 mode2
= insn_data
[icode
].operand
[3].mode
;
17578 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17579 op0
= copy_to_mode_reg (mode0
, op0
);
17580 if ((optimize
&& !register_operand (op1
, mode1
))
17581 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17582 op1
= copy_to_mode_reg (mode1
, op1
);
17583 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17585 /* @@@ better error message */
17586 error ("mask must be an immediate");
17587 return gen_reg_rtx (tmode
);
17589 if (optimize
|| target
== 0
17590 || GET_MODE (target
) != tmode
17591 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17592 target
= gen_reg_rtx (tmode
);
17593 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17599 case IX86_BUILTIN_PSHUFW
:
17600 case IX86_BUILTIN_PSHUFD
:
17601 case IX86_BUILTIN_PSHUFHW
:
17602 case IX86_BUILTIN_PSHUFLW
:
17603 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17604 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17605 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17606 : CODE_FOR_mmx_pshufw
);
17607 arg0
= TREE_VALUE (arglist
);
17608 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17609 op0
= expand_normal (arg0
);
17610 op1
= expand_normal (arg1
);
17611 tmode
= insn_data
[icode
].operand
[0].mode
;
17612 mode1
= insn_data
[icode
].operand
[1].mode
;
17613 mode2
= insn_data
[icode
].operand
[2].mode
;
17615 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17616 op0
= copy_to_mode_reg (mode1
, op0
);
17617 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17619 /* @@@ better error message */
17620 error ("mask must be an immediate");
17624 || GET_MODE (target
) != tmode
17625 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17626 target
= gen_reg_rtx (tmode
);
17627 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17633 case IX86_BUILTIN_PSLLDQI128
:
17634 case IX86_BUILTIN_PSRLDQI128
:
17635 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17636 : CODE_FOR_sse2_lshrti3
);
17637 arg0
= TREE_VALUE (arglist
);
17638 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17639 op0
= expand_normal (arg0
);
17640 op1
= expand_normal (arg1
);
17641 tmode
= insn_data
[icode
].operand
[0].mode
;
17642 mode1
= insn_data
[icode
].operand
[1].mode
;
17643 mode2
= insn_data
[icode
].operand
[2].mode
;
17645 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17647 op0
= copy_to_reg (op0
);
17648 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17650 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17652 error ("shift must be an immediate");
17655 target
= gen_reg_rtx (V2DImode
);
17656 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
17662 case IX86_BUILTIN_FEMMS
:
17663 emit_insn (gen_mmx_femms ());
17666 case IX86_BUILTIN_PAVGUSB
:
17667 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
17669 case IX86_BUILTIN_PF2ID
:
17670 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
17672 case IX86_BUILTIN_PFACC
:
17673 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
17675 case IX86_BUILTIN_PFADD
:
17676 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
17678 case IX86_BUILTIN_PFCMPEQ
:
17679 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
17681 case IX86_BUILTIN_PFCMPGE
:
17682 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
17684 case IX86_BUILTIN_PFCMPGT
:
17685 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
17687 case IX86_BUILTIN_PFMAX
:
17688 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
17690 case IX86_BUILTIN_PFMIN
:
17691 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
17693 case IX86_BUILTIN_PFMUL
:
17694 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
17696 case IX86_BUILTIN_PFRCP
:
17697 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
17699 case IX86_BUILTIN_PFRCPIT1
:
17700 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
17702 case IX86_BUILTIN_PFRCPIT2
:
17703 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
17705 case IX86_BUILTIN_PFRSQIT1
:
17706 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
17708 case IX86_BUILTIN_PFRSQRT
:
17709 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
17711 case IX86_BUILTIN_PFSUB
:
17712 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
17714 case IX86_BUILTIN_PFSUBR
:
17715 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
17717 case IX86_BUILTIN_PI2FD
:
17718 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
17720 case IX86_BUILTIN_PMULHRW
:
17721 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
17723 case IX86_BUILTIN_PF2IW
:
17724 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
17726 case IX86_BUILTIN_PFNACC
:
17727 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
17729 case IX86_BUILTIN_PFPNACC
:
17730 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
17732 case IX86_BUILTIN_PI2FW
:
17733 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
17735 case IX86_BUILTIN_PSWAPDSI
:
17736 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
17738 case IX86_BUILTIN_PSWAPDSF
:
17739 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
17741 case IX86_BUILTIN_SQRTSD
:
17742 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
17743 case IX86_BUILTIN_LOADUPD
:
17744 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
17745 case IX86_BUILTIN_STOREUPD
:
17746 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
17748 case IX86_BUILTIN_MFENCE
:
17749 emit_insn (gen_sse2_mfence ());
17751 case IX86_BUILTIN_LFENCE
:
17752 emit_insn (gen_sse2_lfence ());
17755 case IX86_BUILTIN_CLFLUSH
:
17756 arg0
= TREE_VALUE (arglist
);
17757 op0
= expand_normal (arg0
);
17758 icode
= CODE_FOR_sse2_clflush
;
17759 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
17760 op0
= copy_to_mode_reg (Pmode
, op0
);
17762 emit_insn (gen_sse2_clflush (op0
));
17765 case IX86_BUILTIN_MOVNTPD
:
17766 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
17767 case IX86_BUILTIN_MOVNTDQ
:
17768 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
17769 case IX86_BUILTIN_MOVNTI
:
17770 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
17772 case IX86_BUILTIN_LOADDQU
:
17773 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
17774 case IX86_BUILTIN_STOREDQU
:
17775 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
17777 case IX86_BUILTIN_MONITOR
:
17778 arg0
= TREE_VALUE (arglist
);
17779 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17780 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17781 op0
= expand_normal (arg0
);
17782 op1
= expand_normal (arg1
);
17783 op2
= expand_normal (arg2
);
17785 op0
= copy_to_mode_reg (Pmode
, op0
);
17787 op1
= copy_to_mode_reg (SImode
, op1
);
17789 op2
= copy_to_mode_reg (SImode
, op2
);
17791 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
17793 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
17796 case IX86_BUILTIN_MWAIT
:
17797 arg0
= TREE_VALUE (arglist
);
17798 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17799 op0
= expand_normal (arg0
);
17800 op1
= expand_normal (arg1
);
17802 op0
= copy_to_mode_reg (SImode
, op0
);
17804 op1
= copy_to_mode_reg (SImode
, op1
);
17805 emit_insn (gen_sse3_mwait (op0
, op1
));
17808 case IX86_BUILTIN_LDDQU
:
17809 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
17812 case IX86_BUILTIN_PALIGNR
:
17813 case IX86_BUILTIN_PALIGNR128
:
17814 if (fcode
== IX86_BUILTIN_PALIGNR
)
17816 icode
= CODE_FOR_ssse3_palignrdi
;
17821 icode
= CODE_FOR_ssse3_palignrti
;
17824 arg0
= TREE_VALUE (arglist
);
17825 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17826 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17827 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
17828 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
17829 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
17830 tmode
= insn_data
[icode
].operand
[0].mode
;
17831 mode1
= insn_data
[icode
].operand
[1].mode
;
17832 mode2
= insn_data
[icode
].operand
[2].mode
;
17833 mode3
= insn_data
[icode
].operand
[3].mode
;
17835 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17837 op0
= copy_to_reg (op0
);
17838 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17840 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17842 op1
= copy_to_reg (op1
);
17843 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
17845 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17847 error ("shift must be an immediate");
17850 target
= gen_reg_rtx (mode
);
17851 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
17858 case IX86_BUILTIN_MOVNTSD
:
17859 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, arglist
);
17861 case IX86_BUILTIN_MOVNTSS
:
17862 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, arglist
);
17864 case IX86_BUILTIN_INSERTQ
:
17865 case IX86_BUILTIN_EXTRQ
:
17866 icode
= (fcode
== IX86_BUILTIN_EXTRQ
17867 ? CODE_FOR_sse4a_extrq
17868 : CODE_FOR_sse4a_insertq
);
17869 arg0
= TREE_VALUE (arglist
);
17870 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17871 op0
= expand_normal (arg0
);
17872 op1
= expand_normal (arg1
);
17873 tmode
= insn_data
[icode
].operand
[0].mode
;
17874 mode1
= insn_data
[icode
].operand
[1].mode
;
17875 mode2
= insn_data
[icode
].operand
[2].mode
;
17876 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17877 op0
= copy_to_mode_reg (mode1
, op0
);
17878 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17879 op1
= copy_to_mode_reg (mode2
, op1
);
17880 if (optimize
|| target
== 0
17881 || GET_MODE (target
) != tmode
17882 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17883 target
= gen_reg_rtx (tmode
);
17884 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17890 case IX86_BUILTIN_EXTRQI
:
17891 icode
= CODE_FOR_sse4a_extrqi
;
17892 arg0
= TREE_VALUE (arglist
);
17893 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17894 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17895 op0
= expand_normal (arg0
);
17896 op1
= expand_normal (arg1
);
17897 op2
= expand_normal (arg2
);
17898 tmode
= insn_data
[icode
].operand
[0].mode
;
17899 mode1
= insn_data
[icode
].operand
[1].mode
;
17900 mode2
= insn_data
[icode
].operand
[2].mode
;
17901 mode3
= insn_data
[icode
].operand
[3].mode
;
17902 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17903 op0
= copy_to_mode_reg (mode1
, op0
);
17904 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17906 error ("index mask must be an immediate");
17907 return gen_reg_rtx (tmode
);
17909 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17911 error ("length mask must be an immediate");
17912 return gen_reg_rtx (tmode
);
17914 if (optimize
|| target
== 0
17915 || GET_MODE (target
) != tmode
17916 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17917 target
= gen_reg_rtx (tmode
);
17918 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17924 case IX86_BUILTIN_INSERTQI
:
17925 icode
= CODE_FOR_sse4a_insertqi
;
17926 arg0
= TREE_VALUE (arglist
);
17927 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17928 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17929 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
17930 op0
= expand_normal (arg0
);
17931 op1
= expand_normal (arg1
);
17932 op2
= expand_normal (arg2
);
17933 op3
= expand_normal (arg3
);
17934 tmode
= insn_data
[icode
].operand
[0].mode
;
17935 mode1
= insn_data
[icode
].operand
[1].mode
;
17936 mode2
= insn_data
[icode
].operand
[2].mode
;
17937 mode3
= insn_data
[icode
].operand
[3].mode
;
17938 mode4
= insn_data
[icode
].operand
[4].mode
;
17940 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17941 op0
= copy_to_mode_reg (mode1
, op0
);
17943 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17944 op1
= copy_to_mode_reg (mode2
, op1
);
17946 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17948 error ("index mask must be an immediate");
17949 return gen_reg_rtx (tmode
);
17951 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
17953 error ("length mask must be an immediate");
17954 return gen_reg_rtx (tmode
);
17956 if (optimize
|| target
== 0
17957 || GET_MODE (target
) != tmode
17958 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17959 target
= gen_reg_rtx (tmode
);
17960 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
17966 case IX86_BUILTIN_VEC_INIT_V2SI
:
17967 case IX86_BUILTIN_VEC_INIT_V4HI
:
17968 case IX86_BUILTIN_VEC_INIT_V8QI
:
17969 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
17971 case IX86_BUILTIN_VEC_EXT_V2DF
:
17972 case IX86_BUILTIN_VEC_EXT_V2DI
:
17973 case IX86_BUILTIN_VEC_EXT_V4SF
:
17974 case IX86_BUILTIN_VEC_EXT_V4SI
:
17975 case IX86_BUILTIN_VEC_EXT_V8HI
:
17976 case IX86_BUILTIN_VEC_EXT_V2SI
:
17977 case IX86_BUILTIN_VEC_EXT_V4HI
:
17978 return ix86_expand_vec_ext_builtin (arglist
, target
);
17980 case IX86_BUILTIN_VEC_SET_V8HI
:
17981 case IX86_BUILTIN_VEC_SET_V4HI
:
17982 return ix86_expand_vec_set_builtin (arglist
);
17988 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17989 if (d
->code
== fcode
)
17991 /* Compares are treated specially. */
17992 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17993 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
17994 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17995 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17996 return ix86_expand_sse_compare (d
, arglist
, target
);
17998 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
18001 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18002 if (d
->code
== fcode
)
18003 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
18005 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18006 if (d
->code
== fcode
)
18007 return ix86_expand_sse_comi (d
, arglist
, target
);
18009 gcc_unreachable ();
18012 /* Returns a function decl for a vectorized version of the builtin function
18013 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18014 if it is not available. */
18017 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18020 enum machine_mode in_mode
, out_mode
;
18023 if (TREE_CODE (type_out
) != VECTOR_TYPE
18024 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18027 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18028 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18029 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18030 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18034 case BUILT_IN_SQRT
:
18035 if (out_mode
== DFmode
&& out_n
== 2
18036 && in_mode
== DFmode
&& in_n
== 2)
18037 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18040 case BUILT_IN_SQRTF
:
18041 if (out_mode
== SFmode
&& out_n
== 4
18042 && in_mode
== SFmode
&& in_n
== 4)
18043 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18046 case BUILT_IN_LRINTF
:
18047 if (out_mode
== SImode
&& out_n
== 4
18048 && in_mode
== SFmode
&& in_n
== 4)
18049 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18059 /* Store OPERAND to the memory after reload is completed. This means
18060 that we can't easily use assign_stack_local. */
18062 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18066 gcc_assert (reload_completed
);
18067 if (TARGET_RED_ZONE
)
18069 result
= gen_rtx_MEM (mode
,
18070 gen_rtx_PLUS (Pmode
,
18072 GEN_INT (-RED_ZONE_SIZE
)));
18073 emit_move_insn (result
, operand
);
18075 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18081 operand
= gen_lowpart (DImode
, operand
);
18085 gen_rtx_SET (VOIDmode
,
18086 gen_rtx_MEM (DImode
,
18087 gen_rtx_PRE_DEC (DImode
,
18088 stack_pointer_rtx
)),
18092 gcc_unreachable ();
18094 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18103 split_di (&operand
, 1, operands
, operands
+ 1);
18105 gen_rtx_SET (VOIDmode
,
18106 gen_rtx_MEM (SImode
,
18107 gen_rtx_PRE_DEC (Pmode
,
18108 stack_pointer_rtx
)),
18111 gen_rtx_SET (VOIDmode
,
18112 gen_rtx_MEM (SImode
,
18113 gen_rtx_PRE_DEC (Pmode
,
18114 stack_pointer_rtx
)),
18119 /* Store HImodes as SImodes. */
18120 operand
= gen_lowpart (SImode
, operand
);
18124 gen_rtx_SET (VOIDmode
,
18125 gen_rtx_MEM (GET_MODE (operand
),
18126 gen_rtx_PRE_DEC (SImode
,
18127 stack_pointer_rtx
)),
18131 gcc_unreachable ();
18133 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18138 /* Free operand from the memory. */
18140 ix86_free_from_memory (enum machine_mode mode
)
18142 if (!TARGET_RED_ZONE
)
18146 if (mode
== DImode
|| TARGET_64BIT
)
18150 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18151 to pop or add instruction if registers are available. */
18152 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18153 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18158 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18159 QImode must go into class Q_REGS.
18160 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18161 movdf to do mem-to-mem moves through integer regs. */
18163 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18165 enum machine_mode mode
= GET_MODE (x
);
18167 /* We're only allowed to return a subclass of CLASS. Many of the
18168 following checks fail for NO_REGS, so eliminate that early. */
18169 if (class == NO_REGS
)
18172 /* All classes can load zeros. */
18173 if (x
== CONST0_RTX (mode
))
18176 /* Force constants into memory if we are loading a (nonzero) constant into
18177 an MMX or SSE register. This is because there are no MMX/SSE instructions
18178 to load from a constant. */
18180 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18183 /* Prefer SSE regs only, if we can use them for math. */
18184 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18185 return SSE_CLASS_P (class) ? class : NO_REGS
;
18187 /* Floating-point constants need more complex checks. */
18188 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18190 /* General regs can load everything. */
18191 if (reg_class_subset_p (class, GENERAL_REGS
))
18194 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18195 zero above. We only want to wind up preferring 80387 registers if
18196 we plan on doing computation with them. */
18198 && standard_80387_constant_p (x
))
18200 /* Limit class to non-sse. */
18201 if (class == FLOAT_SSE_REGS
)
18203 if (class == FP_TOP_SSE_REGS
)
18205 if (class == FP_SECOND_SSE_REGS
)
18206 return FP_SECOND_REG
;
18207 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18214 /* Generally when we see PLUS here, it's the function invariant
18215 (plus soft-fp const_int). Which can only be computed into general
18217 if (GET_CODE (x
) == PLUS
)
18218 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18220 /* QImode constants are easy to load, but non-constant QImode data
18221 must go into Q_REGS. */
18222 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18224 if (reg_class_subset_p (class, Q_REGS
))
18226 if (reg_class_subset_p (Q_REGS
, class))
18234 /* Discourage putting floating-point values in SSE registers unless
18235 SSE math is being used, and likewise for the 387 registers. */
18237 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18239 enum machine_mode mode
= GET_MODE (x
);
18241 /* Restrict the output reload class to the register bank that we are doing
18242 math on. If we would like not to return a subset of CLASS, reject this
18243 alternative: if reload cannot do this, it will still use its choice. */
18244 mode
= GET_MODE (x
);
18245 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18246 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18248 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18250 if (class == FP_TOP_SSE_REGS
)
18252 else if (class == FP_SECOND_SSE_REGS
)
18253 return FP_SECOND_REG
;
18255 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18261 /* If we are copying between general and FP registers, we need a memory
18262 location. The same is true for SSE and MMX registers.
18264 The macro can't work reliably when one of the CLASSES is class containing
18265 registers from multiple units (SSE, MMX, integer). We avoid this by never
18266 combining those units in single alternative in the machine description.
18267 Ensure that this constraint holds to avoid unexpected surprises.
18269 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18270 enforce these sanity checks. */
18273 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18274 enum machine_mode mode
, int strict
)
18276 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18277 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18278 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18279 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18280 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18281 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18283 gcc_assert (!strict
);
18287 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18290 /* ??? This is a lie. We do have moves between mmx/general, and for
18291 mmx/sse2. But by saying we need secondary memory we discourage the
18292 register allocator from using the mmx registers unless needed. */
18293 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18296 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18298 /* SSE1 doesn't have any direct moves from other classes. */
18302 /* If the target says that inter-unit moves are more expensive
18303 than moving through memory, then don't generate them. */
18304 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
18307 /* Between SSE and general, we have moves no larger than word size. */
18308 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18311 /* ??? For the cost of one register reformat penalty, we could use
18312 the same instructions to move SFmode and DFmode data, but the
18313 relevant move patterns don't support those alternatives. */
18314 if (mode
== SFmode
|| mode
== DFmode
)
18321 /* Return true if the registers in CLASS cannot represent the change from
18322 modes FROM to TO. */
18325 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18326 enum reg_class
class)
18331 /* x87 registers can't do subreg at all, as all values are reformatted
18332 to extended precision. */
18333 if (MAYBE_FLOAT_CLASS_P (class))
18336 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18338 /* Vector registers do not support QI or HImode loads. If we don't
18339 disallow a change to these modes, reload will assume it's ok to
18340 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18341 the vec_dupv4hi pattern. */
18342 if (GET_MODE_SIZE (from
) < 4)
18345 /* Vector registers do not support subreg with nonzero offsets, which
18346 are otherwise valid for integer registers. Since we can't see
18347 whether we have a nonzero offset from here, prohibit all
18348 nonparadoxical subregs changing size. */
18349 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18356 /* Return the cost of moving data from a register in class CLASS1 to
18357 one in class CLASS2.
18359 It is not required that the cost always equal 2 when FROM is the same as TO;
18360 on some machines it is expensive to move between registers if they are not
18361 general registers. */
18364 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18365 enum reg_class class2
)
18367 /* In case we require secondary memory, compute cost of the store followed
18368 by load. In order to avoid bad register allocation choices, we need
18369 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18371 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18375 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18376 MEMORY_MOVE_COST (mode
, class1
, 1));
18377 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18378 MEMORY_MOVE_COST (mode
, class2
, 1));
18380 /* In case of copying from general_purpose_register we may emit multiple
18381 stores followed by single load causing memory size mismatch stall.
18382 Count this as arbitrarily high cost of 20. */
18383 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18386 /* In the case of FP/MMX moves, the registers actually overlap, and we
18387 have to switch modes in order to treat them differently. */
18388 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18389 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18395 /* Moves between SSE/MMX and integer unit are expensive. */
18396 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18397 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18398 return ix86_cost
->mmxsse_to_integer
;
18399 if (MAYBE_FLOAT_CLASS_P (class1
))
18400 return ix86_cost
->fp_move
;
18401 if (MAYBE_SSE_CLASS_P (class1
))
18402 return ix86_cost
->sse_move
;
18403 if (MAYBE_MMX_CLASS_P (class1
))
18404 return ix86_cost
->mmx_move
;
18408 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18411 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18413 /* Flags and only flags can only hold CCmode values. */
18414 if (CC_REGNO_P (regno
))
18415 return GET_MODE_CLASS (mode
) == MODE_CC
;
18416 if (GET_MODE_CLASS (mode
) == MODE_CC
18417 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18418 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18420 if (FP_REGNO_P (regno
))
18421 return VALID_FP_MODE_P (mode
);
18422 if (SSE_REGNO_P (regno
))
18424 /* We implement the move patterns for all vector modes into and
18425 out of SSE registers, even when no operation instructions
18427 return (VALID_SSE_REG_MODE (mode
)
18428 || VALID_SSE2_REG_MODE (mode
)
18429 || VALID_MMX_REG_MODE (mode
)
18430 || VALID_MMX_REG_MODE_3DNOW (mode
));
18432 if (MMX_REGNO_P (regno
))
18434 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18435 so if the register is available at all, then we can move data of
18436 the given mode into or out of it. */
18437 return (VALID_MMX_REG_MODE (mode
)
18438 || VALID_MMX_REG_MODE_3DNOW (mode
));
18441 if (mode
== QImode
)
18443 /* Take care for QImode values - they can be in non-QI regs,
18444 but then they do cause partial register stalls. */
18445 if (regno
< 4 || TARGET_64BIT
)
18447 if (!TARGET_PARTIAL_REG_STALL
)
18449 return reload_in_progress
|| reload_completed
;
18451 /* We handle both integer and floats in the general purpose registers. */
18452 else if (VALID_INT_MODE_P (mode
))
18454 else if (VALID_FP_MODE_P (mode
))
18456 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18457 on to use that value in smaller contexts, this can easily force a
18458 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18459 supporting DImode, allow it. */
18460 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18466 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18467 tieable integer mode. */
18470 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18479 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18482 return TARGET_64BIT
;
18489 /* Return true if MODE1 is accessible in a register that can hold MODE2
18490 without copying. That is, all register classes that can hold MODE2
18491 can also hold MODE1. */
18494 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18496 if (mode1
== mode2
)
18499 if (ix86_tieable_integer_mode_p (mode1
)
18500 && ix86_tieable_integer_mode_p (mode2
))
18503 /* MODE2 being XFmode implies fp stack or general regs, which means we
18504 can tie any smaller floating point modes to it. Note that we do not
18505 tie this with TFmode. */
18506 if (mode2
== XFmode
)
18507 return mode1
== SFmode
|| mode1
== DFmode
;
18509 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18510 that we can tie it with SFmode. */
18511 if (mode2
== DFmode
)
18512 return mode1
== SFmode
;
18514 /* If MODE2 is only appropriate for an SSE register, then tie with
18515 any other mode acceptable to SSE registers. */
18516 if (GET_MODE_SIZE (mode2
) >= 8
18517 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18518 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
18520 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18521 with any other mode acceptable to MMX registers. */
18522 if (GET_MODE_SIZE (mode2
) == 8
18523 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18524 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
18529 /* Return the cost of moving data of mode M between a
18530 register and memory. A value of 2 is the default; this cost is
18531 relative to those in `REGISTER_MOVE_COST'.
18533 If moving between registers and memory is more expensive than
18534 between two registers, you should define this macro to express the
18537 Model also increased moving costs of QImode registers in non
18541 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18543 if (FLOAT_CLASS_P (class))
18560 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18562 if (SSE_CLASS_P (class))
18565 switch (GET_MODE_SIZE (mode
))
18579 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18581 if (MMX_CLASS_P (class))
18584 switch (GET_MODE_SIZE (mode
))
18595 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18597 switch (GET_MODE_SIZE (mode
))
18601 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18602 : ix86_cost
->movzbl_load
);
18604 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18605 : ix86_cost
->int_store
[0] + 4);
18608 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18610 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18611 if (mode
== TFmode
)
18613 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18614 * (((int) GET_MODE_SIZE (mode
)
18615 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18619 /* Compute a (partial) cost for rtx X. Return true if the complete
18620 cost has been computed, and false if subexpressions should be
18621 scanned. In either case, *TOTAL contains the cost result. */
18624 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18626 enum machine_mode mode
= GET_MODE (x
);
18634 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18636 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18638 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18640 || (!GET_CODE (x
) != LABEL_REF
18641 && (GET_CODE (x
) != SYMBOL_REF
18642 || !SYMBOL_REF_LOCAL_P (x
)))))
18649 if (mode
== VOIDmode
)
18652 switch (standard_80387_constant_p (x
))
18657 default: /* Other constants */
18662 /* Start with (MEM (SYMBOL_REF)), since that's where
18663 it'll probably end up. Add a penalty for size. */
18664 *total
= (COSTS_N_INSNS (1)
18665 + (flag_pic
!= 0 && !TARGET_64BIT
)
18666 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18672 /* The zero extensions is often completely free on x86_64, so make
18673 it as cheap as possible. */
18674 if (TARGET_64BIT
&& mode
== DImode
18675 && GET_MODE (XEXP (x
, 0)) == SImode
)
18677 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18678 *total
= ix86_cost
->add
;
18680 *total
= ix86_cost
->movzx
;
18684 *total
= ix86_cost
->movsx
;
18688 if (CONST_INT_P (XEXP (x
, 1))
18689 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18691 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18694 *total
= ix86_cost
->add
;
18697 if ((value
== 2 || value
== 3)
18698 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
18700 *total
= ix86_cost
->lea
;
18710 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
18712 if (CONST_INT_P (XEXP (x
, 1)))
18714 if (INTVAL (XEXP (x
, 1)) > 32)
18715 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
18717 *total
= ix86_cost
->shift_const
* 2;
18721 if (GET_CODE (XEXP (x
, 1)) == AND
)
18722 *total
= ix86_cost
->shift_var
* 2;
18724 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
18729 if (CONST_INT_P (XEXP (x
, 1)))
18730 *total
= ix86_cost
->shift_const
;
18732 *total
= ix86_cost
->shift_var
;
18737 if (FLOAT_MODE_P (mode
))
18739 *total
= ix86_cost
->fmul
;
18744 rtx op0
= XEXP (x
, 0);
18745 rtx op1
= XEXP (x
, 1);
18747 if (CONST_INT_P (XEXP (x
, 1)))
18749 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18750 for (nbits
= 0; value
!= 0; value
&= value
- 1)
18754 /* This is arbitrary. */
18757 /* Compute costs correctly for widening multiplication. */
18758 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
18759 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
18760 == GET_MODE_SIZE (mode
))
18762 int is_mulwiden
= 0;
18763 enum machine_mode inner_mode
= GET_MODE (op0
);
18765 if (GET_CODE (op0
) == GET_CODE (op1
))
18766 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
18767 else if (CONST_INT_P (op1
))
18769 if (GET_CODE (op0
) == SIGN_EXTEND
)
18770 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
18773 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
18777 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
18780 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
18781 + nbits
* ix86_cost
->mult_bit
18782 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
18791 if (FLOAT_MODE_P (mode
))
18792 *total
= ix86_cost
->fdiv
;
18794 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
18798 if (FLOAT_MODE_P (mode
))
18799 *total
= ix86_cost
->fadd
;
18800 else if (GET_MODE_CLASS (mode
) == MODE_INT
18801 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
18803 if (GET_CODE (XEXP (x
, 0)) == PLUS
18804 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
18805 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
18806 && CONSTANT_P (XEXP (x
, 1)))
18808 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
18809 if (val
== 2 || val
== 4 || val
== 8)
18811 *total
= ix86_cost
->lea
;
18812 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
18813 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
18815 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18819 else if (GET_CODE (XEXP (x
, 0)) == MULT
18820 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
18822 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
18823 if (val
== 2 || val
== 4 || val
== 8)
18825 *total
= ix86_cost
->lea
;
18826 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
18827 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18831 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
18833 *total
= ix86_cost
->lea
;
18834 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
18835 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
18836 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18843 if (FLOAT_MODE_P (mode
))
18845 *total
= ix86_cost
->fadd
;
18853 if (!TARGET_64BIT
&& mode
== DImode
)
18855 *total
= (ix86_cost
->add
* 2
18856 + (rtx_cost (XEXP (x
, 0), outer_code
)
18857 << (GET_MODE (XEXP (x
, 0)) != DImode
))
18858 + (rtx_cost (XEXP (x
, 1), outer_code
)
18859 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
18865 if (FLOAT_MODE_P (mode
))
18867 *total
= ix86_cost
->fchs
;
18873 if (!TARGET_64BIT
&& mode
== DImode
)
18874 *total
= ix86_cost
->add
* 2;
18876 *total
= ix86_cost
->add
;
18880 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
18881 && XEXP (XEXP (x
, 0), 1) == const1_rtx
18882 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
18883 && XEXP (x
, 1) == const0_rtx
)
18885 /* This kind of construct is implemented using test[bwl].
18886 Treat it as if we had an AND. */
18887 *total
= (ix86_cost
->add
18888 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
18889 + rtx_cost (const1_rtx
, outer_code
));
18895 if (!TARGET_SSE_MATH
18897 || (mode
== DFmode
&& !TARGET_SSE2
))
18902 if (FLOAT_MODE_P (mode
))
18903 *total
= ix86_cost
->fabs
;
18907 if (FLOAT_MODE_P (mode
))
18908 *total
= ix86_cost
->fsqrt
;
18912 if (XINT (x
, 1) == UNSPEC_TP
)
18923 static int current_machopic_label_num
;
18925 /* Given a symbol name and its associated stub, write out the
18926 definition of the stub. */
18929 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
18931 unsigned int length
;
18932 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
18933 int label
= ++current_machopic_label_num
;
18935 /* For 64-bit we shouldn't get here. */
18936 gcc_assert (!TARGET_64BIT
);
18938 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18939 symb
= (*targetm
.strip_name_encoding
) (symb
);
18941 length
= strlen (stub
);
18942 binder_name
= alloca (length
+ 32);
18943 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
18945 length
= strlen (symb
);
18946 symbol_name
= alloca (length
+ 32);
18947 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
18949 sprintf (lazy_ptr_name
, "L%d$lz", label
);
18952 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
18954 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
18956 fprintf (file
, "%s:\n", stub
);
18957 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
18961 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
18962 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
18963 fprintf (file
, "\tjmp\t*%%edx\n");
18966 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
18968 fprintf (file
, "%s:\n", binder_name
);
18972 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
18973 fprintf (file
, "\tpushl\t%%eax\n");
18976 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
18978 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
18980 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
18981 fprintf (file
, "%s:\n", lazy_ptr_name
);
18982 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
18983 fprintf (file
, "\t.long %s\n", binder_name
);
18987 darwin_x86_file_end (void)
18989 darwin_file_end ();
18992 #endif /* TARGET_MACHO */
18994 /* Order the registers for register allocator. */
18997 x86_order_regs_for_local_alloc (void)
19002 /* First allocate the local general purpose registers. */
19003 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19004 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19005 reg_alloc_order
[pos
++] = i
;
19007 /* Global general purpose registers. */
19008 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19009 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19010 reg_alloc_order
[pos
++] = i
;
19012 /* x87 registers come first in case we are doing FP math
19014 if (!TARGET_SSE_MATH
)
19015 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19016 reg_alloc_order
[pos
++] = i
;
19018 /* SSE registers. */
19019 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19020 reg_alloc_order
[pos
++] = i
;
19021 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19022 reg_alloc_order
[pos
++] = i
;
19024 /* x87 registers. */
19025 if (TARGET_SSE_MATH
)
19026 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19027 reg_alloc_order
[pos
++] = i
;
19029 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19030 reg_alloc_order
[pos
++] = i
;
19032 /* Initialize the rest of array as we do not allocate some registers
19034 while (pos
< FIRST_PSEUDO_REGISTER
)
19035 reg_alloc_order
[pos
++] = 0;
19038 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19039 struct attribute_spec.handler. */
19041 ix86_handle_struct_attribute (tree
*node
, tree name
,
19042 tree args ATTRIBUTE_UNUSED
,
19043 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19046 if (DECL_P (*node
))
19048 if (TREE_CODE (*node
) == TYPE_DECL
)
19049 type
= &TREE_TYPE (*node
);
19054 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19055 || TREE_CODE (*type
) == UNION_TYPE
)))
19057 warning (OPT_Wattributes
, "%qs attribute ignored",
19058 IDENTIFIER_POINTER (name
));
19059 *no_add_attrs
= true;
19062 else if ((is_attribute_p ("ms_struct", name
)
19063 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19064 || ((is_attribute_p ("gcc_struct", name
)
19065 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19067 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19068 IDENTIFIER_POINTER (name
));
19069 *no_add_attrs
= true;
19076 ix86_ms_bitfield_layout_p (tree record_type
)
19078 return (TARGET_MS_BITFIELD_LAYOUT
&&
19079 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19080 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19083 /* Returns an expression indicating where the this parameter is
19084 located on entry to the FUNCTION. */
19087 x86_this_parameter (tree function
)
19089 tree type
= TREE_TYPE (function
);
19093 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19094 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19097 if (ix86_function_regparm (type
, function
) > 0)
19101 parm
= TYPE_ARG_TYPES (type
);
19102 /* Figure out whether or not the function has a variable number of
19104 for (; parm
; parm
= TREE_CHAIN (parm
))
19105 if (TREE_VALUE (parm
) == void_type_node
)
19107 /* If not, the this parameter is in the first argument. */
19111 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19113 return gen_rtx_REG (SImode
, regno
);
19117 if (aggregate_value_p (TREE_TYPE (type
), type
))
19118 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19120 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19123 /* Determine whether x86_output_mi_thunk can succeed. */
19126 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19127 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19128 HOST_WIDE_INT vcall_offset
, tree function
)
19130 /* 64-bit can handle anything. */
19134 /* For 32-bit, everything's fine if we have one free register. */
19135 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19138 /* Need a free register for vcall_offset. */
19142 /* Need a free register for GOT references. */
19143 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19146 /* Otherwise ok. */
19150 /* Output the assembler code for a thunk function. THUNK_DECL is the
19151 declaration for the thunk function itself, FUNCTION is the decl for
19152 the target function. DELTA is an immediate constant offset to be
19153 added to THIS. If VCALL_OFFSET is nonzero, the word at
19154 *(*this + vcall_offset) should be added to THIS. */
19157 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19158 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19159 HOST_WIDE_INT vcall_offset
, tree function
)
19162 rtx
this = x86_this_parameter (function
);
19165 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19166 pull it in now and let DELTA benefit. */
19169 else if (vcall_offset
)
19171 /* Put the this parameter into %eax. */
19173 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19174 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19177 this_reg
= NULL_RTX
;
19179 /* Adjust the this parameter by a fixed constant. */
19182 xops
[0] = GEN_INT (delta
);
19183 xops
[1] = this_reg
? this_reg
: this;
19186 if (!x86_64_general_operand (xops
[0], DImode
))
19188 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19190 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19194 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19197 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19200 /* Adjust the this parameter by a value stored in the vtable. */
19204 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19207 int tmp_regno
= 2 /* ECX */;
19208 if (lookup_attribute ("fastcall",
19209 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19210 tmp_regno
= 0 /* EAX */;
19211 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19214 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19217 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19219 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19221 /* Adjust the this parameter. */
19222 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19223 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19225 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19226 xops
[0] = GEN_INT (vcall_offset
);
19228 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19229 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19231 xops
[1] = this_reg
;
19233 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19235 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19238 /* If necessary, drop THIS back to its stack slot. */
19239 if (this_reg
&& this_reg
!= this)
19241 xops
[0] = this_reg
;
19243 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19246 xops
[0] = XEXP (DECL_RTL (function
), 0);
19249 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19250 output_asm_insn ("jmp\t%P0", xops
);
19253 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19254 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19255 tmp
= gen_rtx_MEM (QImode
, tmp
);
19257 output_asm_insn ("jmp\t%A0", xops
);
19262 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19263 output_asm_insn ("jmp\t%P0", xops
);
19268 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19269 tmp
= (gen_rtx_SYMBOL_REF
19271 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19272 tmp
= gen_rtx_MEM (QImode
, tmp
);
19274 output_asm_insn ("jmp\t%0", xops
);
19277 #endif /* TARGET_MACHO */
19279 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19280 output_set_got (tmp
, NULL_RTX
);
19283 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19284 output_asm_insn ("jmp\t{*}%1", xops
);
19290 x86_file_start (void)
19292 default_file_start ();
19294 darwin_file_start ();
19296 if (X86_FILE_START_VERSION_DIRECTIVE
)
19297 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19298 if (X86_FILE_START_FLTUSED
)
19299 fputs ("\t.global\t__fltused\n", asm_out_file
);
19300 if (ix86_asm_dialect
== ASM_INTEL
)
19301 fputs ("\t.intel_syntax\n", asm_out_file
);
19305 x86_field_alignment (tree field
, int computed
)
19307 enum machine_mode mode
;
19308 tree type
= TREE_TYPE (field
);
19310 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19312 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19313 ? get_inner_array_type (type
) : type
);
19314 if (mode
== DFmode
|| mode
== DCmode
19315 || GET_MODE_CLASS (mode
) == MODE_INT
19316 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19317 return MIN (32, computed
);
19321 /* Output assembler code to FILE to increment profiler label # LABELNO
19322 for profiling a function entry. */
19324 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19329 #ifndef NO_PROFILE_COUNTERS
19330 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19332 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19336 #ifndef NO_PROFILE_COUNTERS
19337 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19339 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19343 #ifndef NO_PROFILE_COUNTERS
19344 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19345 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19347 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19351 #ifndef NO_PROFILE_COUNTERS
19352 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19353 PROFILE_COUNT_REGISTER
);
19355 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19359 /* We don't have exact information about the insn sizes, but we may assume
19360 quite safely that we are informed about all 1 byte insns and memory
19361 address sizes. This is enough to eliminate unnecessary padding in
19365 min_insn_size (rtx insn
)
19369 if (!INSN_P (insn
) || !active_insn_p (insn
))
19372 /* Discard alignments we've emit and jump instructions. */
19373 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19374 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19377 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19378 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19381 /* Important case - calls are always 5 bytes.
19382 It is common to have many calls in the row. */
19384 && symbolic_reference_mentioned_p (PATTERN (insn
))
19385 && !SIBLING_CALL_P (insn
))
19387 if (get_attr_length (insn
) <= 1)
19390 /* For normal instructions we may rely on the sizes of addresses
19391 and the presence of symbol to require 4 bytes of encoding.
19392 This is not the case for jumps where references are PC relative. */
19393 if (!JUMP_P (insn
))
19395 l
= get_attr_length_address (insn
);
19396 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19405 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19409 ix86_avoid_jump_misspredicts (void)
19411 rtx insn
, start
= get_insns ();
19412 int nbytes
= 0, njumps
= 0;
19415 /* Look for all minimal intervals of instructions containing 4 jumps.
19416 The intervals are bounded by START and INSN. NBYTES is the total
19417 size of instructions in the interval including INSN and not including
19418 START. When the NBYTES is smaller than 16 bytes, it is possible
19419 that the end of START and INSN ends up in the same 16byte page.
19421 The smallest offset in the page INSN can start is the case where START
19422 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19423 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19425 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19428 nbytes
+= min_insn_size (insn
);
19430 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19431 INSN_UID (insn
), min_insn_size (insn
));
19433 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19434 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19442 start
= NEXT_INSN (start
);
19443 if ((JUMP_P (start
)
19444 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19445 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19447 njumps
--, isjump
= 1;
19450 nbytes
-= min_insn_size (start
);
19452 gcc_assert (njumps
>= 0);
19454 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19455 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19457 if (njumps
== 3 && isjump
&& nbytes
< 16)
19459 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19462 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19463 INSN_UID (insn
), padsize
);
19464 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19469 /* AMD Athlon works faster
19470 when RET is not destination of conditional jump or directly preceded
19471 by other jump instruction. We avoid the penalty by inserting NOP just
19472 before the RET instructions in such cases. */
19474 ix86_pad_returns (void)
19479 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19481 basic_block bb
= e
->src
;
19482 rtx ret
= BB_END (bb
);
19484 bool replace
= false;
19486 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19487 || !maybe_hot_bb_p (bb
))
19489 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19490 if (active_insn_p (prev
) || LABEL_P (prev
))
19492 if (prev
&& LABEL_P (prev
))
19497 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19498 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19499 && !(e
->flags
& EDGE_FALLTHRU
))
19504 prev
= prev_active_insn (ret
);
19506 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19509 /* Empty functions get branch mispredict even when the jump destination
19510 is not visible to us. */
19511 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19516 emit_insn_before (gen_return_internal_long (), ret
);
19522 /* Implement machine specific optimizations. We implement padding of returns
19523 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19527 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19528 ix86_pad_returns ();
19529 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19530 ix86_avoid_jump_misspredicts ();
19533 /* Return nonzero when QImode register that must be represented via REX prefix
19536 x86_extended_QIreg_mentioned_p (rtx insn
)
19539 extract_insn_cached (insn
);
19540 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19541 if (REG_P (recog_data
.operand
[i
])
19542 && REGNO (recog_data
.operand
[i
]) >= 4)
19547 /* Return nonzero when P points to register encoded via REX prefix.
19548 Called via for_each_rtx. */
19550 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19552 unsigned int regno
;
19555 regno
= REGNO (*p
);
19556 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19559 /* Return true when INSN mentions register that must be encoded using REX
19562 x86_extended_reg_mentioned_p (rtx insn
)
19564 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19567 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19568 optabs would emit if we didn't have TFmode patterns. */
19571 x86_emit_floatuns (rtx operands
[2])
19573 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19574 enum machine_mode mode
, inmode
;
19576 inmode
= GET_MODE (operands
[1]);
19577 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19580 in
= force_reg (inmode
, operands
[1]);
19581 mode
= GET_MODE (out
);
19582 neglab
= gen_label_rtx ();
19583 donelab
= gen_label_rtx ();
19584 i1
= gen_reg_rtx (Pmode
);
19585 f0
= gen_reg_rtx (mode
);
19587 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
19589 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
19590 emit_jump_insn (gen_jump (donelab
));
19593 emit_label (neglab
);
19595 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
19596 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
19597 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19598 expand_float (f0
, i0
, 0);
19599 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19601 emit_label (donelab
);
19604 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19605 with all elements equal to VAR. Return true if successful. */
19608 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19609 rtx target
, rtx val
)
19611 enum machine_mode smode
, wsmode
, wvmode
;
19626 val
= force_reg (GET_MODE_INNER (mode
), val
);
19627 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19628 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19634 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19636 val
= gen_lowpart (SImode
, val
);
19637 x
= gen_rtx_TRUNCATE (HImode
, val
);
19638 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19639 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19661 /* Extend HImode to SImode using a paradoxical SUBREG. */
19662 tmp1
= gen_reg_rtx (SImode
);
19663 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19664 /* Insert the SImode value as low element of V4SImode vector. */
19665 tmp2
= gen_reg_rtx (V4SImode
);
19666 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19667 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19668 CONST0_RTX (V4SImode
),
19670 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19671 /* Cast the V4SImode vector back to a V8HImode vector. */
19672 tmp1
= gen_reg_rtx (V8HImode
);
19673 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19674 /* Duplicate the low short through the whole low SImode word. */
19675 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19676 /* Cast the V8HImode vector back to a V4SImode vector. */
19677 tmp2
= gen_reg_rtx (V4SImode
);
19678 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19679 /* Replicate the low element of the V4SImode vector. */
19680 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19681 /* Cast the V2SImode back to V8HImode, and store in target. */
19682 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
19693 /* Extend QImode to SImode using a paradoxical SUBREG. */
19694 tmp1
= gen_reg_rtx (SImode
);
19695 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19696 /* Insert the SImode value as low element of V4SImode vector. */
19697 tmp2
= gen_reg_rtx (V4SImode
);
19698 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19699 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19700 CONST0_RTX (V4SImode
),
19702 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19703 /* Cast the V4SImode vector back to a V16QImode vector. */
19704 tmp1
= gen_reg_rtx (V16QImode
);
19705 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
19706 /* Duplicate the low byte through the whole low SImode word. */
19707 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19708 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19709 /* Cast the V16QImode vector back to a V4SImode vector. */
19710 tmp2
= gen_reg_rtx (V4SImode
);
19711 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19712 /* Replicate the low element of the V4SImode vector. */
19713 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19714 /* Cast the V2SImode back to V16QImode, and store in target. */
19715 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
19723 /* Replicate the value once into the next wider mode and recurse. */
19724 val
= convert_modes (wsmode
, smode
, val
, true);
19725 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
19726 GEN_INT (GET_MODE_BITSIZE (smode
)),
19727 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19728 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
19730 x
= gen_reg_rtx (wvmode
);
19731 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
19732 gcc_unreachable ();
19733 emit_move_insn (target
, gen_lowpart (mode
, x
));
19741 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19742 whose ONE_VAR element is VAR, and other elements are zero. Return true
19746 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
19747 rtx target
, rtx var
, int one_var
)
19749 enum machine_mode vsimode
;
19765 var
= force_reg (GET_MODE_INNER (mode
), var
);
19766 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
19767 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19772 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
19773 new_target
= gen_reg_rtx (mode
);
19775 new_target
= target
;
19776 var
= force_reg (GET_MODE_INNER (mode
), var
);
19777 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
19778 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
19779 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
19782 /* We need to shuffle the value to the correct position, so
19783 create a new pseudo to store the intermediate result. */
19785 /* With SSE2, we can use the integer shuffle insns. */
19786 if (mode
!= V4SFmode
&& TARGET_SSE2
)
19788 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
19790 GEN_INT (one_var
== 1 ? 0 : 1),
19791 GEN_INT (one_var
== 2 ? 0 : 1),
19792 GEN_INT (one_var
== 3 ? 0 : 1)));
19793 if (target
!= new_target
)
19794 emit_move_insn (target
, new_target
);
19798 /* Otherwise convert the intermediate result to V4SFmode and
19799 use the SSE1 shuffle instructions. */
19800 if (mode
!= V4SFmode
)
19802 tmp
= gen_reg_rtx (V4SFmode
);
19803 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
19808 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
19810 GEN_INT (one_var
== 1 ? 0 : 1),
19811 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
19812 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
19814 if (mode
!= V4SFmode
)
19815 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
19816 else if (tmp
!= target
)
19817 emit_move_insn (target
, tmp
);
19819 else if (target
!= new_target
)
19820 emit_move_insn (target
, new_target
);
19825 vsimode
= V4SImode
;
19831 vsimode
= V2SImode
;
19837 /* Zero extend the variable element to SImode and recurse. */
19838 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
19840 x
= gen_reg_rtx (vsimode
);
19841 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
19843 gcc_unreachable ();
19845 emit_move_insn (target
, gen_lowpart (mode
, x
));
19853 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19854 consisting of the values in VALS. It is known that all elements
19855 except ONE_VAR are constants. Return true if successful. */
19858 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
19859 rtx target
, rtx vals
, int one_var
)
19861 rtx var
= XVECEXP (vals
, 0, one_var
);
19862 enum machine_mode wmode
;
19865 const_vec
= copy_rtx (vals
);
19866 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
19867 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
19875 /* For the two element vectors, it's just as easy to use
19876 the general case. */
19892 /* There's no way to set one QImode entry easily. Combine
19893 the variable value with its adjacent constant value, and
19894 promote to an HImode set. */
19895 x
= XVECEXP (vals
, 0, one_var
^ 1);
19898 var
= convert_modes (HImode
, QImode
, var
, true);
19899 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
19900 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19901 x
= GEN_INT (INTVAL (x
) & 0xff);
19905 var
= convert_modes (HImode
, QImode
, var
, true);
19906 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
19908 if (x
!= const0_rtx
)
19909 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
19910 1, OPTAB_LIB_WIDEN
);
19912 x
= gen_reg_rtx (wmode
);
19913 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
19914 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
19916 emit_move_insn (target
, gen_lowpart (mode
, x
));
19923 emit_move_insn (target
, const_vec
);
19924 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
19928 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19929 all values variable, and none identical. */
19932 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
19933 rtx target
, rtx vals
)
19935 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
19936 rtx op0
= NULL
, op1
= NULL
;
19937 bool use_vec_concat
= false;
19943 if (!mmx_ok
&& !TARGET_SSE
)
19949 /* For the two element vectors, we always implement VEC_CONCAT. */
19950 op0
= XVECEXP (vals
, 0, 0);
19951 op1
= XVECEXP (vals
, 0, 1);
19952 use_vec_concat
= true;
19956 half_mode
= V2SFmode
;
19959 half_mode
= V2SImode
;
19965 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19966 Recurse to load the two halves. */
19968 op0
= gen_reg_rtx (half_mode
);
19969 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
19970 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
19972 op1
= gen_reg_rtx (half_mode
);
19973 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
19974 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
19976 use_vec_concat
= true;
19987 gcc_unreachable ();
19990 if (use_vec_concat
)
19992 if (!register_operand (op0
, half_mode
))
19993 op0
= force_reg (half_mode
, op0
);
19994 if (!register_operand (op1
, half_mode
))
19995 op1
= force_reg (half_mode
, op1
);
19997 emit_insn (gen_rtx_SET (VOIDmode
, target
,
19998 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20002 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20003 enum machine_mode inner_mode
;
20004 rtx words
[4], shift
;
20006 inner_mode
= GET_MODE_INNER (mode
);
20007 n_elts
= GET_MODE_NUNITS (mode
);
20008 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20009 n_elt_per_word
= n_elts
/ n_words
;
20010 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20012 for (i
= 0; i
< n_words
; ++i
)
20014 rtx word
= NULL_RTX
;
20016 for (j
= 0; j
< n_elt_per_word
; ++j
)
20018 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20019 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20025 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20026 word
, 1, OPTAB_LIB_WIDEN
);
20027 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20028 word
, 1, OPTAB_LIB_WIDEN
);
20036 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20037 else if (n_words
== 2)
20039 rtx tmp
= gen_reg_rtx (mode
);
20040 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20041 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20042 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20043 emit_move_insn (target
, tmp
);
20045 else if (n_words
== 4)
20047 rtx tmp
= gen_reg_rtx (V4SImode
);
20048 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20049 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20050 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20053 gcc_unreachable ();
20057 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20058 instructions unless MMX_OK is true. */
20061 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20063 enum machine_mode mode
= GET_MODE (target
);
20064 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20065 int n_elts
= GET_MODE_NUNITS (mode
);
20066 int n_var
= 0, one_var
= -1;
20067 bool all_same
= true, all_const_zero
= true;
20071 for (i
= 0; i
< n_elts
; ++i
)
20073 x
= XVECEXP (vals
, 0, i
);
20074 if (!CONSTANT_P (x
))
20075 n_var
++, one_var
= i
;
20076 else if (x
!= CONST0_RTX (inner_mode
))
20077 all_const_zero
= false;
20078 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20082 /* Constants are best loaded from the constant pool. */
20085 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20089 /* If all values are identical, broadcast the value. */
20091 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20092 XVECEXP (vals
, 0, 0)))
20095 /* Values where only one field is non-constant are best loaded from
20096 the pool and overwritten via move later. */
20100 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20101 XVECEXP (vals
, 0, one_var
),
20105 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20109 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20113 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20115 enum machine_mode mode
= GET_MODE (target
);
20116 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20117 bool use_vec_merge
= false;
20126 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20127 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20129 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20131 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20132 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20142 /* For the two element vectors, we implement a VEC_CONCAT with
20143 the extraction of the other element. */
20145 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20146 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20149 op0
= val
, op1
= tmp
;
20151 op0
= tmp
, op1
= val
;
20153 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20154 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20162 use_vec_merge
= true;
20166 /* tmp = target = A B C D */
20167 tmp
= copy_to_reg (target
);
20168 /* target = A A B B */
20169 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20170 /* target = X A B B */
20171 ix86_expand_vector_set (false, target
, val
, 0);
20172 /* target = A X C D */
20173 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20174 GEN_INT (1), GEN_INT (0),
20175 GEN_INT (2+4), GEN_INT (3+4)));
20179 /* tmp = target = A B C D */
20180 tmp
= copy_to_reg (target
);
20181 /* tmp = X B C D */
20182 ix86_expand_vector_set (false, tmp
, val
, 0);
20183 /* target = A B X D */
20184 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20185 GEN_INT (0), GEN_INT (1),
20186 GEN_INT (0+4), GEN_INT (3+4)));
20190 /* tmp = target = A B C D */
20191 tmp
= copy_to_reg (target
);
20192 /* tmp = X B C D */
20193 ix86_expand_vector_set (false, tmp
, val
, 0);
20194 /* target = A B X D */
20195 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20196 GEN_INT (0), GEN_INT (1),
20197 GEN_INT (2+4), GEN_INT (0+4)));
20201 gcc_unreachable ();
20206 /* Element 0 handled by vec_merge below. */
20209 use_vec_merge
= true;
20215 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20216 store into element 0, then shuffle them back. */
20220 order
[0] = GEN_INT (elt
);
20221 order
[1] = const1_rtx
;
20222 order
[2] = const2_rtx
;
20223 order
[3] = GEN_INT (3);
20224 order
[elt
] = const0_rtx
;
20226 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20227 order
[1], order
[2], order
[3]));
20229 ix86_expand_vector_set (false, target
, val
, 0);
20231 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20232 order
[1], order
[2], order
[3]));
20236 /* For SSE1, we have to reuse the V4SF code. */
20237 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20238 gen_lowpart (SFmode
, val
), elt
);
20243 use_vec_merge
= TARGET_SSE2
;
20246 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20257 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20258 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20259 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20263 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20265 emit_move_insn (mem
, target
);
20267 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20268 emit_move_insn (tmp
, val
);
20270 emit_move_insn (target
, mem
);
20275 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20277 enum machine_mode mode
= GET_MODE (vec
);
20278 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20279 bool use_vec_extr
= false;
20292 use_vec_extr
= true;
20304 tmp
= gen_reg_rtx (mode
);
20305 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20306 GEN_INT (elt
), GEN_INT (elt
),
20307 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20311 tmp
= gen_reg_rtx (mode
);
20312 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20316 gcc_unreachable ();
20319 use_vec_extr
= true;
20334 tmp
= gen_reg_rtx (mode
);
20335 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20336 GEN_INT (elt
), GEN_INT (elt
),
20337 GEN_INT (elt
), GEN_INT (elt
)));
20341 tmp
= gen_reg_rtx (mode
);
20342 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20346 gcc_unreachable ();
20349 use_vec_extr
= true;
20354 /* For SSE1, we have to reuse the V4SF code. */
20355 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20356 gen_lowpart (V4SFmode
, vec
), elt
);
20362 use_vec_extr
= TARGET_SSE2
;
20365 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20370 /* ??? Could extract the appropriate HImode element and shift. */
20377 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20378 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20380 /* Let the rtl optimizers know about the zero extension performed. */
20381 if (inner_mode
== HImode
)
20383 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20384 target
= gen_lowpart (SImode
, target
);
20387 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20391 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20393 emit_move_insn (mem
, vec
);
20395 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20396 emit_move_insn (target
, tmp
);
20400 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20401 pattern to reduce; DEST is the destination; IN is the input vector. */
20404 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20406 rtx tmp1
, tmp2
, tmp3
;
20408 tmp1
= gen_reg_rtx (V4SFmode
);
20409 tmp2
= gen_reg_rtx (V4SFmode
);
20410 tmp3
= gen_reg_rtx (V4SFmode
);
20412 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20413 emit_insn (fn (tmp2
, tmp1
, in
));
20415 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20416 GEN_INT (1), GEN_INT (1),
20417 GEN_INT (1+4), GEN_INT (1+4)));
20418 emit_insn (fn (dest
, tmp2
, tmp3
));
20421 /* Target hook for scalar_mode_supported_p. */
20423 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20425 if (DECIMAL_FLOAT_MODE_P (mode
))
20428 return default_scalar_mode_supported_p (mode
);
20431 /* Implements target hook vector_mode_supported_p. */
20433 ix86_vector_mode_supported_p (enum machine_mode mode
)
20435 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20437 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20439 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20441 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20446 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20448 We do this in the new i386 backend to maintain source compatibility
20449 with the old cc0-based compiler. */
20452 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20453 tree inputs ATTRIBUTE_UNUSED
,
20456 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20458 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20463 /* Return true if this goes in small data/bss. */
20466 ix86_in_large_data_p (tree exp
)
20468 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20471 /* Functions are never large data. */
20472 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20475 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20477 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20478 if (strcmp (section
, ".ldata") == 0
20479 || strcmp (section
, ".lbss") == 0)
20485 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20487 /* If this is an incomplete type with size 0, then we can't put it
20488 in data because it might be too big when completed. */
20489 if (!size
|| size
> ix86_section_threshold
)
20496 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20498 default_encode_section_info (decl
, rtl
, first
);
20500 if (TREE_CODE (decl
) == VAR_DECL
20501 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20502 && ix86_in_large_data_p (decl
))
20503 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20506 /* Worker function for REVERSE_CONDITION. */
20509 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20511 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20512 ? reverse_condition (code
)
20513 : reverse_condition_maybe_unordered (code
));
20516 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20520 output_387_reg_move (rtx insn
, rtx
*operands
)
20522 if (REG_P (operands
[1])
20523 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20525 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20526 return output_387_ffreep (operands
, 0);
20527 return "fstp\t%y0";
20529 if (STACK_TOP_P (operands
[0]))
20530 return "fld%z1\t%y1";
20534 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20535 FP status register is set. */
20538 ix86_emit_fp_unordered_jump (rtx label
)
20540 rtx reg
= gen_reg_rtx (HImode
);
20543 emit_insn (gen_x86_fnstsw_1 (reg
));
20545 if (TARGET_USE_SAHF
)
20547 emit_insn (gen_x86_sahf_1 (reg
));
20549 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20550 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20554 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20556 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20557 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20560 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20561 gen_rtx_LABEL_REF (VOIDmode
, label
),
20563 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20564 emit_jump_insn (temp
);
20567 /* Output code to perform a log1p XFmode calculation. */
20569 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20571 rtx label1
= gen_label_rtx ();
20572 rtx label2
= gen_label_rtx ();
20574 rtx tmp
= gen_reg_rtx (XFmode
);
20575 rtx tmp2
= gen_reg_rtx (XFmode
);
20577 emit_insn (gen_absxf2 (tmp
, op1
));
20578 emit_insn (gen_cmpxf (tmp
,
20579 CONST_DOUBLE_FROM_REAL_VALUE (
20580 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20582 emit_jump_insn (gen_bge (label1
));
20584 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20585 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20586 emit_jump (label2
);
20588 emit_label (label1
);
20589 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20590 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20591 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20592 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20594 emit_label (label2
);
20597 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20600 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20603 /* With Binutils 2.15, the "@unwind" marker must be specified on
20604 every occurrence of the ".eh_frame" section, not just the first
20607 && strcmp (name
, ".eh_frame") == 0)
20609 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20610 flags
& SECTION_WRITE
? "aw" : "a");
20613 default_elf_asm_named_section (name
, flags
, decl
);
20616 /* Return the mangling of TYPE if it is an extended fundamental type. */
20618 static const char *
20619 ix86_mangle_fundamental_type (tree type
)
20621 switch (TYPE_MODE (type
))
20624 /* __float128 is "g". */
20627 /* "long double" or __float80 is "e". */
20634 /* For 32-bit code we can save PIC register setup by using
20635 __stack_chk_fail_local hidden function instead of calling
20636 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20637 register, so it is better to call __stack_chk_fail directly. */
20640 ix86_stack_protect_fail (void)
20642 return TARGET_64BIT
20643 ? default_external_stack_protect_fail ()
20644 : default_hidden_stack_protect_fail ();
20647 /* Select a format to encode pointers in exception handling data. CODE
20648 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20649 true if the symbol may be affected by dynamic relocations.
20651 ??? All x86 object file formats are capable of representing this.
20652 After all, the relocation needed is the same as for the call insn.
20653 Whether or not a particular assembler allows us to enter such, I
20654 guess we'll have to see. */
20656 asm_preferred_eh_data_format (int code
, int global
)
20660 int type
= DW_EH_PE_sdata8
;
20662 || ix86_cmodel
== CM_SMALL_PIC
20663 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20664 type
= DW_EH_PE_sdata4
;
20665 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20667 if (ix86_cmodel
== CM_SMALL
20668 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20669 return DW_EH_PE_udata4
;
20670 return DW_EH_PE_absptr
;
20673 /* Expand copysign from SIGN to the positive value ABS_VALUE
20674 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20677 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20679 enum machine_mode mode
= GET_MODE (sign
);
20680 rtx sgn
= gen_reg_rtx (mode
);
20681 if (mask
== NULL_RTX
)
20683 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20684 if (!VECTOR_MODE_P (mode
))
20686 /* We need to generate a scalar mode mask in this case. */
20687 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20688 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20689 mask
= gen_reg_rtx (mode
);
20690 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20694 mask
= gen_rtx_NOT (mode
, mask
);
20695 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20696 gen_rtx_AND (mode
, mask
, sign
)));
20697 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20698 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20701 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20702 mask for masking out the sign-bit is stored in *SMASK, if that is
20705 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20707 enum machine_mode mode
= GET_MODE (op0
);
20710 xa
= gen_reg_rtx (mode
);
20711 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20712 if (!VECTOR_MODE_P (mode
))
20714 /* We need to generate a scalar mode mask in this case. */
20715 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20716 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20717 mask
= gen_reg_rtx (mode
);
20718 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20720 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
20721 gen_rtx_AND (mode
, op0
, mask
)));
20729 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20730 swapping the operands if SWAP_OPERANDS is true. The expanded
20731 code is a forward jump to a newly created label in case the
20732 comparison is true. The generated label rtx is returned. */
20734 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
20735 bool swap_operands
)
20746 label
= gen_label_rtx ();
20747 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
20748 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20749 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
20750 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
20751 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
20752 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
20753 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
20754 JUMP_LABEL (tmp
) = label
;
20759 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20760 using comparison code CODE. Operands are swapped for the comparison if
20761 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20763 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
20764 bool swap_operands
)
20766 enum machine_mode mode
= GET_MODE (op0
);
20767 rtx mask
= gen_reg_rtx (mode
);
20776 if (mode
== DFmode
)
20777 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
20778 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
20780 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
20781 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
20786 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20787 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20789 ix86_gen_TWO52 (enum machine_mode mode
)
20791 REAL_VALUE_TYPE TWO52r
;
20794 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
20795 TWO52
= const_double_from_real_value (TWO52r
, mode
);
20796 TWO52
= force_reg (mode
, TWO52
);
20801 /* Expand SSE sequence for computing lround from OP1 storing
20804 ix86_expand_lround (rtx op0
, rtx op1
)
20806 /* C code for the stuff we're doing below:
20807 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20810 enum machine_mode mode
= GET_MODE (op1
);
20811 const struct real_format
*fmt
;
20812 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
20815 /* load nextafter (0.5, 0.0) */
20816 fmt
= REAL_MODE_FORMAT (mode
);
20817 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
20818 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
20820 /* adj = copysign (0.5, op1) */
20821 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
20822 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
20824 /* adj = op1 + adj */
20825 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
20827 /* op0 = (imode)adj */
20828 expand_fix (op0
, adj
, 0);
20831 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20834 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
20836 /* C code for the stuff we're doing below (for do_floor):
20838 xi -= (double)xi > op1 ? 1 : 0;
20841 enum machine_mode fmode
= GET_MODE (op1
);
20842 enum machine_mode imode
= GET_MODE (op0
);
20843 rtx ireg
, freg
, label
, tmp
;
20845 /* reg = (long)op1 */
20846 ireg
= gen_reg_rtx (imode
);
20847 expand_fix (ireg
, op1
, 0);
20849 /* freg = (double)reg */
20850 freg
= gen_reg_rtx (fmode
);
20851 expand_float (freg
, ireg
, 0);
20853 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20854 label
= ix86_expand_sse_compare_and_jump (UNLE
,
20855 freg
, op1
, !do_floor
);
20856 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
20857 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
20858 emit_move_insn (ireg
, tmp
);
20860 emit_label (label
);
20861 LABEL_NUSES (label
) = 1;
20863 emit_move_insn (op0
, ireg
);
20866 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20867 result in OPERAND0. */
20869 ix86_expand_rint (rtx operand0
, rtx operand1
)
20871 /* C code for the stuff we're doing below:
20872 xa = fabs (operand1);
20873 if (!isless (xa, 2**52))
20875 xa = xa + 2**52 - 2**52;
20876 return copysign (xa, operand1);
20878 enum machine_mode mode
= GET_MODE (operand0
);
20879 rtx res
, xa
, label
, TWO52
, mask
;
20881 res
= gen_reg_rtx (mode
);
20882 emit_move_insn (res
, operand1
);
20884 /* xa = abs (operand1) */
20885 xa
= ix86_expand_sse_fabs (res
, &mask
);
20887 /* if (!isless (xa, TWO52)) goto label; */
20888 TWO52
= ix86_gen_TWO52 (mode
);
20889 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20891 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20892 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
20894 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
20896 emit_label (label
);
20897 LABEL_NUSES (label
) = 1;
20899 emit_move_insn (operand0
, res
);
20902 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20905 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
20907 /* C code for the stuff we expand below.
20908 double xa = fabs (x), x2;
20909 if (!isless (xa, TWO52))
20911 xa = xa + TWO52 - TWO52;
20912 x2 = copysign (xa, x);
20921 enum machine_mode mode
= GET_MODE (operand0
);
20922 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
20924 TWO52
= ix86_gen_TWO52 (mode
);
20926 /* Temporary for holding the result, initialized to the input
20927 operand to ease control flow. */
20928 res
= gen_reg_rtx (mode
);
20929 emit_move_insn (res
, operand1
);
20931 /* xa = abs (operand1) */
20932 xa
= ix86_expand_sse_fabs (res
, &mask
);
20934 /* if (!isless (xa, TWO52)) goto label; */
20935 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20937 /* xa = xa + TWO52 - TWO52; */
20938 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20939 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
20941 /* xa = copysign (xa, operand1) */
20942 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
20944 /* generate 1.0 or -1.0 */
20945 one
= force_reg (mode
,
20946 const_double_from_real_value (do_floor
20947 ? dconst1
: dconstm1
, mode
));
20949 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20950 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
20951 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20952 gen_rtx_AND (mode
, one
, tmp
)));
20953 /* We always need to subtract here to preserve signed zero. */
20954 tmp
= expand_simple_binop (mode
, MINUS
,
20955 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
20956 emit_move_insn (res
, tmp
);
20958 emit_label (label
);
20959 LABEL_NUSES (label
) = 1;
20961 emit_move_insn (operand0
, res
);
20964 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20967 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
20969 /* C code for the stuff we expand below.
20970 double xa = fabs (x), x2;
20971 if (!isless (xa, TWO52))
20973 x2 = (double)(long)x;
20980 if (HONOR_SIGNED_ZEROS (mode))
20981 return copysign (x2, x);
20984 enum machine_mode mode
= GET_MODE (operand0
);
20985 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
20987 TWO52
= ix86_gen_TWO52 (mode
);
20989 /* Temporary for holding the result, initialized to the input
20990 operand to ease control flow. */
20991 res
= gen_reg_rtx (mode
);
20992 emit_move_insn (res
, operand1
);
20994 /* xa = abs (operand1) */
20995 xa
= ix86_expand_sse_fabs (res
, &mask
);
20997 /* if (!isless (xa, TWO52)) goto label; */
20998 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21000 /* xa = (double)(long)x */
21001 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21002 expand_fix (xi
, res
, 0);
21003 expand_float (xa
, xi
, 0);
21006 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21008 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21009 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21010 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21011 gen_rtx_AND (mode
, one
, tmp
)));
21012 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21013 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21014 emit_move_insn (res
, tmp
);
21016 if (HONOR_SIGNED_ZEROS (mode
))
21017 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21019 emit_label (label
);
21020 LABEL_NUSES (label
) = 1;
21022 emit_move_insn (operand0
, res
);
21025 /* Expand SSE sequence for computing round from OPERAND1 storing
21026 into OPERAND0. Sequence that works without relying on DImode truncation
21027 via cvttsd2siq that is only available on 64bit targets. */
21029 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21031 /* C code for the stuff we expand below.
21032 double xa = fabs (x), xa2, x2;
21033 if (!isless (xa, TWO52))
21035 Using the absolute value and copying back sign makes
21036 -0.0 -> -0.0 correct.
21037 xa2 = xa + TWO52 - TWO52;
21042 else if (dxa > 0.5)
21044 x2 = copysign (xa2, x);
21047 enum machine_mode mode
= GET_MODE (operand0
);
21048 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21050 TWO52
= ix86_gen_TWO52 (mode
);
21052 /* Temporary for holding the result, initialized to the input
21053 operand to ease control flow. */
21054 res
= gen_reg_rtx (mode
);
21055 emit_move_insn (res
, operand1
);
21057 /* xa = abs (operand1) */
21058 xa
= ix86_expand_sse_fabs (res
, &mask
);
21060 /* if (!isless (xa, TWO52)) goto label; */
21061 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21063 /* xa2 = xa + TWO52 - TWO52; */
21064 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21065 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21067 /* dxa = xa2 - xa; */
21068 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21070 /* generate 0.5, 1.0 and -0.5 */
21071 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21072 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21073 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21077 tmp
= gen_reg_rtx (mode
);
21078 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21079 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21080 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21081 gen_rtx_AND (mode
, one
, tmp
)));
21082 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21083 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21084 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21085 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21086 gen_rtx_AND (mode
, one
, tmp
)));
21087 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21089 /* res = copysign (xa2, operand1) */
21090 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21092 emit_label (label
);
21093 LABEL_NUSES (label
) = 1;
21095 emit_move_insn (operand0
, res
);
21098 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21101 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21103 /* C code for SSE variant we expand below.
21104 double xa = fabs (x), x2;
21105 if (!isless (xa, TWO52))
21107 x2 = (double)(long)x;
21108 if (HONOR_SIGNED_ZEROS (mode))
21109 return copysign (x2, x);
21112 enum machine_mode mode
= GET_MODE (operand0
);
21113 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21115 TWO52
= ix86_gen_TWO52 (mode
);
21117 /* Temporary for holding the result, initialized to the input
21118 operand to ease control flow. */
21119 res
= gen_reg_rtx (mode
);
21120 emit_move_insn (res
, operand1
);
21122 /* xa = abs (operand1) */
21123 xa
= ix86_expand_sse_fabs (res
, &mask
);
21125 /* if (!isless (xa, TWO52)) goto label; */
21126 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21128 /* x = (double)(long)x */
21129 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21130 expand_fix (xi
, res
, 0);
21131 expand_float (res
, xi
, 0);
21133 if (HONOR_SIGNED_ZEROS (mode
))
21134 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21136 emit_label (label
);
21137 LABEL_NUSES (label
) = 1;
21139 emit_move_insn (operand0
, res
);
21142 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21145 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21147 enum machine_mode mode
= GET_MODE (operand0
);
21148 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21150 /* C code for SSE variant we expand below.
21151 double xa = fabs (x), x2;
21152 if (!isless (xa, TWO52))
21154 xa2 = xa + TWO52 - TWO52;
21158 x2 = copysign (xa2, x);
21162 TWO52
= ix86_gen_TWO52 (mode
);
21164 /* Temporary for holding the result, initialized to the input
21165 operand to ease control flow. */
21166 res
= gen_reg_rtx (mode
);
21167 emit_move_insn (res
, operand1
);
21169 /* xa = abs (operand1) */
21170 xa
= ix86_expand_sse_fabs (res
, &smask
);
21172 /* if (!isless (xa, TWO52)) goto label; */
21173 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21175 /* res = xa + TWO52 - TWO52; */
21176 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21177 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21178 emit_move_insn (res
, tmp
);
21181 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21183 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21184 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21185 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21186 gen_rtx_AND (mode
, mask
, one
)));
21187 tmp
= expand_simple_binop (mode
, MINUS
,
21188 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21189 emit_move_insn (res
, tmp
);
21191 /* res = copysign (res, operand1) */
21192 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21194 emit_label (label
);
21195 LABEL_NUSES (label
) = 1;
21197 emit_move_insn (operand0
, res
);
21200 /* Expand SSE sequence for computing round from OPERAND1 storing
21203 ix86_expand_round (rtx operand0
, rtx operand1
)
21205 /* C code for the stuff we're doing below:
21206 double xa = fabs (x);
21207 if (!isless (xa, TWO52))
21209 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21210 return copysign (xa, x);
21212 enum machine_mode mode
= GET_MODE (operand0
);
21213 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21214 const struct real_format
*fmt
;
21215 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21217 /* Temporary for holding the result, initialized to the input
21218 operand to ease control flow. */
21219 res
= gen_reg_rtx (mode
);
21220 emit_move_insn (res
, operand1
);
21222 TWO52
= ix86_gen_TWO52 (mode
);
21223 xa
= ix86_expand_sse_fabs (res
, &mask
);
21224 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21226 /* load nextafter (0.5, 0.0) */
21227 fmt
= REAL_MODE_FORMAT (mode
);
21228 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21229 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21231 /* xa = xa + 0.5 */
21232 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21233 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21235 /* xa = (double)(int64_t)xa */
21236 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21237 expand_fix (xi
, xa
, 0);
21238 expand_float (xa
, xi
, 0);
21240 /* res = copysign (xa, operand1) */
21241 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21243 emit_label (label
);
21244 LABEL_NUSES (label
) = 1;
21246 emit_move_insn (operand0
, res
);
21249 #include "gt-i386.h"