1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1010 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1011 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1012 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1015 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
1016 const int x86_double_with_add
= ~m_386
;
1017 const int x86_use_bit_test
= m_386
;
1018 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
1019 | m_K6
| m_CORE2
| m_GENERIC
;
1020 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1022 const int x86_3dnow_a
= m_ATHLON_K8_AMDFAM10
;
1023 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
1024 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints
= 0;
1030 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall
= m_PPRO
;
1041 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
1042 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
1043 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
1044 | m_CORE2
| m_GENERIC
);
1045 const int x86_use_mov0
= m_K6
;
1046 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
1047 const int x86_read_modify_write
= ~m_PENT
;
1048 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
1049 const int x86_split_long_moves
= m_PPRO
;
1050 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
1051 | m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1053 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
1054 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
1055 const int x86_qimode_math
= ~(0);
1056 const int x86_promote_qi_regs
= 0;
1057 /* On PPro this flag is meant to avoid partial register stalls. Just like
1058 the x86_partial_reg_stall this option might be considered for Generic32
1059 if our scheme for avoiding partial stalls was more effective. */
1060 const int x86_himode_math
= ~(m_PPRO
);
1061 const int x86_promote_hi_regs
= m_PPRO
;
1062 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1063 const int x86_sub_esp_4
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1064 | m_CORE2
| m_GENERIC
;
1065 const int x86_sub_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1066 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1067 const int x86_add_esp_4
= m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
1068 | m_CORE2
| m_GENERIC
;
1069 const int x86_add_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1070 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1071 /* Enable if integer moves are preferred for DFmode copies */
1072 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1073 | m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
1074 const int x86_partial_reg_dependency
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1075 | m_CORE2
| m_GENERIC
;
1076 const int x86_memory_mismatch_stall
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1077 | m_CORE2
| m_GENERIC
;
1078 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1079 for outgoing arguments will be computed and placed into the variable
1080 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1081 for each call; instead, the function prologue should increase the stack frame
1082 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1084 const int x86_accumulate_outgoing_args
= m_ATHLON_K8_AMDFAM10
| m_PENT4
1085 | m_NOCONA
| m_PPRO
| m_CORE2
1087 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1088 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1089 const int x86_shift1
= ~m_486
;
1090 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
1091 | m_ATHLON_K8_AMDFAM10
| m_PENT4
1092 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1093 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1094 that thread 128bit SSE registers as single units versus K8 based chips that
1095 divide SSE registers to two 64bit halves.
1096 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1097 to allow register renaming on 128bit SSE units, but usually results in one
1098 extra microop on 64bit SSE units. Experimental results shows that disabling
1099 this option on P4 brings over 20% SPECfp regression, while enabling it on
1100 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1102 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1103 | m_GENERIC
| m_AMDFAM10
;
1104 /* Set for machines where the type and dependencies are resolved on SSE
1105 register parts instead of whole registers, so we may maintain just
1106 lower part of scalar values in proper format leaving the upper part
1108 const int x86_sse_split_regs
= m_ATHLON_K8
;
1109 /* Code generation for scalar reg-reg moves of single and double precision data:
1110 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1114 if (x86_sse_partial_reg_dependency == true)
1119 Code generation for scalar loads of double precision data:
1120 if (x86_sse_split_regs == true)
1121 movlpd mem, reg (gas syntax)
1125 Code generation for unaligned packed loads of single precision data
1126 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1127 if (x86_sse_unaligned_move_optimal)
1130 if (x86_sse_partial_reg_dependency == true)
1142 Code generation for unaligned packed loads of double precision data
1143 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1144 if (x86_sse_unaligned_move_optimal)
1147 if (x86_sse_split_regs == true)
1158 const int x86_sse_unaligned_move_optimal
= m_AMDFAM10
;
1159 const int x86_sse_typeless_stores
= m_ATHLON_K8_AMDFAM10
;
1160 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
1161 const int x86_use_ffreep
= m_ATHLON_K8_AMDFAM10
;
1162 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
1164 const int x86_inter_unit_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
);
1166 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
1167 | m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1168 /* Some CPU cores are not able to predict more than 4 branch instructions in
1169 the 16 byte window. */
1170 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1171 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1172 const int x86_schedule
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
1173 | m_CORE2
| m_GENERIC
;
1174 const int x86_use_bt
= m_ATHLON_K8_AMDFAM10
;
1175 /* Compare and exchange was added for 80486. */
1176 const int x86_cmpxchg
= ~m_386
;
1177 /* Compare and exchange 8 bytes was added for pentium. */
1178 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
1179 /* Exchange and add was added for 80486. */
1180 const int x86_xadd
= ~m_386
;
1181 /* Byteswap was added for 80486. */
1182 const int x86_bswap
= ~m_386
;
1183 const int x86_pad_returns
= m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1185 static enum stringop_alg stringop_alg
= no_stringop
;
1187 /* In case the average insn count for single function invocation is
1188 lower than this constant, emit fast (but longer) prologue and
1190 #define FAST_PROLOGUE_INSN_COUNT 20
1192 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1193 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1194 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1195 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1197 /* Array of the smallest class containing reg number REGNO, indexed by
1198 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1200 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1202 /* ax, dx, cx, bx */
1203 AREG
, DREG
, CREG
, BREG
,
1204 /* si, di, bp, sp */
1205 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1207 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1208 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1211 /* flags, fpsr, fpcr, frame */
1212 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1213 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1215 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1217 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1218 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1219 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1223 /* The "default" register map used in 32bit mode. */
1225 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1227 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1228 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1229 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1236 static int const x86_64_int_parameter_registers
[6] =
1238 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1239 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1242 static int const x86_64_int_return_registers
[4] =
1244 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1247 /* The "default" register map used in 64bit mode. */
1248 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1250 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1251 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1252 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1253 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1254 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1255 8,9,10,11,12,13,14,15, /* extended integer registers */
1256 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1259 /* Define the register numbers to be used in Dwarf debugging information.
1260 The SVR4 reference port C compiler uses the following register numbers
1261 in its Dwarf output code:
1262 0 for %eax (gcc regno = 0)
1263 1 for %ecx (gcc regno = 2)
1264 2 for %edx (gcc regno = 1)
1265 3 for %ebx (gcc regno = 3)
1266 4 for %esp (gcc regno = 7)
1267 5 for %ebp (gcc regno = 6)
1268 6 for %esi (gcc regno = 4)
1269 7 for %edi (gcc regno = 5)
1270 The following three DWARF register numbers are never generated by
1271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1272 believes these numbers have these meanings.
1273 8 for %eip (no gcc equivalent)
1274 9 for %eflags (gcc regno = 17)
1275 10 for %trapno (no gcc equivalent)
1276 It is not at all clear how we should number the FP stack registers
1277 for the x86 architecture. If the version of SDB on x86/svr4 were
1278 a bit less brain dead with respect to floating-point then we would
1279 have a precedent to follow with respect to DWARF register numbers
1280 for x86 FP registers, but the SDB on x86/svr4 is so completely
1281 broken with respect to FP registers that it is hardly worth thinking
1282 of it as something to strive for compatibility with.
1283 The version of x86/svr4 SDB I have at the moment does (partially)
1284 seem to believe that DWARF register number 11 is associated with
1285 the x86 register %st(0), but that's about all. Higher DWARF
1286 register numbers don't seem to be associated with anything in
1287 particular, and even for DWARF regno 11, SDB only seems to under-
1288 stand that it should say that a variable lives in %st(0) (when
1289 asked via an `=' command) if we said it was in DWARF regno 11,
1290 but SDB still prints garbage when asked for the value of the
1291 variable in question (via a `/' command).
1292 (Also note that the labels SDB prints for various FP stack regs
1293 when doing an `x' command are all wrong.)
1294 Note that these problems generally don't affect the native SVR4
1295 C compiler because it doesn't allow the use of -O with -g and
1296 because when it is *not* optimizing, it allocates a memory
1297 location for each floating-point variable, and the memory
1298 location is what gets described in the DWARF AT_location
1299 attribute for the variable in question.
1300 Regardless of the severe mental illness of the x86/svr4 SDB, we
1301 do something sensible here and we use the following DWARF
1302 register numbers. Note that these are all stack-top-relative
1304 11 for %st(0) (gcc regno = 8)
1305 12 for %st(1) (gcc regno = 9)
1306 13 for %st(2) (gcc regno = 10)
1307 14 for %st(3) (gcc regno = 11)
1308 15 for %st(4) (gcc regno = 12)
1309 16 for %st(5) (gcc regno = 13)
1310 17 for %st(6) (gcc regno = 14)
1311 18 for %st(7) (gcc regno = 15)
1313 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1317 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1320 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1321 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1324 /* Test and compare insns in i386.md store the information needed to
1325 generate branch and scc insns here. */
1327 rtx ix86_compare_op0
= NULL_RTX
;
1328 rtx ix86_compare_op1
= NULL_RTX
;
1329 rtx ix86_compare_emitted
= NULL_RTX
;
1331 /* Size of the register save area. */
1332 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1334 /* Define the structure for the machine field in struct function. */
1336 struct stack_local_entry
GTY(())
1338 unsigned short mode
;
1341 struct stack_local_entry
*next
;
1344 /* Structure describing stack frame layout.
1345 Stack grows downward:
1351 saved frame pointer if frame_pointer_needed
1352 <- HARD_FRAME_POINTER
1357 [va_arg registers] (
1358 > to_allocate <- FRAME_POINTER
1368 HOST_WIDE_INT frame
;
1370 int outgoing_arguments_size
;
1373 HOST_WIDE_INT to_allocate
;
1374 /* The offsets relative to ARG_POINTER. */
1375 HOST_WIDE_INT frame_pointer_offset
;
1376 HOST_WIDE_INT hard_frame_pointer_offset
;
1377 HOST_WIDE_INT stack_pointer_offset
;
1379 /* When save_regs_using_mov is set, emit prologue using
1380 move instead of push instructions. */
1381 bool save_regs_using_mov
;
1384 /* Code model option. */
1385 enum cmodel ix86_cmodel
;
1387 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1389 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1391 /* Which unit we are generating floating point math for. */
1392 enum fpmath_unit ix86_fpmath
;
1394 /* Which cpu are we scheduling for. */
1395 enum processor_type ix86_tune
;
1396 /* Which instruction set architecture to use. */
1397 enum processor_type ix86_arch
;
1399 /* true if sse prefetch instruction is not NOOP. */
1400 int x86_prefetch_sse
;
1402 /* true if cmpxchg16b is supported. */
1405 /* ix86_regparm_string as a number */
1406 static int ix86_regparm
;
1408 /* -mstackrealign option */
1409 extern int ix86_force_align_arg_pointer
;
1410 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1412 /* Preferred alignment for stack boundary in bits. */
1413 unsigned int ix86_preferred_stack_boundary
;
1415 /* Values 1-5: see jump.c */
1416 int ix86_branch_cost
;
1418 /* Variables which are this size or smaller are put in the data/bss
1419 or ldata/lbss sections. */
1421 int ix86_section_threshold
= 65536;
1423 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1424 char internal_label_prefix
[16];
1425 int internal_label_prefix_len
;
1427 static bool ix86_handle_option (size_t, const char *, int);
1428 static void output_pic_addr_const (FILE *, rtx
, int);
1429 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1431 static const char *get_some_local_dynamic_name (void);
1432 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1433 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1434 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1436 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1437 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1439 static rtx
get_thread_pointer (int);
1440 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1441 static void get_pc_thunk_name (char [32], unsigned int);
1442 static rtx
gen_push (rtx
);
1443 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1444 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1445 static struct machine_function
* ix86_init_machine_status (void);
1446 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1447 static int ix86_nsaved_regs (void);
1448 static void ix86_emit_save_regs (void);
1449 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1450 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1451 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1452 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1453 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1454 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1455 static int ix86_issue_rate (void);
1456 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1457 static int ia32_multipass_dfa_lookahead (void);
1458 static void ix86_init_mmx_sse_builtins (void);
1459 static rtx
x86_this_parameter (tree
);
1460 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1461 HOST_WIDE_INT
, tree
);
1462 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1463 static void x86_file_start (void);
1464 static void ix86_reorg (void);
1465 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1466 static tree
ix86_build_builtin_va_list (void);
1467 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1469 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1470 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1471 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1473 static int ix86_address_cost (rtx
);
1474 static bool ix86_cannot_force_const_mem (rtx
);
1475 static rtx
ix86_delegitimize_address (rtx
);
1477 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1479 struct builtin_description
;
1480 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1482 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1484 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1485 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1486 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1487 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1488 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1489 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1490 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1491 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1492 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1493 static int ix86_fp_comparison_cost (enum rtx_code code
);
1494 static unsigned int ix86_select_alt_pic_regnum (void);
1495 static int ix86_save_reg (unsigned int, int);
1496 static void ix86_compute_frame_layout (struct ix86_frame
*);
1497 static int ix86_comp_type_attributes (tree
, tree
);
1498 static int ix86_function_regparm (tree
, tree
);
1499 const struct attribute_spec ix86_attribute_table
[];
1500 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1501 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1502 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1503 static bool contains_128bit_aligned_vector_p (tree
);
1504 static rtx
ix86_struct_value_rtx (tree
, int);
1505 static bool ix86_ms_bitfield_layout_p (tree
);
1506 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1507 static int extended_reg_mentioned_1 (rtx
*, void *);
1508 static bool ix86_rtx_costs (rtx
, int, int, int *);
1509 static int min_insn_size (rtx
);
1510 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1511 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1512 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1514 static void ix86_init_builtins (void);
1515 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1516 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1517 static const char *ix86_mangle_fundamental_type (tree
);
1518 static tree
ix86_stack_protect_fail (void);
1519 static rtx
ix86_internal_arg_pointer (void);
1520 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1522 /* This function is only used on Solaris. */
1523 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1526 /* Register class used for passing given 64bit part of the argument.
1527 These represent classes as documented by the PS ABI, with the exception
1528 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1529 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1531 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1532 whenever possible (upper half does contain padding).
1534 enum x86_64_reg_class
1537 X86_64_INTEGER_CLASS
,
1538 X86_64_INTEGERSI_CLASS
,
1545 X86_64_COMPLEX_X87_CLASS
,
1548 static const char * const x86_64_reg_class_name
[] = {
1549 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1550 "sseup", "x87", "x87up", "cplx87", "no"
1553 #define MAX_CLASSES 4
1555 /* Table of constants used by fldpi, fldln2, etc.... */
1556 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1557 static bool ext_80387_constants_init
= 0;
1558 static void init_ext_80387_constants (void);
1559 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1560 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1561 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1562 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1563 unsigned HOST_WIDE_INT align
)
1566 /* Initialize the GCC target structure. */
1567 #undef TARGET_ATTRIBUTE_TABLE
1568 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1569 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1570 # undef TARGET_MERGE_DECL_ATTRIBUTES
1571 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1574 #undef TARGET_COMP_TYPE_ATTRIBUTES
1575 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1577 #undef TARGET_INIT_BUILTINS
1578 #define TARGET_INIT_BUILTINS ix86_init_builtins
1579 #undef TARGET_EXPAND_BUILTIN
1580 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1581 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1582 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1584 #undef TARGET_ASM_FUNCTION_EPILOGUE
1585 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1587 #undef TARGET_ENCODE_SECTION_INFO
1588 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1589 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1591 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1594 #undef TARGET_ASM_OPEN_PAREN
1595 #define TARGET_ASM_OPEN_PAREN ""
1596 #undef TARGET_ASM_CLOSE_PAREN
1597 #define TARGET_ASM_CLOSE_PAREN ""
1599 #undef TARGET_ASM_ALIGNED_HI_OP
1600 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1601 #undef TARGET_ASM_ALIGNED_SI_OP
1602 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1604 #undef TARGET_ASM_ALIGNED_DI_OP
1605 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1608 #undef TARGET_ASM_UNALIGNED_HI_OP
1609 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1610 #undef TARGET_ASM_UNALIGNED_SI_OP
1611 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1612 #undef TARGET_ASM_UNALIGNED_DI_OP
1613 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1615 #undef TARGET_SCHED_ADJUST_COST
1616 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1617 #undef TARGET_SCHED_ISSUE_RATE
1618 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1619 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1620 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1621 ia32_multipass_dfa_lookahead
1623 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1624 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1627 #undef TARGET_HAVE_TLS
1628 #define TARGET_HAVE_TLS true
1630 #undef TARGET_CANNOT_FORCE_CONST_MEM
1631 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1632 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1633 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1635 #undef TARGET_DELEGITIMIZE_ADDRESS
1636 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1638 #undef TARGET_MS_BITFIELD_LAYOUT_P
1639 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1642 #undef TARGET_BINDS_LOCAL_P
1643 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1646 #undef TARGET_ASM_OUTPUT_MI_THUNK
1647 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1648 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1649 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1651 #undef TARGET_ASM_FILE_START
1652 #define TARGET_ASM_FILE_START x86_file_start
1654 #undef TARGET_DEFAULT_TARGET_FLAGS
1655 #define TARGET_DEFAULT_TARGET_FLAGS \
1657 | TARGET_64BIT_DEFAULT \
1658 | TARGET_SUBTARGET_DEFAULT \
1659 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1661 #undef TARGET_HANDLE_OPTION
1662 #define TARGET_HANDLE_OPTION ix86_handle_option
1664 #undef TARGET_RTX_COSTS
1665 #define TARGET_RTX_COSTS ix86_rtx_costs
1666 #undef TARGET_ADDRESS_COST
1667 #define TARGET_ADDRESS_COST ix86_address_cost
1669 #undef TARGET_FIXED_CONDITION_CODE_REGS
1670 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1671 #undef TARGET_CC_MODES_COMPATIBLE
1672 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1674 #undef TARGET_MACHINE_DEPENDENT_REORG
1675 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1677 #undef TARGET_BUILD_BUILTIN_VA_LIST
1678 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1680 #undef TARGET_MD_ASM_CLOBBERS
1681 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1683 #undef TARGET_PROMOTE_PROTOTYPES
1684 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1685 #undef TARGET_STRUCT_VALUE_RTX
1686 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1687 #undef TARGET_SETUP_INCOMING_VARARGS
1688 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1689 #undef TARGET_MUST_PASS_IN_STACK
1690 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1691 #undef TARGET_PASS_BY_REFERENCE
1692 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1693 #undef TARGET_INTERNAL_ARG_POINTER
1694 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1695 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1696 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1698 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1699 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1701 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1702 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1704 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1705 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1712 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1713 #undef TARGET_INSERT_ATTRIBUTES
1714 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1717 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1718 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1720 #undef TARGET_STACK_PROTECT_FAIL
1721 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1723 #undef TARGET_FUNCTION_VALUE
1724 #define TARGET_FUNCTION_VALUE ix86_function_value
1726 struct gcc_target targetm
= TARGET_INITIALIZER
;
1729 /* The svr4 ABI for the i386 says that records and unions are returned
1731 #ifndef DEFAULT_PCC_STRUCT_RETURN
1732 #define DEFAULT_PCC_STRUCT_RETURN 1
1735 /* Implement TARGET_HANDLE_OPTION. */
1738 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1745 target_flags
&= ~MASK_3DNOW_A
;
1746 target_flags_explicit
|= MASK_3DNOW_A
;
1753 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1754 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1761 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1762 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1769 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1770 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1777 target_flags
&= ~MASK_SSE4A
;
1778 target_flags_explicit
|= MASK_SSE4A
;
1787 /* Sometimes certain combinations of command options do not make
1788 sense on a particular target machine. You can define a macro
1789 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1790 defined, is executed once just after all the command options have
1793 Don't use this macro to turn on various extra optimizations for
1794 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1797 override_options (void)
1800 int ix86_tune_defaulted
= 0;
1802 /* Comes from final.c -- no real reason to change it. */
1803 #define MAX_CODE_ALIGN 16
1807 const struct processor_costs
*cost
; /* Processor costs */
1808 const int target_enable
; /* Target flags to enable. */
1809 const int target_disable
; /* Target flags to disable. */
1810 const int align_loop
; /* Default alignments. */
1811 const int align_loop_max_skip
;
1812 const int align_jump
;
1813 const int align_jump_max_skip
;
1814 const int align_func
;
1816 const processor_target_table
[PROCESSOR_max
] =
1818 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1819 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1820 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1821 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1822 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1823 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1824 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1825 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1826 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1827 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1828 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1829 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1830 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1831 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1834 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1837 const char *const name
; /* processor name or nickname. */
1838 const enum processor_type processor
;
1839 const enum pta_flags
1845 PTA_PREFETCH_SSE
= 16,
1856 const processor_alias_table
[] =
1858 {"i386", PROCESSOR_I386
, 0},
1859 {"i486", PROCESSOR_I486
, 0},
1860 {"i586", PROCESSOR_PENTIUM
, 0},
1861 {"pentium", PROCESSOR_PENTIUM
, 0},
1862 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1863 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1864 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1865 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1866 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1867 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1868 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1869 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1870 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1871 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1872 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1873 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1874 | PTA_MMX
| PTA_PREFETCH_SSE
},
1875 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1876 | PTA_MMX
| PTA_PREFETCH_SSE
},
1877 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1878 | PTA_MMX
| PTA_PREFETCH_SSE
},
1879 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1880 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1881 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1882 | PTA_64BIT
| PTA_MMX
1883 | PTA_PREFETCH_SSE
| PTA_CX16
},
1884 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1886 {"k6", PROCESSOR_K6
, PTA_MMX
},
1887 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1888 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1889 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1891 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1892 | PTA_3DNOW
| PTA_3DNOW_A
},
1893 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1894 | PTA_3DNOW_A
| PTA_SSE
},
1895 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1896 | PTA_3DNOW_A
| PTA_SSE
},
1897 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1898 | PTA_3DNOW_A
| PTA_SSE
},
1899 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1900 | PTA_SSE
| PTA_SSE2
},
1901 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1902 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1903 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1904 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1905 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1906 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1907 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1908 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1909 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1910 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1911 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1912 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1913 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1914 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1917 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1919 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1920 SUBTARGET_OVERRIDE_OPTIONS
;
1923 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1924 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1927 /* -fPIC is the default for x86_64. */
1928 if (TARGET_MACHO
&& TARGET_64BIT
)
1931 /* Set the default values for switches whose default depends on TARGET_64BIT
1932 in case they weren't overwritten by command line options. */
1935 /* Mach-O doesn't support omitting the frame pointer for now. */
1936 if (flag_omit_frame_pointer
== 2)
1937 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1938 if (flag_asynchronous_unwind_tables
== 2)
1939 flag_asynchronous_unwind_tables
= 1;
1940 if (flag_pcc_struct_return
== 2)
1941 flag_pcc_struct_return
= 0;
1945 if (flag_omit_frame_pointer
== 2)
1946 flag_omit_frame_pointer
= 0;
1947 if (flag_asynchronous_unwind_tables
== 2)
1948 flag_asynchronous_unwind_tables
= 0;
1949 if (flag_pcc_struct_return
== 2)
1950 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1953 /* Need to check -mtune=generic first. */
1954 if (ix86_tune_string
)
1956 if (!strcmp (ix86_tune_string
, "generic")
1957 || !strcmp (ix86_tune_string
, "i686")
1958 /* As special support for cross compilers we read -mtune=native
1959 as -mtune=generic. With native compilers we won't see the
1960 -mtune=native, as it was changed by the driver. */
1961 || !strcmp (ix86_tune_string
, "native"))
1964 ix86_tune_string
= "generic64";
1966 ix86_tune_string
= "generic32";
1968 else if (!strncmp (ix86_tune_string
, "generic", 7))
1969 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1973 if (ix86_arch_string
)
1974 ix86_tune_string
= ix86_arch_string
;
1975 if (!ix86_tune_string
)
1977 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1978 ix86_tune_defaulted
= 1;
1981 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1982 need to use a sensible tune option. */
1983 if (!strcmp (ix86_tune_string
, "generic")
1984 || !strcmp (ix86_tune_string
, "x86-64")
1985 || !strcmp (ix86_tune_string
, "i686"))
1988 ix86_tune_string
= "generic64";
1990 ix86_tune_string
= "generic32";
1993 if (ix86_stringop_string
)
1995 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1996 stringop_alg
= rep_prefix_1_byte
;
1997 else if (!strcmp (ix86_stringop_string
, "libcall"))
1998 stringop_alg
= libcall
;
1999 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2000 stringop_alg
= rep_prefix_4_byte
;
2001 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2002 stringop_alg
= rep_prefix_8_byte
;
2003 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2004 stringop_alg
= loop_1_byte
;
2005 else if (!strcmp (ix86_stringop_string
, "loop"))
2006 stringop_alg
= loop
;
2007 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2008 stringop_alg
= unrolled_loop
;
2010 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2012 if (!strcmp (ix86_tune_string
, "x86-64"))
2013 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2014 "-mtune=generic instead as appropriate.");
2016 if (!ix86_arch_string
)
2017 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2018 if (!strcmp (ix86_arch_string
, "generic"))
2019 error ("generic CPU can be used only for -mtune= switch");
2020 if (!strncmp (ix86_arch_string
, "generic", 7))
2021 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2023 if (ix86_cmodel_string
!= 0)
2025 if (!strcmp (ix86_cmodel_string
, "small"))
2026 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2027 else if (!strcmp (ix86_cmodel_string
, "medium"))
2028 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2030 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
2031 else if (!strcmp (ix86_cmodel_string
, "32"))
2032 ix86_cmodel
= CM_32
;
2033 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2034 ix86_cmodel
= CM_KERNEL
;
2035 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
2036 ix86_cmodel
= CM_LARGE
;
2038 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2042 ix86_cmodel
= CM_32
;
2044 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2046 if (ix86_asm_string
!= 0)
2049 && !strcmp (ix86_asm_string
, "intel"))
2050 ix86_asm_dialect
= ASM_INTEL
;
2051 else if (!strcmp (ix86_asm_string
, "att"))
2052 ix86_asm_dialect
= ASM_ATT
;
2054 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2056 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2057 error ("code model %qs not supported in the %s bit mode",
2058 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2059 if (ix86_cmodel
== CM_LARGE
)
2060 sorry ("code model %<large%> not supported yet");
2061 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2062 sorry ("%i-bit mode not compiled in",
2063 (target_flags
& MASK_64BIT
) ? 64 : 32);
2065 for (i
= 0; i
< pta_size
; i
++)
2066 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2068 ix86_arch
= processor_alias_table
[i
].processor
;
2069 /* Default cpu tuning to the architecture. */
2070 ix86_tune
= ix86_arch
;
2071 if (processor_alias_table
[i
].flags
& PTA_MMX
2072 && !(target_flags_explicit
& MASK_MMX
))
2073 target_flags
|= MASK_MMX
;
2074 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2075 && !(target_flags_explicit
& MASK_3DNOW
))
2076 target_flags
|= MASK_3DNOW
;
2077 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2078 && !(target_flags_explicit
& MASK_3DNOW_A
))
2079 target_flags
|= MASK_3DNOW_A
;
2080 if (processor_alias_table
[i
].flags
& PTA_SSE
2081 && !(target_flags_explicit
& MASK_SSE
))
2082 target_flags
|= MASK_SSE
;
2083 if (processor_alias_table
[i
].flags
& PTA_SSE2
2084 && !(target_flags_explicit
& MASK_SSE2
))
2085 target_flags
|= MASK_SSE2
;
2086 if (processor_alias_table
[i
].flags
& PTA_SSE3
2087 && !(target_flags_explicit
& MASK_SSE3
))
2088 target_flags
|= MASK_SSE3
;
2089 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2090 && !(target_flags_explicit
& MASK_SSSE3
))
2091 target_flags
|= MASK_SSSE3
;
2092 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2093 x86_prefetch_sse
= true;
2094 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2095 x86_cmpxchg16b
= true;
2096 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2097 && !(target_flags_explicit
& MASK_POPCNT
))
2098 target_flags
|= MASK_POPCNT
;
2099 if (processor_alias_table
[i
].flags
& PTA_ABM
2100 && !(target_flags_explicit
& MASK_ABM
))
2101 target_flags
|= MASK_ABM
;
2102 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2103 && !(target_flags_explicit
& MASK_SSE4A
))
2104 target_flags
|= MASK_SSE4A
;
2105 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2106 error ("CPU you selected does not support x86-64 "
2112 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2114 for (i
= 0; i
< pta_size
; i
++)
2115 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2117 ix86_tune
= processor_alias_table
[i
].processor
;
2118 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2120 if (ix86_tune_defaulted
)
2122 ix86_tune_string
= "x86-64";
2123 for (i
= 0; i
< pta_size
; i
++)
2124 if (! strcmp (ix86_tune_string
,
2125 processor_alias_table
[i
].name
))
2127 ix86_tune
= processor_alias_table
[i
].processor
;
2130 error ("CPU you selected does not support x86-64 "
2133 /* Intel CPUs have always interpreted SSE prefetch instructions as
2134 NOPs; so, we can enable SSE prefetch instructions even when
2135 -mtune (rather than -march) points us to a processor that has them.
2136 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2137 higher processors. */
2138 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2139 x86_prefetch_sse
= true;
2143 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2146 ix86_cost
= &size_cost
;
2148 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2149 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2150 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2152 /* Arrange to set up i386_stack_locals for all functions. */
2153 init_machine_status
= ix86_init_machine_status
;
2155 /* Validate -mregparm= value. */
2156 if (ix86_regparm_string
)
2158 i
= atoi (ix86_regparm_string
);
2159 if (i
< 0 || i
> REGPARM_MAX
)
2160 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2166 ix86_regparm
= REGPARM_MAX
;
2168 /* If the user has provided any of the -malign-* options,
2169 warn and use that value only if -falign-* is not set.
2170 Remove this code in GCC 3.2 or later. */
2171 if (ix86_align_loops_string
)
2173 warning (0, "-malign-loops is obsolete, use -falign-loops");
2174 if (align_loops
== 0)
2176 i
= atoi (ix86_align_loops_string
);
2177 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2178 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2180 align_loops
= 1 << i
;
2184 if (ix86_align_jumps_string
)
2186 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2187 if (align_jumps
== 0)
2189 i
= atoi (ix86_align_jumps_string
);
2190 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2191 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2193 align_jumps
= 1 << i
;
2197 if (ix86_align_funcs_string
)
2199 warning (0, "-malign-functions is obsolete, use -falign-functions");
2200 if (align_functions
== 0)
2202 i
= atoi (ix86_align_funcs_string
);
2203 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2204 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2206 align_functions
= 1 << i
;
2210 /* Default align_* from the processor table. */
2211 if (align_loops
== 0)
2213 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2214 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2216 if (align_jumps
== 0)
2218 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2219 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2221 if (align_functions
== 0)
2223 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2226 /* Validate -mbranch-cost= value, or provide default. */
2227 ix86_branch_cost
= ix86_cost
->branch_cost
;
2228 if (ix86_branch_cost_string
)
2230 i
= atoi (ix86_branch_cost_string
);
2232 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2234 ix86_branch_cost
= i
;
2236 if (ix86_section_threshold_string
)
2238 i
= atoi (ix86_section_threshold_string
);
2240 error ("-mlarge-data-threshold=%d is negative", i
);
2242 ix86_section_threshold
= i
;
2245 if (ix86_tls_dialect_string
)
2247 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2248 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2249 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2250 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2251 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2252 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2254 error ("bad value (%s) for -mtls-dialect= switch",
2255 ix86_tls_dialect_string
);
2258 /* Keep nonleaf frame pointers. */
2259 if (flag_omit_frame_pointer
)
2260 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2261 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2262 flag_omit_frame_pointer
= 1;
2264 /* If we're doing fast math, we don't care about comparison order
2265 wrt NaNs. This lets us use a shorter comparison sequence. */
2266 if (flag_finite_math_only
)
2267 target_flags
&= ~MASK_IEEE_FP
;
2269 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2270 since the insns won't need emulation. */
2271 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
2272 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2274 /* Likewise, if the target doesn't have a 387, or we've specified
2275 software floating point, don't use 387 inline intrinsics. */
2277 target_flags
|= MASK_NO_FANCY_MATH_387
;
2279 /* Turn on SSE3 builtins for -mssse3. */
2281 target_flags
|= MASK_SSE3
;
2283 /* Turn on SSE3 builtins for -msse4a. */
2285 target_flags
|= MASK_SSE3
;
2287 /* Turn on SSE2 builtins for -msse3. */
2289 target_flags
|= MASK_SSE2
;
2291 /* Turn on SSE builtins for -msse2. */
2293 target_flags
|= MASK_SSE
;
2295 /* Turn on MMX builtins for -msse. */
2298 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2299 x86_prefetch_sse
= true;
2302 /* Turn on MMX builtins for 3Dnow. */
2304 target_flags
|= MASK_MMX
;
2306 /* Turn on POPCNT builtins for -mabm. */
2308 target_flags
|= MASK_POPCNT
;
2312 if (TARGET_ALIGN_DOUBLE
)
2313 error ("-malign-double makes no sense in the 64bit mode");
2315 error ("-mrtd calling convention not supported in the 64bit mode");
2317 /* Enable by default the SSE and MMX builtins. Do allow the user to
2318 explicitly disable any of these. In particular, disabling SSE and
2319 MMX for kernel code is extremely useful. */
2321 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2322 & ~target_flags_explicit
);
2326 /* i386 ABI does not specify red zone. It still makes sense to use it
2327 when programmer takes care to stack from being destroyed. */
2328 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2329 target_flags
|= MASK_NO_RED_ZONE
;
2332 /* Validate -mpreferred-stack-boundary= value, or provide default.
2333 The default of 128 bits is for Pentium III's SSE __m128. We can't
2334 change it because of optimize_size. Otherwise, we can't mix object
2335 files compiled with -Os and -On. */
2336 ix86_preferred_stack_boundary
= 128;
2337 if (ix86_preferred_stack_boundary_string
)
2339 i
= atoi (ix86_preferred_stack_boundary_string
);
2340 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2341 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2342 TARGET_64BIT
? 4 : 2);
2344 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2347 /* Accept -msseregparm only if at least SSE support is enabled. */
2348 if (TARGET_SSEREGPARM
2350 error ("-msseregparm used without SSE enabled");
2352 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2354 if (ix86_fpmath_string
!= 0)
2356 if (! strcmp (ix86_fpmath_string
, "387"))
2357 ix86_fpmath
= FPMATH_387
;
2358 else if (! strcmp (ix86_fpmath_string
, "sse"))
2362 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2363 ix86_fpmath
= FPMATH_387
;
2366 ix86_fpmath
= FPMATH_SSE
;
2368 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2369 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2373 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2374 ix86_fpmath
= FPMATH_387
;
2376 else if (!TARGET_80387
)
2378 warning (0, "387 instruction set disabled, using SSE arithmetics");
2379 ix86_fpmath
= FPMATH_SSE
;
2382 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2385 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2388 /* If the i387 is disabled, then do not return values in it. */
2390 target_flags
&= ~MASK_FLOAT_RETURNS
;
2392 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2393 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2395 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2397 /* ??? Unwind info is not correct around the CFG unless either a frame
2398 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2399 unwind info generation to be aware of the CFG and propagating states
2401 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2402 || flag_exceptions
|| flag_non_call_exceptions
)
2403 && flag_omit_frame_pointer
2404 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2406 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2407 warning (0, "unwind tables currently require either a frame pointer "
2408 "or -maccumulate-outgoing-args for correctness");
2409 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2412 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2415 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2416 p
= strchr (internal_label_prefix
, 'X');
2417 internal_label_prefix_len
= p
- internal_label_prefix
;
2421 /* When scheduling description is not available, disable scheduler pass
2422 so it won't slow down the compilation and make x87 code slower. */
2423 if (!TARGET_SCHEDULE
)
2424 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2426 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2427 set_param_value ("simultaneous-prefetches",
2428 ix86_cost
->simultaneous_prefetches
);
2429 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2430 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2433 /* switch to the appropriate section for output of DECL.
2434 DECL is either a `VAR_DECL' node or a constant of some sort.
2435 RELOC indicates whether forming the initial value of DECL requires
2436 link-time relocations. */
2439 x86_64_elf_select_section (tree decl
, int reloc
,
2440 unsigned HOST_WIDE_INT align
)
2442 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2443 && ix86_in_large_data_p (decl
))
2445 const char *sname
= NULL
;
2446 unsigned int flags
= SECTION_WRITE
;
2447 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2452 case SECCAT_DATA_REL
:
2453 sname
= ".ldata.rel";
2455 case SECCAT_DATA_REL_LOCAL
:
2456 sname
= ".ldata.rel.local";
2458 case SECCAT_DATA_REL_RO
:
2459 sname
= ".ldata.rel.ro";
2461 case SECCAT_DATA_REL_RO_LOCAL
:
2462 sname
= ".ldata.rel.ro.local";
2466 flags
|= SECTION_BSS
;
2469 case SECCAT_RODATA_MERGE_STR
:
2470 case SECCAT_RODATA_MERGE_STR_INIT
:
2471 case SECCAT_RODATA_MERGE_CONST
:
2475 case SECCAT_SRODATA
:
2482 /* We don't split these for medium model. Place them into
2483 default sections and hope for best. */
2488 /* We might get called with string constants, but get_named_section
2489 doesn't like them as they are not DECLs. Also, we need to set
2490 flags in that case. */
2492 return get_section (sname
, flags
, NULL
);
2493 return get_named_section (decl
, sname
, reloc
);
2496 return default_elf_select_section (decl
, reloc
, align
);
2499 /* Build up a unique section name, expressed as a
2500 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2501 RELOC indicates whether the initial value of EXP requires
2502 link-time relocations. */
2505 x86_64_elf_unique_section (tree decl
, int reloc
)
2507 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2508 && ix86_in_large_data_p (decl
))
2510 const char *prefix
= NULL
;
2511 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2512 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2514 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2517 case SECCAT_DATA_REL
:
2518 case SECCAT_DATA_REL_LOCAL
:
2519 case SECCAT_DATA_REL_RO
:
2520 case SECCAT_DATA_REL_RO_LOCAL
:
2521 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2524 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2527 case SECCAT_RODATA_MERGE_STR
:
2528 case SECCAT_RODATA_MERGE_STR_INIT
:
2529 case SECCAT_RODATA_MERGE_CONST
:
2530 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2532 case SECCAT_SRODATA
:
2539 /* We don't split these for medium model. Place them into
2540 default sections and hope for best. */
2548 plen
= strlen (prefix
);
2550 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2551 name
= targetm
.strip_name_encoding (name
);
2552 nlen
= strlen (name
);
2554 string
= alloca (nlen
+ plen
+ 1);
2555 memcpy (string
, prefix
, plen
);
2556 memcpy (string
+ plen
, name
, nlen
+ 1);
2558 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2562 default_unique_section (decl
, reloc
);
2565 #ifdef COMMON_ASM_OP
2566 /* This says how to output assembler code to declare an
2567 uninitialized external linkage data object.
2569 For medium model x86-64 we need to use .largecomm opcode for
2572 x86_elf_aligned_common (FILE *file
,
2573 const char *name
, unsigned HOST_WIDE_INT size
,
2576 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2577 && size
> (unsigned int)ix86_section_threshold
)
2578 fprintf (file
, ".largecomm\t");
2580 fprintf (file
, "%s", COMMON_ASM_OP
);
2581 assemble_name (file
, name
);
2582 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2583 size
, align
/ BITS_PER_UNIT
);
2586 /* Utility function for targets to use in implementing
2587 ASM_OUTPUT_ALIGNED_BSS. */
2590 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2591 const char *name
, unsigned HOST_WIDE_INT size
,
2594 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2595 && size
> (unsigned int)ix86_section_threshold
)
2596 switch_to_section (get_named_section (decl
, ".lbss", 0));
2598 switch_to_section (bss_section
);
2599 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2600 #ifdef ASM_DECLARE_OBJECT_NAME
2601 last_assemble_variable_decl
= decl
;
2602 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2604 /* Standard thing is just output label for the object. */
2605 ASM_OUTPUT_LABEL (file
, name
);
2606 #endif /* ASM_DECLARE_OBJECT_NAME */
2607 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2611 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2613 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2614 make the problem with not enough registers even worse. */
2615 #ifdef INSN_SCHEDULING
2617 flag_schedule_insns
= 0;
2621 /* The Darwin libraries never set errno, so we might as well
2622 avoid calling them when that's the only reason we would. */
2623 flag_errno_math
= 0;
2625 /* The default values of these switches depend on the TARGET_64BIT
2626 that is not known at this moment. Mark these values with 2 and
2627 let user the to override these. In case there is no command line option
2628 specifying them, we will set the defaults in override_options. */
2630 flag_omit_frame_pointer
= 2;
2631 flag_pcc_struct_return
= 2;
2632 flag_asynchronous_unwind_tables
= 2;
2633 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2634 SUBTARGET_OPTIMIZATION_OPTIONS
;
2638 /* Table of valid machine attributes. */
2639 const struct attribute_spec ix86_attribute_table
[] =
2641 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2642 /* Stdcall attribute says callee is responsible for popping arguments
2643 if they are not variable. */
2644 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2645 /* Fastcall attribute says callee is responsible for popping arguments
2646 if they are not variable. */
2647 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2648 /* Cdecl attribute says the callee is a normal C declaration */
2649 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2650 /* Regparm attribute specifies how many integer arguments are to be
2651 passed in registers. */
2652 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2653 /* Sseregparm attribute says we are using x86_64 calling conventions
2654 for FP arguments. */
2655 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2656 /* force_align_arg_pointer says this function realigns the stack at entry. */
2657 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2658 false, true, true, ix86_handle_cconv_attribute
},
2659 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2660 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2661 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2662 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2664 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2665 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2666 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2667 SUBTARGET_ATTRIBUTE_TABLE
,
2669 { NULL
, 0, 0, false, false, false, NULL
}
2672 /* Decide whether we can make a sibling call to a function. DECL is the
2673 declaration of the function being targeted by the call and EXP is the
2674 CALL_EXPR representing the call. */
2677 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2682 /* If we are generating position-independent code, we cannot sibcall
2683 optimize any indirect call, or a direct call to a global function,
2684 as the PLT requires %ebx be live. */
2685 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2692 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2693 if (POINTER_TYPE_P (func
))
2694 func
= TREE_TYPE (func
);
2697 /* Check that the return value locations are the same. Like
2698 if we are returning floats on the 80387 register stack, we cannot
2699 make a sibcall from a function that doesn't return a float to a
2700 function that does or, conversely, from a function that does return
2701 a float to a function that doesn't; the necessary stack adjustment
2702 would not be executed. This is also the place we notice
2703 differences in the return value ABI. Note that it is ok for one
2704 of the functions to have void return type as long as the return
2705 value of the other is passed in a register. */
2706 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2707 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2709 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2711 if (!rtx_equal_p (a
, b
))
2714 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2716 else if (!rtx_equal_p (a
, b
))
2719 /* If this call is indirect, we'll need to be able to use a call-clobbered
2720 register for the address of the target function. Make sure that all
2721 such registers are not used for passing parameters. */
2722 if (!decl
&& !TARGET_64BIT
)
2726 /* We're looking at the CALL_EXPR, we need the type of the function. */
2727 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2728 type
= TREE_TYPE (type
); /* pointer type */
2729 type
= TREE_TYPE (type
); /* function type */
2731 if (ix86_function_regparm (type
, NULL
) >= 3)
2733 /* ??? Need to count the actual number of registers to be used,
2734 not the possible number of registers. Fix later. */
2739 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2740 /* Dllimport'd functions are also called indirectly. */
2741 if (decl
&& DECL_DLLIMPORT_P (decl
)
2742 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2746 /* If we forced aligned the stack, then sibcalling would unalign the
2747 stack, which may break the called function. */
2748 if (cfun
->machine
->force_align_arg_pointer
)
2751 /* Otherwise okay. That also includes certain types of indirect calls. */
2755 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2756 calling convention attributes;
2757 arguments as in struct attribute_spec.handler. */
2760 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2762 int flags ATTRIBUTE_UNUSED
,
2765 if (TREE_CODE (*node
) != FUNCTION_TYPE
2766 && TREE_CODE (*node
) != METHOD_TYPE
2767 && TREE_CODE (*node
) != FIELD_DECL
2768 && TREE_CODE (*node
) != TYPE_DECL
)
2770 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2771 IDENTIFIER_POINTER (name
));
2772 *no_add_attrs
= true;
2776 /* Can combine regparm with all attributes but fastcall. */
2777 if (is_attribute_p ("regparm", name
))
2781 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2783 error ("fastcall and regparm attributes are not compatible");
2786 cst
= TREE_VALUE (args
);
2787 if (TREE_CODE (cst
) != INTEGER_CST
)
2789 warning (OPT_Wattributes
,
2790 "%qs attribute requires an integer constant argument",
2791 IDENTIFIER_POINTER (name
));
2792 *no_add_attrs
= true;
2794 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2796 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2797 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2798 *no_add_attrs
= true;
2802 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2803 TYPE_ATTRIBUTES (*node
))
2804 && compare_tree_int (cst
, REGPARM_MAX
-1))
2806 error ("%s functions limited to %d register parameters",
2807 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2815 warning (OPT_Wattributes
, "%qs attribute ignored",
2816 IDENTIFIER_POINTER (name
));
2817 *no_add_attrs
= true;
2821 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2822 if (is_attribute_p ("fastcall", name
))
2824 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2826 error ("fastcall and cdecl attributes are not compatible");
2828 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2830 error ("fastcall and stdcall attributes are not compatible");
2832 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2834 error ("fastcall and regparm attributes are not compatible");
2838 /* Can combine stdcall with fastcall (redundant), regparm and
2840 else if (is_attribute_p ("stdcall", name
))
2842 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2844 error ("stdcall and cdecl attributes are not compatible");
2846 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2848 error ("stdcall and fastcall attributes are not compatible");
2852 /* Can combine cdecl with regparm and sseregparm. */
2853 else if (is_attribute_p ("cdecl", name
))
2855 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2857 error ("stdcall and cdecl attributes are not compatible");
2859 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2861 error ("fastcall and cdecl attributes are not compatible");
2865 /* Can combine sseregparm with all attributes. */
2870 /* Return 0 if the attributes for two types are incompatible, 1 if they
2871 are compatible, and 2 if they are nearly compatible (which causes a
2872 warning to be generated). */
2875 ix86_comp_type_attributes (tree type1
, tree type2
)
2877 /* Check for mismatch of non-default calling convention. */
2878 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2880 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2883 /* Check for mismatched fastcall/regparm types. */
2884 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2885 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2886 || (ix86_function_regparm (type1
, NULL
)
2887 != ix86_function_regparm (type2
, NULL
)))
2890 /* Check for mismatched sseregparm types. */
2891 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2892 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2895 /* Check for mismatched return types (cdecl vs stdcall). */
2896 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2897 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2903 /* Return the regparm value for a function with the indicated TYPE and DECL.
2904 DECL may be NULL when calling function indirectly
2905 or considering a libcall. */
2908 ix86_function_regparm (tree type
, tree decl
)
2911 int regparm
= ix86_regparm
;
2912 bool user_convention
= false;
2916 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2919 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2920 user_convention
= true;
2923 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2926 user_convention
= true;
2929 /* Use register calling convention for local functions when possible. */
2930 if (!TARGET_64BIT
&& !user_convention
&& decl
2931 && flag_unit_at_a_time
&& !profile_flag
)
2933 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2936 int local_regparm
, globals
= 0, regno
;
2938 /* Make sure no regparm register is taken by a global register
2940 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2941 if (global_regs
[local_regparm
])
2943 /* We can't use regparm(3) for nested functions as these use
2944 static chain pointer in third argument. */
2945 if (local_regparm
== 3
2946 && decl_function_context (decl
)
2947 && !DECL_NO_STATIC_CHAIN (decl
))
2949 /* If the function realigns its stackpointer, the
2950 prologue will clobber %ecx. If we've already
2951 generated code for the callee, the callee
2952 DECL_STRUCT_FUNCTION is gone, so we fall back to
2953 scanning the attributes for the self-realigning
2955 if ((DECL_STRUCT_FUNCTION (decl
)
2956 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2957 || (!DECL_STRUCT_FUNCTION (decl
)
2958 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2959 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2961 /* Each global register variable increases register preassure,
2962 so the more global reg vars there are, the smaller regparm
2963 optimization use, unless requested by the user explicitly. */
2964 for (regno
= 0; regno
< 6; regno
++)
2965 if (global_regs
[regno
])
2968 = globals
< local_regparm
? local_regparm
- globals
: 0;
2970 if (local_regparm
> regparm
)
2971 regparm
= local_regparm
;
2978 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2979 DFmode (2) arguments in SSE registers for a function with the
2980 indicated TYPE and DECL. DECL may be NULL when calling function
2981 indirectly or considering a libcall. Otherwise return 0. */
2984 ix86_function_sseregparm (tree type
, tree decl
)
2986 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2987 by the sseregparm attribute. */
2988 if (TARGET_SSEREGPARM
2990 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2995 error ("Calling %qD with attribute sseregparm without "
2996 "SSE/SSE2 enabled", decl
);
2998 error ("Calling %qT with attribute sseregparm without "
2999 "SSE/SSE2 enabled", type
);
3006 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3007 (and DFmode for SSE2) arguments in SSE registers,
3008 even for 32-bit targets. */
3009 if (!TARGET_64BIT
&& decl
3010 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3012 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3014 return TARGET_SSE2
? 2 : 1;
3020 /* Return true if EAX is live at the start of the function. Used by
3021 ix86_expand_prologue to determine if we need special help before
3022 calling allocate_stack_worker. */
3025 ix86_eax_live_at_start_p (void)
3027 /* Cheat. Don't bother working forward from ix86_function_regparm
3028 to the function type to whether an actual argument is located in
3029 eax. Instead just look at cfg info, which is still close enough
3030 to correct at this point. This gives false positives for broken
3031 functions that might use uninitialized data that happens to be
3032 allocated in eax, but who cares? */
3033 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3036 /* Value is the number of bytes of arguments automatically
3037 popped when returning from a subroutine call.
3038 FUNDECL is the declaration node of the function (as a tree),
3039 FUNTYPE is the data type of the function (as a tree),
3040 or for a library call it is an identifier node for the subroutine name.
3041 SIZE is the number of bytes of arguments passed on the stack.
3043 On the 80386, the RTD insn may be used to pop them if the number
3044 of args is fixed, but if the number is variable then the caller
3045 must pop them all. RTD can't be used for library calls now
3046 because the library is compiled with the Unix compiler.
3047 Use of RTD is a selectable option, since it is incompatible with
3048 standard Unix calling sequences. If the option is not selected,
3049 the caller must always pop the args.
3051 The attribute stdcall is equivalent to RTD on a per module basis. */
3054 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3056 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3058 /* Cdecl functions override -mrtd, and never pop the stack. */
3059 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3061 /* Stdcall and fastcall functions will pop the stack if not
3063 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3064 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3068 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3069 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3070 == void_type_node
)))
3074 /* Lose any fake structure return argument if it is passed on the stack. */
3075 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3077 && !KEEP_AGGREGATE_RETURN_POINTER
)
3079 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3082 return GET_MODE_SIZE (Pmode
);
3088 /* Argument support functions. */
3090 /* Return true when register may be used to pass function parameters. */
3092 ix86_function_arg_regno_p (int regno
)
3098 return (regno
< REGPARM_MAX
3099 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3101 return (regno
< REGPARM_MAX
3102 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3103 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3104 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3105 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3110 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3115 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3116 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3119 /* RAX is used as hidden argument to va_arg functions. */
3122 for (i
= 0; i
< REGPARM_MAX
; i
++)
3123 if (regno
== x86_64_int_parameter_registers
[i
])
3128 /* Return if we do not know how to pass TYPE solely in registers. */
3131 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3133 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3136 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3137 The layout_type routine is crafty and tries to trick us into passing
3138 currently unsupported vector types on the stack by using TImode. */
3139 return (!TARGET_64BIT
&& mode
== TImode
3140 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3143 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3144 for a call to a function whose data type is FNTYPE.
3145 For a library call, FNTYPE is 0. */
3148 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3149 tree fntype
, /* tree ptr for function decl */
3150 rtx libname
, /* SYMBOL_REF of library name or 0 */
3153 static CUMULATIVE_ARGS zero_cum
;
3154 tree param
, next_param
;
3156 if (TARGET_DEBUG_ARG
)
3158 fprintf (stderr
, "\ninit_cumulative_args (");
3160 fprintf (stderr
, "fntype code = %s, ret code = %s",
3161 tree_code_name
[(int) TREE_CODE (fntype
)],
3162 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3164 fprintf (stderr
, "no fntype");
3167 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3172 /* Set up the number of registers to use for passing arguments. */
3173 cum
->nregs
= ix86_regparm
;
3175 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3177 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3178 cum
->warn_sse
= true;
3179 cum
->warn_mmx
= true;
3180 cum
->maybe_vaarg
= false;
3182 /* Use ecx and edx registers if function has fastcall attribute,
3183 else look for regparm information. */
3184 if (fntype
&& !TARGET_64BIT
)
3186 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3192 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3195 /* Set up the number of SSE registers used for passing SFmode
3196 and DFmode arguments. Warn for mismatching ABI. */
3197 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3199 /* Determine if this function has variable arguments. This is
3200 indicated by the last argument being 'void_type_mode' if there
3201 are no variable arguments. If there are variable arguments, then
3202 we won't pass anything in registers in 32-bit mode. */
3204 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3206 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3207 param
!= 0; param
= next_param
)
3209 next_param
= TREE_CHAIN (param
);
3210 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3220 cum
->float_in_sse
= 0;
3222 cum
->maybe_vaarg
= true;
3226 if ((!fntype
&& !libname
)
3227 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3228 cum
->maybe_vaarg
= true;
3230 if (TARGET_DEBUG_ARG
)
3231 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3236 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3237 But in the case of vector types, it is some vector mode.
3239 When we have only some of our vector isa extensions enabled, then there
3240 are some modes for which vector_mode_supported_p is false. For these
3241 modes, the generic vector support in gcc will choose some non-vector mode
3242 in order to implement the type. By computing the natural mode, we'll
3243 select the proper ABI location for the operand and not depend on whatever
3244 the middle-end decides to do with these vector types. */
3246 static enum machine_mode
3247 type_natural_mode (tree type
)
3249 enum machine_mode mode
= TYPE_MODE (type
);
3251 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3253 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3254 if ((size
== 8 || size
== 16)
3255 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3256 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3258 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3260 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3261 mode
= MIN_MODE_VECTOR_FLOAT
;
3263 mode
= MIN_MODE_VECTOR_INT
;
3265 /* Get the mode which has this inner mode and number of units. */
3266 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3267 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3268 && GET_MODE_INNER (mode
) == innermode
)
3278 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3279 this may not agree with the mode that the type system has chosen for the
3280 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3281 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3284 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3289 if (orig_mode
!= BLKmode
)
3290 tmp
= gen_rtx_REG (orig_mode
, regno
);
3293 tmp
= gen_rtx_REG (mode
, regno
);
3294 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3295 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3301 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3302 of this code is to classify each 8bytes of incoming argument by the register
3303 class and assign registers accordingly. */
3305 /* Return the union class of CLASS1 and CLASS2.
3306 See the x86-64 PS ABI for details. */
3308 static enum x86_64_reg_class
3309 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3311 /* Rule #1: If both classes are equal, this is the resulting class. */
3312 if (class1
== class2
)
3315 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3317 if (class1
== X86_64_NO_CLASS
)
3319 if (class2
== X86_64_NO_CLASS
)
3322 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3323 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3324 return X86_64_MEMORY_CLASS
;
3326 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3327 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3328 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3329 return X86_64_INTEGERSI_CLASS
;
3330 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3331 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3332 return X86_64_INTEGER_CLASS
;
3334 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3336 if (class1
== X86_64_X87_CLASS
3337 || class1
== X86_64_X87UP_CLASS
3338 || class1
== X86_64_COMPLEX_X87_CLASS
3339 || class2
== X86_64_X87_CLASS
3340 || class2
== X86_64_X87UP_CLASS
3341 || class2
== X86_64_COMPLEX_X87_CLASS
)
3342 return X86_64_MEMORY_CLASS
;
3344 /* Rule #6: Otherwise class SSE is used. */
3345 return X86_64_SSE_CLASS
;
3348 /* Classify the argument of type TYPE and mode MODE.
3349 CLASSES will be filled by the register class used to pass each word
3350 of the operand. The number of words is returned. In case the parameter
3351 should be passed in memory, 0 is returned. As a special case for zero
3352 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3354 BIT_OFFSET is used internally for handling records and specifies offset
3355 of the offset in bits modulo 256 to avoid overflow cases.
3357 See the x86-64 PS ABI for details.
3361 classify_argument (enum machine_mode mode
, tree type
,
3362 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3364 HOST_WIDE_INT bytes
=
3365 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3366 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3368 /* Variable sized entities are always passed/returned in memory. */
3372 if (mode
!= VOIDmode
3373 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3376 if (type
&& AGGREGATE_TYPE_P (type
))
3380 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3382 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3386 for (i
= 0; i
< words
; i
++)
3387 classes
[i
] = X86_64_NO_CLASS
;
3389 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3390 signalize memory class, so handle it as special case. */
3393 classes
[0] = X86_64_NO_CLASS
;
3397 /* Classify each field of record and merge classes. */
3398 switch (TREE_CODE (type
))
3401 /* And now merge the fields of structure. */
3402 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3404 if (TREE_CODE (field
) == FIELD_DECL
)
3408 if (TREE_TYPE (field
) == error_mark_node
)
3411 /* Bitfields are always classified as integer. Handle them
3412 early, since later code would consider them to be
3413 misaligned integers. */
3414 if (DECL_BIT_FIELD (field
))
3416 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3417 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3418 + tree_low_cst (DECL_SIZE (field
), 0)
3421 merge_classes (X86_64_INTEGER_CLASS
,
3426 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3427 TREE_TYPE (field
), subclasses
,
3428 (int_bit_position (field
)
3429 + bit_offset
) % 256);
3432 for (i
= 0; i
< num
; i
++)
3435 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3437 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3445 /* Arrays are handled as small records. */
3448 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3449 TREE_TYPE (type
), subclasses
, bit_offset
);
3453 /* The partial classes are now full classes. */
3454 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3455 subclasses
[0] = X86_64_SSE_CLASS
;
3456 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3457 subclasses
[0] = X86_64_INTEGER_CLASS
;
3459 for (i
= 0; i
< words
; i
++)
3460 classes
[i
] = subclasses
[i
% num
];
3465 case QUAL_UNION_TYPE
:
3466 /* Unions are similar to RECORD_TYPE but offset is always 0.
3468 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3470 if (TREE_CODE (field
) == FIELD_DECL
)
3474 if (TREE_TYPE (field
) == error_mark_node
)
3477 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3478 TREE_TYPE (field
), subclasses
,
3482 for (i
= 0; i
< num
; i
++)
3483 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3492 /* Final merger cleanup. */
3493 for (i
= 0; i
< words
; i
++)
3495 /* If one class is MEMORY, everything should be passed in
3497 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3500 /* The X86_64_SSEUP_CLASS should be always preceded by
3501 X86_64_SSE_CLASS. */
3502 if (classes
[i
] == X86_64_SSEUP_CLASS
3503 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3504 classes
[i
] = X86_64_SSE_CLASS
;
3506 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3507 if (classes
[i
] == X86_64_X87UP_CLASS
3508 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3509 classes
[i
] = X86_64_SSE_CLASS
;
3514 /* Compute alignment needed. We align all types to natural boundaries with
3515 exception of XFmode that is aligned to 64bits. */
3516 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3518 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3521 mode_alignment
= 128;
3522 else if (mode
== XCmode
)
3523 mode_alignment
= 256;
3524 if (COMPLEX_MODE_P (mode
))
3525 mode_alignment
/= 2;
3526 /* Misaligned fields are always returned in memory. */
3527 if (bit_offset
% mode_alignment
)
3531 /* for V1xx modes, just use the base mode */
3532 if (VECTOR_MODE_P (mode
)
3533 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3534 mode
= GET_MODE_INNER (mode
);
3536 /* Classification of atomic types. */
3541 classes
[0] = X86_64_SSE_CLASS
;
3544 classes
[0] = X86_64_SSE_CLASS
;
3545 classes
[1] = X86_64_SSEUP_CLASS
;
3554 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3555 classes
[0] = X86_64_INTEGERSI_CLASS
;
3557 classes
[0] = X86_64_INTEGER_CLASS
;
3561 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3566 if (!(bit_offset
% 64))
3567 classes
[0] = X86_64_SSESF_CLASS
;
3569 classes
[0] = X86_64_SSE_CLASS
;
3572 classes
[0] = X86_64_SSEDF_CLASS
;
3575 classes
[0] = X86_64_X87_CLASS
;
3576 classes
[1] = X86_64_X87UP_CLASS
;
3579 classes
[0] = X86_64_SSE_CLASS
;
3580 classes
[1] = X86_64_SSEUP_CLASS
;
3583 classes
[0] = X86_64_SSE_CLASS
;
3586 classes
[0] = X86_64_SSEDF_CLASS
;
3587 classes
[1] = X86_64_SSEDF_CLASS
;
3590 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3593 /* This modes is larger than 16 bytes. */
3601 classes
[0] = X86_64_SSE_CLASS
;
3602 classes
[1] = X86_64_SSEUP_CLASS
;
3608 classes
[0] = X86_64_SSE_CLASS
;
3614 gcc_assert (VECTOR_MODE_P (mode
));
3619 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3621 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3622 classes
[0] = X86_64_INTEGERSI_CLASS
;
3624 classes
[0] = X86_64_INTEGER_CLASS
;
3625 classes
[1] = X86_64_INTEGER_CLASS
;
3626 return 1 + (bytes
> 8);
3630 /* Examine the argument and return set number of register required in each
3631 class. Return 0 iff parameter should be passed in memory. */
3633 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3634 int *int_nregs
, int *sse_nregs
)
3636 enum x86_64_reg_class
class[MAX_CLASSES
];
3637 int n
= classify_argument (mode
, type
, class, 0);
3643 for (n
--; n
>= 0; n
--)
3646 case X86_64_INTEGER_CLASS
:
3647 case X86_64_INTEGERSI_CLASS
:
3650 case X86_64_SSE_CLASS
:
3651 case X86_64_SSESF_CLASS
:
3652 case X86_64_SSEDF_CLASS
:
3655 case X86_64_NO_CLASS
:
3656 case X86_64_SSEUP_CLASS
:
3658 case X86_64_X87_CLASS
:
3659 case X86_64_X87UP_CLASS
:
3663 case X86_64_COMPLEX_X87_CLASS
:
3664 return in_return
? 2 : 0;
3665 case X86_64_MEMORY_CLASS
:
3671 /* Construct container for the argument used by GCC interface. See
3672 FUNCTION_ARG for the detailed description. */
3675 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3676 tree type
, int in_return
, int nintregs
, int nsseregs
,
3677 const int *intreg
, int sse_regno
)
3679 /* The following variables hold the static issued_error state. */
3680 static bool issued_sse_arg_error
;
3681 static bool issued_sse_ret_error
;
3682 static bool issued_x87_ret_error
;
3684 enum machine_mode tmpmode
;
3686 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3687 enum x86_64_reg_class
class[MAX_CLASSES
];
3691 int needed_sseregs
, needed_intregs
;
3692 rtx exp
[MAX_CLASSES
];
3695 n
= classify_argument (mode
, type
, class, 0);
3696 if (TARGET_DEBUG_ARG
)
3699 fprintf (stderr
, "Memory class\n");
3702 fprintf (stderr
, "Classes:");
3703 for (i
= 0; i
< n
; i
++)
3705 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3707 fprintf (stderr
, "\n");
3712 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3715 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3718 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3719 some less clueful developer tries to use floating-point anyway. */
3720 if (needed_sseregs
&& !TARGET_SSE
)
3724 if (!issued_sse_ret_error
)
3726 error ("SSE register return with SSE disabled");
3727 issued_sse_ret_error
= true;
3730 else if (!issued_sse_arg_error
)
3732 error ("SSE register argument with SSE disabled");
3733 issued_sse_arg_error
= true;
3738 /* Likewise, error if the ABI requires us to return values in the
3739 x87 registers and the user specified -mno-80387. */
3740 if (!TARGET_80387
&& in_return
)
3741 for (i
= 0; i
< n
; i
++)
3742 if (class[i
] == X86_64_X87_CLASS
3743 || class[i
] == X86_64_X87UP_CLASS
3744 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3746 if (!issued_x87_ret_error
)
3748 error ("x87 register return with x87 disabled");
3749 issued_x87_ret_error
= true;
3754 /* First construct simple cases. Avoid SCmode, since we want to use
3755 single register to pass this type. */
3756 if (n
== 1 && mode
!= SCmode
)
3759 case X86_64_INTEGER_CLASS
:
3760 case X86_64_INTEGERSI_CLASS
:
3761 return gen_rtx_REG (mode
, intreg
[0]);
3762 case X86_64_SSE_CLASS
:
3763 case X86_64_SSESF_CLASS
:
3764 case X86_64_SSEDF_CLASS
:
3765 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3766 case X86_64_X87_CLASS
:
3767 case X86_64_COMPLEX_X87_CLASS
:
3768 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3769 case X86_64_NO_CLASS
:
3770 /* Zero sized array, struct or class. */
3775 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3777 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3779 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3780 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3781 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3782 && class[1] == X86_64_INTEGER_CLASS
3783 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3784 && intreg
[0] + 1 == intreg
[1])
3785 return gen_rtx_REG (mode
, intreg
[0]);
3787 /* Otherwise figure out the entries of the PARALLEL. */
3788 for (i
= 0; i
< n
; i
++)
3792 case X86_64_NO_CLASS
:
3794 case X86_64_INTEGER_CLASS
:
3795 case X86_64_INTEGERSI_CLASS
:
3796 /* Merge TImodes on aligned occasions here too. */
3797 if (i
* 8 + 8 > bytes
)
3798 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3799 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3803 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3804 if (tmpmode
== BLKmode
)
3806 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3807 gen_rtx_REG (tmpmode
, *intreg
),
3811 case X86_64_SSESF_CLASS
:
3812 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3813 gen_rtx_REG (SFmode
,
3814 SSE_REGNO (sse_regno
)),
3818 case X86_64_SSEDF_CLASS
:
3819 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3820 gen_rtx_REG (DFmode
,
3821 SSE_REGNO (sse_regno
)),
3825 case X86_64_SSE_CLASS
:
3826 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3830 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3831 gen_rtx_REG (tmpmode
,
3832 SSE_REGNO (sse_regno
)),
3834 if (tmpmode
== TImode
)
3843 /* Empty aligned struct, union or class. */
3847 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3848 for (i
= 0; i
< nexps
; i
++)
3849 XVECEXP (ret
, 0, i
) = exp
[i
];
3853 /* Update the data in CUM to advance over an argument
3854 of mode MODE and data type TYPE.
3855 (TYPE is null for libcalls where that information may not be available.) */
3858 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3859 tree type
, int named
)
3862 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3863 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3866 mode
= type_natural_mode (type
);
3868 if (TARGET_DEBUG_ARG
)
3869 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3870 "mode=%s, named=%d)\n\n",
3871 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3872 GET_MODE_NAME (mode
), named
);
3876 int int_nregs
, sse_nregs
;
3877 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3878 cum
->words
+= words
;
3879 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3881 cum
->nregs
-= int_nregs
;
3882 cum
->sse_nregs
-= sse_nregs
;
3883 cum
->regno
+= int_nregs
;
3884 cum
->sse_regno
+= sse_nregs
;
3887 cum
->words
+= words
;
3905 cum
->words
+= words
;
3906 cum
->nregs
-= words
;
3907 cum
->regno
+= words
;
3909 if (cum
->nregs
<= 0)
3917 if (cum
->float_in_sse
< 2)
3920 if (cum
->float_in_sse
< 1)
3931 if (!type
|| !AGGREGATE_TYPE_P (type
))
3933 cum
->sse_words
+= words
;
3934 cum
->sse_nregs
-= 1;
3935 cum
->sse_regno
+= 1;
3936 if (cum
->sse_nregs
<= 0)
3948 if (!type
|| !AGGREGATE_TYPE_P (type
))
3950 cum
->mmx_words
+= words
;
3951 cum
->mmx_nregs
-= 1;
3952 cum
->mmx_regno
+= 1;
3953 if (cum
->mmx_nregs
<= 0)
3964 /* Define where to put the arguments to a function.
3965 Value is zero to push the argument on the stack,
3966 or a hard register in which to store the argument.
3968 MODE is the argument's machine mode.
3969 TYPE is the data type of the argument (as a tree).
3970 This is null for libcalls where that information may
3972 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3973 the preceding args and about the function being called.
3974 NAMED is nonzero if this argument is a named parameter
3975 (otherwise it is an extra parameter matching an ellipsis). */
3978 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3979 tree type
, int named
)
3981 enum machine_mode mode
= orig_mode
;
3984 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3985 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3986 static bool warnedsse
, warnedmmx
;
3988 /* To simplify the code below, represent vector types with a vector mode
3989 even if MMX/SSE are not active. */
3990 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3991 mode
= type_natural_mode (type
);
3993 /* Handle a hidden AL argument containing number of registers for varargs
3994 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3996 if (mode
== VOIDmode
)
3999 return GEN_INT (cum
->maybe_vaarg
4000 ? (cum
->sse_nregs
< 0
4008 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4010 &x86_64_int_parameter_registers
[cum
->regno
],
4015 /* For now, pass fp/complex values on the stack. */
4027 if (words
<= cum
->nregs
)
4029 int regno
= cum
->regno
;
4031 /* Fastcall allocates the first two DWORD (SImode) or
4032 smaller arguments to ECX and EDX. */
4035 if (mode
== BLKmode
|| mode
== DImode
)
4038 /* ECX not EAX is the first allocated register. */
4042 ret
= gen_rtx_REG (mode
, regno
);
4046 if (cum
->float_in_sse
< 2)
4049 if (cum
->float_in_sse
< 1)
4059 if (!type
|| !AGGREGATE_TYPE_P (type
))
4061 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4064 warning (0, "SSE vector argument without SSE enabled "
4068 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4069 cum
->sse_regno
+ FIRST_SSE_REG
);
4076 if (!type
|| !AGGREGATE_TYPE_P (type
))
4078 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4081 warning (0, "MMX vector argument without MMX enabled "
4085 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4086 cum
->mmx_regno
+ FIRST_MMX_REG
);
4091 if (TARGET_DEBUG_ARG
)
4094 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4095 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4098 print_simple_rtl (stderr
, ret
);
4100 fprintf (stderr
, ", stack");
4102 fprintf (stderr
, " )\n");
4108 /* A C expression that indicates when an argument must be passed by
4109 reference. If nonzero for an argument, a copy of that argument is
4110 made in memory and a pointer to the argument is passed instead of
4111 the argument itself. The pointer is passed in whatever way is
4112 appropriate for passing a pointer to that type. */
4115 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4116 enum machine_mode mode ATTRIBUTE_UNUSED
,
4117 tree type
, bool named ATTRIBUTE_UNUSED
)
4122 if (type
&& int_size_in_bytes (type
) == -1)
4124 if (TARGET_DEBUG_ARG
)
4125 fprintf (stderr
, "function_arg_pass_by_reference\n");
4132 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4133 ABI. Only called if TARGET_SSE. */
4135 contains_128bit_aligned_vector_p (tree type
)
4137 enum machine_mode mode
= TYPE_MODE (type
);
4138 if (SSE_REG_MODE_P (mode
)
4139 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4141 if (TYPE_ALIGN (type
) < 128)
4144 if (AGGREGATE_TYPE_P (type
))
4146 /* Walk the aggregates recursively. */
4147 switch (TREE_CODE (type
))
4151 case QUAL_UNION_TYPE
:
4155 /* Walk all the structure fields. */
4156 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4158 if (TREE_CODE (field
) == FIELD_DECL
4159 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4166 /* Just for use if some languages passes arrays by value. */
4167 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4178 /* Gives the alignment boundary, in bits, of an argument with the
4179 specified mode and type. */
4182 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4186 align
= TYPE_ALIGN (type
);
4188 align
= GET_MODE_ALIGNMENT (mode
);
4189 if (align
< PARM_BOUNDARY
)
4190 align
= PARM_BOUNDARY
;
4193 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4194 make an exception for SSE modes since these require 128bit
4197 The handling here differs from field_alignment. ICC aligns MMX
4198 arguments to 4 byte boundaries, while structure fields are aligned
4199 to 8 byte boundaries. */
4201 align
= PARM_BOUNDARY
;
4204 if (!SSE_REG_MODE_P (mode
))
4205 align
= PARM_BOUNDARY
;
4209 if (!contains_128bit_aligned_vector_p (type
))
4210 align
= PARM_BOUNDARY
;
4218 /* Return true if N is a possible register number of function value. */
4220 ix86_function_value_regno_p (int regno
)
4226 return ((regno
) == 0
4227 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4228 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4230 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4231 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4232 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4237 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4238 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4242 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4249 /* Define how to find the value returned by a function.
4250 VALTYPE is the data type of the value (as a tree).
4251 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4252 otherwise, FUNC is 0. */
4254 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4255 bool outgoing ATTRIBUTE_UNUSED
)
4257 enum machine_mode natmode
= type_natural_mode (valtype
);
4261 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4262 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4263 x86_64_int_return_registers
, 0);
4264 /* For zero sized structures, construct_container return NULL, but we
4265 need to keep rest of compiler happy by returning meaningful value. */
4267 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4272 tree fn
= NULL_TREE
, fntype
;
4274 && DECL_P (fntype_or_decl
))
4275 fn
= fntype_or_decl
;
4276 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4277 return gen_rtx_REG (TYPE_MODE (valtype
),
4278 ix86_value_regno (natmode
, fn
, fntype
));
4282 /* Return true iff type is returned in memory. */
4284 ix86_return_in_memory (tree type
)
4286 int needed_intregs
, needed_sseregs
, size
;
4287 enum machine_mode mode
= type_natural_mode (type
);
4290 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4292 if (mode
== BLKmode
)
4295 size
= int_size_in_bytes (type
);
4297 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4300 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4302 /* User-created vectors small enough to fit in EAX. */
4306 /* MMX/3dNow values are returned in MM0,
4307 except when it doesn't exits. */
4309 return (TARGET_MMX
? 0 : 1);
4311 /* SSE values are returned in XMM0, except when it doesn't exist. */
4313 return (TARGET_SSE
? 0 : 1);
4327 /* When returning SSE vector types, we have a choice of either
4328 (1) being abi incompatible with a -march switch, or
4329 (2) generating an error.
4330 Given no good solution, I think the safest thing is one warning.
4331 The user won't be able to use -Werror, but....
4333 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4334 called in response to actually generating a caller or callee that
4335 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4336 via aggregate_value_p for general type probing from tree-ssa. */
4339 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4341 static bool warnedsse
, warnedmmx
;
4345 /* Look at the return type of the function, not the function type. */
4346 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4348 if (!TARGET_SSE
&& !warnedsse
)
4351 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4354 warning (0, "SSE vector return without SSE enabled "
4359 if (!TARGET_MMX
&& !warnedmmx
)
4361 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4364 warning (0, "MMX vector return without MMX enabled "
4373 /* Define how to find the value returned by a library function
4374 assuming the value has mode MODE. */
4376 ix86_libcall_value (enum machine_mode mode
)
4390 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4393 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4397 return gen_rtx_REG (mode
, 0);
4401 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4404 /* Given a mode, return the register to use for a return value. */
4407 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4409 gcc_assert (!TARGET_64BIT
);
4411 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4412 we normally prevent this case when mmx is not available. However
4413 some ABIs may require the result to be returned like DImode. */
4414 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4415 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4417 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4418 we prevent this case when sse is not available. However some ABIs
4419 may require the result to be returned like integer TImode. */
4420 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4421 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4423 /* Decimal floating point values can go in %eax, unlike other float modes. */
4424 if (DECIMAL_FLOAT_MODE_P (mode
))
4427 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4428 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4431 /* Floating point return values in %st(0), except for local functions when
4432 SSE math is enabled or for functions with sseregparm attribute. */
4433 if ((func
|| fntype
)
4434 && (mode
== SFmode
|| mode
== DFmode
))
4436 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4437 if ((sse_level
>= 1 && mode
== SFmode
)
4438 || (sse_level
== 2 && mode
== DFmode
))
4439 return FIRST_SSE_REG
;
4442 return FIRST_FLOAT_REG
;
4445 /* Create the va_list data type. */
4448 ix86_build_builtin_va_list (void)
4450 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4452 /* For i386 we use plain pointer to argument area. */
4454 return build_pointer_type (char_type_node
);
4456 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4457 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4459 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4460 unsigned_type_node
);
4461 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4462 unsigned_type_node
);
4463 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4465 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4468 va_list_gpr_counter_field
= f_gpr
;
4469 va_list_fpr_counter_field
= f_fpr
;
4471 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4472 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4473 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4474 DECL_FIELD_CONTEXT (f_sav
) = record
;
4476 TREE_CHAIN (record
) = type_decl
;
4477 TYPE_NAME (record
) = type_decl
;
4478 TYPE_FIELDS (record
) = f_gpr
;
4479 TREE_CHAIN (f_gpr
) = f_fpr
;
4480 TREE_CHAIN (f_fpr
) = f_ovf
;
4481 TREE_CHAIN (f_ovf
) = f_sav
;
4483 layout_type (record
);
4485 /* The correct type is an array type of one element. */
4486 return build_array_type (record
, build_index_type (size_zero_node
));
4489 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4492 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4493 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4496 CUMULATIVE_ARGS next_cum
;
4497 rtx save_area
= NULL_RTX
, mem
;
4510 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4513 /* Indicate to allocate space on the stack for varargs save area. */
4514 ix86_save_varrargs_registers
= 1;
4516 cfun
->stack_alignment_needed
= 128;
4518 fntype
= TREE_TYPE (current_function_decl
);
4519 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4520 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4521 != void_type_node
));
4523 /* For varargs, we do not want to skip the dummy va_dcl argument.
4524 For stdargs, we do want to skip the last named argument. */
4527 function_arg_advance (&next_cum
, mode
, type
, 1);
4530 save_area
= frame_pointer_rtx
;
4532 set
= get_varargs_alias_set ();
4534 for (i
= next_cum
.regno
;
4536 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4539 mem
= gen_rtx_MEM (Pmode
,
4540 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4541 MEM_NOTRAP_P (mem
) = 1;
4542 set_mem_alias_set (mem
, set
);
4543 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4544 x86_64_int_parameter_registers
[i
]));
4547 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4549 /* Now emit code to save SSE registers. The AX parameter contains number
4550 of SSE parameter registers used to call this function. We use
4551 sse_prologue_save insn template that produces computed jump across
4552 SSE saves. We need some preparation work to get this working. */
4554 label
= gen_label_rtx ();
4555 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4557 /* Compute address to jump to :
4558 label - 5*eax + nnamed_sse_arguments*5 */
4559 tmp_reg
= gen_reg_rtx (Pmode
);
4560 nsse_reg
= gen_reg_rtx (Pmode
);
4561 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4562 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4563 gen_rtx_MULT (Pmode
, nsse_reg
,
4565 if (next_cum
.sse_regno
)
4568 gen_rtx_CONST (DImode
,
4569 gen_rtx_PLUS (DImode
,
4571 GEN_INT (next_cum
.sse_regno
* 4))));
4573 emit_move_insn (nsse_reg
, label_ref
);
4574 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4576 /* Compute address of memory block we save into. We always use pointer
4577 pointing 127 bytes after first byte to store - this is needed to keep
4578 instruction size limited by 4 bytes. */
4579 tmp_reg
= gen_reg_rtx (Pmode
);
4580 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4581 plus_constant (save_area
,
4582 8 * REGPARM_MAX
+ 127)));
4583 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4584 MEM_NOTRAP_P (mem
) = 1;
4585 set_mem_alias_set (mem
, set
);
4586 set_mem_align (mem
, BITS_PER_WORD
);
4588 /* And finally do the dirty job! */
4589 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4590 GEN_INT (next_cum
.sse_regno
), label
));
4595 /* Implement va_start. */
4598 ix86_va_start (tree valist
, rtx nextarg
)
4600 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4601 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4602 tree gpr
, fpr
, ovf
, sav
, t
;
4605 /* Only 64bit target needs something special. */
4608 std_expand_builtin_va_start (valist
, nextarg
);
4612 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4613 f_fpr
= TREE_CHAIN (f_gpr
);
4614 f_ovf
= TREE_CHAIN (f_fpr
);
4615 f_sav
= TREE_CHAIN (f_ovf
);
4617 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4618 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4619 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4620 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4621 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4623 /* Count number of gp and fp argument registers used. */
4624 words
= current_function_args_info
.words
;
4625 n_gpr
= current_function_args_info
.regno
;
4626 n_fpr
= current_function_args_info
.sse_regno
;
4628 if (TARGET_DEBUG_ARG
)
4629 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4630 (int) words
, (int) n_gpr
, (int) n_fpr
);
4632 if (cfun
->va_list_gpr_size
)
4634 type
= TREE_TYPE (gpr
);
4635 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4636 build_int_cst (type
, n_gpr
* 8));
4637 TREE_SIDE_EFFECTS (t
) = 1;
4638 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4641 if (cfun
->va_list_fpr_size
)
4643 type
= TREE_TYPE (fpr
);
4644 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4645 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4646 TREE_SIDE_EFFECTS (t
) = 1;
4647 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4650 /* Find the overflow area. */
4651 type
= TREE_TYPE (ovf
);
4652 t
= make_tree (type
, virtual_incoming_args_rtx
);
4654 t
= build2 (PLUS_EXPR
, type
, t
,
4655 build_int_cst (type
, words
* UNITS_PER_WORD
));
4656 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4657 TREE_SIDE_EFFECTS (t
) = 1;
4658 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4660 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4662 /* Find the register save area.
4663 Prologue of the function save it right above stack frame. */
4664 type
= TREE_TYPE (sav
);
4665 t
= make_tree (type
, frame_pointer_rtx
);
4666 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4667 TREE_SIDE_EFFECTS (t
) = 1;
4668 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4672 /* Implement va_arg. */
4675 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4677 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4678 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4679 tree gpr
, fpr
, ovf
, sav
, t
;
4681 tree lab_false
, lab_over
= NULL_TREE
;
4686 enum machine_mode nat_mode
;
4688 /* Only 64bit target needs something special. */
4690 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4692 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4693 f_fpr
= TREE_CHAIN (f_gpr
);
4694 f_ovf
= TREE_CHAIN (f_fpr
);
4695 f_sav
= TREE_CHAIN (f_ovf
);
4697 valist
= build_va_arg_indirect_ref (valist
);
4698 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4699 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4700 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4701 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4703 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4705 type
= build_pointer_type (type
);
4706 size
= int_size_in_bytes (type
);
4707 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4709 nat_mode
= type_natural_mode (type
);
4710 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4711 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4713 /* Pull the value out of the saved registers. */
4715 addr
= create_tmp_var (ptr_type_node
, "addr");
4716 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4720 int needed_intregs
, needed_sseregs
;
4722 tree int_addr
, sse_addr
;
4724 lab_false
= create_artificial_label ();
4725 lab_over
= create_artificial_label ();
4727 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4729 need_temp
= (!REG_P (container
)
4730 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4731 || TYPE_ALIGN (type
) > 128));
4733 /* In case we are passing structure, verify that it is consecutive block
4734 on the register save area. If not we need to do moves. */
4735 if (!need_temp
&& !REG_P (container
))
4737 /* Verify that all registers are strictly consecutive */
4738 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4742 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4744 rtx slot
= XVECEXP (container
, 0, i
);
4745 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4746 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4754 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4756 rtx slot
= XVECEXP (container
, 0, i
);
4757 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4758 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4770 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4771 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4772 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4773 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4776 /* First ensure that we fit completely in registers. */
4779 t
= build_int_cst (TREE_TYPE (gpr
),
4780 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4781 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4782 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4783 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4784 gimplify_and_add (t
, pre_p
);
4788 t
= build_int_cst (TREE_TYPE (fpr
),
4789 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4791 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4792 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4793 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4794 gimplify_and_add (t
, pre_p
);
4797 /* Compute index to start of area used for integer regs. */
4800 /* int_addr = gpr + sav; */
4801 t
= fold_convert (ptr_type_node
, gpr
);
4802 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4803 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4804 gimplify_and_add (t
, pre_p
);
4808 /* sse_addr = fpr + sav; */
4809 t
= fold_convert (ptr_type_node
, fpr
);
4810 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4811 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4812 gimplify_and_add (t
, pre_p
);
4817 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4820 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4821 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4822 gimplify_and_add (t
, pre_p
);
4824 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4826 rtx slot
= XVECEXP (container
, 0, i
);
4827 rtx reg
= XEXP (slot
, 0);
4828 enum machine_mode mode
= GET_MODE (reg
);
4829 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4830 tree addr_type
= build_pointer_type (piece_type
);
4833 tree dest_addr
, dest
;
4835 if (SSE_REGNO_P (REGNO (reg
)))
4837 src_addr
= sse_addr
;
4838 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4842 src_addr
= int_addr
;
4843 src_offset
= REGNO (reg
) * 8;
4845 src_addr
= fold_convert (addr_type
, src_addr
);
4846 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4847 size_int (src_offset
)));
4848 src
= build_va_arg_indirect_ref (src_addr
);
4850 dest_addr
= fold_convert (addr_type
, addr
);
4851 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4852 size_int (INTVAL (XEXP (slot
, 1)))));
4853 dest
= build_va_arg_indirect_ref (dest_addr
);
4855 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4856 gimplify_and_add (t
, pre_p
);
4862 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4863 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4864 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4865 gimplify_and_add (t
, pre_p
);
4869 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4870 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4871 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4872 gimplify_and_add (t
, pre_p
);
4875 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4876 gimplify_and_add (t
, pre_p
);
4878 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4879 append_to_statement_list (t
, pre_p
);
4882 /* ... otherwise out of the overflow area. */
4884 /* Care for on-stack alignment if needed. */
4885 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4886 || integer_zerop (TYPE_SIZE (type
)))
4890 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4891 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4892 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4893 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4894 build_int_cst (TREE_TYPE (t
), -align
));
4896 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4898 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4899 gimplify_and_add (t2
, pre_p
);
4901 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4902 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4903 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4904 gimplify_and_add (t
, pre_p
);
4908 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4909 append_to_statement_list (t
, pre_p
);
4912 ptrtype
= build_pointer_type (type
);
4913 addr
= fold_convert (ptrtype
, addr
);
4916 addr
= build_va_arg_indirect_ref (addr
);
4917 return build_va_arg_indirect_ref (addr
);
4920 /* Return nonzero if OPNUM's MEM should be matched
4921 in movabs* patterns. */
4924 ix86_check_movabs (rtx insn
, int opnum
)
4928 set
= PATTERN (insn
);
4929 if (GET_CODE (set
) == PARALLEL
)
4930 set
= XVECEXP (set
, 0, 0);
4931 gcc_assert (GET_CODE (set
) == SET
);
4932 mem
= XEXP (set
, opnum
);
4933 while (GET_CODE (mem
) == SUBREG
)
4934 mem
= SUBREG_REG (mem
);
4935 gcc_assert (MEM_P (mem
));
4936 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4939 /* Initialize the table of extra 80387 mathematical constants. */
4942 init_ext_80387_constants (void)
4944 static const char * cst
[5] =
4946 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4947 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4948 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4949 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4950 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4954 for (i
= 0; i
< 5; i
++)
4956 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4957 /* Ensure each constant is rounded to XFmode precision. */
4958 real_convert (&ext_80387_constants_table
[i
],
4959 XFmode
, &ext_80387_constants_table
[i
]);
4962 ext_80387_constants_init
= 1;
4965 /* Return true if the constant is something that can be loaded with
4966 a special instruction. */
4969 standard_80387_constant_p (rtx x
)
4973 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4976 if (x
== CONST0_RTX (GET_MODE (x
)))
4978 if (x
== CONST1_RTX (GET_MODE (x
)))
4981 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4983 /* For XFmode constants, try to find a special 80387 instruction when
4984 optimizing for size or on those CPUs that benefit from them. */
4985 if (GET_MODE (x
) == XFmode
4986 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4990 if (! ext_80387_constants_init
)
4991 init_ext_80387_constants ();
4993 for (i
= 0; i
< 5; i
++)
4994 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4998 /* Load of the constant -0.0 or -1.0 will be split as
4999 fldz;fchs or fld1;fchs sequence. */
5000 if (real_isnegzero (&r
))
5002 if (real_identical (&r
, &dconstm1
))
5008 /* Return the opcode of the special instruction to be used to load
5012 standard_80387_constant_opcode (rtx x
)
5014 switch (standard_80387_constant_p (x
))
5038 /* Return the CONST_DOUBLE representing the 80387 constant that is
5039 loaded by the specified special instruction. The argument IDX
5040 matches the return value from standard_80387_constant_p. */
5043 standard_80387_constant_rtx (int idx
)
5047 if (! ext_80387_constants_init
)
5048 init_ext_80387_constants ();
5064 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5068 /* Return 1 if mode is a valid mode for sse. */
5070 standard_sse_mode_p (enum machine_mode mode
)
5087 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5090 standard_sse_constant_p (rtx x
)
5092 enum machine_mode mode
= GET_MODE (x
);
5094 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5096 if (vector_all_ones_operand (x
, mode
)
5097 && standard_sse_mode_p (mode
))
5098 return TARGET_SSE2
? 2 : -1;
5103 /* Return the opcode of the special instruction to be used to load
5107 standard_sse_constant_opcode (rtx insn
, rtx x
)
5109 switch (standard_sse_constant_p (x
))
5112 if (get_attr_mode (insn
) == MODE_V4SF
)
5113 return "xorps\t%0, %0";
5114 else if (get_attr_mode (insn
) == MODE_V2DF
)
5115 return "xorpd\t%0, %0";
5117 return "pxor\t%0, %0";
5119 return "pcmpeqd\t%0, %0";
5124 /* Returns 1 if OP contains a symbol reference */
5127 symbolic_reference_mentioned_p (rtx op
)
5132 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5135 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5136 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5142 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5143 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5147 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5154 /* Return 1 if it is appropriate to emit `ret' instructions in the
5155 body of a function. Do this only if the epilogue is simple, needing a
5156 couple of insns. Prior to reloading, we can't tell how many registers
5157 must be saved, so return 0 then. Return 0 if there is no frame
5158 marker to de-allocate. */
5161 ix86_can_use_return_insn_p (void)
5163 struct ix86_frame frame
;
5165 if (! reload_completed
|| frame_pointer_needed
)
5168 /* Don't allow more than 32 pop, since that's all we can do
5169 with one instruction. */
5170 if (current_function_pops_args
5171 && current_function_args_size
>= 32768)
5174 ix86_compute_frame_layout (&frame
);
5175 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5178 /* Value should be nonzero if functions must have frame pointers.
5179 Zero means the frame pointer need not be set up (and parms may
5180 be accessed via the stack pointer) in functions that seem suitable. */
5183 ix86_frame_pointer_required (void)
5185 /* If we accessed previous frames, then the generated code expects
5186 to be able to access the saved ebp value in our frame. */
5187 if (cfun
->machine
->accesses_prev_frame
)
5190 /* Several x86 os'es need a frame pointer for other reasons,
5191 usually pertaining to setjmp. */
5192 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5195 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5196 the frame pointer by default. Turn it back on now if we've not
5197 got a leaf function. */
5198 if (TARGET_OMIT_LEAF_FRAME_POINTER
5199 && (!current_function_is_leaf
5200 || ix86_current_function_calls_tls_descriptor
))
5203 if (current_function_profile
)
5209 /* Record that the current function accesses previous call frames. */
5212 ix86_setup_frame_addresses (void)
5214 cfun
->machine
->accesses_prev_frame
= 1;
5217 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5218 # define USE_HIDDEN_LINKONCE 1
5220 # define USE_HIDDEN_LINKONCE 0
5223 static int pic_labels_used
;
5225 /* Fills in the label name that should be used for a pc thunk for
5226 the given register. */
5229 get_pc_thunk_name (char name
[32], unsigned int regno
)
5231 gcc_assert (!TARGET_64BIT
);
5233 if (USE_HIDDEN_LINKONCE
)
5234 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5236 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5240 /* This function generates code for -fpic that loads %ebx with
5241 the return address of the caller and then returns. */
5244 ix86_file_end (void)
5249 for (regno
= 0; regno
< 8; ++regno
)
5253 if (! ((pic_labels_used
>> regno
) & 1))
5256 get_pc_thunk_name (name
, regno
);
5261 switch_to_section (darwin_sections
[text_coal_section
]);
5262 fputs ("\t.weak_definition\t", asm_out_file
);
5263 assemble_name (asm_out_file
, name
);
5264 fputs ("\n\t.private_extern\t", asm_out_file
);
5265 assemble_name (asm_out_file
, name
);
5266 fputs ("\n", asm_out_file
);
5267 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5271 if (USE_HIDDEN_LINKONCE
)
5275 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5277 TREE_PUBLIC (decl
) = 1;
5278 TREE_STATIC (decl
) = 1;
5279 DECL_ONE_ONLY (decl
) = 1;
5281 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5282 switch_to_section (get_named_section (decl
, NULL
, 0));
5284 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5285 fputs ("\t.hidden\t", asm_out_file
);
5286 assemble_name (asm_out_file
, name
);
5287 fputc ('\n', asm_out_file
);
5288 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5292 switch_to_section (text_section
);
5293 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5296 xops
[0] = gen_rtx_REG (SImode
, regno
);
5297 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5298 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5299 output_asm_insn ("ret", xops
);
5302 if (NEED_INDICATE_EXEC_STACK
)
5303 file_end_indicate_exec_stack ();
5306 /* Emit code for the SET_GOT patterns. */
5309 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5314 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5316 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5318 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5321 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5323 output_asm_insn ("call\t%a2", xops
);
5326 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5327 is what will be referenced by the Mach-O PIC subsystem. */
5329 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5332 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5333 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5336 output_asm_insn ("pop{l}\t%0", xops
);
5341 get_pc_thunk_name (name
, REGNO (dest
));
5342 pic_labels_used
|= 1 << REGNO (dest
);
5344 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5345 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5346 output_asm_insn ("call\t%X2", xops
);
5347 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5348 is what will be referenced by the Mach-O PIC subsystem. */
5351 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5353 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5354 CODE_LABEL_NUMBER (label
));
5361 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5362 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5364 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5369 /* Generate an "push" pattern for input ARG. */
5374 return gen_rtx_SET (VOIDmode
,
5376 gen_rtx_PRE_DEC (Pmode
,
5377 stack_pointer_rtx
)),
5381 /* Return >= 0 if there is an unused call-clobbered register available
5382 for the entire function. */
5385 ix86_select_alt_pic_regnum (void)
5387 if (current_function_is_leaf
&& !current_function_profile
5388 && !ix86_current_function_calls_tls_descriptor
)
5391 for (i
= 2; i
>= 0; --i
)
5392 if (!regs_ever_live
[i
])
5396 return INVALID_REGNUM
;
5399 /* Return 1 if we need to save REGNO. */
5401 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5403 if (pic_offset_table_rtx
5404 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5405 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5406 || current_function_profile
5407 || current_function_calls_eh_return
5408 || current_function_uses_const_pool
))
5410 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5415 if (current_function_calls_eh_return
&& maybe_eh_return
)
5420 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5421 if (test
== INVALID_REGNUM
)
5428 if (cfun
->machine
->force_align_arg_pointer
5429 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5432 return (regs_ever_live
[regno
]
5433 && !call_used_regs
[regno
]
5434 && !fixed_regs
[regno
]
5435 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5438 /* Return number of registers to be saved on the stack. */
5441 ix86_nsaved_regs (void)
5446 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5447 if (ix86_save_reg (regno
, true))
5452 /* Return the offset between two registers, one to be eliminated, and the other
5453 its replacement, at the start of a routine. */
5456 ix86_initial_elimination_offset (int from
, int to
)
5458 struct ix86_frame frame
;
5459 ix86_compute_frame_layout (&frame
);
5461 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5462 return frame
.hard_frame_pointer_offset
;
5463 else if (from
== FRAME_POINTER_REGNUM
5464 && to
== HARD_FRAME_POINTER_REGNUM
)
5465 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5468 gcc_assert (to
== STACK_POINTER_REGNUM
);
5470 if (from
== ARG_POINTER_REGNUM
)
5471 return frame
.stack_pointer_offset
;
5473 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5474 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5478 /* Fill structure ix86_frame about frame of currently computed function. */
5481 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5483 HOST_WIDE_INT total_size
;
5484 unsigned int stack_alignment_needed
;
5485 HOST_WIDE_INT offset
;
5486 unsigned int preferred_alignment
;
5487 HOST_WIDE_INT size
= get_frame_size ();
5489 frame
->nregs
= ix86_nsaved_regs ();
5492 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5493 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5495 /* During reload iteration the amount of registers saved can change.
5496 Recompute the value as needed. Do not recompute when amount of registers
5497 didn't change as reload does multiple calls to the function and does not
5498 expect the decision to change within single iteration. */
5500 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5502 int count
= frame
->nregs
;
5504 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5505 /* The fast prologue uses move instead of push to save registers. This
5506 is significantly longer, but also executes faster as modern hardware
5507 can execute the moves in parallel, but can't do that for push/pop.
5509 Be careful about choosing what prologue to emit: When function takes
5510 many instructions to execute we may use slow version as well as in
5511 case function is known to be outside hot spot (this is known with
5512 feedback only). Weight the size of function by number of registers
5513 to save as it is cheap to use one or two push instructions but very
5514 slow to use many of them. */
5516 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5517 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5518 || (flag_branch_probabilities
5519 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5520 cfun
->machine
->use_fast_prologue_epilogue
= false;
5522 cfun
->machine
->use_fast_prologue_epilogue
5523 = !expensive_function_p (count
);
5525 if (TARGET_PROLOGUE_USING_MOVE
5526 && cfun
->machine
->use_fast_prologue_epilogue
)
5527 frame
->save_regs_using_mov
= true;
5529 frame
->save_regs_using_mov
= false;
5532 /* Skip return address and saved base pointer. */
5533 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5535 frame
->hard_frame_pointer_offset
= offset
;
5537 /* Do some sanity checking of stack_alignment_needed and
5538 preferred_alignment, since i386 port is the only using those features
5539 that may break easily. */
5541 gcc_assert (!size
|| stack_alignment_needed
);
5542 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5543 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5544 gcc_assert (stack_alignment_needed
5545 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5547 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5548 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5550 /* Register save area */
5551 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5554 if (ix86_save_varrargs_registers
)
5556 offset
+= X86_64_VARARGS_SIZE
;
5557 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5560 frame
->va_arg_size
= 0;
5562 /* Align start of frame for local function. */
5563 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5564 & -stack_alignment_needed
) - offset
;
5566 offset
+= frame
->padding1
;
5568 /* Frame pointer points here. */
5569 frame
->frame_pointer_offset
= offset
;
5573 /* Add outgoing arguments area. Can be skipped if we eliminated
5574 all the function calls as dead code.
5575 Skipping is however impossible when function calls alloca. Alloca
5576 expander assumes that last current_function_outgoing_args_size
5577 of stack frame are unused. */
5578 if (ACCUMULATE_OUTGOING_ARGS
5579 && (!current_function_is_leaf
|| current_function_calls_alloca
5580 || ix86_current_function_calls_tls_descriptor
))
5582 offset
+= current_function_outgoing_args_size
;
5583 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5586 frame
->outgoing_arguments_size
= 0;
5588 /* Align stack boundary. Only needed if we're calling another function
5590 if (!current_function_is_leaf
|| current_function_calls_alloca
5591 || ix86_current_function_calls_tls_descriptor
)
5592 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5593 & -preferred_alignment
) - offset
;
5595 frame
->padding2
= 0;
5597 offset
+= frame
->padding2
;
5599 /* We've reached end of stack frame. */
5600 frame
->stack_pointer_offset
= offset
;
5602 /* Size prologue needs to allocate. */
5603 frame
->to_allocate
=
5604 (size
+ frame
->padding1
+ frame
->padding2
5605 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5607 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5608 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5609 frame
->save_regs_using_mov
= false;
5611 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5612 && current_function_is_leaf
5613 && !ix86_current_function_calls_tls_descriptor
)
5615 frame
->red_zone_size
= frame
->to_allocate
;
5616 if (frame
->save_regs_using_mov
)
5617 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5618 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5619 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5622 frame
->red_zone_size
= 0;
5623 frame
->to_allocate
-= frame
->red_zone_size
;
5624 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5626 fprintf (stderr
, "\n");
5627 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5628 fprintf (stderr
, "size: %ld\n", (long)size
);
5629 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5630 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5631 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5632 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5633 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5634 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5635 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5636 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5637 (long)frame
->hard_frame_pointer_offset
);
5638 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5639 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5640 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5641 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5645 /* Emit code to save registers in the prologue. */
5648 ix86_emit_save_regs (void)
5653 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5654 if (ix86_save_reg (regno
, true))
5656 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5657 RTX_FRAME_RELATED_P (insn
) = 1;
5661 /* Emit code to save registers using MOV insns. First register
5662 is restored from POINTER + OFFSET. */
5664 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5669 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5670 if (ix86_save_reg (regno
, true))
5672 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5674 gen_rtx_REG (Pmode
, regno
));
5675 RTX_FRAME_RELATED_P (insn
) = 1;
5676 offset
+= UNITS_PER_WORD
;
5680 /* Expand prologue or epilogue stack adjustment.
5681 The pattern exist to put a dependency on all ebp-based memory accesses.
5682 STYLE should be negative if instructions should be marked as frame related,
5683 zero if %r11 register is live and cannot be freely used and positive
5687 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5692 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5693 else if (x86_64_immediate_operand (offset
, DImode
))
5694 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5698 /* r11 is used by indirect sibcall return as well, set before the
5699 epilogue and used after the epilogue. ATM indirect sibcall
5700 shouldn't be used together with huge frame sizes in one
5701 function because of the frame_size check in sibcall.c. */
5703 r11
= gen_rtx_REG (DImode
, R11_REG
);
5704 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5706 RTX_FRAME_RELATED_P (insn
) = 1;
5707 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5711 RTX_FRAME_RELATED_P (insn
) = 1;
5714 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5717 ix86_internal_arg_pointer (void)
5719 bool has_force_align_arg_pointer
=
5720 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5721 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5722 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5723 && DECL_NAME (current_function_decl
)
5724 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5725 && DECL_FILE_SCOPE_P (current_function_decl
))
5726 || ix86_force_align_arg_pointer
5727 || has_force_align_arg_pointer
)
5729 /* Nested functions can't realign the stack due to a register
5731 if (DECL_CONTEXT (current_function_decl
)
5732 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5734 if (ix86_force_align_arg_pointer
)
5735 warning (0, "-mstackrealign ignored for nested functions");
5736 if (has_force_align_arg_pointer
)
5737 error ("%s not supported for nested functions",
5738 ix86_force_align_arg_pointer_string
);
5739 return virtual_incoming_args_rtx
;
5741 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5742 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5745 return virtual_incoming_args_rtx
;
5748 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5749 This is called from dwarf2out.c to emit call frame instructions
5750 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5752 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5754 rtx unspec
= SET_SRC (pattern
);
5755 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5759 case UNSPEC_REG_SAVE
:
5760 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5761 SET_DEST (pattern
));
5763 case UNSPEC_DEF_CFA
:
5764 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5765 INTVAL (XVECEXP (unspec
, 0, 0)));
5772 /* Expand the prologue into a bunch of separate insns. */
5775 ix86_expand_prologue (void)
5779 struct ix86_frame frame
;
5780 HOST_WIDE_INT allocate
;
5782 ix86_compute_frame_layout (&frame
);
5784 if (cfun
->machine
->force_align_arg_pointer
)
5788 /* Grab the argument pointer. */
5789 x
= plus_constant (stack_pointer_rtx
, 4);
5790 y
= cfun
->machine
->force_align_arg_pointer
;
5791 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5792 RTX_FRAME_RELATED_P (insn
) = 1;
5794 /* The unwind info consists of two parts: install the fafp as the cfa,
5795 and record the fafp as the "save register" of the stack pointer.
5796 The later is there in order that the unwinder can see where it
5797 should restore the stack pointer across the and insn. */
5798 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5799 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5800 RTX_FRAME_RELATED_P (x
) = 1;
5801 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5803 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5804 RTX_FRAME_RELATED_P (y
) = 1;
5805 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5806 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5807 REG_NOTES (insn
) = x
;
5809 /* Align the stack. */
5810 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5813 /* And here we cheat like madmen with the unwind info. We force the
5814 cfa register back to sp+4, which is exactly what it was at the
5815 start of the function. Re-pushing the return address results in
5816 the return at the same spot relative to the cfa, and thus is
5817 correct wrt the unwind info. */
5818 x
= cfun
->machine
->force_align_arg_pointer
;
5819 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5820 insn
= emit_insn (gen_push (x
));
5821 RTX_FRAME_RELATED_P (insn
) = 1;
5824 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5825 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5826 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5827 REG_NOTES (insn
) = x
;
5830 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5831 slower on all targets. Also sdb doesn't like it. */
5833 if (frame_pointer_needed
)
5835 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5836 RTX_FRAME_RELATED_P (insn
) = 1;
5838 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5839 RTX_FRAME_RELATED_P (insn
) = 1;
5842 allocate
= frame
.to_allocate
;
5844 if (!frame
.save_regs_using_mov
)
5845 ix86_emit_save_regs ();
5847 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5849 /* When using red zone we may start register saving before allocating
5850 the stack frame saving one cycle of the prologue. */
5851 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5852 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5853 : stack_pointer_rtx
,
5854 -frame
.nregs
* UNITS_PER_WORD
);
5858 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5859 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5860 GEN_INT (-allocate
), -1);
5863 /* Only valid for Win32. */
5864 rtx eax
= gen_rtx_REG (SImode
, 0);
5865 bool eax_live
= ix86_eax_live_at_start_p ();
5868 gcc_assert (!TARGET_64BIT
);
5872 emit_insn (gen_push (eax
));
5876 emit_move_insn (eax
, GEN_INT (allocate
));
5878 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5879 RTX_FRAME_RELATED_P (insn
) = 1;
5880 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5881 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5882 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5883 t
, REG_NOTES (insn
));
5887 if (frame_pointer_needed
)
5888 t
= plus_constant (hard_frame_pointer_rtx
,
5891 - frame
.nregs
* UNITS_PER_WORD
);
5893 t
= plus_constant (stack_pointer_rtx
, allocate
);
5894 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5898 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5900 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5901 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5903 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5904 -frame
.nregs
* UNITS_PER_WORD
);
5907 pic_reg_used
= false;
5908 if (pic_offset_table_rtx
5909 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5910 || current_function_profile
))
5912 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5914 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5915 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5917 pic_reg_used
= true;
5923 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5925 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5927 /* Even with accurate pre-reload life analysis, we can wind up
5928 deleting all references to the pic register after reload.
5929 Consider if cross-jumping unifies two sides of a branch
5930 controlled by a comparison vs the only read from a global.
5931 In which case, allow the set_got to be deleted, though we're
5932 too late to do anything about the ebx save in the prologue. */
5933 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5936 /* Prevent function calls from be scheduled before the call to mcount.
5937 In the pic_reg_used case, make sure that the got load isn't deleted. */
5938 if (current_function_profile
)
5939 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5942 /* Emit code to restore saved registers using MOV insns. First register
5943 is restored from POINTER + OFFSET. */
5945 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5946 int maybe_eh_return
)
5949 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5951 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5952 if (ix86_save_reg (regno
, maybe_eh_return
))
5954 /* Ensure that adjust_address won't be forced to produce pointer
5955 out of range allowed by x86-64 instruction set. */
5956 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5960 r11
= gen_rtx_REG (DImode
, R11_REG
);
5961 emit_move_insn (r11
, GEN_INT (offset
));
5962 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5963 base_address
= gen_rtx_MEM (Pmode
, r11
);
5966 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5967 adjust_address (base_address
, Pmode
, offset
));
5968 offset
+= UNITS_PER_WORD
;
5972 /* Restore function stack, frame, and registers. */
5975 ix86_expand_epilogue (int style
)
5978 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5979 struct ix86_frame frame
;
5980 HOST_WIDE_INT offset
;
5982 ix86_compute_frame_layout (&frame
);
5984 /* Calculate start of saved registers relative to ebp. Special care
5985 must be taken for the normal return case of a function using
5986 eh_return: the eax and edx registers are marked as saved, but not
5987 restored along this path. */
5988 offset
= frame
.nregs
;
5989 if (current_function_calls_eh_return
&& style
!= 2)
5991 offset
*= -UNITS_PER_WORD
;
5993 /* If we're only restoring one register and sp is not valid then
5994 using a move instruction to restore the register since it's
5995 less work than reloading sp and popping the register.
5997 The default code result in stack adjustment using add/lea instruction,
5998 while this code results in LEAVE instruction (or discrete equivalent),
5999 so it is profitable in some other cases as well. Especially when there
6000 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6001 and there is exactly one register to pop. This heuristic may need some
6002 tuning in future. */
6003 if ((!sp_valid
&& frame
.nregs
<= 1)
6004 || (TARGET_EPILOGUE_USING_MOVE
6005 && cfun
->machine
->use_fast_prologue_epilogue
6006 && (frame
.nregs
> 1 || frame
.to_allocate
))
6007 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6008 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6009 && cfun
->machine
->use_fast_prologue_epilogue
6010 && frame
.nregs
== 1)
6011 || current_function_calls_eh_return
)
6013 /* Restore registers. We can use ebp or esp to address the memory
6014 locations. If both are available, default to ebp, since offsets
6015 are known to be small. Only exception is esp pointing directly to the
6016 end of block of saved registers, where we may simplify addressing
6019 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6020 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6021 frame
.to_allocate
, style
== 2);
6023 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6024 offset
, style
== 2);
6026 /* eh_return epilogues need %ecx added to the stack pointer. */
6029 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6031 if (frame_pointer_needed
)
6033 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6034 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6035 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6037 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6038 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6040 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6045 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6046 tmp
= plus_constant (tmp
, (frame
.to_allocate
6047 + frame
.nregs
* UNITS_PER_WORD
));
6048 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6051 else if (!frame_pointer_needed
)
6052 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6053 GEN_INT (frame
.to_allocate
6054 + frame
.nregs
* UNITS_PER_WORD
),
6056 /* If not an i386, mov & pop is faster than "leave". */
6057 else if (TARGET_USE_LEAVE
|| optimize_size
6058 || !cfun
->machine
->use_fast_prologue_epilogue
)
6059 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6062 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6063 hard_frame_pointer_rtx
,
6066 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6068 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6073 /* First step is to deallocate the stack frame so that we can
6074 pop the registers. */
6077 gcc_assert (frame_pointer_needed
);
6078 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6079 hard_frame_pointer_rtx
,
6080 GEN_INT (offset
), style
);
6082 else if (frame
.to_allocate
)
6083 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6084 GEN_INT (frame
.to_allocate
), style
);
6086 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6087 if (ix86_save_reg (regno
, false))
6090 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6092 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6094 if (frame_pointer_needed
)
6096 /* Leave results in shorter dependency chains on CPUs that are
6097 able to grok it fast. */
6098 if (TARGET_USE_LEAVE
)
6099 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6100 else if (TARGET_64BIT
)
6101 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6103 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6107 if (cfun
->machine
->force_align_arg_pointer
)
6109 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6110 cfun
->machine
->force_align_arg_pointer
,
6114 /* Sibcall epilogues don't want a return instruction. */
6118 if (current_function_pops_args
&& current_function_args_size
)
6120 rtx popc
= GEN_INT (current_function_pops_args
);
6122 /* i386 can only pop 64K bytes. If asked to pop more, pop
6123 return address, do explicit add, and jump indirectly to the
6126 if (current_function_pops_args
>= 65536)
6128 rtx ecx
= gen_rtx_REG (SImode
, 2);
6130 /* There is no "pascal" calling convention in 64bit ABI. */
6131 gcc_assert (!TARGET_64BIT
);
6133 emit_insn (gen_popsi1 (ecx
));
6134 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6135 emit_jump_insn (gen_return_indirect_internal (ecx
));
6138 emit_jump_insn (gen_return_pop_internal (popc
));
6141 emit_jump_insn (gen_return_internal ());
6144 /* Reset from the function's potential modifications. */
6147 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6148 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6150 if (pic_offset_table_rtx
)
6151 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6153 /* Mach-O doesn't support labels at the end of objects, so if
6154 it looks like we might want one, insert a NOP. */
6156 rtx insn
= get_last_insn ();
6159 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6160 insn
= PREV_INSN (insn
);
6164 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6165 fputs ("\tnop\n", file
);
6171 /* Extract the parts of an RTL expression that is a valid memory address
6172 for an instruction. Return 0 if the structure of the address is
6173 grossly off. Return -1 if the address contains ASHIFT, so it is not
6174 strictly valid, but still used for computing length of lea instruction. */
6177 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6179 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6180 rtx base_reg
, index_reg
;
6181 HOST_WIDE_INT scale
= 1;
6182 rtx scale_rtx
= NULL_RTX
;
6184 enum ix86_address_seg seg
= SEG_DEFAULT
;
6186 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6188 else if (GET_CODE (addr
) == PLUS
)
6198 addends
[n
++] = XEXP (op
, 1);
6201 while (GET_CODE (op
) == PLUS
);
6206 for (i
= n
; i
>= 0; --i
)
6209 switch (GET_CODE (op
))
6214 index
= XEXP (op
, 0);
6215 scale_rtx
= XEXP (op
, 1);
6219 if (XINT (op
, 1) == UNSPEC_TP
6220 && TARGET_TLS_DIRECT_SEG_REFS
6221 && seg
== SEG_DEFAULT
)
6222 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6251 else if (GET_CODE (addr
) == MULT
)
6253 index
= XEXP (addr
, 0); /* index*scale */
6254 scale_rtx
= XEXP (addr
, 1);
6256 else if (GET_CODE (addr
) == ASHIFT
)
6260 /* We're called for lea too, which implements ashift on occasion. */
6261 index
= XEXP (addr
, 0);
6262 tmp
= XEXP (addr
, 1);
6263 if (!CONST_INT_P (tmp
))
6265 scale
= INTVAL (tmp
);
6266 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6272 disp
= addr
; /* displacement */
6274 /* Extract the integral value of scale. */
6277 if (!CONST_INT_P (scale_rtx
))
6279 scale
= INTVAL (scale_rtx
);
6282 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6283 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6285 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6286 if (base_reg
&& index_reg
&& scale
== 1
6287 && (index_reg
== arg_pointer_rtx
6288 || index_reg
== frame_pointer_rtx
6289 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6292 tmp
= base
, base
= index
, index
= tmp
;
6293 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6296 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6297 if ((base_reg
== hard_frame_pointer_rtx
6298 || base_reg
== frame_pointer_rtx
6299 || base_reg
== arg_pointer_rtx
) && !disp
)
6302 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6303 Avoid this by transforming to [%esi+0]. */
6304 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6305 && base_reg
&& !index_reg
&& !disp
6307 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6310 /* Special case: encode reg+reg instead of reg*2. */
6311 if (!base
&& index
&& scale
&& scale
== 2)
6312 base
= index
, base_reg
= index_reg
, scale
= 1;
6314 /* Special case: scaling cannot be encoded without base or displacement. */
6315 if (!base
&& !disp
&& index
&& scale
!= 1)
6327 /* Return cost of the memory address x.
6328 For i386, it is better to use a complex address than let gcc copy
6329 the address into a reg and make a new pseudo. But not if the address
6330 requires to two regs - that would mean more pseudos with longer
6333 ix86_address_cost (rtx x
)
6335 struct ix86_address parts
;
6337 int ok
= ix86_decompose_address (x
, &parts
);
6341 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6342 parts
.base
= SUBREG_REG (parts
.base
);
6343 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6344 parts
.index
= SUBREG_REG (parts
.index
);
6346 /* More complex memory references are better. */
6347 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6349 if (parts
.seg
!= SEG_DEFAULT
)
6352 /* Attempt to minimize number of registers in the address. */
6354 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6356 && (!REG_P (parts
.index
)
6357 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6361 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6363 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6364 && parts
.base
!= parts
.index
)
6367 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6368 since it's predecode logic can't detect the length of instructions
6369 and it degenerates to vector decoded. Increase cost of such
6370 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6371 to split such addresses or even refuse such addresses at all.
6373 Following addressing modes are affected:
6378 The first and last case may be avoidable by explicitly coding the zero in
6379 memory address, but I don't have AMD-K6 machine handy to check this
6383 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6384 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6385 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6391 /* If X is a machine specific address (i.e. a symbol or label being
6392 referenced as a displacement from the GOT implemented using an
6393 UNSPEC), then return the base term. Otherwise return X. */
6396 ix86_find_base_term (rtx x
)
6402 if (GET_CODE (x
) != CONST
)
6405 if (GET_CODE (term
) == PLUS
6406 && (CONST_INT_P (XEXP (term
, 1))
6407 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6408 term
= XEXP (term
, 0);
6409 if (GET_CODE (term
) != UNSPEC
6410 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6413 term
= XVECEXP (term
, 0, 0);
6415 if (GET_CODE (term
) != SYMBOL_REF
6416 && GET_CODE (term
) != LABEL_REF
)
6422 term
= ix86_delegitimize_address (x
);
6424 if (GET_CODE (term
) != SYMBOL_REF
6425 && GET_CODE (term
) != LABEL_REF
)
6431 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6432 this is used for to form addresses to local data when -fPIC is in
6436 darwin_local_data_pic (rtx disp
)
6438 if (GET_CODE (disp
) == MINUS
)
6440 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6441 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6442 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6444 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6445 if (! strcmp (sym_name
, "<pic base>"))
6453 /* Determine if a given RTX is a valid constant. We already know this
6454 satisfies CONSTANT_P. */
6457 legitimate_constant_p (rtx x
)
6459 switch (GET_CODE (x
))
6464 if (GET_CODE (x
) == PLUS
)
6466 if (!CONST_INT_P (XEXP (x
, 1)))
6471 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6474 /* Only some unspecs are valid as "constants". */
6475 if (GET_CODE (x
) == UNSPEC
)
6476 switch (XINT (x
, 1))
6479 return TARGET_64BIT
;
6482 x
= XVECEXP (x
, 0, 0);
6483 return (GET_CODE (x
) == SYMBOL_REF
6484 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6486 x
= XVECEXP (x
, 0, 0);
6487 return (GET_CODE (x
) == SYMBOL_REF
6488 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6493 /* We must have drilled down to a symbol. */
6494 if (GET_CODE (x
) == LABEL_REF
)
6496 if (GET_CODE (x
) != SYMBOL_REF
)
6501 /* TLS symbols are never valid. */
6502 if (SYMBOL_REF_TLS_MODEL (x
))
6507 if (GET_MODE (x
) == TImode
6508 && x
!= CONST0_RTX (TImode
)
6514 if (x
== CONST0_RTX (GET_MODE (x
)))
6522 /* Otherwise we handle everything else in the move patterns. */
6526 /* Determine if it's legal to put X into the constant pool. This
6527 is not possible for the address of thread-local symbols, which
6528 is checked above. */
6531 ix86_cannot_force_const_mem (rtx x
)
6533 /* We can always put integral constants and vectors in memory. */
6534 switch (GET_CODE (x
))
6544 return !legitimate_constant_p (x
);
6547 /* Determine if a given RTX is a valid constant address. */
6550 constant_address_p (rtx x
)
6552 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6555 /* Nonzero if the constant value X is a legitimate general operand
6556 when generating PIC code. It is given that flag_pic is on and
6557 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6560 legitimate_pic_operand_p (rtx x
)
6564 switch (GET_CODE (x
))
6567 inner
= XEXP (x
, 0);
6568 if (GET_CODE (inner
) == PLUS
6569 && CONST_INT_P (XEXP (inner
, 1)))
6570 inner
= XEXP (inner
, 0);
6572 /* Only some unspecs are valid as "constants". */
6573 if (GET_CODE (inner
) == UNSPEC
)
6574 switch (XINT (inner
, 1))
6577 return TARGET_64BIT
;
6579 x
= XVECEXP (inner
, 0, 0);
6580 return (GET_CODE (x
) == SYMBOL_REF
6581 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6589 return legitimate_pic_address_disp_p (x
);
6596 /* Determine if a given CONST RTX is a valid memory displacement
6600 legitimate_pic_address_disp_p (rtx disp
)
6604 /* In 64bit mode we can allow direct addresses of symbols and labels
6605 when they are not dynamic symbols. */
6608 rtx op0
= disp
, op1
;
6610 switch (GET_CODE (disp
))
6616 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6618 op0
= XEXP (XEXP (disp
, 0), 0);
6619 op1
= XEXP (XEXP (disp
, 0), 1);
6620 if (!CONST_INT_P (op1
)
6621 || INTVAL (op1
) >= 16*1024*1024
6622 || INTVAL (op1
) < -16*1024*1024)
6624 if (GET_CODE (op0
) == LABEL_REF
)
6626 if (GET_CODE (op0
) != SYMBOL_REF
)
6631 /* TLS references should always be enclosed in UNSPEC. */
6632 if (SYMBOL_REF_TLS_MODEL (op0
))
6634 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6642 if (GET_CODE (disp
) != CONST
)
6644 disp
= XEXP (disp
, 0);
6648 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6649 of GOT tables. We should not need these anyway. */
6650 if (GET_CODE (disp
) != UNSPEC
6651 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6652 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6655 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6656 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6662 if (GET_CODE (disp
) == PLUS
)
6664 if (!CONST_INT_P (XEXP (disp
, 1)))
6666 disp
= XEXP (disp
, 0);
6670 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6673 if (GET_CODE (disp
) != UNSPEC
)
6676 switch (XINT (disp
, 1))
6681 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6683 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6684 While ABI specify also 32bit relocation but we don't produce it in
6685 small PIC model at all. */
6686 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6687 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6689 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6691 case UNSPEC_GOTTPOFF
:
6692 case UNSPEC_GOTNTPOFF
:
6693 case UNSPEC_INDNTPOFF
:
6696 disp
= XVECEXP (disp
, 0, 0);
6697 return (GET_CODE (disp
) == SYMBOL_REF
6698 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6700 disp
= XVECEXP (disp
, 0, 0);
6701 return (GET_CODE (disp
) == SYMBOL_REF
6702 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6704 disp
= XVECEXP (disp
, 0, 0);
6705 return (GET_CODE (disp
) == SYMBOL_REF
6706 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6712 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6713 memory address for an instruction. The MODE argument is the machine mode
6714 for the MEM expression that wants to use this address.
6716 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6717 convert common non-canonical forms to canonical form so that they will
6721 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6723 struct ix86_address parts
;
6724 rtx base
, index
, disp
;
6725 HOST_WIDE_INT scale
;
6726 const char *reason
= NULL
;
6727 rtx reason_rtx
= NULL_RTX
;
6729 if (TARGET_DEBUG_ADDR
)
6732 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6733 GET_MODE_NAME (mode
), strict
);
6737 if (ix86_decompose_address (addr
, &parts
) <= 0)
6739 reason
= "decomposition failed";
6744 index
= parts
.index
;
6746 scale
= parts
.scale
;
6748 /* Validate base register.
6750 Don't allow SUBREG's that span more than a word here. It can lead to spill
6751 failures when the base is one word out of a two word structure, which is
6752 represented internally as a DImode int. */
6761 else if (GET_CODE (base
) == SUBREG
6762 && REG_P (SUBREG_REG (base
))
6763 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6765 reg
= SUBREG_REG (base
);
6768 reason
= "base is not a register";
6772 if (GET_MODE (base
) != Pmode
)
6774 reason
= "base is not in Pmode";
6778 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6779 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6781 reason
= "base is not valid";
6786 /* Validate index register.
6788 Don't allow SUBREG's that span more than a word here -- same as above. */
6797 else if (GET_CODE (index
) == SUBREG
6798 && REG_P (SUBREG_REG (index
))
6799 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6801 reg
= SUBREG_REG (index
);
6804 reason
= "index is not a register";
6808 if (GET_MODE (index
) != Pmode
)
6810 reason
= "index is not in Pmode";
6814 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6815 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6817 reason
= "index is not valid";
6822 /* Validate scale factor. */
6825 reason_rtx
= GEN_INT (scale
);
6828 reason
= "scale without index";
6832 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6834 reason
= "scale is not a valid multiplier";
6839 /* Validate displacement. */
6844 if (GET_CODE (disp
) == CONST
6845 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6846 switch (XINT (XEXP (disp
, 0), 1))
6848 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6849 used. While ABI specify also 32bit relocations, we don't produce
6850 them at all and use IP relative instead. */
6853 gcc_assert (flag_pic
);
6855 goto is_legitimate_pic
;
6856 reason
= "64bit address unspec";
6859 case UNSPEC_GOTPCREL
:
6860 gcc_assert (flag_pic
);
6861 goto is_legitimate_pic
;
6863 case UNSPEC_GOTTPOFF
:
6864 case UNSPEC_GOTNTPOFF
:
6865 case UNSPEC_INDNTPOFF
:
6871 reason
= "invalid address unspec";
6875 else if (SYMBOLIC_CONST (disp
)
6879 && MACHOPIC_INDIRECT
6880 && !machopic_operand_p (disp
)
6886 if (TARGET_64BIT
&& (index
|| base
))
6888 /* foo@dtpoff(%rX) is ok. */
6889 if (GET_CODE (disp
) != CONST
6890 || GET_CODE (XEXP (disp
, 0)) != PLUS
6891 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6892 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6893 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6894 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6896 reason
= "non-constant pic memory reference";
6900 else if (! legitimate_pic_address_disp_p (disp
))
6902 reason
= "displacement is an invalid pic construct";
6906 /* This code used to verify that a symbolic pic displacement
6907 includes the pic_offset_table_rtx register.
6909 While this is good idea, unfortunately these constructs may
6910 be created by "adds using lea" optimization for incorrect
6919 This code is nonsensical, but results in addressing
6920 GOT table with pic_offset_table_rtx base. We can't
6921 just refuse it easily, since it gets matched by
6922 "addsi3" pattern, that later gets split to lea in the
6923 case output register differs from input. While this
6924 can be handled by separate addsi pattern for this case
6925 that never results in lea, this seems to be easier and
6926 correct fix for crash to disable this test. */
6928 else if (GET_CODE (disp
) != LABEL_REF
6929 && !CONST_INT_P (disp
)
6930 && (GET_CODE (disp
) != CONST
6931 || !legitimate_constant_p (disp
))
6932 && (GET_CODE (disp
) != SYMBOL_REF
6933 || !legitimate_constant_p (disp
)))
6935 reason
= "displacement is not constant";
6938 else if (TARGET_64BIT
6939 && !x86_64_immediate_operand (disp
, VOIDmode
))
6941 reason
= "displacement is out of range";
6946 /* Everything looks valid. */
6947 if (TARGET_DEBUG_ADDR
)
6948 fprintf (stderr
, "Success.\n");
6952 if (TARGET_DEBUG_ADDR
)
6954 fprintf (stderr
, "Error: %s\n", reason
);
6955 debug_rtx (reason_rtx
);
6960 /* Return a unique alias set for the GOT. */
6962 static HOST_WIDE_INT
6963 ix86_GOT_alias_set (void)
6965 static HOST_WIDE_INT set
= -1;
6967 set
= new_alias_set ();
6971 /* Return a legitimate reference for ORIG (an address) using the
6972 register REG. If REG is 0, a new pseudo is generated.
6974 There are two types of references that must be handled:
6976 1. Global data references must load the address from the GOT, via
6977 the PIC reg. An insn is emitted to do this load, and the reg is
6980 2. Static data references, constant pool addresses, and code labels
6981 compute the address as an offset from the GOT, whose base is in
6982 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6983 differentiate them from global data objects. The returned
6984 address is the PIC reg + an unspec constant.
6986 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6987 reg also appears in the address. */
6990 legitimize_pic_address (rtx orig
, rtx reg
)
6997 if (TARGET_MACHO
&& !TARGET_64BIT
)
7000 reg
= gen_reg_rtx (Pmode
);
7001 /* Use the generic Mach-O PIC machinery. */
7002 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7006 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7008 else if (TARGET_64BIT
7009 && ix86_cmodel
!= CM_SMALL_PIC
7010 && local_symbolic_operand (addr
, Pmode
))
7013 /* This symbol may be referenced via a displacement from the PIC
7014 base address (@GOTOFF). */
7016 if (reload_in_progress
)
7017 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7018 if (GET_CODE (addr
) == CONST
)
7019 addr
= XEXP (addr
, 0);
7020 if (GET_CODE (addr
) == PLUS
)
7022 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7023 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7026 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7027 new = gen_rtx_CONST (Pmode
, new);
7029 tmpreg
= gen_reg_rtx (Pmode
);
7032 emit_move_insn (tmpreg
, new);
7036 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7037 tmpreg
, 1, OPTAB_DIRECT
);
7040 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7042 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
7044 /* This symbol may be referenced via a displacement from the PIC
7045 base address (@GOTOFF). */
7047 if (reload_in_progress
)
7048 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7049 if (GET_CODE (addr
) == CONST
)
7050 addr
= XEXP (addr
, 0);
7051 if (GET_CODE (addr
) == PLUS
)
7053 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7054 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7057 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7058 new = gen_rtx_CONST (Pmode
, new);
7059 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7063 emit_move_insn (reg
, new);
7067 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7071 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7072 new = gen_rtx_CONST (Pmode
, new);
7073 new = gen_const_mem (Pmode
, new);
7074 set_mem_alias_set (new, ix86_GOT_alias_set ());
7077 reg
= gen_reg_rtx (Pmode
);
7078 /* Use directly gen_movsi, otherwise the address is loaded
7079 into register for CSE. We don't want to CSE this addresses,
7080 instead we CSE addresses from the GOT table, so skip this. */
7081 emit_insn (gen_movsi (reg
, new));
7086 /* This symbol must be referenced via a load from the
7087 Global Offset Table (@GOT). */
7089 if (reload_in_progress
)
7090 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7091 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7092 new = gen_rtx_CONST (Pmode
, new);
7093 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7094 new = gen_const_mem (Pmode
, new);
7095 set_mem_alias_set (new, ix86_GOT_alias_set ());
7098 reg
= gen_reg_rtx (Pmode
);
7099 emit_move_insn (reg
, new);
7105 if (CONST_INT_P (addr
)
7106 && !x86_64_immediate_operand (addr
, VOIDmode
))
7110 emit_move_insn (reg
, addr
);
7114 new = force_reg (Pmode
, addr
);
7116 else if (GET_CODE (addr
) == CONST
)
7118 addr
= XEXP (addr
, 0);
7120 /* We must match stuff we generate before. Assume the only
7121 unspecs that can get here are ours. Not that we could do
7122 anything with them anyway.... */
7123 if (GET_CODE (addr
) == UNSPEC
7124 || (GET_CODE (addr
) == PLUS
7125 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7127 gcc_assert (GET_CODE (addr
) == PLUS
);
7129 if (GET_CODE (addr
) == PLUS
)
7131 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7133 /* Check first to see if this is a constant offset from a @GOTOFF
7134 symbol reference. */
7135 if (local_symbolic_operand (op0
, Pmode
)
7136 && CONST_INT_P (op1
))
7140 if (reload_in_progress
)
7141 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7142 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7144 new = gen_rtx_PLUS (Pmode
, new, op1
);
7145 new = gen_rtx_CONST (Pmode
, new);
7146 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7150 emit_move_insn (reg
, new);
7156 if (INTVAL (op1
) < -16*1024*1024
7157 || INTVAL (op1
) >= 16*1024*1024)
7159 if (!x86_64_immediate_operand (op1
, Pmode
))
7160 op1
= force_reg (Pmode
, op1
);
7161 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7167 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7168 new = legitimize_pic_address (XEXP (addr
, 1),
7169 base
== reg
? NULL_RTX
: reg
);
7171 if (CONST_INT_P (new))
7172 new = plus_constant (base
, INTVAL (new));
7175 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7177 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7178 new = XEXP (new, 1);
7180 new = gen_rtx_PLUS (Pmode
, base
, new);
7188 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7191 get_thread_pointer (int to_reg
)
7195 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7199 reg
= gen_reg_rtx (Pmode
);
7200 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7201 insn
= emit_insn (insn
);
7206 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7207 false if we expect this to be used for a memory address and true if
7208 we expect to load the address into a register. */
7211 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7213 rtx dest
, base
, off
, pic
, tp
;
7218 case TLS_MODEL_GLOBAL_DYNAMIC
:
7219 dest
= gen_reg_rtx (Pmode
);
7220 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7222 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7224 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7227 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7228 insns
= get_insns ();
7231 emit_libcall_block (insns
, dest
, rax
, x
);
7233 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7234 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7236 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7238 if (TARGET_GNU2_TLS
)
7240 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7242 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7246 case TLS_MODEL_LOCAL_DYNAMIC
:
7247 base
= gen_reg_rtx (Pmode
);
7248 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7250 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7252 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7255 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7256 insns
= get_insns ();
7259 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7260 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7261 emit_libcall_block (insns
, base
, rax
, note
);
7263 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7264 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7266 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7268 if (TARGET_GNU2_TLS
)
7270 rtx x
= ix86_tls_module_base ();
7272 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7273 gen_rtx_MINUS (Pmode
, x
, tp
));
7276 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7277 off
= gen_rtx_CONST (Pmode
, off
);
7279 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7281 if (TARGET_GNU2_TLS
)
7283 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7285 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7290 case TLS_MODEL_INITIAL_EXEC
:
7294 type
= UNSPEC_GOTNTPOFF
;
7298 if (reload_in_progress
)
7299 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7300 pic
= pic_offset_table_rtx
;
7301 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7303 else if (!TARGET_ANY_GNU_TLS
)
7305 pic
= gen_reg_rtx (Pmode
);
7306 emit_insn (gen_set_got (pic
));
7307 type
= UNSPEC_GOTTPOFF
;
7312 type
= UNSPEC_INDNTPOFF
;
7315 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7316 off
= gen_rtx_CONST (Pmode
, off
);
7318 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7319 off
= gen_const_mem (Pmode
, off
);
7320 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7322 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7324 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7325 off
= force_reg (Pmode
, off
);
7326 return gen_rtx_PLUS (Pmode
, base
, off
);
7330 base
= get_thread_pointer (true);
7331 dest
= gen_reg_rtx (Pmode
);
7332 emit_insn (gen_subsi3 (dest
, base
, off
));
7336 case TLS_MODEL_LOCAL_EXEC
:
7337 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7338 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7339 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7340 off
= gen_rtx_CONST (Pmode
, off
);
7342 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7344 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7345 return gen_rtx_PLUS (Pmode
, base
, off
);
7349 base
= get_thread_pointer (true);
7350 dest
= gen_reg_rtx (Pmode
);
7351 emit_insn (gen_subsi3 (dest
, base
, off
));
7362 /* Try machine-dependent ways of modifying an illegitimate address
7363 to be legitimate. If we find one, return the new, valid address.
7364 This macro is used in only one place: `memory_address' in explow.c.
7366 OLDX is the address as it was before break_out_memory_refs was called.
7367 In some cases it is useful to look at this to decide what needs to be done.
7369 MODE and WIN are passed so that this macro can use
7370 GO_IF_LEGITIMATE_ADDRESS.
7372 It is always safe for this macro to do nothing. It exists to recognize
7373 opportunities to optimize the output.
7375 For the 80386, we handle X+REG by loading X into a register R and
7376 using R+REG. R will go in a general reg and indexing will be used.
7377 However, if REG is a broken-out memory address or multiplication,
7378 nothing needs to be done because REG can certainly go in a general reg.
7380 When -fpic is used, special handling is needed for symbolic references.
7381 See comments by legitimize_pic_address in i386.c for details. */
7384 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7389 if (TARGET_DEBUG_ADDR
)
7391 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7392 GET_MODE_NAME (mode
));
7396 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7398 return legitimize_tls_address (x
, log
, false);
7399 if (GET_CODE (x
) == CONST
7400 && GET_CODE (XEXP (x
, 0)) == PLUS
7401 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7402 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7404 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7405 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7408 if (flag_pic
&& SYMBOLIC_CONST (x
))
7409 return legitimize_pic_address (x
, 0);
7411 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7412 if (GET_CODE (x
) == ASHIFT
7413 && CONST_INT_P (XEXP (x
, 1))
7414 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7417 log
= INTVAL (XEXP (x
, 1));
7418 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7419 GEN_INT (1 << log
));
7422 if (GET_CODE (x
) == PLUS
)
7424 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7426 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7427 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7428 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7431 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7432 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7433 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7434 GEN_INT (1 << log
));
7437 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7438 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7439 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7442 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7443 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7444 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7445 GEN_INT (1 << log
));
7448 /* Put multiply first if it isn't already. */
7449 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7451 rtx tmp
= XEXP (x
, 0);
7452 XEXP (x
, 0) = XEXP (x
, 1);
7457 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7458 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7459 created by virtual register instantiation, register elimination, and
7460 similar optimizations. */
7461 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7464 x
= gen_rtx_PLUS (Pmode
,
7465 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7466 XEXP (XEXP (x
, 1), 0)),
7467 XEXP (XEXP (x
, 1), 1));
7471 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7472 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7473 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7474 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7475 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7476 && CONSTANT_P (XEXP (x
, 1)))
7479 rtx other
= NULL_RTX
;
7481 if (CONST_INT_P (XEXP (x
, 1)))
7483 constant
= XEXP (x
, 1);
7484 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7486 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7488 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7489 other
= XEXP (x
, 1);
7497 x
= gen_rtx_PLUS (Pmode
,
7498 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7499 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7500 plus_constant (other
, INTVAL (constant
)));
7504 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7507 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7510 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7513 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7516 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7520 && REG_P (XEXP (x
, 1))
7521 && REG_P (XEXP (x
, 0)))
7524 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7527 x
= legitimize_pic_address (x
, 0);
7530 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7533 if (REG_P (XEXP (x
, 0)))
7535 rtx temp
= gen_reg_rtx (Pmode
);
7536 rtx val
= force_operand (XEXP (x
, 1), temp
);
7538 emit_move_insn (temp
, val
);
7544 else if (REG_P (XEXP (x
, 1)))
7546 rtx temp
= gen_reg_rtx (Pmode
);
7547 rtx val
= force_operand (XEXP (x
, 0), temp
);
7549 emit_move_insn (temp
, val
);
7559 /* Print an integer constant expression in assembler syntax. Addition
7560 and subtraction are the only arithmetic that may appear in these
7561 expressions. FILE is the stdio stream to write to, X is the rtx, and
7562 CODE is the operand print code from the output string. */
7565 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7569 switch (GET_CODE (x
))
7572 gcc_assert (flag_pic
);
7577 output_addr_const (file
, x
);
7578 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7579 fputs ("@PLT", file
);
7586 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7587 assemble_name (asm_out_file
, buf
);
7591 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7595 /* This used to output parentheses around the expression,
7596 but that does not work on the 386 (either ATT or BSD assembler). */
7597 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7601 if (GET_MODE (x
) == VOIDmode
)
7603 /* We can use %d if the number is <32 bits and positive. */
7604 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7605 fprintf (file
, "0x%lx%08lx",
7606 (unsigned long) CONST_DOUBLE_HIGH (x
),
7607 (unsigned long) CONST_DOUBLE_LOW (x
));
7609 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7612 /* We can't handle floating point constants;
7613 PRINT_OPERAND must handle them. */
7614 output_operand_lossage ("floating constant misused");
7618 /* Some assemblers need integer constants to appear first. */
7619 if (CONST_INT_P (XEXP (x
, 0)))
7621 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7623 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7627 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7628 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7630 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7636 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7637 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7639 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7641 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7645 gcc_assert (XVECLEN (x
, 0) == 1);
7646 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7647 switch (XINT (x
, 1))
7650 fputs ("@GOT", file
);
7653 fputs ("@GOTOFF", file
);
7655 case UNSPEC_GOTPCREL
:
7656 fputs ("@GOTPCREL(%rip)", file
);
7658 case UNSPEC_GOTTPOFF
:
7659 /* FIXME: This might be @TPOFF in Sun ld too. */
7660 fputs ("@GOTTPOFF", file
);
7663 fputs ("@TPOFF", file
);
7667 fputs ("@TPOFF", file
);
7669 fputs ("@NTPOFF", file
);
7672 fputs ("@DTPOFF", file
);
7674 case UNSPEC_GOTNTPOFF
:
7676 fputs ("@GOTTPOFF(%rip)", file
);
7678 fputs ("@GOTNTPOFF", file
);
7680 case UNSPEC_INDNTPOFF
:
7681 fputs ("@INDNTPOFF", file
);
7684 output_operand_lossage ("invalid UNSPEC as operand");
7690 output_operand_lossage ("invalid expression as operand");
7694 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7695 We need to emit DTP-relative relocations. */
7698 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7700 fputs (ASM_LONG
, file
);
7701 output_addr_const (file
, x
);
7702 fputs ("@DTPOFF", file
);
7708 fputs (", 0", file
);
7715 /* In the name of slightly smaller debug output, and to cater to
7716 general assembler lossage, recognize PIC+GOTOFF and turn it back
7717 into a direct symbol reference.
7719 On Darwin, this is necessary to avoid a crash, because Darwin
7720 has a different PIC label for each routine but the DWARF debugging
7721 information is not associated with any particular routine, so it's
7722 necessary to remove references to the PIC label from RTL stored by
7723 the DWARF output code. */
7726 ix86_delegitimize_address (rtx orig_x
)
7729 /* reg_addend is NULL or a multiple of some register. */
7730 rtx reg_addend
= NULL_RTX
;
7731 /* const_addend is NULL or a const_int. */
7732 rtx const_addend
= NULL_RTX
;
7733 /* This is the result, or NULL. */
7734 rtx result
= NULL_RTX
;
7741 if (GET_CODE (x
) != CONST
7742 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7743 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7746 return XVECEXP (XEXP (x
, 0), 0, 0);
7749 if (GET_CODE (x
) != PLUS
7750 || GET_CODE (XEXP (x
, 1)) != CONST
)
7753 if (REG_P (XEXP (x
, 0))
7754 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7755 /* %ebx + GOT/GOTOFF */
7757 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7759 /* %ebx + %reg * scale + GOT/GOTOFF */
7760 reg_addend
= XEXP (x
, 0);
7761 if (REG_P (XEXP (reg_addend
, 0))
7762 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7763 reg_addend
= XEXP (reg_addend
, 1);
7764 else if (REG_P (XEXP (reg_addend
, 1))
7765 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7766 reg_addend
= XEXP (reg_addend
, 0);
7769 if (!REG_P (reg_addend
)
7770 && GET_CODE (reg_addend
) != MULT
7771 && GET_CODE (reg_addend
) != ASHIFT
)
7777 x
= XEXP (XEXP (x
, 1), 0);
7778 if (GET_CODE (x
) == PLUS
7779 && CONST_INT_P (XEXP (x
, 1)))
7781 const_addend
= XEXP (x
, 1);
7785 if (GET_CODE (x
) == UNSPEC
7786 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7787 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7788 result
= XVECEXP (x
, 0, 0);
7790 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7792 result
= XEXP (x
, 0);
7798 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7800 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7805 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7810 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7812 enum rtx_code second_code
, bypass_code
;
7813 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7814 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7815 code
= ix86_fp_compare_code_to_integer (code
);
7819 code
= reverse_condition (code
);
7830 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7834 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7835 Those same assemblers have the same but opposite lossage on cmov. */
7836 gcc_assert (mode
== CCmode
);
7837 suffix
= fp
? "nbe" : "a";
7857 gcc_assert (mode
== CCmode
);
7879 gcc_assert (mode
== CCmode
);
7880 suffix
= fp
? "nb" : "ae";
7883 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7887 gcc_assert (mode
== CCmode
);
7891 suffix
= fp
? "u" : "p";
7894 suffix
= fp
? "nu" : "np";
7899 fputs (suffix
, file
);
7902 /* Print the name of register X to FILE based on its machine mode and number.
7903 If CODE is 'w', pretend the mode is HImode.
7904 If CODE is 'b', pretend the mode is QImode.
7905 If CODE is 'k', pretend the mode is SImode.
7906 If CODE is 'q', pretend the mode is DImode.
7907 If CODE is 'h', pretend the reg is the 'high' byte register.
7908 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7911 print_reg (rtx x
, int code
, FILE *file
)
7913 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7914 && REGNO (x
) != FRAME_POINTER_REGNUM
7915 && REGNO (x
) != FLAGS_REG
7916 && REGNO (x
) != FPSR_REG
7917 && REGNO (x
) != FPCR_REG
);
7919 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7922 if (code
== 'w' || MMX_REG_P (x
))
7924 else if (code
== 'b')
7926 else if (code
== 'k')
7928 else if (code
== 'q')
7930 else if (code
== 'y')
7932 else if (code
== 'h')
7935 code
= GET_MODE_SIZE (GET_MODE (x
));
7937 /* Irritatingly, AMD extended registers use different naming convention
7938 from the normal registers. */
7939 if (REX_INT_REG_P (x
))
7941 gcc_assert (TARGET_64BIT
);
7945 error ("extended registers have no high halves");
7948 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7951 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7954 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7957 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7960 error ("unsupported operand size for extended register");
7968 if (STACK_TOP_P (x
))
7970 fputs ("st(0)", file
);
7977 if (! ANY_FP_REG_P (x
))
7978 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7983 fputs (hi_reg_name
[REGNO (x
)], file
);
7986 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7988 fputs (qi_reg_name
[REGNO (x
)], file
);
7991 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7993 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8000 /* Locate some local-dynamic symbol still in use by this function
8001 so that we can print its name in some tls_local_dynamic_base
8005 get_some_local_dynamic_name (void)
8009 if (cfun
->machine
->some_ld_name
)
8010 return cfun
->machine
->some_ld_name
;
8012 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8014 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8015 return cfun
->machine
->some_ld_name
;
8021 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8025 if (GET_CODE (x
) == SYMBOL_REF
8026 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8028 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8036 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8037 C -- print opcode suffix for set/cmov insn.
8038 c -- like C, but print reversed condition
8039 F,f -- likewise, but for floating-point.
8040 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8042 R -- print the prefix for register names.
8043 z -- print the opcode suffix for the size of the current operand.
8044 * -- print a star (in certain assembler syntax)
8045 A -- print an absolute memory reference.
8046 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8047 s -- print a shift double count, followed by the assemblers argument
8049 b -- print the QImode name of the register for the indicated operand.
8050 %b0 would print %al if operands[0] is reg 0.
8051 w -- likewise, print the HImode name of the register.
8052 k -- likewise, print the SImode name of the register.
8053 q -- likewise, print the DImode name of the register.
8054 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8055 y -- print "st(0)" instead of "st" as a register.
8056 D -- print condition for SSE cmp instruction.
8057 P -- if PIC, print an @PLT suffix.
8058 X -- don't print any sort of PIC '@' suffix for a symbol.
8059 & -- print some in-use local-dynamic symbol name.
8060 H -- print a memory address offset by 8; used for sse high-parts
8064 print_operand (FILE *file
, rtx x
, int code
)
8071 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8076 assemble_name (file
, get_some_local_dynamic_name ());
8080 switch (ASSEMBLER_DIALECT
)
8087 /* Intel syntax. For absolute addresses, registers should not
8088 be surrounded by braces. */
8092 PRINT_OPERAND (file
, x
, 0);
8102 PRINT_OPERAND (file
, x
, 0);
8107 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8112 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8117 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8122 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8127 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8132 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8137 /* 387 opcodes don't get size suffixes if the operands are
8139 if (STACK_REG_P (x
))
8142 /* Likewise if using Intel opcodes. */
8143 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8146 /* This is the size of op from size of operand. */
8147 switch (GET_MODE_SIZE (GET_MODE (x
)))
8154 #ifdef HAVE_GAS_FILDS_FISTS
8160 if (GET_MODE (x
) == SFmode
)
8175 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8177 #ifdef GAS_MNEMONICS
8203 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8205 PRINT_OPERAND (file
, x
, 0);
8211 /* Little bit of braindamage here. The SSE compare instructions
8212 does use completely different names for the comparisons that the
8213 fp conditional moves. */
8214 switch (GET_CODE (x
))
8229 fputs ("unord", file
);
8233 fputs ("neq", file
);
8237 fputs ("nlt", file
);
8241 fputs ("nle", file
);
8244 fputs ("ord", file
);
8251 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8252 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8254 switch (GET_MODE (x
))
8256 case HImode
: putc ('w', file
); break;
8258 case SFmode
: putc ('l', file
); break;
8260 case DFmode
: putc ('q', file
); break;
8261 default: gcc_unreachable ();
8268 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8271 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8272 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8275 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8278 /* Like above, but reverse condition */
8280 /* Check to see if argument to %c is really a constant
8281 and not a condition code which needs to be reversed. */
8282 if (!COMPARISON_P (x
))
8284 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8287 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8290 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8291 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8294 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8298 /* It doesn't actually matter what mode we use here, as we're
8299 only going to use this for printing. */
8300 x
= adjust_address_nv (x
, DImode
, 8);
8307 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8310 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8313 int pred_val
= INTVAL (XEXP (x
, 0));
8315 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8316 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8318 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8319 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8321 /* Emit hints only in the case default branch prediction
8322 heuristics would fail. */
8323 if (taken
!= cputaken
)
8325 /* We use 3e (DS) prefix for taken branches and
8326 2e (CS) prefix for not taken branches. */
8328 fputs ("ds ; ", file
);
8330 fputs ("cs ; ", file
);
8337 output_operand_lossage ("invalid operand code '%c'", code
);
8342 print_reg (x
, code
, file
);
8346 /* No `byte ptr' prefix for call instructions. */
8347 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8350 switch (GET_MODE_SIZE (GET_MODE (x
)))
8352 case 1: size
= "BYTE"; break;
8353 case 2: size
= "WORD"; break;
8354 case 4: size
= "DWORD"; break;
8355 case 8: size
= "QWORD"; break;
8356 case 12: size
= "XWORD"; break;
8357 case 16: size
= "XMMWORD"; break;
8362 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8365 else if (code
== 'w')
8367 else if (code
== 'k')
8371 fputs (" PTR ", file
);
8375 /* Avoid (%rip) for call operands. */
8376 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8377 && !CONST_INT_P (x
))
8378 output_addr_const (file
, x
);
8379 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8380 output_operand_lossage ("invalid constraints for operand");
8385 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8390 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8391 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8393 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8395 fprintf (file
, "0x%08lx", l
);
8398 /* These float cases don't actually occur as immediate operands. */
8399 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8403 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8404 fprintf (file
, "%s", dstr
);
8407 else if (GET_CODE (x
) == CONST_DOUBLE
8408 && GET_MODE (x
) == XFmode
)
8412 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8413 fprintf (file
, "%s", dstr
);
8418 /* We have patterns that allow zero sets of memory, for instance.
8419 In 64-bit mode, we should probably support all 8-byte vectors,
8420 since we can in fact encode that into an immediate. */
8421 if (GET_CODE (x
) == CONST_VECTOR
)
8423 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8429 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8431 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8434 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8435 || GET_CODE (x
) == LABEL_REF
)
8437 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8440 fputs ("OFFSET FLAT:", file
);
8443 if (CONST_INT_P (x
))
8444 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8446 output_pic_addr_const (file
, x
, code
);
8448 output_addr_const (file
, x
);
8452 /* Print a memory operand whose address is ADDR. */
8455 print_operand_address (FILE *file
, rtx addr
)
8457 struct ix86_address parts
;
8458 rtx base
, index
, disp
;
8460 int ok
= ix86_decompose_address (addr
, &parts
);
8465 index
= parts
.index
;
8467 scale
= parts
.scale
;
8475 if (USER_LABEL_PREFIX
[0] == 0)
8477 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8483 if (!base
&& !index
)
8485 /* Displacement only requires special attention. */
8487 if (CONST_INT_P (disp
))
8489 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8491 if (USER_LABEL_PREFIX
[0] == 0)
8493 fputs ("ds:", file
);
8495 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8498 output_pic_addr_const (file
, disp
, 0);
8500 output_addr_const (file
, disp
);
8502 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8505 if (GET_CODE (disp
) == CONST
8506 && GET_CODE (XEXP (disp
, 0)) == PLUS
8507 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8508 disp
= XEXP (XEXP (disp
, 0), 0);
8509 if (GET_CODE (disp
) == LABEL_REF
8510 || (GET_CODE (disp
) == SYMBOL_REF
8511 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8512 fputs ("(%rip)", file
);
8517 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8522 output_pic_addr_const (file
, disp
, 0);
8523 else if (GET_CODE (disp
) == LABEL_REF
)
8524 output_asm_label (disp
);
8526 output_addr_const (file
, disp
);
8531 print_reg (base
, 0, file
);
8535 print_reg (index
, 0, file
);
8537 fprintf (file
, ",%d", scale
);
8543 rtx offset
= NULL_RTX
;
8547 /* Pull out the offset of a symbol; print any symbol itself. */
8548 if (GET_CODE (disp
) == CONST
8549 && GET_CODE (XEXP (disp
, 0)) == PLUS
8550 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8552 offset
= XEXP (XEXP (disp
, 0), 1);
8553 disp
= gen_rtx_CONST (VOIDmode
,
8554 XEXP (XEXP (disp
, 0), 0));
8558 output_pic_addr_const (file
, disp
, 0);
8559 else if (GET_CODE (disp
) == LABEL_REF
)
8560 output_asm_label (disp
);
8561 else if (CONST_INT_P (disp
))
8564 output_addr_const (file
, disp
);
8570 print_reg (base
, 0, file
);
8573 if (INTVAL (offset
) >= 0)
8575 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8579 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8586 print_reg (index
, 0, file
);
8588 fprintf (file
, "*%d", scale
);
8596 output_addr_const_extra (FILE *file
, rtx x
)
8600 if (GET_CODE (x
) != UNSPEC
)
8603 op
= XVECEXP (x
, 0, 0);
8604 switch (XINT (x
, 1))
8606 case UNSPEC_GOTTPOFF
:
8607 output_addr_const (file
, op
);
8608 /* FIXME: This might be @TPOFF in Sun ld. */
8609 fputs ("@GOTTPOFF", file
);
8612 output_addr_const (file
, op
);
8613 fputs ("@TPOFF", file
);
8616 output_addr_const (file
, op
);
8618 fputs ("@TPOFF", file
);
8620 fputs ("@NTPOFF", file
);
8623 output_addr_const (file
, op
);
8624 fputs ("@DTPOFF", file
);
8626 case UNSPEC_GOTNTPOFF
:
8627 output_addr_const (file
, op
);
8629 fputs ("@GOTTPOFF(%rip)", file
);
8631 fputs ("@GOTNTPOFF", file
);
8633 case UNSPEC_INDNTPOFF
:
8634 output_addr_const (file
, op
);
8635 fputs ("@INDNTPOFF", file
);
8645 /* Split one or more DImode RTL references into pairs of SImode
8646 references. The RTL can be REG, offsettable MEM, integer constant, or
8647 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8648 split and "num" is its length. lo_half and hi_half are output arrays
8649 that parallel "operands". */
8652 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8656 rtx op
= operands
[num
];
8658 /* simplify_subreg refuse to split volatile memory addresses,
8659 but we still have to handle it. */
8662 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8663 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8667 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8668 GET_MODE (op
) == VOIDmode
8669 ? DImode
: GET_MODE (op
), 0);
8670 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8671 GET_MODE (op
) == VOIDmode
8672 ? DImode
: GET_MODE (op
), 4);
8676 /* Split one or more TImode RTL references into pairs of DImode
8677 references. The RTL can be REG, offsettable MEM, integer constant, or
8678 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8679 split and "num" is its length. lo_half and hi_half are output arrays
8680 that parallel "operands". */
8683 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8687 rtx op
= operands
[num
];
8689 /* simplify_subreg refuse to split volatile memory addresses, but we
8690 still have to handle it. */
8693 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8694 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8698 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8699 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8704 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8705 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8706 is the expression of the binary operation. The output may either be
8707 emitted here, or returned to the caller, like all output_* functions.
8709 There is no guarantee that the operands are the same mode, as they
8710 might be within FLOAT or FLOAT_EXTEND expressions. */
8712 #ifndef SYSV386_COMPAT
8713 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8714 wants to fix the assemblers because that causes incompatibility
8715 with gcc. No-one wants to fix gcc because that causes
8716 incompatibility with assemblers... You can use the option of
8717 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8718 #define SYSV386_COMPAT 1
8722 output_387_binary_op (rtx insn
, rtx
*operands
)
8724 static char buf
[30];
8727 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8729 #ifdef ENABLE_CHECKING
8730 /* Even if we do not want to check the inputs, this documents input
8731 constraints. Which helps in understanding the following code. */
8732 if (STACK_REG_P (operands
[0])
8733 && ((REG_P (operands
[1])
8734 && REGNO (operands
[0]) == REGNO (operands
[1])
8735 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8736 || (REG_P (operands
[2])
8737 && REGNO (operands
[0]) == REGNO (operands
[2])
8738 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8739 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8742 gcc_assert (is_sse
);
8745 switch (GET_CODE (operands
[3]))
8748 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8749 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8757 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8758 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8766 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8767 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8775 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8776 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8790 if (GET_MODE (operands
[0]) == SFmode
)
8791 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8793 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8798 switch (GET_CODE (operands
[3]))
8802 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8804 rtx temp
= operands
[2];
8805 operands
[2] = operands
[1];
8809 /* know operands[0] == operands[1]. */
8811 if (MEM_P (operands
[2]))
8817 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8819 if (STACK_TOP_P (operands
[0]))
8820 /* How is it that we are storing to a dead operand[2]?
8821 Well, presumably operands[1] is dead too. We can't
8822 store the result to st(0) as st(0) gets popped on this
8823 instruction. Instead store to operands[2] (which I
8824 think has to be st(1)). st(1) will be popped later.
8825 gcc <= 2.8.1 didn't have this check and generated
8826 assembly code that the Unixware assembler rejected. */
8827 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8829 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8833 if (STACK_TOP_P (operands
[0]))
8834 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8836 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8841 if (MEM_P (operands
[1]))
8847 if (MEM_P (operands
[2]))
8853 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8856 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8857 derived assemblers, confusingly reverse the direction of
8858 the operation for fsub{r} and fdiv{r} when the
8859 destination register is not st(0). The Intel assembler
8860 doesn't have this brain damage. Read !SYSV386_COMPAT to
8861 figure out what the hardware really does. */
8862 if (STACK_TOP_P (operands
[0]))
8863 p
= "{p\t%0, %2|rp\t%2, %0}";
8865 p
= "{rp\t%2, %0|p\t%0, %2}";
8867 if (STACK_TOP_P (operands
[0]))
8868 /* As above for fmul/fadd, we can't store to st(0). */
8869 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8871 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8876 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8879 if (STACK_TOP_P (operands
[0]))
8880 p
= "{rp\t%0, %1|p\t%1, %0}";
8882 p
= "{p\t%1, %0|rp\t%0, %1}";
8884 if (STACK_TOP_P (operands
[0]))
8885 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8887 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8892 if (STACK_TOP_P (operands
[0]))
8894 if (STACK_TOP_P (operands
[1]))
8895 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8897 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8900 else if (STACK_TOP_P (operands
[1]))
8903 p
= "{\t%1, %0|r\t%0, %1}";
8905 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8911 p
= "{r\t%2, %0|\t%0, %2}";
8913 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8926 /* Return needed mode for entity in optimize_mode_switching pass. */
8929 ix86_mode_needed (int entity
, rtx insn
)
8931 enum attr_i387_cw mode
;
8933 /* The mode UNINITIALIZED is used to store control word after a
8934 function call or ASM pattern. The mode ANY specify that function
8935 has no requirements on the control word and make no changes in the
8936 bits we are interested in. */
8939 || (NONJUMP_INSN_P (insn
)
8940 && (asm_noperands (PATTERN (insn
)) >= 0
8941 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8942 return I387_CW_UNINITIALIZED
;
8944 if (recog_memoized (insn
) < 0)
8947 mode
= get_attr_i387_cw (insn
);
8952 if (mode
== I387_CW_TRUNC
)
8957 if (mode
== I387_CW_FLOOR
)
8962 if (mode
== I387_CW_CEIL
)
8967 if (mode
== I387_CW_MASK_PM
)
8978 /* Output code to initialize control word copies used by trunc?f?i and
8979 rounding patterns. CURRENT_MODE is set to current control word,
8980 while NEW_MODE is set to new control word. */
8983 emit_i387_cw_initialization (int mode
)
8985 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8990 rtx reg
= gen_reg_rtx (HImode
);
8992 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8993 emit_move_insn (reg
, copy_rtx (stored_mode
));
8995 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9000 /* round toward zero (truncate) */
9001 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9002 slot
= SLOT_CW_TRUNC
;
9006 /* round down toward -oo */
9007 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9008 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9009 slot
= SLOT_CW_FLOOR
;
9013 /* round up toward +oo */
9014 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9015 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9016 slot
= SLOT_CW_CEIL
;
9019 case I387_CW_MASK_PM
:
9020 /* mask precision exception for nearbyint() */
9021 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9022 slot
= SLOT_CW_MASK_PM
;
9034 /* round toward zero (truncate) */
9035 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9036 slot
= SLOT_CW_TRUNC
;
9040 /* round down toward -oo */
9041 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9042 slot
= SLOT_CW_FLOOR
;
9046 /* round up toward +oo */
9047 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9048 slot
= SLOT_CW_CEIL
;
9051 case I387_CW_MASK_PM
:
9052 /* mask precision exception for nearbyint() */
9053 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9054 slot
= SLOT_CW_MASK_PM
;
9062 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9064 new_mode
= assign_386_stack_local (HImode
, slot
);
9065 emit_move_insn (new_mode
, reg
);
9068 /* Output code for INSN to convert a float to a signed int. OPERANDS
9069 are the insn operands. The output may be [HSD]Imode and the input
9070 operand may be [SDX]Fmode. */
9073 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9075 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9076 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9077 int round_mode
= get_attr_i387_cw (insn
);
9079 /* Jump through a hoop or two for DImode, since the hardware has no
9080 non-popping instruction. We used to do this a different way, but
9081 that was somewhat fragile and broke with post-reload splitters. */
9082 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9083 output_asm_insn ("fld\t%y1", operands
);
9085 gcc_assert (STACK_TOP_P (operands
[1]));
9086 gcc_assert (MEM_P (operands
[0]));
9089 output_asm_insn ("fisttp%z0\t%0", operands
);
9092 if (round_mode
!= I387_CW_ANY
)
9093 output_asm_insn ("fldcw\t%3", operands
);
9094 if (stack_top_dies
|| dimode_p
)
9095 output_asm_insn ("fistp%z0\t%0", operands
);
9097 output_asm_insn ("fist%z0\t%0", operands
);
9098 if (round_mode
!= I387_CW_ANY
)
9099 output_asm_insn ("fldcw\t%2", operands
);
9105 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9106 have the values zero or one, indicates the ffreep insn's operand
9107 from the OPERANDS array. */
9110 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9112 if (TARGET_USE_FFREEP
)
9113 #if HAVE_AS_IX86_FFREEP
9114 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9117 static char retval
[] = ".word\t0xc_df";
9118 int regno
= REGNO (operands
[opno
]);
9120 gcc_assert (FP_REGNO_P (regno
));
9122 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9127 return opno
? "fstp\t%y1" : "fstp\t%y0";
9131 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9132 should be used. UNORDERED_P is true when fucom should be used. */
9135 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9138 rtx cmp_op0
, cmp_op1
;
9139 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9143 cmp_op0
= operands
[0];
9144 cmp_op1
= operands
[1];
9148 cmp_op0
= operands
[1];
9149 cmp_op1
= operands
[2];
9154 if (GET_MODE (operands
[0]) == SFmode
)
9156 return "ucomiss\t{%1, %0|%0, %1}";
9158 return "comiss\t{%1, %0|%0, %1}";
9161 return "ucomisd\t{%1, %0|%0, %1}";
9163 return "comisd\t{%1, %0|%0, %1}";
9166 gcc_assert (STACK_TOP_P (cmp_op0
));
9168 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9170 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9174 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9175 return output_387_ffreep (operands
, 1);
9178 return "ftst\n\tfnstsw\t%0";
9181 if (STACK_REG_P (cmp_op1
)
9183 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9184 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9186 /* If both the top of the 387 stack dies, and the other operand
9187 is also a stack register that dies, then this must be a
9188 `fcompp' float compare */
9192 /* There is no double popping fcomi variant. Fortunately,
9193 eflags is immune from the fstp's cc clobbering. */
9195 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9197 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9198 return output_387_ffreep (operands
, 0);
9203 return "fucompp\n\tfnstsw\t%0";
9205 return "fcompp\n\tfnstsw\t%0";
9210 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9212 static const char * const alt
[16] =
9214 "fcom%z2\t%y2\n\tfnstsw\t%0",
9215 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9216 "fucom%z2\t%y2\n\tfnstsw\t%0",
9217 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9219 "ficom%z2\t%y2\n\tfnstsw\t%0",
9220 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9224 "fcomi\t{%y1, %0|%0, %y1}",
9225 "fcomip\t{%y1, %0|%0, %y1}",
9226 "fucomi\t{%y1, %0|%0, %y1}",
9227 "fucomip\t{%y1, %0|%0, %y1}",
9238 mask
= eflags_p
<< 3;
9239 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9240 mask
|= unordered_p
<< 1;
9241 mask
|= stack_top_dies
;
9243 gcc_assert (mask
< 16);
9252 ix86_output_addr_vec_elt (FILE *file
, int value
)
9254 const char *directive
= ASM_LONG
;
9258 directive
= ASM_QUAD
;
9260 gcc_assert (!TARGET_64BIT
);
9263 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9267 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9270 fprintf (file
, "%s%s%d-%s%d\n",
9271 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9272 else if (HAVE_AS_GOTOFF_IN_DATA
)
9273 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9275 else if (TARGET_MACHO
)
9277 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9278 machopic_output_function_base_name (file
);
9279 fprintf(file
, "\n");
9283 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9284 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9287 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9291 ix86_expand_clear (rtx dest
)
9295 /* We play register width games, which are only valid after reload. */
9296 gcc_assert (reload_completed
);
9298 /* Avoid HImode and its attendant prefix byte. */
9299 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9300 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9302 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9304 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9305 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9307 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9308 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9314 /* X is an unchanging MEM. If it is a constant pool reference, return
9315 the constant pool rtx, else NULL. */
9318 maybe_get_pool_constant (rtx x
)
9320 x
= ix86_delegitimize_address (XEXP (x
, 0));
9322 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9323 return get_pool_constant (x
);
9329 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9331 int strict
= (reload_in_progress
|| reload_completed
);
9333 enum tls_model model
;
9338 if (GET_CODE (op1
) == SYMBOL_REF
)
9340 model
= SYMBOL_REF_TLS_MODEL (op1
);
9343 op1
= legitimize_tls_address (op1
, model
, true);
9344 op1
= force_operand (op1
, op0
);
9349 else if (GET_CODE (op1
) == CONST
9350 && GET_CODE (XEXP (op1
, 0)) == PLUS
9351 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9353 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9356 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9357 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9358 op1
= force_operand (op1
, NULL
);
9359 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9360 op0
, 1, OPTAB_DIRECT
);
9366 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9368 if (TARGET_MACHO
&& !TARGET_64BIT
)
9373 rtx temp
= ((reload_in_progress
9374 || ((op0
&& REG_P (op0
))
9376 ? op0
: gen_reg_rtx (Pmode
));
9377 op1
= machopic_indirect_data_reference (op1
, temp
);
9378 op1
= machopic_legitimize_pic_address (op1
, mode
,
9379 temp
== op1
? 0 : temp
);
9381 else if (MACHOPIC_INDIRECT
)
9382 op1
= machopic_indirect_data_reference (op1
, 0);
9390 op1
= force_reg (Pmode
, op1
);
9392 op1
= legitimize_address (op1
, op1
, Pmode
);
9398 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9399 || !push_operand (op0
, mode
))
9401 op1
= force_reg (mode
, op1
);
9403 if (push_operand (op0
, mode
)
9404 && ! general_no_elim_operand (op1
, mode
))
9405 op1
= copy_to_mode_reg (mode
, op1
);
9407 /* Force large constants in 64bit compilation into register
9408 to get them CSEed. */
9409 if (TARGET_64BIT
&& mode
== DImode
9410 && immediate_operand (op1
, mode
)
9411 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9412 && !register_operand (op0
, mode
)
9413 && optimize
&& !reload_completed
&& !reload_in_progress
)
9414 op1
= copy_to_mode_reg (mode
, op1
);
9416 if (FLOAT_MODE_P (mode
))
9418 /* If we are loading a floating point constant to a register,
9419 force the value to memory now, since we'll get better code
9420 out the back end. */
9424 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9426 op1
= validize_mem (force_const_mem (mode
, op1
));
9427 if (!register_operand (op0
, mode
))
9429 rtx temp
= gen_reg_rtx (mode
);
9430 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9431 emit_move_insn (op0
, temp
);
9438 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9442 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9444 rtx op0
= operands
[0], op1
= operands
[1];
9446 /* Force constants other than zero into memory. We do not know how
9447 the instructions used to build constants modify the upper 64 bits
9448 of the register, once we have that information we may be able
9449 to handle some of them more efficiently. */
9450 if ((reload_in_progress
| reload_completed
) == 0
9451 && register_operand (op0
, mode
)
9453 && standard_sse_constant_p (op1
) <= 0)
9454 op1
= validize_mem (force_const_mem (mode
, op1
));
9456 /* Make operand1 a register if it isn't already. */
9458 && !register_operand (op0
, mode
)
9459 && !register_operand (op1
, mode
))
9461 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9465 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9468 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9469 straight to ix86_expand_vector_move. */
9472 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9481 /* If we're optimizing for size, movups is the smallest. */
9484 op0
= gen_lowpart (V4SFmode
, op0
);
9485 op1
= gen_lowpart (V4SFmode
, op1
);
9486 emit_insn (gen_sse_movups (op0
, op1
));
9490 /* ??? If we have typed data, then it would appear that using
9491 movdqu is the only way to get unaligned data loaded with
9493 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9495 op0
= gen_lowpart (V16QImode
, op0
);
9496 op1
= gen_lowpart (V16QImode
, op1
);
9497 emit_insn (gen_sse2_movdqu (op0
, op1
));
9501 if (TARGET_SSE2
&& mode
== V2DFmode
)
9505 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9507 op0
= gen_lowpart (V2DFmode
, op0
);
9508 op1
= gen_lowpart (V2DFmode
, op1
);
9509 emit_insn (gen_sse2_movupd (op0
, op1
));
9513 /* When SSE registers are split into halves, we can avoid
9514 writing to the top half twice. */
9515 if (TARGET_SSE_SPLIT_REGS
)
9517 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9522 /* ??? Not sure about the best option for the Intel chips.
9523 The following would seem to satisfy; the register is
9524 entirely cleared, breaking the dependency chain. We
9525 then store to the upper half, with a dependency depth
9526 of one. A rumor has it that Intel recommends two movsd
9527 followed by an unpacklpd, but this is unconfirmed. And
9528 given that the dependency depth of the unpacklpd would
9529 still be one, I'm not sure why this would be better. */
9530 zero
= CONST0_RTX (V2DFmode
);
9533 m
= adjust_address (op1
, DFmode
, 0);
9534 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9535 m
= adjust_address (op1
, DFmode
, 8);
9536 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9540 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9542 op0
= gen_lowpart (V4SFmode
, op0
);
9543 op1
= gen_lowpart (V4SFmode
, op1
);
9544 emit_insn (gen_sse_movups (op0
, op1
));
9548 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9549 emit_move_insn (op0
, CONST0_RTX (mode
));
9551 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9553 if (mode
!= V4SFmode
)
9554 op0
= gen_lowpart (V4SFmode
, op0
);
9555 m
= adjust_address (op1
, V2SFmode
, 0);
9556 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9557 m
= adjust_address (op1
, V2SFmode
, 8);
9558 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9561 else if (MEM_P (op0
))
9563 /* If we're optimizing for size, movups is the smallest. */
9566 op0
= gen_lowpart (V4SFmode
, op0
);
9567 op1
= gen_lowpart (V4SFmode
, op1
);
9568 emit_insn (gen_sse_movups (op0
, op1
));
9572 /* ??? Similar to above, only less clear because of quote
9573 typeless stores unquote. */
9574 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9575 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9577 op0
= gen_lowpart (V16QImode
, op0
);
9578 op1
= gen_lowpart (V16QImode
, op1
);
9579 emit_insn (gen_sse2_movdqu (op0
, op1
));
9583 if (TARGET_SSE2
&& mode
== V2DFmode
)
9585 m
= adjust_address (op0
, DFmode
, 0);
9586 emit_insn (gen_sse2_storelpd (m
, op1
));
9587 m
= adjust_address (op0
, DFmode
, 8);
9588 emit_insn (gen_sse2_storehpd (m
, op1
));
9592 if (mode
!= V4SFmode
)
9593 op1
= gen_lowpart (V4SFmode
, op1
);
9594 m
= adjust_address (op0
, V2SFmode
, 0);
9595 emit_insn (gen_sse_storelps (m
, op1
));
9596 m
= adjust_address (op0
, V2SFmode
, 8);
9597 emit_insn (gen_sse_storehps (m
, op1
));
9604 /* Expand a push in MODE. This is some mode for which we do not support
9605 proper push instructions, at least from the registers that we expect
9606 the value to live in. */
9609 ix86_expand_push (enum machine_mode mode
, rtx x
)
9613 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9614 GEN_INT (-GET_MODE_SIZE (mode
)),
9615 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9616 if (tmp
!= stack_pointer_rtx
)
9617 emit_move_insn (stack_pointer_rtx
, tmp
);
9619 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9620 emit_move_insn (tmp
, x
);
9623 /* Helper function of ix86_fixup_binary_operands to canonicalize
9624 operand order. Returns true if the operands should be swapped. */
9627 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9630 rtx dst
= operands
[0];
9631 rtx src1
= operands
[1];
9632 rtx src2
= operands
[2];
9634 /* If the operation is not commutative, we can't do anything. */
9635 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9638 /* Highest priority is that src1 should match dst. */
9639 if (rtx_equal_p (dst
, src1
))
9641 if (rtx_equal_p (dst
, src2
))
9644 /* Next highest priority is that immediate constants come second. */
9645 if (immediate_operand (src2
, mode
))
9647 if (immediate_operand (src1
, mode
))
9650 /* Lowest priority is that memory references should come second. */
9660 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9661 destination to use for the operation. If different from the true
9662 destination in operands[0], a copy operation will be required. */
9665 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9668 rtx dst
= operands
[0];
9669 rtx src1
= operands
[1];
9670 rtx src2
= operands
[2];
9672 /* Canonicalize operand order. */
9673 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9680 /* Both source operands cannot be in memory. */
9681 if (MEM_P (src1
) && MEM_P (src2
))
9683 /* Optimization: Only read from memory once. */
9684 if (rtx_equal_p (src1
, src2
))
9686 src2
= force_reg (mode
, src2
);
9690 src2
= force_reg (mode
, src2
);
9693 /* If the destination is memory, and we do not have matching source
9694 operands, do things in registers. */
9695 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9696 dst
= gen_reg_rtx (mode
);
9698 /* Source 1 cannot be a constant. */
9699 if (CONSTANT_P (src1
))
9700 src1
= force_reg (mode
, src1
);
9702 /* Source 1 cannot be a non-matching memory. */
9703 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9704 src1
= force_reg (mode
, src1
);
9711 /* Similarly, but assume that the destination has already been
9715 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9716 enum machine_mode mode
, rtx operands
[])
9718 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9719 gcc_assert (dst
== operands
[0]);
9722 /* Attempt to expand a binary operator. Make the expansion closer to the
9723 actual machine, then just general_operand, which will allow 3 separate
9724 memory references (one output, two input) in a single insn. */
9727 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9730 rtx src1
, src2
, dst
, op
, clob
;
9732 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9736 /* Emit the instruction. */
9738 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9739 if (reload_in_progress
)
9741 /* Reload doesn't know about the flags register, and doesn't know that
9742 it doesn't want to clobber it. We can only do this with PLUS. */
9743 gcc_assert (code
== PLUS
);
9748 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9749 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9752 /* Fix up the destination if needed. */
9753 if (dst
!= operands
[0])
9754 emit_move_insn (operands
[0], dst
);
9757 /* Return TRUE or FALSE depending on whether the binary operator meets the
9758 appropriate constraints. */
9761 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9764 rtx dst
= operands
[0];
9765 rtx src1
= operands
[1];
9766 rtx src2
= operands
[2];
9768 /* Both source operands cannot be in memory. */
9769 if (MEM_P (src1
) && MEM_P (src2
))
9772 /* Canonicalize operand order for commutative operators. */
9773 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9780 /* If the destination is memory, we must have a matching source operand. */
9781 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9784 /* Source 1 cannot be a constant. */
9785 if (CONSTANT_P (src1
))
9788 /* Source 1 cannot be a non-matching memory. */
9789 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9795 /* Attempt to expand a unary operator. Make the expansion closer to the
9796 actual machine, then just general_operand, which will allow 2 separate
9797 memory references (one output, one input) in a single insn. */
9800 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9803 int matching_memory
;
9804 rtx src
, dst
, op
, clob
;
9809 /* If the destination is memory, and we do not have matching source
9810 operands, do things in registers. */
9811 matching_memory
= 0;
9814 if (rtx_equal_p (dst
, src
))
9815 matching_memory
= 1;
9817 dst
= gen_reg_rtx (mode
);
9820 /* When source operand is memory, destination must match. */
9821 if (MEM_P (src
) && !matching_memory
)
9822 src
= force_reg (mode
, src
);
9824 /* Emit the instruction. */
9826 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9827 if (reload_in_progress
|| code
== NOT
)
9829 /* Reload doesn't know about the flags register, and doesn't know that
9830 it doesn't want to clobber it. */
9831 gcc_assert (code
== NOT
);
9836 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9837 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9840 /* Fix up the destination if needed. */
9841 if (dst
!= operands
[0])
9842 emit_move_insn (operands
[0], dst
);
9845 /* Return TRUE or FALSE depending on whether the unary operator meets the
9846 appropriate constraints. */
9849 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9850 enum machine_mode mode ATTRIBUTE_UNUSED
,
9851 rtx operands
[2] ATTRIBUTE_UNUSED
)
9853 /* If one of operands is memory, source and destination must match. */
9854 if ((MEM_P (operands
[0])
9855 || MEM_P (operands
[1]))
9856 && ! rtx_equal_p (operands
[0], operands
[1]))
9861 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9862 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9863 true, then replicate the mask for all elements of the vector register.
9864 If INVERT is true, then create a mask excluding the sign bit. */
9867 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9869 enum machine_mode vec_mode
;
9870 HOST_WIDE_INT hi
, lo
;
9875 /* Find the sign bit, sign extended to 2*HWI. */
9877 lo
= 0x80000000, hi
= lo
< 0;
9878 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9879 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9881 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9886 /* Force this value into the low part of a fp vector constant. */
9887 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9888 mask
= gen_lowpart (mode
, mask
);
9893 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9895 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9896 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9897 vec_mode
= V4SFmode
;
9902 v
= gen_rtvec (2, mask
, mask
);
9904 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9905 vec_mode
= V2DFmode
;
9908 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9911 /* Generate code for floating point ABS or NEG. */
9914 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9917 rtx mask
, set
, use
, clob
, dst
, src
;
9918 bool matching_memory
;
9919 bool use_sse
= false;
9920 bool vector_mode
= VECTOR_MODE_P (mode
);
9921 enum machine_mode elt_mode
= mode
;
9925 elt_mode
= GET_MODE_INNER (mode
);
9928 else if (TARGET_SSE_MATH
)
9929 use_sse
= SSE_FLOAT_MODE_P (mode
);
9931 /* NEG and ABS performed with SSE use bitwise mask operations.
9932 Create the appropriate mask now. */
9934 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9941 /* If the destination is memory, and we don't have matching source
9942 operands or we're using the x87, do things in registers. */
9943 matching_memory
= false;
9946 if (use_sse
&& rtx_equal_p (dst
, src
))
9947 matching_memory
= true;
9949 dst
= gen_reg_rtx (mode
);
9951 if (MEM_P (src
) && !matching_memory
)
9952 src
= force_reg (mode
, src
);
9956 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9957 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9962 set
= gen_rtx_fmt_e (code
, mode
, src
);
9963 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9966 use
= gen_rtx_USE (VOIDmode
, mask
);
9967 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9968 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9969 gen_rtvec (3, set
, use
, clob
)));
9975 if (dst
!= operands
[0])
9976 emit_move_insn (operands
[0], dst
);
9979 /* Expand a copysign operation. Special case operand 0 being a constant. */
9982 ix86_expand_copysign (rtx operands
[])
9984 enum machine_mode mode
, vmode
;
9985 rtx dest
, op0
, op1
, mask
, nmask
;
9991 mode
= GET_MODE (dest
);
9992 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9994 if (GET_CODE (op0
) == CONST_DOUBLE
)
9998 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9999 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10001 if (op0
== CONST0_RTX (mode
))
10002 op0
= CONST0_RTX (vmode
);
10005 if (mode
== SFmode
)
10006 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10007 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10009 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10010 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10013 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10015 if (mode
== SFmode
)
10016 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10018 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10022 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10023 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10025 if (mode
== SFmode
)
10026 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10028 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10032 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10033 be a constant, and so has already been expanded into a vector constant. */
10036 ix86_split_copysign_const (rtx operands
[])
10038 enum machine_mode mode
, vmode
;
10039 rtx dest
, op0
, op1
, mask
, x
;
10041 dest
= operands
[0];
10044 mask
= operands
[3];
10046 mode
= GET_MODE (dest
);
10047 vmode
= GET_MODE (mask
);
10049 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10050 x
= gen_rtx_AND (vmode
, dest
, mask
);
10051 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10053 if (op0
!= CONST0_RTX (vmode
))
10055 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10056 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10060 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10061 so we have to do two masks. */
10064 ix86_split_copysign_var (rtx operands
[])
10066 enum machine_mode mode
, vmode
;
10067 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10069 dest
= operands
[0];
10070 scratch
= operands
[1];
10073 nmask
= operands
[4];
10074 mask
= operands
[5];
10076 mode
= GET_MODE (dest
);
10077 vmode
= GET_MODE (mask
);
10079 if (rtx_equal_p (op0
, op1
))
10081 /* Shouldn't happen often (it's useless, obviously), but when it does
10082 we'd generate incorrect code if we continue below. */
10083 emit_move_insn (dest
, op0
);
10087 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10089 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10091 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10092 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10095 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10096 x
= gen_rtx_NOT (vmode
, dest
);
10097 x
= gen_rtx_AND (vmode
, x
, op0
);
10098 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10102 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10104 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10106 else /* alternative 2,4 */
10108 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10109 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10110 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10112 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10114 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10116 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10117 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10119 else /* alternative 3,4 */
10121 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10123 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10124 x
= gen_rtx_AND (vmode
, dest
, op0
);
10126 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10129 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10130 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10133 /* Return TRUE or FALSE depending on whether the first SET in INSN
10134 has source and destination with matching CC modes, and that the
10135 CC mode is at least as constrained as REQ_MODE. */
10138 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10141 enum machine_mode set_mode
;
10143 set
= PATTERN (insn
);
10144 if (GET_CODE (set
) == PARALLEL
)
10145 set
= XVECEXP (set
, 0, 0);
10146 gcc_assert (GET_CODE (set
) == SET
);
10147 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10149 set_mode
= GET_MODE (SET_DEST (set
));
10153 if (req_mode
!= CCNOmode
10154 && (req_mode
!= CCmode
10155 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10159 if (req_mode
== CCGCmode
)
10163 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10167 if (req_mode
== CCZmode
)
10174 gcc_unreachable ();
10177 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10180 /* Generate insn patterns to do an integer compare of OPERANDS. */
10183 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10185 enum machine_mode cmpmode
;
10188 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10189 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10191 /* This is very simple, but making the interface the same as in the
10192 FP case makes the rest of the code easier. */
10193 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10194 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10196 /* Return the test that should be put into the flags user, i.e.
10197 the bcc, scc, or cmov instruction. */
10198 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10201 /* Figure out whether to use ordered or unordered fp comparisons.
10202 Return the appropriate mode to use. */
10205 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10207 /* ??? In order to make all comparisons reversible, we do all comparisons
10208 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10209 all forms trapping and nontrapping comparisons, we can make inequality
10210 comparisons trapping again, since it results in better code when using
10211 FCOM based compares. */
10212 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10216 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10218 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10219 return ix86_fp_compare_mode (code
);
10222 /* Only zero flag is needed. */
10223 case EQ
: /* ZF=0 */
10224 case NE
: /* ZF!=0 */
10226 /* Codes needing carry flag. */
10227 case GEU
: /* CF=0 */
10228 case GTU
: /* CF=0 & ZF=0 */
10229 case LTU
: /* CF=1 */
10230 case LEU
: /* CF=1 | ZF=1 */
10232 /* Codes possibly doable only with sign flag when
10233 comparing against zero. */
10234 case GE
: /* SF=OF or SF=0 */
10235 case LT
: /* SF<>OF or SF=1 */
10236 if (op1
== const0_rtx
)
10239 /* For other cases Carry flag is not required. */
10241 /* Codes doable only with sign flag when comparing
10242 against zero, but we miss jump instruction for it
10243 so we need to use relational tests against overflow
10244 that thus needs to be zero. */
10245 case GT
: /* ZF=0 & SF=OF */
10246 case LE
: /* ZF=1 | SF<>OF */
10247 if (op1
== const0_rtx
)
10251 /* strcmp pattern do (use flags) and combine may ask us for proper
10256 gcc_unreachable ();
10260 /* Return the fixed registers used for condition codes. */
10263 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10270 /* If two condition code modes are compatible, return a condition code
10271 mode which is compatible with both. Otherwise, return
10274 static enum machine_mode
10275 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10280 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10283 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10284 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10290 gcc_unreachable ();
10312 /* These are only compatible with themselves, which we already
10318 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10321 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10323 enum rtx_code swapped_code
= swap_condition (code
);
10324 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10325 || (ix86_fp_comparison_cost (swapped_code
)
10326 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10329 /* Swap, force into registers, or otherwise massage the two operands
10330 to a fp comparison. The operands are updated in place; the new
10331 comparison code is returned. */
10333 static enum rtx_code
10334 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10336 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10337 rtx op0
= *pop0
, op1
= *pop1
;
10338 enum machine_mode op_mode
= GET_MODE (op0
);
10339 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10341 /* All of the unordered compare instructions only work on registers.
10342 The same is true of the fcomi compare instructions. The XFmode
10343 compare instructions require registers except when comparing
10344 against zero or when converting operand 1 from fixed point to
10348 && (fpcmp_mode
== CCFPUmode
10349 || (op_mode
== XFmode
10350 && ! (standard_80387_constant_p (op0
) == 1
10351 || standard_80387_constant_p (op1
) == 1)
10352 && GET_CODE (op1
) != FLOAT
)
10353 || ix86_use_fcomi_compare (code
)))
10355 op0
= force_reg (op_mode
, op0
);
10356 op1
= force_reg (op_mode
, op1
);
10360 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10361 things around if they appear profitable, otherwise force op0
10362 into a register. */
10364 if (standard_80387_constant_p (op0
) == 0
10366 && ! (standard_80387_constant_p (op1
) == 0
10370 tmp
= op0
, op0
= op1
, op1
= tmp
;
10371 code
= swap_condition (code
);
10375 op0
= force_reg (op_mode
, op0
);
10377 if (CONSTANT_P (op1
))
10379 int tmp
= standard_80387_constant_p (op1
);
10381 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10385 op1
= force_reg (op_mode
, op1
);
10388 op1
= force_reg (op_mode
, op1
);
10392 /* Try to rearrange the comparison to make it cheaper. */
10393 if (ix86_fp_comparison_cost (code
)
10394 > ix86_fp_comparison_cost (swap_condition (code
))
10395 && (REG_P (op1
) || !no_new_pseudos
))
10398 tmp
= op0
, op0
= op1
, op1
= tmp
;
10399 code
= swap_condition (code
);
10401 op0
= force_reg (op_mode
, op0
);
10409 /* Convert comparison codes we use to represent FP comparison to integer
10410 code that will result in proper branch. Return UNKNOWN if no such code
10414 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10443 /* Split comparison code CODE into comparisons we can do using branch
10444 instructions. BYPASS_CODE is comparison code for branch that will
10445 branch around FIRST_CODE and SECOND_CODE. If some of branches
10446 is not required, set value to UNKNOWN.
10447 We never require more than two branches. */
10450 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10451 enum rtx_code
*first_code
,
10452 enum rtx_code
*second_code
)
10454 *first_code
= code
;
10455 *bypass_code
= UNKNOWN
;
10456 *second_code
= UNKNOWN
;
10458 /* The fcomi comparison sets flags as follows:
10468 case GT
: /* GTU - CF=0 & ZF=0 */
10469 case GE
: /* GEU - CF=0 */
10470 case ORDERED
: /* PF=0 */
10471 case UNORDERED
: /* PF=1 */
10472 case UNEQ
: /* EQ - ZF=1 */
10473 case UNLT
: /* LTU - CF=1 */
10474 case UNLE
: /* LEU - CF=1 | ZF=1 */
10475 case LTGT
: /* EQ - ZF=0 */
10477 case LT
: /* LTU - CF=1 - fails on unordered */
10478 *first_code
= UNLT
;
10479 *bypass_code
= UNORDERED
;
10481 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10482 *first_code
= UNLE
;
10483 *bypass_code
= UNORDERED
;
10485 case EQ
: /* EQ - ZF=1 - fails on unordered */
10486 *first_code
= UNEQ
;
10487 *bypass_code
= UNORDERED
;
10489 case NE
: /* NE - ZF=0 - fails on unordered */
10490 *first_code
= LTGT
;
10491 *second_code
= UNORDERED
;
10493 case UNGE
: /* GEU - CF=0 - fails on unordered */
10495 *second_code
= UNORDERED
;
10497 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10499 *second_code
= UNORDERED
;
10502 gcc_unreachable ();
10504 if (!TARGET_IEEE_FP
)
10506 *second_code
= UNKNOWN
;
10507 *bypass_code
= UNKNOWN
;
10511 /* Return cost of comparison done fcom + arithmetics operations on AX.
10512 All following functions do use number of instructions as a cost metrics.
10513 In future this should be tweaked to compute bytes for optimize_size and
10514 take into account performance of various instructions on various CPUs. */
10516 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10518 if (!TARGET_IEEE_FP
)
10520 /* The cost of code output by ix86_expand_fp_compare. */
10544 gcc_unreachable ();
10548 /* Return cost of comparison done using fcomi operation.
10549 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10551 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10553 enum rtx_code bypass_code
, first_code
, second_code
;
10554 /* Return arbitrarily high cost when instruction is not supported - this
10555 prevents gcc from using it. */
10558 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10559 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10562 /* Return cost of comparison done using sahf operation.
10563 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10565 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10567 enum rtx_code bypass_code
, first_code
, second_code
;
10568 /* Return arbitrarily high cost when instruction is not preferred - this
10569 avoids gcc from using it. */
10570 if (!TARGET_USE_SAHF
&& !optimize_size
)
10572 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10573 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10576 /* Compute cost of the comparison done using any method.
10577 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10579 ix86_fp_comparison_cost (enum rtx_code code
)
10581 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10584 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10585 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10587 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10588 if (min
> sahf_cost
)
10590 if (min
> fcomi_cost
)
10595 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10598 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10599 rtx
*second_test
, rtx
*bypass_test
)
10601 enum machine_mode fpcmp_mode
, intcmp_mode
;
10603 int cost
= ix86_fp_comparison_cost (code
);
10604 enum rtx_code bypass_code
, first_code
, second_code
;
10606 fpcmp_mode
= ix86_fp_compare_mode (code
);
10607 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10610 *second_test
= NULL_RTX
;
10612 *bypass_test
= NULL_RTX
;
10614 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10616 /* Do fcomi/sahf based test when profitable. */
10617 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10618 && (second_code
== UNKNOWN
|| second_test
)
10619 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10623 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10624 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10630 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10631 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10633 scratch
= gen_reg_rtx (HImode
);
10634 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10635 emit_insn (gen_x86_sahf_1 (scratch
));
10638 /* The FP codes work out to act like unsigned. */
10639 intcmp_mode
= fpcmp_mode
;
10641 if (bypass_code
!= UNKNOWN
)
10642 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10643 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10645 if (second_code
!= UNKNOWN
)
10646 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10647 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10652 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10653 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10654 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10656 scratch
= gen_reg_rtx (HImode
);
10657 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10659 /* In the unordered case, we have to check C2 for NaN's, which
10660 doesn't happen to work out to anything nice combination-wise.
10661 So do some bit twiddling on the value we've got in AH to come
10662 up with an appropriate set of condition codes. */
10664 intcmp_mode
= CCNOmode
;
10669 if (code
== GT
|| !TARGET_IEEE_FP
)
10671 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10676 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10677 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10678 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10679 intcmp_mode
= CCmode
;
10685 if (code
== LT
&& TARGET_IEEE_FP
)
10687 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10688 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10689 intcmp_mode
= CCmode
;
10694 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10700 if (code
== GE
|| !TARGET_IEEE_FP
)
10702 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10707 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10708 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10715 if (code
== LE
&& TARGET_IEEE_FP
)
10717 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10718 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10719 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10720 intcmp_mode
= CCmode
;
10725 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10731 if (code
== EQ
&& TARGET_IEEE_FP
)
10733 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10734 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10735 intcmp_mode
= CCmode
;
10740 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10747 if (code
== NE
&& TARGET_IEEE_FP
)
10749 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10750 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10756 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10762 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10766 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10771 gcc_unreachable ();
10775 /* Return the test that should be put into the flags user, i.e.
10776 the bcc, scc, or cmov instruction. */
10777 return gen_rtx_fmt_ee (code
, VOIDmode
,
10778 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10783 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10786 op0
= ix86_compare_op0
;
10787 op1
= ix86_compare_op1
;
10790 *second_test
= NULL_RTX
;
10792 *bypass_test
= NULL_RTX
;
10794 if (ix86_compare_emitted
)
10796 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10797 ix86_compare_emitted
= NULL_RTX
;
10799 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10800 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10801 second_test
, bypass_test
);
10803 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10808 /* Return true if the CODE will result in nontrivial jump sequence. */
10810 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10812 enum rtx_code bypass_code
, first_code
, second_code
;
10815 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10816 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10820 ix86_expand_branch (enum rtx_code code
, rtx label
)
10824 /* If we have emitted a compare insn, go straight to simple.
10825 ix86_expand_compare won't emit anything if ix86_compare_emitted
10827 if (ix86_compare_emitted
)
10830 switch (GET_MODE (ix86_compare_op0
))
10836 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10837 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10838 gen_rtx_LABEL_REF (VOIDmode
, label
),
10840 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10849 enum rtx_code bypass_code
, first_code
, second_code
;
10851 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10852 &ix86_compare_op1
);
10854 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10856 /* Check whether we will use the natural sequence with one jump. If
10857 so, we can expand jump early. Otherwise delay expansion by
10858 creating compound insn to not confuse optimizers. */
10859 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10862 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10863 gen_rtx_LABEL_REF (VOIDmode
, label
),
10864 pc_rtx
, NULL_RTX
, NULL_RTX
);
10868 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10869 ix86_compare_op0
, ix86_compare_op1
);
10870 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10871 gen_rtx_LABEL_REF (VOIDmode
, label
),
10873 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10875 use_fcomi
= ix86_use_fcomi_compare (code
);
10876 vec
= rtvec_alloc (3 + !use_fcomi
);
10877 RTVEC_ELT (vec
, 0) = tmp
;
10879 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10881 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10884 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10886 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10895 /* Expand DImode branch into multiple compare+branch. */
10897 rtx lo
[2], hi
[2], label2
;
10898 enum rtx_code code1
, code2
, code3
;
10899 enum machine_mode submode
;
10901 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10903 tmp
= ix86_compare_op0
;
10904 ix86_compare_op0
= ix86_compare_op1
;
10905 ix86_compare_op1
= tmp
;
10906 code
= swap_condition (code
);
10908 if (GET_MODE (ix86_compare_op0
) == DImode
)
10910 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10911 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10916 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10917 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10921 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10922 avoid two branches. This costs one extra insn, so disable when
10923 optimizing for size. */
10925 if ((code
== EQ
|| code
== NE
)
10927 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10932 if (hi
[1] != const0_rtx
)
10933 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10934 NULL_RTX
, 0, OPTAB_WIDEN
);
10937 if (lo
[1] != const0_rtx
)
10938 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10939 NULL_RTX
, 0, OPTAB_WIDEN
);
10941 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10942 NULL_RTX
, 0, OPTAB_WIDEN
);
10944 ix86_compare_op0
= tmp
;
10945 ix86_compare_op1
= const0_rtx
;
10946 ix86_expand_branch (code
, label
);
10950 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10951 op1 is a constant and the low word is zero, then we can just
10952 examine the high word. */
10954 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
10957 case LT
: case LTU
: case GE
: case GEU
:
10958 ix86_compare_op0
= hi
[0];
10959 ix86_compare_op1
= hi
[1];
10960 ix86_expand_branch (code
, label
);
10966 /* Otherwise, we need two or three jumps. */
10968 label2
= gen_label_rtx ();
10971 code2
= swap_condition (code
);
10972 code3
= unsigned_condition (code
);
10976 case LT
: case GT
: case LTU
: case GTU
:
10979 case LE
: code1
= LT
; code2
= GT
; break;
10980 case GE
: code1
= GT
; code2
= LT
; break;
10981 case LEU
: code1
= LTU
; code2
= GTU
; break;
10982 case GEU
: code1
= GTU
; code2
= LTU
; break;
10984 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10985 case NE
: code2
= UNKNOWN
; break;
10988 gcc_unreachable ();
10993 * if (hi(a) < hi(b)) goto true;
10994 * if (hi(a) > hi(b)) goto false;
10995 * if (lo(a) < lo(b)) goto true;
10999 ix86_compare_op0
= hi
[0];
11000 ix86_compare_op1
= hi
[1];
11002 if (code1
!= UNKNOWN
)
11003 ix86_expand_branch (code1
, label
);
11004 if (code2
!= UNKNOWN
)
11005 ix86_expand_branch (code2
, label2
);
11007 ix86_compare_op0
= lo
[0];
11008 ix86_compare_op1
= lo
[1];
11009 ix86_expand_branch (code3
, label
);
11011 if (code2
!= UNKNOWN
)
11012 emit_label (label2
);
11017 gcc_unreachable ();
11021 /* Split branch based on floating point condition. */
11023 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11024 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11026 rtx second
, bypass
;
11027 rtx label
= NULL_RTX
;
11029 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11032 if (target2
!= pc_rtx
)
11035 code
= reverse_condition_maybe_unordered (code
);
11040 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11041 tmp
, &second
, &bypass
);
11043 /* Remove pushed operand from stack. */
11045 ix86_free_from_memory (GET_MODE (pushed
));
11047 if (split_branch_probability
>= 0)
11049 /* Distribute the probabilities across the jumps.
11050 Assume the BYPASS and SECOND to be always test
11052 probability
= split_branch_probability
;
11054 /* Value of 1 is low enough to make no need for probability
11055 to be updated. Later we may run some experiments and see
11056 if unordered values are more frequent in practice. */
11058 bypass_probability
= 1;
11060 second_probability
= 1;
11062 if (bypass
!= NULL_RTX
)
11064 label
= gen_label_rtx ();
11065 i
= emit_jump_insn (gen_rtx_SET
11067 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11069 gen_rtx_LABEL_REF (VOIDmode
,
11072 if (bypass_probability
>= 0)
11074 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11075 GEN_INT (bypass_probability
),
11078 i
= emit_jump_insn (gen_rtx_SET
11080 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11081 condition
, target1
, target2
)));
11082 if (probability
>= 0)
11084 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11085 GEN_INT (probability
),
11087 if (second
!= NULL_RTX
)
11089 i
= emit_jump_insn (gen_rtx_SET
11091 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11093 if (second_probability
>= 0)
11095 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11096 GEN_INT (second_probability
),
11099 if (label
!= NULL_RTX
)
11100 emit_label (label
);
11104 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11106 rtx ret
, tmp
, tmpreg
, equiv
;
11107 rtx second_test
, bypass_test
;
11109 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11110 return 0; /* FAIL */
11112 gcc_assert (GET_MODE (dest
) == QImode
);
11114 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11115 PUT_MODE (ret
, QImode
);
11120 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11121 if (bypass_test
|| second_test
)
11123 rtx test
= second_test
;
11125 rtx tmp2
= gen_reg_rtx (QImode
);
11128 gcc_assert (!second_test
);
11129 test
= bypass_test
;
11131 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11133 PUT_MODE (test
, QImode
);
11134 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11137 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11139 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11142 /* Attach a REG_EQUAL note describing the comparison result. */
11143 if (ix86_compare_op0
&& ix86_compare_op1
)
11145 equiv
= simplify_gen_relational (code
, QImode
,
11146 GET_MODE (ix86_compare_op0
),
11147 ix86_compare_op0
, ix86_compare_op1
);
11148 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11151 return 1; /* DONE */
11154 /* Expand comparison setting or clearing carry flag. Return true when
11155 successful and set pop for the operation. */
11157 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11159 enum machine_mode mode
=
11160 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11162 /* Do not handle DImode compares that go through special path. Also we can't
11163 deal with FP compares yet. This is possible to add. */
11164 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11166 if (FLOAT_MODE_P (mode
))
11168 rtx second_test
= NULL
, bypass_test
= NULL
;
11169 rtx compare_op
, compare_seq
;
11171 /* Shortcut: following common codes never translate into carry flag compares. */
11172 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11173 || code
== ORDERED
|| code
== UNORDERED
)
11176 /* These comparisons require zero flag; swap operands so they won't. */
11177 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11178 && !TARGET_IEEE_FP
)
11183 code
= swap_condition (code
);
11186 /* Try to expand the comparison and verify that we end up with carry flag
11187 based comparison. This is fails to be true only when we decide to expand
11188 comparison using arithmetic that is not too common scenario. */
11190 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11191 &second_test
, &bypass_test
);
11192 compare_seq
= get_insns ();
11195 if (second_test
|| bypass_test
)
11197 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11198 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11199 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11201 code
= GET_CODE (compare_op
);
11202 if (code
!= LTU
&& code
!= GEU
)
11204 emit_insn (compare_seq
);
11208 if (!INTEGRAL_MODE_P (mode
))
11216 /* Convert a==0 into (unsigned)a<1. */
11219 if (op1
!= const0_rtx
)
11222 code
= (code
== EQ
? LTU
: GEU
);
11225 /* Convert a>b into b<a or a>=b-1. */
11228 if (CONST_INT_P (op1
))
11230 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11231 /* Bail out on overflow. We still can swap operands but that
11232 would force loading of the constant into register. */
11233 if (op1
== const0_rtx
11234 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11236 code
= (code
== GTU
? GEU
: LTU
);
11243 code
= (code
== GTU
? LTU
: GEU
);
11247 /* Convert a>=0 into (unsigned)a<0x80000000. */
11250 if (mode
== DImode
|| op1
!= const0_rtx
)
11252 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11253 code
= (code
== LT
? GEU
: LTU
);
11257 if (mode
== DImode
|| op1
!= constm1_rtx
)
11259 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11260 code
= (code
== LE
? GEU
: LTU
);
11266 /* Swapping operands may cause constant to appear as first operand. */
11267 if (!nonimmediate_operand (op0
, VOIDmode
))
11269 if (no_new_pseudos
)
11271 op0
= force_reg (mode
, op0
);
11273 ix86_compare_op0
= op0
;
11274 ix86_compare_op1
= op1
;
11275 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11276 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11281 ix86_expand_int_movcc (rtx operands
[])
11283 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11284 rtx compare_seq
, compare_op
;
11285 rtx second_test
, bypass_test
;
11286 enum machine_mode mode
= GET_MODE (operands
[0]);
11287 bool sign_bit_compare_p
= false;;
11290 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11291 compare_seq
= get_insns ();
11294 compare_code
= GET_CODE (compare_op
);
11296 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11297 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11298 sign_bit_compare_p
= true;
11300 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11301 HImode insns, we'd be swallowed in word prefix ops. */
11303 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11304 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11305 && CONST_INT_P (operands
[2])
11306 && CONST_INT_P (operands
[3]))
11308 rtx out
= operands
[0];
11309 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11310 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11311 HOST_WIDE_INT diff
;
11314 /* Sign bit compares are better done using shifts than we do by using
11316 if (sign_bit_compare_p
11317 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11318 ix86_compare_op1
, &compare_op
))
11320 /* Detect overlap between destination and compare sources. */
11323 if (!sign_bit_compare_p
)
11325 bool fpcmp
= false;
11327 compare_code
= GET_CODE (compare_op
);
11329 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11330 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11333 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11336 /* To simplify rest of code, restrict to the GEU case. */
11337 if (compare_code
== LTU
)
11339 HOST_WIDE_INT tmp
= ct
;
11342 compare_code
= reverse_condition (compare_code
);
11343 code
= reverse_condition (code
);
11348 PUT_CODE (compare_op
,
11349 reverse_condition_maybe_unordered
11350 (GET_CODE (compare_op
)));
11352 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11356 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11357 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11358 tmp
= gen_reg_rtx (mode
);
11360 if (mode
== DImode
)
11361 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11363 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11367 if (code
== GT
|| code
== GE
)
11368 code
= reverse_condition (code
);
11371 HOST_WIDE_INT tmp
= ct
;
11376 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11377 ix86_compare_op1
, VOIDmode
, 0, -1);
11390 tmp
= expand_simple_binop (mode
, PLUS
,
11392 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11403 tmp
= expand_simple_binop (mode
, IOR
,
11405 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11407 else if (diff
== -1 && ct
)
11417 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11419 tmp
= expand_simple_binop (mode
, PLUS
,
11420 copy_rtx (tmp
), GEN_INT (cf
),
11421 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11429 * andl cf - ct, dest
11439 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11442 tmp
= expand_simple_binop (mode
, AND
,
11444 gen_int_mode (cf
- ct
, mode
),
11445 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11447 tmp
= expand_simple_binop (mode
, PLUS
,
11448 copy_rtx (tmp
), GEN_INT (ct
),
11449 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11452 if (!rtx_equal_p (tmp
, out
))
11453 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11455 return 1; /* DONE */
11461 tmp
= ct
, ct
= cf
, cf
= tmp
;
11463 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11465 /* We may be reversing unordered compare to normal compare, that
11466 is not valid in general (we may convert non-trapping condition
11467 to trapping one), however on i386 we currently emit all
11468 comparisons unordered. */
11469 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11470 code
= reverse_condition_maybe_unordered (code
);
11474 compare_code
= reverse_condition (compare_code
);
11475 code
= reverse_condition (code
);
11479 compare_code
= UNKNOWN
;
11480 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11481 && CONST_INT_P (ix86_compare_op1
))
11483 if (ix86_compare_op1
== const0_rtx
11484 && (code
== LT
|| code
== GE
))
11485 compare_code
= code
;
11486 else if (ix86_compare_op1
== constm1_rtx
)
11490 else if (code
== GT
)
11495 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11496 if (compare_code
!= UNKNOWN
11497 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11498 && (cf
== -1 || ct
== -1))
11500 /* If lea code below could be used, only optimize
11501 if it results in a 2 insn sequence. */
11503 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11504 || diff
== 3 || diff
== 5 || diff
== 9)
11505 || (compare_code
== LT
&& ct
== -1)
11506 || (compare_code
== GE
&& cf
== -1))
11509 * notl op1 (if necessary)
11517 code
= reverse_condition (code
);
11520 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11521 ix86_compare_op1
, VOIDmode
, 0, -1);
11523 out
= expand_simple_binop (mode
, IOR
,
11525 out
, 1, OPTAB_DIRECT
);
11526 if (out
!= operands
[0])
11527 emit_move_insn (operands
[0], out
);
11529 return 1; /* DONE */
11534 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11535 || diff
== 3 || diff
== 5 || diff
== 9)
11536 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11538 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11544 * lea cf(dest*(ct-cf)),dest
11548 * This also catches the degenerate setcc-only case.
11554 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11555 ix86_compare_op1
, VOIDmode
, 0, 1);
11558 /* On x86_64 the lea instruction operates on Pmode, so we need
11559 to get arithmetics done in proper mode to match. */
11561 tmp
= copy_rtx (out
);
11565 out1
= copy_rtx (out
);
11566 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11570 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11576 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11579 if (!rtx_equal_p (tmp
, out
))
11582 out
= force_operand (tmp
, copy_rtx (out
));
11584 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11586 if (!rtx_equal_p (out
, operands
[0]))
11587 emit_move_insn (operands
[0], copy_rtx (out
));
11589 return 1; /* DONE */
11593 * General case: Jumpful:
11594 * xorl dest,dest cmpl op1, op2
11595 * cmpl op1, op2 movl ct, dest
11596 * setcc dest jcc 1f
11597 * decl dest movl cf, dest
11598 * andl (cf-ct),dest 1:
11601 * Size 20. Size 14.
11603 * This is reasonably steep, but branch mispredict costs are
11604 * high on modern cpus, so consider failing only if optimizing
11608 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11609 && BRANCH_COST
>= 2)
11615 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11616 /* We may be reversing unordered compare to normal compare,
11617 that is not valid in general (we may convert non-trapping
11618 condition to trapping one), however on i386 we currently
11619 emit all comparisons unordered. */
11620 code
= reverse_condition_maybe_unordered (code
);
11623 code
= reverse_condition (code
);
11624 if (compare_code
!= UNKNOWN
)
11625 compare_code
= reverse_condition (compare_code
);
11629 if (compare_code
!= UNKNOWN
)
11631 /* notl op1 (if needed)
11636 For x < 0 (resp. x <= -1) there will be no notl,
11637 so if possible swap the constants to get rid of the
11639 True/false will be -1/0 while code below (store flag
11640 followed by decrement) is 0/-1, so the constants need
11641 to be exchanged once more. */
11643 if (compare_code
== GE
|| !cf
)
11645 code
= reverse_condition (code
);
11650 HOST_WIDE_INT tmp
= cf
;
11655 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11656 ix86_compare_op1
, VOIDmode
, 0, -1);
11660 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11661 ix86_compare_op1
, VOIDmode
, 0, 1);
11663 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11664 copy_rtx (out
), 1, OPTAB_DIRECT
);
11667 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11668 gen_int_mode (cf
- ct
, mode
),
11669 copy_rtx (out
), 1, OPTAB_DIRECT
);
11671 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11672 copy_rtx (out
), 1, OPTAB_DIRECT
);
11673 if (!rtx_equal_p (out
, operands
[0]))
11674 emit_move_insn (operands
[0], copy_rtx (out
));
11676 return 1; /* DONE */
11680 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11682 /* Try a few things more with specific constants and a variable. */
11685 rtx var
, orig_out
, out
, tmp
;
11687 if (BRANCH_COST
<= 2)
11688 return 0; /* FAIL */
11690 /* If one of the two operands is an interesting constant, load a
11691 constant with the above and mask it in with a logical operation. */
11693 if (CONST_INT_P (operands
[2]))
11696 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11697 operands
[3] = constm1_rtx
, op
= and_optab
;
11698 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11699 operands
[3] = const0_rtx
, op
= ior_optab
;
11701 return 0; /* FAIL */
11703 else if (CONST_INT_P (operands
[3]))
11706 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11707 operands
[2] = constm1_rtx
, op
= and_optab
;
11708 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11709 operands
[2] = const0_rtx
, op
= ior_optab
;
11711 return 0; /* FAIL */
11714 return 0; /* FAIL */
11716 orig_out
= operands
[0];
11717 tmp
= gen_reg_rtx (mode
);
11720 /* Recurse to get the constant loaded. */
11721 if (ix86_expand_int_movcc (operands
) == 0)
11722 return 0; /* FAIL */
11724 /* Mask in the interesting variable. */
11725 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11727 if (!rtx_equal_p (out
, orig_out
))
11728 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11730 return 1; /* DONE */
11734 * For comparison with above,
11744 if (! nonimmediate_operand (operands
[2], mode
))
11745 operands
[2] = force_reg (mode
, operands
[2]);
11746 if (! nonimmediate_operand (operands
[3], mode
))
11747 operands
[3] = force_reg (mode
, operands
[3]);
11749 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11751 rtx tmp
= gen_reg_rtx (mode
);
11752 emit_move_insn (tmp
, operands
[3]);
11755 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11757 rtx tmp
= gen_reg_rtx (mode
);
11758 emit_move_insn (tmp
, operands
[2]);
11762 if (! register_operand (operands
[2], VOIDmode
)
11764 || ! register_operand (operands
[3], VOIDmode
)))
11765 operands
[2] = force_reg (mode
, operands
[2]);
11768 && ! register_operand (operands
[3], VOIDmode
))
11769 operands
[3] = force_reg (mode
, operands
[3]);
11771 emit_insn (compare_seq
);
11772 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11773 gen_rtx_IF_THEN_ELSE (mode
,
11774 compare_op
, operands
[2],
11777 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11778 gen_rtx_IF_THEN_ELSE (mode
,
11780 copy_rtx (operands
[3]),
11781 copy_rtx (operands
[0]))));
11783 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11784 gen_rtx_IF_THEN_ELSE (mode
,
11786 copy_rtx (operands
[2]),
11787 copy_rtx (operands
[0]))));
11789 return 1; /* DONE */
11792 /* Swap, force into registers, or otherwise massage the two operands
11793 to an sse comparison with a mask result. Thus we differ a bit from
11794 ix86_prepare_fp_compare_args which expects to produce a flags result.
11796 The DEST operand exists to help determine whether to commute commutative
11797 operators. The POP0/POP1 operands are updated in place. The new
11798 comparison code is returned, or UNKNOWN if not implementable. */
11800 static enum rtx_code
11801 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11802 rtx
*pop0
, rtx
*pop1
)
11810 /* We have no LTGT as an operator. We could implement it with
11811 NE & ORDERED, but this requires an extra temporary. It's
11812 not clear that it's worth it. */
11819 /* These are supported directly. */
11826 /* For commutative operators, try to canonicalize the destination
11827 operand to be first in the comparison - this helps reload to
11828 avoid extra moves. */
11829 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11837 /* These are not supported directly. Swap the comparison operands
11838 to transform into something that is supported. */
11842 code
= swap_condition (code
);
11846 gcc_unreachable ();
11852 /* Detect conditional moves that exactly match min/max operational
11853 semantics. Note that this is IEEE safe, as long as we don't
11854 interchange the operands.
11856 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11857 and TRUE if the operation is successful and instructions are emitted. */
11860 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11861 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11863 enum machine_mode mode
;
11869 else if (code
== UNGE
)
11872 if_true
= if_false
;
11878 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11880 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11885 mode
= GET_MODE (dest
);
11887 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11888 but MODE may be a vector mode and thus not appropriate. */
11889 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11891 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11894 if_true
= force_reg (mode
, if_true
);
11895 v
= gen_rtvec (2, if_true
, if_false
);
11896 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11900 code
= is_min
? SMIN
: SMAX
;
11901 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11904 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11908 /* Expand an sse vector comparison. Return the register with the result. */
11911 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11912 rtx op_true
, rtx op_false
)
11914 enum machine_mode mode
= GET_MODE (dest
);
11917 cmp_op0
= force_reg (mode
, cmp_op0
);
11918 if (!nonimmediate_operand (cmp_op1
, mode
))
11919 cmp_op1
= force_reg (mode
, cmp_op1
);
11922 || reg_overlap_mentioned_p (dest
, op_true
)
11923 || reg_overlap_mentioned_p (dest
, op_false
))
11924 dest
= gen_reg_rtx (mode
);
11926 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11927 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11932 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11933 operations. This is used for both scalar and vector conditional moves. */
11936 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11938 enum machine_mode mode
= GET_MODE (dest
);
11941 if (op_false
== CONST0_RTX (mode
))
11943 op_true
= force_reg (mode
, op_true
);
11944 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11945 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11947 else if (op_true
== CONST0_RTX (mode
))
11949 op_false
= force_reg (mode
, op_false
);
11950 x
= gen_rtx_NOT (mode
, cmp
);
11951 x
= gen_rtx_AND (mode
, x
, op_false
);
11952 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11956 op_true
= force_reg (mode
, op_true
);
11957 op_false
= force_reg (mode
, op_false
);
11959 t2
= gen_reg_rtx (mode
);
11961 t3
= gen_reg_rtx (mode
);
11965 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11966 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11968 x
= gen_rtx_NOT (mode
, cmp
);
11969 x
= gen_rtx_AND (mode
, x
, op_false
);
11970 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11972 x
= gen_rtx_IOR (mode
, t3
, t2
);
11973 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11977 /* Expand a floating-point conditional move. Return true if successful. */
11980 ix86_expand_fp_movcc (rtx operands
[])
11982 enum machine_mode mode
= GET_MODE (operands
[0]);
11983 enum rtx_code code
= GET_CODE (operands
[1]);
11984 rtx tmp
, compare_op
, second_test
, bypass_test
;
11986 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11988 enum machine_mode cmode
;
11990 /* Since we've no cmove for sse registers, don't force bad register
11991 allocation just to gain access to it. Deny movcc when the
11992 comparison mode doesn't match the move mode. */
11993 cmode
= GET_MODE (ix86_compare_op0
);
11994 if (cmode
== VOIDmode
)
11995 cmode
= GET_MODE (ix86_compare_op1
);
11999 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12001 &ix86_compare_op1
);
12002 if (code
== UNKNOWN
)
12005 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12006 ix86_compare_op1
, operands
[2],
12010 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12011 ix86_compare_op1
, operands
[2], operands
[3]);
12012 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12016 /* The floating point conditional move instructions don't directly
12017 support conditions resulting from a signed integer comparison. */
12019 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12021 /* The floating point conditional move instructions don't directly
12022 support signed integer comparisons. */
12024 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12026 gcc_assert (!second_test
&& !bypass_test
);
12027 tmp
= gen_reg_rtx (QImode
);
12028 ix86_expand_setcc (code
, tmp
);
12030 ix86_compare_op0
= tmp
;
12031 ix86_compare_op1
= const0_rtx
;
12032 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12034 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12036 tmp
= gen_reg_rtx (mode
);
12037 emit_move_insn (tmp
, operands
[3]);
12040 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12042 tmp
= gen_reg_rtx (mode
);
12043 emit_move_insn (tmp
, operands
[2]);
12047 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12048 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12049 operands
[2], operands
[3])));
12051 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12052 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12053 operands
[3], operands
[0])));
12055 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12056 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12057 operands
[2], operands
[0])));
12062 /* Expand a floating-point vector conditional move; a vcond operation
12063 rather than a movcc operation. */
12066 ix86_expand_fp_vcond (rtx operands
[])
12068 enum rtx_code code
= GET_CODE (operands
[3]);
12071 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12072 &operands
[4], &operands
[5]);
12073 if (code
== UNKNOWN
)
12076 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12077 operands
[5], operands
[1], operands
[2]))
12080 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12081 operands
[1], operands
[2]);
12082 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12086 /* Expand a signed integral vector conditional move. */
12089 ix86_expand_int_vcond (rtx operands
[])
12091 enum machine_mode mode
= GET_MODE (operands
[0]);
12092 enum rtx_code code
= GET_CODE (operands
[3]);
12093 bool negate
= false;
12096 cop0
= operands
[4];
12097 cop1
= operands
[5];
12099 /* Canonicalize the comparison to EQ, GT, GTU. */
12110 code
= reverse_condition (code
);
12116 code
= reverse_condition (code
);
12122 code
= swap_condition (code
);
12123 x
= cop0
, cop0
= cop1
, cop1
= x
;
12127 gcc_unreachable ();
12130 /* Unsigned parallel compare is not supported by the hardware. Play some
12131 tricks to turn this into a signed comparison against 0. */
12134 cop0
= force_reg (mode
, cop0
);
12142 /* Perform a parallel modulo subtraction. */
12143 t1
= gen_reg_rtx (mode
);
12144 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12146 /* Extract the original sign bit of op0. */
12147 mask
= GEN_INT (-0x80000000);
12148 mask
= gen_rtx_CONST_VECTOR (mode
,
12149 gen_rtvec (4, mask
, mask
, mask
, mask
));
12150 mask
= force_reg (mode
, mask
);
12151 t2
= gen_reg_rtx (mode
);
12152 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12154 /* XOR it back into the result of the subtraction. This results
12155 in the sign bit set iff we saw unsigned underflow. */
12156 x
= gen_reg_rtx (mode
);
12157 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12165 /* Perform a parallel unsigned saturating subtraction. */
12166 x
= gen_reg_rtx (mode
);
12167 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12168 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12175 gcc_unreachable ();
12179 cop1
= CONST0_RTX (mode
);
12182 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12183 operands
[1+negate
], operands
[2-negate
]);
12185 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12186 operands
[2-negate
]);
12190 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12191 true if we should do zero extension, else sign extension. HIGH_P is
12192 true if we want the N/2 high elements, else the low elements. */
12195 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12197 enum machine_mode imode
= GET_MODE (operands
[1]);
12198 rtx (*unpack
)(rtx
, rtx
, rtx
);
12205 unpack
= gen_vec_interleave_highv16qi
;
12207 unpack
= gen_vec_interleave_lowv16qi
;
12211 unpack
= gen_vec_interleave_highv8hi
;
12213 unpack
= gen_vec_interleave_lowv8hi
;
12217 unpack
= gen_vec_interleave_highv4si
;
12219 unpack
= gen_vec_interleave_lowv4si
;
12222 gcc_unreachable ();
12225 dest
= gen_lowpart (imode
, operands
[0]);
12228 se
= force_reg (imode
, CONST0_RTX (imode
));
12230 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12231 operands
[1], pc_rtx
, pc_rtx
);
12233 emit_insn (unpack (dest
, operands
[1], se
));
12236 /* Expand conditional increment or decrement using adb/sbb instructions.
12237 The default case using setcc followed by the conditional move can be
12238 done by generic code. */
12240 ix86_expand_int_addcc (rtx operands
[])
12242 enum rtx_code code
= GET_CODE (operands
[1]);
12244 rtx val
= const0_rtx
;
12245 bool fpcmp
= false;
12246 enum machine_mode mode
= GET_MODE (operands
[0]);
12248 if (operands
[3] != const1_rtx
12249 && operands
[3] != constm1_rtx
)
12251 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12252 ix86_compare_op1
, &compare_op
))
12254 code
= GET_CODE (compare_op
);
12256 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12257 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12260 code
= ix86_fp_compare_code_to_integer (code
);
12267 PUT_CODE (compare_op
,
12268 reverse_condition_maybe_unordered
12269 (GET_CODE (compare_op
)));
12271 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12273 PUT_MODE (compare_op
, mode
);
12275 /* Construct either adc or sbb insn. */
12276 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12278 switch (GET_MODE (operands
[0]))
12281 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12284 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12287 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12290 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12293 gcc_unreachable ();
12298 switch (GET_MODE (operands
[0]))
12301 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12304 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12307 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12310 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12313 gcc_unreachable ();
12316 return 1; /* DONE */
12320 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12321 works for floating pointer parameters and nonoffsetable memories.
12322 For pushes, it returns just stack offsets; the values will be saved
12323 in the right order. Maximally three parts are generated. */
12326 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12331 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12333 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12335 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12336 gcc_assert (size
>= 2 && size
<= 3);
12338 /* Optimize constant pool reference to immediates. This is used by fp
12339 moves, that force all constants to memory to allow combining. */
12340 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12342 rtx tmp
= maybe_get_pool_constant (operand
);
12347 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12349 /* The only non-offsetable memories we handle are pushes. */
12350 int ok
= push_operand (operand
, VOIDmode
);
12354 operand
= copy_rtx (operand
);
12355 PUT_MODE (operand
, Pmode
);
12356 parts
[0] = parts
[1] = parts
[2] = operand
;
12360 if (GET_CODE (operand
) == CONST_VECTOR
)
12362 enum machine_mode imode
= int_mode_for_mode (mode
);
12363 /* Caution: if we looked through a constant pool memory above,
12364 the operand may actually have a different mode now. That's
12365 ok, since we want to pun this all the way back to an integer. */
12366 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12367 gcc_assert (operand
!= NULL
);
12373 if (mode
== DImode
)
12374 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12377 if (REG_P (operand
))
12379 gcc_assert (reload_completed
);
12380 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12381 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12383 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12385 else if (offsettable_memref_p (operand
))
12387 operand
= adjust_address (operand
, SImode
, 0);
12388 parts
[0] = operand
;
12389 parts
[1] = adjust_address (operand
, SImode
, 4);
12391 parts
[2] = adjust_address (operand
, SImode
, 8);
12393 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12398 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12402 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12403 parts
[2] = gen_int_mode (l
[2], SImode
);
12406 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12409 gcc_unreachable ();
12411 parts
[1] = gen_int_mode (l
[1], SImode
);
12412 parts
[0] = gen_int_mode (l
[0], SImode
);
12415 gcc_unreachable ();
12420 if (mode
== TImode
)
12421 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12422 if (mode
== XFmode
|| mode
== TFmode
)
12424 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12425 if (REG_P (operand
))
12427 gcc_assert (reload_completed
);
12428 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12429 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12431 else if (offsettable_memref_p (operand
))
12433 operand
= adjust_address (operand
, DImode
, 0);
12434 parts
[0] = operand
;
12435 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12437 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12442 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12443 real_to_target (l
, &r
, mode
);
12445 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12446 if (HOST_BITS_PER_WIDE_INT
>= 64)
12449 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12450 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12453 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12455 if (upper_mode
== SImode
)
12456 parts
[1] = gen_int_mode (l
[2], SImode
);
12457 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12460 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12461 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12464 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12467 gcc_unreachable ();
12474 /* Emit insns to perform a move or push of DI, DF, and XF values.
12475 Return false when normal moves are needed; true when all required
12476 insns have been emitted. Operands 2-4 contain the input values
12477 int the correct order; operands 5-7 contain the output values. */
12480 ix86_split_long_move (rtx operands
[])
12485 int collisions
= 0;
12486 enum machine_mode mode
= GET_MODE (operands
[0]);
12488 /* The DFmode expanders may ask us to move double.
12489 For 64bit target this is single move. By hiding the fact
12490 here we simplify i386.md splitters. */
12491 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12493 /* Optimize constant pool reference to immediates. This is used by
12494 fp moves, that force all constants to memory to allow combining. */
12496 if (MEM_P (operands
[1])
12497 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12498 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12499 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12500 if (push_operand (operands
[0], VOIDmode
))
12502 operands
[0] = copy_rtx (operands
[0]);
12503 PUT_MODE (operands
[0], Pmode
);
12506 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12507 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12508 emit_move_insn (operands
[0], operands
[1]);
12512 /* The only non-offsettable memory we handle is push. */
12513 if (push_operand (operands
[0], VOIDmode
))
12516 gcc_assert (!MEM_P (operands
[0])
12517 || offsettable_memref_p (operands
[0]));
12519 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12520 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12522 /* When emitting push, take care for source operands on the stack. */
12523 if (push
&& MEM_P (operands
[1])
12524 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12527 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12528 XEXP (part
[1][2], 0));
12529 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12530 XEXP (part
[1][1], 0));
12533 /* We need to do copy in the right order in case an address register
12534 of the source overlaps the destination. */
12535 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12537 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12539 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12542 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12545 /* Collision in the middle part can be handled by reordering. */
12546 if (collisions
== 1 && nparts
== 3
12547 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12550 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12551 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12554 /* If there are more collisions, we can't handle it by reordering.
12555 Do an lea to the last part and use only one colliding move. */
12556 else if (collisions
> 1)
12562 base
= part
[0][nparts
- 1];
12564 /* Handle the case when the last part isn't valid for lea.
12565 Happens in 64-bit mode storing the 12-byte XFmode. */
12566 if (GET_MODE (base
) != Pmode
)
12567 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12569 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12570 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12571 part
[1][1] = replace_equiv_address (part
[1][1],
12572 plus_constant (base
, UNITS_PER_WORD
));
12574 part
[1][2] = replace_equiv_address (part
[1][2],
12575 plus_constant (base
, 8));
12585 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12586 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12587 emit_move_insn (part
[0][2], part
[1][2]);
12592 /* In 64bit mode we don't have 32bit push available. In case this is
12593 register, it is OK - we will just use larger counterpart. We also
12594 retype memory - these comes from attempt to avoid REX prefix on
12595 moving of second half of TFmode value. */
12596 if (GET_MODE (part
[1][1]) == SImode
)
12598 switch (GET_CODE (part
[1][1]))
12601 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12605 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12609 gcc_unreachable ();
12612 if (GET_MODE (part
[1][0]) == SImode
)
12613 part
[1][0] = part
[1][1];
12616 emit_move_insn (part
[0][1], part
[1][1]);
12617 emit_move_insn (part
[0][0], part
[1][0]);
12621 /* Choose correct order to not overwrite the source before it is copied. */
12622 if ((REG_P (part
[0][0])
12623 && REG_P (part
[1][1])
12624 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12626 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12628 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12632 operands
[2] = part
[0][2];
12633 operands
[3] = part
[0][1];
12634 operands
[4] = part
[0][0];
12635 operands
[5] = part
[1][2];
12636 operands
[6] = part
[1][1];
12637 operands
[7] = part
[1][0];
12641 operands
[2] = part
[0][1];
12642 operands
[3] = part
[0][0];
12643 operands
[5] = part
[1][1];
12644 operands
[6] = part
[1][0];
12651 operands
[2] = part
[0][0];
12652 operands
[3] = part
[0][1];
12653 operands
[4] = part
[0][2];
12654 operands
[5] = part
[1][0];
12655 operands
[6] = part
[1][1];
12656 operands
[7] = part
[1][2];
12660 operands
[2] = part
[0][0];
12661 operands
[3] = part
[0][1];
12662 operands
[5] = part
[1][0];
12663 operands
[6] = part
[1][1];
12667 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12670 if (CONST_INT_P (operands
[5])
12671 && operands
[5] != const0_rtx
12672 && REG_P (operands
[2]))
12674 if (CONST_INT_P (operands
[6])
12675 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12676 operands
[6] = operands
[2];
12679 && CONST_INT_P (operands
[7])
12680 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12681 operands
[7] = operands
[2];
12685 && CONST_INT_P (operands
[6])
12686 && operands
[6] != const0_rtx
12687 && REG_P (operands
[3])
12688 && CONST_INT_P (operands
[7])
12689 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12690 operands
[7] = operands
[3];
12693 emit_move_insn (operands
[2], operands
[5]);
12694 emit_move_insn (operands
[3], operands
[6]);
12696 emit_move_insn (operands
[4], operands
[7]);
12701 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12702 left shift by a constant, either using a single shift or
12703 a sequence of add instructions. */
12706 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12710 emit_insn ((mode
== DImode
12712 : gen_adddi3
) (operand
, operand
, operand
));
12714 else if (!optimize_size
12715 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12718 for (i
=0; i
<count
; i
++)
12720 emit_insn ((mode
== DImode
12722 : gen_adddi3
) (operand
, operand
, operand
));
12726 emit_insn ((mode
== DImode
12728 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12732 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12734 rtx low
[2], high
[2];
12736 const int single_width
= mode
== DImode
? 32 : 64;
12738 if (CONST_INT_P (operands
[2]))
12740 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12741 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12743 if (count
>= single_width
)
12745 emit_move_insn (high
[0], low
[1]);
12746 emit_move_insn (low
[0], const0_rtx
);
12748 if (count
> single_width
)
12749 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12753 if (!rtx_equal_p (operands
[0], operands
[1]))
12754 emit_move_insn (operands
[0], operands
[1]);
12755 emit_insn ((mode
== DImode
12757 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12758 ix86_expand_ashl_const (low
[0], count
, mode
);
12763 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12765 if (operands
[1] == const1_rtx
)
12767 /* Assuming we've chosen a QImode capable registers, then 1 << N
12768 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12769 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12771 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12773 ix86_expand_clear (low
[0]);
12774 ix86_expand_clear (high
[0]);
12775 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12777 d
= gen_lowpart (QImode
, low
[0]);
12778 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12779 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12780 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12782 d
= gen_lowpart (QImode
, high
[0]);
12783 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12784 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12785 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12788 /* Otherwise, we can get the same results by manually performing
12789 a bit extract operation on bit 5/6, and then performing the two
12790 shifts. The two methods of getting 0/1 into low/high are exactly
12791 the same size. Avoiding the shift in the bit extract case helps
12792 pentium4 a bit; no one else seems to care much either way. */
12797 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12798 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12800 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12801 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12803 emit_insn ((mode
== DImode
12805 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12806 emit_insn ((mode
== DImode
12808 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12809 emit_move_insn (low
[0], high
[0]);
12810 emit_insn ((mode
== DImode
12812 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12815 emit_insn ((mode
== DImode
12817 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12818 emit_insn ((mode
== DImode
12820 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12824 if (operands
[1] == constm1_rtx
)
12826 /* For -1 << N, we can avoid the shld instruction, because we
12827 know that we're shifting 0...31/63 ones into a -1. */
12828 emit_move_insn (low
[0], constm1_rtx
);
12830 emit_move_insn (high
[0], low
[0]);
12832 emit_move_insn (high
[0], constm1_rtx
);
12836 if (!rtx_equal_p (operands
[0], operands
[1]))
12837 emit_move_insn (operands
[0], operands
[1]);
12839 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12840 emit_insn ((mode
== DImode
12842 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12845 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12847 if (TARGET_CMOVE
&& scratch
)
12849 ix86_expand_clear (scratch
);
12850 emit_insn ((mode
== DImode
12851 ? gen_x86_shift_adj_1
12852 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12855 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12859 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12861 rtx low
[2], high
[2];
12863 const int single_width
= mode
== DImode
? 32 : 64;
12865 if (CONST_INT_P (operands
[2]))
12867 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12868 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12870 if (count
== single_width
* 2 - 1)
12872 emit_move_insn (high
[0], high
[1]);
12873 emit_insn ((mode
== DImode
12875 : gen_ashrdi3
) (high
[0], high
[0],
12876 GEN_INT (single_width
- 1)));
12877 emit_move_insn (low
[0], high
[0]);
12880 else if (count
>= single_width
)
12882 emit_move_insn (low
[0], high
[1]);
12883 emit_move_insn (high
[0], low
[0]);
12884 emit_insn ((mode
== DImode
12886 : gen_ashrdi3
) (high
[0], high
[0],
12887 GEN_INT (single_width
- 1)));
12888 if (count
> single_width
)
12889 emit_insn ((mode
== DImode
12891 : gen_ashrdi3
) (low
[0], low
[0],
12892 GEN_INT (count
- single_width
)));
12896 if (!rtx_equal_p (operands
[0], operands
[1]))
12897 emit_move_insn (operands
[0], operands
[1]);
12898 emit_insn ((mode
== DImode
12900 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12901 emit_insn ((mode
== DImode
12903 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12908 if (!rtx_equal_p (operands
[0], operands
[1]))
12909 emit_move_insn (operands
[0], operands
[1]);
12911 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12913 emit_insn ((mode
== DImode
12915 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12916 emit_insn ((mode
== DImode
12918 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12920 if (TARGET_CMOVE
&& scratch
)
12922 emit_move_insn (scratch
, high
[0]);
12923 emit_insn ((mode
== DImode
12925 : gen_ashrdi3
) (scratch
, scratch
,
12926 GEN_INT (single_width
- 1)));
12927 emit_insn ((mode
== DImode
12928 ? gen_x86_shift_adj_1
12929 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12933 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12938 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12940 rtx low
[2], high
[2];
12942 const int single_width
= mode
== DImode
? 32 : 64;
12944 if (CONST_INT_P (operands
[2]))
12946 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12947 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12949 if (count
>= single_width
)
12951 emit_move_insn (low
[0], high
[1]);
12952 ix86_expand_clear (high
[0]);
12954 if (count
> single_width
)
12955 emit_insn ((mode
== DImode
12957 : gen_lshrdi3
) (low
[0], low
[0],
12958 GEN_INT (count
- single_width
)));
12962 if (!rtx_equal_p (operands
[0], operands
[1]))
12963 emit_move_insn (operands
[0], operands
[1]);
12964 emit_insn ((mode
== DImode
12966 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12967 emit_insn ((mode
== DImode
12969 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12974 if (!rtx_equal_p (operands
[0], operands
[1]))
12975 emit_move_insn (operands
[0], operands
[1]);
12977 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12979 emit_insn ((mode
== DImode
12981 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12982 emit_insn ((mode
== DImode
12984 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12986 /* Heh. By reversing the arguments, we can reuse this pattern. */
12987 if (TARGET_CMOVE
&& scratch
)
12989 ix86_expand_clear (scratch
);
12990 emit_insn ((mode
== DImode
12991 ? gen_x86_shift_adj_1
12992 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12996 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13000 /* Predict just emitted jump instruction to be taken with probability PROB. */
13002 predict_jump (int prob
)
13004 rtx insn
= get_last_insn ();
13005 gcc_assert (JUMP_P (insn
));
13007 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13012 /* Helper function for the string operations below. Dest VARIABLE whether
13013 it is aligned to VALUE bytes. If true, jump to the label. */
13015 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13017 rtx label
= gen_label_rtx ();
13018 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13019 if (GET_MODE (variable
) == DImode
)
13020 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13022 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13023 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13026 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13028 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13032 /* Adjust COUNTER by the VALUE. */
13034 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13036 if (GET_MODE (countreg
) == DImode
)
13037 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13039 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13042 /* Zero extend possibly SImode EXP to Pmode register. */
13044 ix86_zero_extend_to_Pmode (rtx exp
)
13047 if (GET_MODE (exp
) == VOIDmode
)
13048 return force_reg (Pmode
, exp
);
13049 if (GET_MODE (exp
) == Pmode
)
13050 return copy_to_mode_reg (Pmode
, exp
);
13051 r
= gen_reg_rtx (Pmode
);
13052 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13056 /* Divide COUNTREG by SCALE. */
13058 scale_counter (rtx countreg
, int scale
)
13061 rtx piece_size_mask
;
13065 if (CONST_INT_P (countreg
))
13066 return GEN_INT (INTVAL (countreg
) / scale
);
13067 gcc_assert (REG_P (countreg
));
13069 piece_size_mask
= GEN_INT (scale
- 1);
13070 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13071 GEN_INT (exact_log2 (scale
)),
13072 NULL
, 1, OPTAB_DIRECT
);
13076 /* When SRCPTR is non-NULL, output simple loop to move memory
13077 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13078 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13079 equivalent loop to set memory by VALUE (supposed to be in MODE).
13081 The size is rounded down to whole number of chunk size moved at once.
13082 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13086 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13087 rtx destptr
, rtx srcptr
, rtx value
,
13088 rtx count
, enum machine_mode mode
, int unroll
,
13091 rtx out_label
, top_label
, iter
, tmp
;
13092 enum machine_mode iter_mode
;
13093 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13094 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13100 iter_mode
= GET_MODE (count
);
13101 if (iter_mode
== VOIDmode
)
13102 iter_mode
= word_mode
;
13104 top_label
= gen_label_rtx ();
13105 out_label
= gen_label_rtx ();
13106 iter
= gen_reg_rtx (iter_mode
);
13108 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13109 NULL
, 1, OPTAB_DIRECT
);
13110 /* Those two should combine. */
13111 if (piece_size
== const1_rtx
)
13113 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13115 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13117 emit_move_insn (iter
, const0_rtx
);
13119 emit_label (top_label
);
13121 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13122 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13123 destmem
= change_address (destmem
, mode
, x_addr
);
13127 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13128 srcmem
= change_address (srcmem
, mode
, y_addr
);
13130 /* When unrolling for chips that reorder memory reads and writes,
13131 we can save registers by using single temporary.
13132 Also using 4 temporaries is overkill in 32bit mode. */
13133 if (!TARGET_64BIT
&& 0)
13135 for (i
= 0; i
< unroll
; i
++)
13140 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13142 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13144 emit_move_insn (destmem
, srcmem
);
13150 gcc_assert (unroll
<= 4);
13151 for (i
= 0; i
< unroll
; i
++)
13153 tmpreg
[i
] = gen_reg_rtx (mode
);
13157 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13159 emit_move_insn (tmpreg
[i
], srcmem
);
13161 for (i
= 0; i
< unroll
; i
++)
13166 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13168 emit_move_insn (destmem
, tmpreg
[i
]);
13173 for (i
= 0; i
< unroll
; i
++)
13177 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13178 emit_move_insn (destmem
, value
);
13181 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13182 true, OPTAB_LIB_WIDEN
);
13184 emit_move_insn (iter
, tmp
);
13186 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13188 if (expected_size
!= -1)
13190 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13191 if (expected_size
== 0)
13193 else if (expected_size
> REG_BR_PROB_BASE
)
13194 predict_jump (REG_BR_PROB_BASE
- 1);
13196 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13199 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13200 iter
= ix86_zero_extend_to_Pmode (iter
);
13201 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13202 true, OPTAB_LIB_WIDEN
);
13203 if (tmp
!= destptr
)
13204 emit_move_insn (destptr
, tmp
);
13207 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13208 true, OPTAB_LIB_WIDEN
);
13210 emit_move_insn (srcptr
, tmp
);
13212 emit_label (out_label
);
13215 /* Output "rep; mov" instruction.
13216 Arguments have same meaning as for previous function */
13218 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13219 rtx destptr
, rtx srcptr
,
13221 enum machine_mode mode
)
13227 /* If the size is known, it is shorter to use rep movs. */
13228 if (mode
== QImode
&& CONST_INT_P (count
)
13229 && !(INTVAL (count
) & 3))
13232 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13233 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13234 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13235 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13236 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13237 if (mode
!= QImode
)
13239 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13240 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13241 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13242 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13243 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13244 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13248 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13249 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13251 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13255 /* Output "rep; stos" instruction.
13256 Arguments have same meaning as for previous function */
13258 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13260 enum machine_mode mode
)
13265 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13266 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13267 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13268 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13269 if (mode
!= QImode
)
13271 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13272 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13273 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13276 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13277 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13281 emit_strmov (rtx destmem
, rtx srcmem
,
13282 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13284 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13285 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13286 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13289 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13291 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13292 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13295 if (CONST_INT_P (count
))
13297 HOST_WIDE_INT countval
= INTVAL (count
);
13300 if ((countval
& 0x16) && max_size
> 16)
13304 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13305 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13308 gcc_unreachable ();
13311 if ((countval
& 0x08) && max_size
> 8)
13314 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13317 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13318 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 4);
13322 if ((countval
& 0x04) && max_size
> 4)
13324 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13327 if ((countval
& 0x02) && max_size
> 2)
13329 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13332 if ((countval
& 0x01) && max_size
> 1)
13334 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13341 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13342 count
, 1, OPTAB_DIRECT
);
13343 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13344 count
, QImode
, 1, 4);
13348 /* When there are stringops, we can cheaply increase dest and src pointers.
13349 Otherwise we save code size by maintaining offset (zero is readily
13350 available from preceding rep operation) and using x86 addressing modes.
13352 if (TARGET_SINGLE_STRINGOP
)
13356 rtx label
= ix86_expand_aligntest (count
, 4, true);
13357 src
= change_address (srcmem
, SImode
, srcptr
);
13358 dest
= change_address (destmem
, SImode
, destptr
);
13359 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13360 emit_label (label
);
13361 LABEL_NUSES (label
) = 1;
13365 rtx label
= ix86_expand_aligntest (count
, 2, true);
13366 src
= change_address (srcmem
, HImode
, srcptr
);
13367 dest
= change_address (destmem
, HImode
, destptr
);
13368 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13369 emit_label (label
);
13370 LABEL_NUSES (label
) = 1;
13374 rtx label
= ix86_expand_aligntest (count
, 1, true);
13375 src
= change_address (srcmem
, QImode
, srcptr
);
13376 dest
= change_address (destmem
, QImode
, destptr
);
13377 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13378 emit_label (label
);
13379 LABEL_NUSES (label
) = 1;
13384 rtx offset
= force_reg (Pmode
, const0_rtx
);
13389 rtx label
= ix86_expand_aligntest (count
, 4, true);
13390 src
= change_address (srcmem
, SImode
, srcptr
);
13391 dest
= change_address (destmem
, SImode
, destptr
);
13392 emit_move_insn (dest
, src
);
13393 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13394 true, OPTAB_LIB_WIDEN
);
13396 emit_move_insn (offset
, tmp
);
13397 emit_label (label
);
13398 LABEL_NUSES (label
) = 1;
13402 rtx label
= ix86_expand_aligntest (count
, 2, true);
13403 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13404 src
= change_address (srcmem
, HImode
, tmp
);
13405 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13406 dest
= change_address (destmem
, HImode
, tmp
);
13407 emit_move_insn (dest
, src
);
13408 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13409 true, OPTAB_LIB_WIDEN
);
13411 emit_move_insn (offset
, tmp
);
13412 emit_label (label
);
13413 LABEL_NUSES (label
) = 1;
13417 rtx label
= ix86_expand_aligntest (count
, 1, true);
13418 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13419 src
= change_address (srcmem
, QImode
, tmp
);
13420 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13421 dest
= change_address (destmem
, QImode
, tmp
);
13422 emit_move_insn (dest
, src
);
13423 emit_label (label
);
13424 LABEL_NUSES (label
) = 1;
13429 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13431 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13432 rtx count
, int max_size
)
13435 expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13436 count
, 1, OPTAB_DIRECT
);
13437 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13438 gen_lowpart (QImode
, value
), count
, QImode
,
13442 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13444 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13448 if (CONST_INT_P (count
))
13450 HOST_WIDE_INT countval
= INTVAL (count
);
13453 if ((countval
& 0x16) && max_size
> 16)
13457 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13458 emit_insn (gen_strset (destptr
, dest
, value
));
13459 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13460 emit_insn (gen_strset (destptr
, dest
, value
));
13463 gcc_unreachable ();
13466 if ((countval
& 0x08) && max_size
> 8)
13470 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13471 emit_insn (gen_strset (destptr
, dest
, value
));
13475 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13476 emit_insn (gen_strset (destptr
, dest
, value
));
13477 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13478 emit_insn (gen_strset (destptr
, dest
, value
));
13482 if ((countval
& 0x04) && max_size
> 4)
13484 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13485 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13488 if ((countval
& 0x02) && max_size
> 2)
13490 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13491 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13494 if ((countval
& 0x01) && max_size
> 1)
13496 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13497 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13504 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13509 rtx label
= ix86_expand_aligntest (count
, 16, true);
13512 dest
= change_address (destmem
, DImode
, destptr
);
13513 emit_insn (gen_strset (destptr
, dest
, value
));
13514 emit_insn (gen_strset (destptr
, dest
, value
));
13518 dest
= change_address (destmem
, SImode
, destptr
);
13519 emit_insn (gen_strset (destptr
, dest
, value
));
13520 emit_insn (gen_strset (destptr
, dest
, value
));
13521 emit_insn (gen_strset (destptr
, dest
, value
));
13522 emit_insn (gen_strset (destptr
, dest
, value
));
13524 emit_label (label
);
13525 LABEL_NUSES (label
) = 1;
13529 rtx label
= ix86_expand_aligntest (count
, 8, true);
13532 dest
= change_address (destmem
, DImode
, destptr
);
13533 emit_insn (gen_strset (destptr
, dest
, value
));
13537 dest
= change_address (destmem
, SImode
, destptr
);
13538 emit_insn (gen_strset (destptr
, dest
, value
));
13539 emit_insn (gen_strset (destptr
, dest
, value
));
13541 emit_label (label
);
13542 LABEL_NUSES (label
) = 1;
13546 rtx label
= ix86_expand_aligntest (count
, 4, true);
13547 dest
= change_address (destmem
, SImode
, destptr
);
13548 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13549 emit_label (label
);
13550 LABEL_NUSES (label
) = 1;
13554 rtx label
= ix86_expand_aligntest (count
, 2, true);
13555 dest
= change_address (destmem
, HImode
, destptr
);
13556 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13557 emit_label (label
);
13558 LABEL_NUSES (label
) = 1;
13562 rtx label
= ix86_expand_aligntest (count
, 1, true);
13563 dest
= change_address (destmem
, QImode
, destptr
);
13564 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13565 emit_label (label
);
13566 LABEL_NUSES (label
) = 1;
13570 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13571 DESIRED_ALIGNMENT. */
13573 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13574 rtx destptr
, rtx srcptr
, rtx count
,
13575 int align
, int desired_alignment
)
13577 if (align
<= 1 && desired_alignment
> 1)
13579 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13580 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13581 destmem
= change_address (destmem
, QImode
, destptr
);
13582 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13583 ix86_adjust_counter (count
, 1);
13584 emit_label (label
);
13585 LABEL_NUSES (label
) = 1;
13587 if (align
<= 2 && desired_alignment
> 2)
13589 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13590 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13591 destmem
= change_address (destmem
, HImode
, destptr
);
13592 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13593 ix86_adjust_counter (count
, 2);
13594 emit_label (label
);
13595 LABEL_NUSES (label
) = 1;
13597 if (align
<= 4 && desired_alignment
> 4)
13599 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13600 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13601 destmem
= change_address (destmem
, SImode
, destptr
);
13602 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13603 ix86_adjust_counter (count
, 4);
13604 emit_label (label
);
13605 LABEL_NUSES (label
) = 1;
13607 gcc_assert (desired_alignment
<= 8);
13610 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13611 DESIRED_ALIGNMENT. */
13613 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13614 int align
, int desired_alignment
)
13616 if (align
<= 1 && desired_alignment
> 1)
13618 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13619 destmem
= change_address (destmem
, QImode
, destptr
);
13620 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13621 ix86_adjust_counter (count
, 1);
13622 emit_label (label
);
13623 LABEL_NUSES (label
) = 1;
13625 if (align
<= 2 && desired_alignment
> 2)
13627 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13628 destmem
= change_address (destmem
, HImode
, destptr
);
13629 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13630 ix86_adjust_counter (count
, 2);
13631 emit_label (label
);
13632 LABEL_NUSES (label
) = 1;
13634 if (align
<= 4 && desired_alignment
> 4)
13636 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13637 destmem
= change_address (destmem
, SImode
, destptr
);
13638 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13639 ix86_adjust_counter (count
, 4);
13640 emit_label (label
);
13641 LABEL_NUSES (label
) = 1;
13643 gcc_assert (desired_alignment
<= 8);
13646 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13647 static enum stringop_alg
13648 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13649 int *dynamic_check
)
13651 const struct stringop_algs
* algs
;
13653 *dynamic_check
= -1;
13655 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13657 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13658 if (stringop_alg
!= no_stringop
)
13659 return stringop_alg
;
13660 /* rep; movq or rep; movl is the smallest variant. */
13661 else if (optimize_size
)
13663 if (!count
|| (count
& 3))
13664 return rep_prefix_1_byte
;
13666 return rep_prefix_4_byte
;
13668 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13670 else if (expected_size
!= -1 && expected_size
< 4)
13671 return loop_1_byte
;
13672 else if (expected_size
!= -1)
13675 enum stringop_alg alg
= libcall
;
13676 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13678 gcc_assert (algs
->size
[i
].max
);
13679 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13681 if (algs
->size
[i
].alg
!= libcall
)
13682 alg
= algs
->size
[i
].alg
;
13683 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13684 last non-libcall inline algorithm. */
13685 if (TARGET_INLINE_ALL_STRINGOPS
)
13687 /* When the current size is best to be copied by a libcall,
13688 but we are still forced to inline, run the heuristic bellow
13689 that will pick code for medium sized blocks. */
13690 if (alg
!= libcall
)
13695 return algs
->size
[i
].alg
;
13698 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13700 /* When asked to inline the call anyway, try to pick meaningful choice.
13701 We look for maximal size of block that is faster to copy by hand and
13702 take blocks of at most of that size guessing that average size will
13703 be roughly half of the block.
13705 If this turns out to be bad, we might simply specify the preferred
13706 choice in ix86_costs. */
13707 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13708 && algs
->unknown_size
== libcall
)
13711 enum stringop_alg alg
;
13714 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13715 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13716 max
= algs
->size
[i
].max
;
13719 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13720 gcc_assert (*dynamic_check
== -1);
13721 gcc_assert (alg
!= libcall
);
13722 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13723 *dynamic_check
= max
;
13726 return algs
->unknown_size
;
13729 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13730 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13732 decide_alignment (int align
,
13733 enum stringop_alg alg
,
13736 int desired_align
= 0;
13740 gcc_unreachable ();
13742 case unrolled_loop
:
13743 desired_align
= GET_MODE_SIZE (Pmode
);
13745 case rep_prefix_8_byte
:
13748 case rep_prefix_4_byte
:
13749 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13750 copying whole cacheline at once. */
13751 if (TARGET_PENTIUMPRO
)
13756 case rep_prefix_1_byte
:
13757 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13758 copying whole cacheline at once. */
13759 if (TARGET_PENTIUMPRO
)
13773 if (desired_align
< align
)
13774 desired_align
= align
;
13775 if (expected_size
!= -1 && expected_size
< 4)
13776 desired_align
= align
;
13777 return desired_align
;
13780 /* Return the smallest power of 2 greater than VAL. */
13782 smallest_pow2_greater_than (int val
)
13790 /* Expand string move (memcpy) operation. Use i386 string operations when
13791 profitable. expand_clrmem contains similar code. The code depends upon
13792 architecture, block size and alignment, but always has the same
13795 1) Prologue guard: Conditional that jumps up to epilogues for small
13796 blocks that can be handled by epilogue alone. This is faster but
13797 also needed for correctness, since prologue assume the block is larger
13798 than the desired alignment.
13800 Optional dynamic check for size and libcall for large
13801 blocks is emitted here too, with -minline-stringops-dynamically.
13803 2) Prologue: copy first few bytes in order to get destination aligned
13804 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13805 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13806 We emit either a jump tree on power of two sized blocks, or a byte loop.
13808 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13809 with specified algorithm.
13811 4) Epilogue: code copying tail of the block that is too small to be
13812 handled by main body (or up to size guarded by prologue guard). */
13815 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
13816 rtx expected_align_exp
, rtx expected_size_exp
)
13822 rtx jump_around_label
= NULL
;
13823 HOST_WIDE_INT align
= 1;
13824 unsigned HOST_WIDE_INT count
= 0;
13825 HOST_WIDE_INT expected_size
= -1;
13826 int size_needed
= 0, epilogue_size_needed
;
13827 int desired_align
= 0;
13828 enum stringop_alg alg
;
13831 if (CONST_INT_P (align_exp
))
13832 align
= INTVAL (align_exp
);
13833 /* i386 can do misaligned access on reasonably increased cost. */
13834 if (CONST_INT_P (expected_align_exp
)
13835 && INTVAL (expected_align_exp
) > align
)
13836 align
= INTVAL (expected_align_exp
);
13837 if (CONST_INT_P (count_exp
))
13838 count
= expected_size
= INTVAL (count_exp
);
13839 if (CONST_INT_P (expected_size_exp
) && count
== 0)
13840 expected_size
= INTVAL (expected_size_exp
);
13842 /* Step 0: Decide on preferred algorithm, desired alignment and
13843 size of chunks to be copied by main loop. */
13845 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
13846 desired_align
= decide_alignment (align
, alg
, expected_size
);
13848 if (!TARGET_ALIGN_STRINGOPS
)
13849 align
= desired_align
;
13851 if (alg
== libcall
)
13853 gcc_assert (alg
!= no_stringop
);
13855 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
13856 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13857 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
13862 gcc_unreachable ();
13864 size_needed
= GET_MODE_SIZE (Pmode
);
13866 case unrolled_loop
:
13867 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
13869 case rep_prefix_8_byte
:
13872 case rep_prefix_4_byte
:
13875 case rep_prefix_1_byte
:
13881 epilogue_size_needed
= size_needed
;
13883 /* Step 1: Prologue guard. */
13885 /* Alignment code needs count to be in register. */
13886 if (CONST_INT_P (count_exp
) && desired_align
> align
)
13888 enum machine_mode mode
= SImode
;
13889 if (TARGET_64BIT
&& (count
& ~0xffffffff))
13891 count_exp
= force_reg (mode
, count_exp
);
13893 gcc_assert (desired_align
>= 1 && align
>= 1);
13895 /* Ensure that alignment prologue won't copy past end of block. */
13896 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
13899 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
13901 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
13902 Make sure it is power of 2. */
13903 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
13905 label
= gen_label_rtx ();
13906 emit_cmp_and_jump_insns (count_exp
,
13907 GEN_INT (epilogue_size_needed
),
13908 LTU
, 0, GET_MODE (count_exp
), 1, label
);
13909 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
13910 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
13912 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
13914 /* Emit code to decide on runtime whether library call or inline should be
13916 if (dynamic_check
!= -1)
13918 rtx hot_label
= gen_label_rtx ();
13919 jump_around_label
= gen_label_rtx ();
13920 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
13921 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
13922 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13923 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
13924 emit_jump (jump_around_label
);
13925 emit_label (hot_label
);
13928 /* Step 2: Alignment prologue. */
13930 if (desired_align
> align
)
13932 /* Except for the first move in epilogue, we no longer know
13933 constant offset in aliasing info. It don't seems to worth
13934 the pain to maintain it for the first move, so throw away
13936 src
= change_address (src
, BLKmode
, srcreg
);
13937 dst
= change_address (dst
, BLKmode
, destreg
);
13938 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
13941 if (label
&& size_needed
== 1)
13943 emit_label (label
);
13944 LABEL_NUSES (label
) = 1;
13948 /* Step 3: Main loop. */
13954 gcc_unreachable ();
13956 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13957 count_exp
, QImode
, 1, expected_size
);
13960 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13961 count_exp
, Pmode
, 1, expected_size
);
13963 case unrolled_loop
:
13964 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13965 registers for 4 temporaries anyway. */
13966 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
13967 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
13970 case rep_prefix_8_byte
:
13971 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13974 case rep_prefix_4_byte
:
13975 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13978 case rep_prefix_1_byte
:
13979 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
13983 /* Adjust properly the offset of src and dest memory for aliasing. */
13984 if (CONST_INT_P (count_exp
))
13986 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
13987 (count
/ size_needed
) * size_needed
);
13988 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
13989 (count
/ size_needed
) * size_needed
);
13993 src
= change_address (src
, BLKmode
, srcreg
);
13994 dst
= change_address (dst
, BLKmode
, destreg
);
13997 /* Step 4: Epilogue to copy the remaining bytes. */
14001 /* When the main loop is done, COUNT_EXP might hold original count,
14002 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14003 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14004 bytes. Compensate if needed. */
14006 if (size_needed
< epilogue_size_needed
)
14009 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14010 GEN_INT (size_needed
- 1), count_exp
, 1,
14012 if (tmp
!= count_exp
)
14013 emit_move_insn (count_exp
, tmp
);
14015 emit_label (label
);
14016 LABEL_NUSES (label
) = 1;
14019 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14020 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14021 epilogue_size_needed
);
14022 if (jump_around_label
)
14023 emit_label (jump_around_label
);
14027 /* Helper function for memcpy. For QImode value 0xXY produce
14028 0xXYXYXYXY of wide specified by MODE. This is essentially
14029 a * 0x10101010, but we can do slightly better than
14030 synth_mult by unwinding the sequence by hand on CPUs with
14033 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14035 enum machine_mode valmode
= GET_MODE (val
);
14037 int nops
= mode
== DImode
? 3 : 2;
14039 gcc_assert (mode
== SImode
|| mode
== DImode
);
14040 if (val
== const0_rtx
)
14041 return copy_to_mode_reg (mode
, const0_rtx
);
14042 if (CONST_INT_P (val
))
14044 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14048 if (mode
== DImode
)
14049 v
|= (v
<< 16) << 16;
14050 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14053 if (valmode
== VOIDmode
)
14055 if (valmode
!= QImode
)
14056 val
= gen_lowpart (QImode
, val
);
14057 if (mode
== QImode
)
14059 if (!TARGET_PARTIAL_REG_STALL
)
14061 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14062 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14063 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14064 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14066 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14067 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14068 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14073 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14075 if (!TARGET_PARTIAL_REG_STALL
)
14076 if (mode
== SImode
)
14077 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14079 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14082 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14083 NULL
, 1, OPTAB_DIRECT
);
14085 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14087 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14088 NULL
, 1, OPTAB_DIRECT
);
14089 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14090 if (mode
== SImode
)
14092 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14093 NULL
, 1, OPTAB_DIRECT
);
14094 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14099 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14100 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14101 alignment from ALIGN to DESIRED_ALIGN. */
14103 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14108 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14109 promoted_val
= promote_duplicated_reg (DImode
, val
);
14110 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14111 promoted_val
= promote_duplicated_reg (SImode
, val
);
14112 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14113 promoted_val
= promote_duplicated_reg (HImode
, val
);
14115 promoted_val
= val
;
14117 return promoted_val
;
14120 /* Expand string clear operation (bzero). Use i386 string operations when
14121 profitable. See expand_movmem comment for explanation of individual
14122 steps performed. */
14124 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14125 rtx expected_align_exp
, rtx expected_size_exp
)
14130 rtx jump_around_label
= NULL
;
14131 HOST_WIDE_INT align
= 1;
14132 unsigned HOST_WIDE_INT count
= 0;
14133 HOST_WIDE_INT expected_size
= -1;
14134 int size_needed
= 0, epilogue_size_needed
;
14135 int desired_align
= 0;
14136 enum stringop_alg alg
;
14137 rtx promoted_val
= NULL
;
14138 bool force_loopy_epilogue
= false;
14141 if (CONST_INT_P (align_exp
))
14142 align
= INTVAL (align_exp
);
14143 /* i386 can do misaligned access on reasonably increased cost. */
14144 if (CONST_INT_P (expected_align_exp
)
14145 && INTVAL (expected_align_exp
) > align
)
14146 align
= INTVAL (expected_align_exp
);
14147 if (CONST_INT_P (count_exp
))
14148 count
= expected_size
= INTVAL (count_exp
);
14149 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14150 expected_size
= INTVAL (expected_size_exp
);
14152 /* Step 0: Decide on preferred algorithm, desired alignment and
14153 size of chunks to be copied by main loop. */
14155 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14156 desired_align
= decide_alignment (align
, alg
, expected_size
);
14158 if (!TARGET_ALIGN_STRINGOPS
)
14159 align
= desired_align
;
14161 if (alg
== libcall
)
14163 gcc_assert (alg
!= no_stringop
);
14165 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14166 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14171 gcc_unreachable ();
14173 size_needed
= GET_MODE_SIZE (Pmode
);
14175 case unrolled_loop
:
14176 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14178 case rep_prefix_8_byte
:
14181 case rep_prefix_4_byte
:
14184 case rep_prefix_1_byte
:
14189 epilogue_size_needed
= size_needed
;
14191 /* Step 1: Prologue guard. */
14193 /* Alignment code needs count to be in register. */
14194 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14196 enum machine_mode mode
= SImode
;
14197 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14199 count_exp
= force_reg (mode
, count_exp
);
14201 /* Do the cheap promotion to allow better CSE across the
14202 main loop and epilogue (ie one load of the big constant in the
14203 front of all code. */
14204 if (CONST_INT_P (val_exp
))
14205 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14206 desired_align
, align
);
14207 /* Ensure that alignment prologue won't copy past end of block. */
14208 if ((size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14211 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14213 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14214 Make sure it is power of 2. */
14215 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14217 /* To improve performance of small blocks, we jump around the VAL
14218 promoting mode. This mean that if the promoted VAL is not constant,
14219 we might not use it in the epilogue and have to use byte
14221 if (epilogue_size_needed
> 2 && !promoted_val
)
14222 force_loopy_epilogue
= true;
14223 label
= gen_label_rtx ();
14224 emit_cmp_and_jump_insns (count_exp
,
14225 GEN_INT (epilogue_size_needed
),
14226 LTU
, 0, GET_MODE (count_exp
), 1, label
);
14227 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14228 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14230 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14232 if (dynamic_check
!= -1)
14234 rtx hot_label
= gen_label_rtx ();
14235 jump_around_label
= gen_label_rtx ();
14236 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14237 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14238 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14239 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14240 emit_jump (jump_around_label
);
14241 emit_label (hot_label
);
14244 /* Step 2: Alignment prologue. */
14246 /* Do the expensive promotion once we branched off the small blocks. */
14248 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14249 desired_align
, align
);
14250 gcc_assert (desired_align
>= 1 && align
>= 1);
14252 if (desired_align
> align
)
14254 /* Except for the first move in epilogue, we no longer know
14255 constant offset in aliasing info. It don't seems to worth
14256 the pain to maintain it for the first move, so throw away
14258 dst
= change_address (dst
, BLKmode
, destreg
);
14259 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14262 if (label
&& size_needed
== 1)
14264 emit_label (label
);
14265 LABEL_NUSES (label
) = 1;
14269 /* Step 3: Main loop. */
14275 gcc_unreachable ();
14277 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14278 count_exp
, QImode
, 1, expected_size
);
14281 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14282 count_exp
, Pmode
, 1, expected_size
);
14284 case unrolled_loop
:
14285 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14286 count_exp
, Pmode
, 4, expected_size
);
14288 case rep_prefix_8_byte
:
14289 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14292 case rep_prefix_4_byte
:
14293 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14296 case rep_prefix_1_byte
:
14297 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14301 /* Adjust properly the offset of src and dest memory for aliasing. */
14302 if (CONST_INT_P (count_exp
))
14303 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14304 (count
/ size_needed
) * size_needed
);
14306 dst
= change_address (dst
, BLKmode
, destreg
);
14308 /* Step 4: Epilogue to copy the remaining bytes. */
14312 /* When the main loop is done, COUNT_EXP might hold original count,
14313 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14314 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14315 bytes. Compensate if needed. */
14317 if (size_needed
< desired_align
- align
)
14320 expand_simple_binop (GET_MODE (count_exp
), AND
, count_exp
,
14321 GEN_INT (size_needed
- 1), count_exp
, 1,
14323 size_needed
= desired_align
- align
+ 1;
14324 if (tmp
!= count_exp
)
14325 emit_move_insn (count_exp
, tmp
);
14327 emit_label (label
);
14328 LABEL_NUSES (label
) = 1;
14330 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14332 if (force_loopy_epilogue
)
14333 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14336 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14339 if (jump_around_label
)
14340 emit_label (jump_around_label
);
14344 /* Expand strlen. */
14346 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14348 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14350 /* The generic case of strlen expander is long. Avoid it's
14351 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14353 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14354 && !TARGET_INLINE_ALL_STRINGOPS
14356 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14359 addr
= force_reg (Pmode
, XEXP (src
, 0));
14360 scratch1
= gen_reg_rtx (Pmode
);
14362 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14365 /* Well it seems that some optimizer does not combine a call like
14366 foo(strlen(bar), strlen(bar));
14367 when the move and the subtraction is done here. It does calculate
14368 the length just once when these instructions are done inside of
14369 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14370 often used and I use one fewer register for the lifetime of
14371 output_strlen_unroll() this is better. */
14373 emit_move_insn (out
, addr
);
14375 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14377 /* strlensi_unroll_1 returns the address of the zero at the end of
14378 the string, like memchr(), so compute the length by subtracting
14379 the start address. */
14381 emit_insn (gen_subdi3 (out
, out
, addr
));
14383 emit_insn (gen_subsi3 (out
, out
, addr
));
14388 scratch2
= gen_reg_rtx (Pmode
);
14389 scratch3
= gen_reg_rtx (Pmode
);
14390 scratch4
= force_reg (Pmode
, constm1_rtx
);
14392 emit_move_insn (scratch3
, addr
);
14393 eoschar
= force_reg (QImode
, eoschar
);
14395 src
= replace_equiv_address_nv (src
, scratch3
);
14397 /* If .md starts supporting :P, this can be done in .md. */
14398 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14399 scratch4
), UNSPEC_SCAS
);
14400 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14403 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14404 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14408 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14409 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14415 /* Expand the appropriate insns for doing strlen if not just doing
14418 out = result, initialized with the start address
14419 align_rtx = alignment of the address.
14420 scratch = scratch register, initialized with the startaddress when
14421 not aligned, otherwise undefined
14423 This is just the body. It needs the initializations mentioned above and
14424 some address computing at the end. These things are done in i386.md. */
14427 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14431 rtx align_2_label
= NULL_RTX
;
14432 rtx align_3_label
= NULL_RTX
;
14433 rtx align_4_label
= gen_label_rtx ();
14434 rtx end_0_label
= gen_label_rtx ();
14436 rtx tmpreg
= gen_reg_rtx (SImode
);
14437 rtx scratch
= gen_reg_rtx (SImode
);
14441 if (CONST_INT_P (align_rtx
))
14442 align
= INTVAL (align_rtx
);
14444 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14446 /* Is there a known alignment and is it less than 4? */
14449 rtx scratch1
= gen_reg_rtx (Pmode
);
14450 emit_move_insn (scratch1
, out
);
14451 /* Is there a known alignment and is it not 2? */
14454 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14455 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14457 /* Leave just the 3 lower bits. */
14458 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14459 NULL_RTX
, 0, OPTAB_WIDEN
);
14461 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14462 Pmode
, 1, align_4_label
);
14463 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14464 Pmode
, 1, align_2_label
);
14465 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14466 Pmode
, 1, align_3_label
);
14470 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14471 check if is aligned to 4 - byte. */
14473 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14474 NULL_RTX
, 0, OPTAB_WIDEN
);
14476 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14477 Pmode
, 1, align_4_label
);
14480 mem
= change_address (src
, QImode
, out
);
14482 /* Now compare the bytes. */
14484 /* Compare the first n unaligned byte on a byte per byte basis. */
14485 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14486 QImode
, 1, end_0_label
);
14488 /* Increment the address. */
14490 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14492 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14494 /* Not needed with an alignment of 2 */
14497 emit_label (align_2_label
);
14499 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14503 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14505 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14507 emit_label (align_3_label
);
14510 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14514 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14516 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14519 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14520 align this loop. It gives only huge programs, but does not help to
14522 emit_label (align_4_label
);
14524 mem
= change_address (src
, SImode
, out
);
14525 emit_move_insn (scratch
, mem
);
14527 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14529 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14531 /* This formula yields a nonzero result iff one of the bytes is zero.
14532 This saves three branches inside loop and many cycles. */
14534 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14535 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14536 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14537 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14538 gen_int_mode (0x80808080, SImode
)));
14539 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14544 rtx reg
= gen_reg_rtx (SImode
);
14545 rtx reg2
= gen_reg_rtx (Pmode
);
14546 emit_move_insn (reg
, tmpreg
);
14547 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14549 /* If zero is not in the first two bytes, move two bytes forward. */
14550 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14551 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14552 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14553 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14554 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14557 /* Emit lea manually to avoid clobbering of flags. */
14558 emit_insn (gen_rtx_SET (SImode
, reg2
,
14559 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14561 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14562 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14563 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14564 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14571 rtx end_2_label
= gen_label_rtx ();
14572 /* Is zero in the first two bytes? */
14574 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14575 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14576 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14577 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14578 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14580 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14581 JUMP_LABEL (tmp
) = end_2_label
;
14583 /* Not in the first two. Move two bytes forward. */
14584 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14586 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14588 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14590 emit_label (end_2_label
);
14594 /* Avoid branch in fixing the byte. */
14595 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14596 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14597 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14599 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14601 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14603 emit_label (end_0_label
);
14607 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14608 rtx callarg2 ATTRIBUTE_UNUSED
,
14609 rtx pop
, int sibcall
)
14611 rtx use
= NULL
, call
;
14613 if (pop
== const0_rtx
)
14615 gcc_assert (!TARGET_64BIT
|| !pop
);
14617 if (TARGET_MACHO
&& !TARGET_64BIT
)
14620 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14621 fnaddr
= machopic_indirect_call_target (fnaddr
);
14626 /* Static functions and indirect calls don't need the pic register. */
14627 if (! TARGET_64BIT
&& flag_pic
14628 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14629 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14630 use_reg (&use
, pic_offset_table_rtx
);
14633 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14635 rtx al
= gen_rtx_REG (QImode
, 0);
14636 emit_move_insn (al
, callarg2
);
14637 use_reg (&use
, al
);
14640 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14642 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14643 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14645 if (sibcall
&& TARGET_64BIT
14646 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14649 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14650 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14651 emit_move_insn (fnaddr
, addr
);
14652 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14655 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14657 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14660 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14661 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14662 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14665 call
= emit_call_insn (call
);
14667 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14671 /* Clear stack slot assignments remembered from previous functions.
14672 This is called from INIT_EXPANDERS once before RTL is emitted for each
14675 static struct machine_function
*
14676 ix86_init_machine_status (void)
14678 struct machine_function
*f
;
14680 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14681 f
->use_fast_prologue_epilogue_nregs
= -1;
14682 f
->tls_descriptor_call_expanded_p
= 0;
14687 /* Return a MEM corresponding to a stack slot with mode MODE.
14688 Allocate a new slot if necessary.
14690 The RTL for a function can have several slots available: N is
14691 which slot to use. */
14694 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14696 struct stack_local_entry
*s
;
14698 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14700 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14701 if (s
->mode
== mode
&& s
->n
== n
)
14702 return copy_rtx (s
->rtl
);
14704 s
= (struct stack_local_entry
*)
14705 ggc_alloc (sizeof (struct stack_local_entry
));
14708 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14710 s
->next
= ix86_stack_locals
;
14711 ix86_stack_locals
= s
;
14715 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14717 static GTY(()) rtx ix86_tls_symbol
;
14719 ix86_tls_get_addr (void)
14722 if (!ix86_tls_symbol
)
14724 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14725 (TARGET_ANY_GNU_TLS
14727 ? "___tls_get_addr"
14728 : "__tls_get_addr");
14731 return ix86_tls_symbol
;
14734 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14736 static GTY(()) rtx ix86_tls_module_base_symbol
;
14738 ix86_tls_module_base (void)
14741 if (!ix86_tls_module_base_symbol
)
14743 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14744 "_TLS_MODULE_BASE_");
14745 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
14746 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
14749 return ix86_tls_module_base_symbol
;
14752 /* Calculate the length of the memory address in the instruction
14753 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14756 memory_address_length (rtx addr
)
14758 struct ix86_address parts
;
14759 rtx base
, index
, disp
;
14763 if (GET_CODE (addr
) == PRE_DEC
14764 || GET_CODE (addr
) == POST_INC
14765 || GET_CODE (addr
) == PRE_MODIFY
14766 || GET_CODE (addr
) == POST_MODIFY
)
14769 ok
= ix86_decompose_address (addr
, &parts
);
14772 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14773 parts
.base
= SUBREG_REG (parts
.base
);
14774 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14775 parts
.index
= SUBREG_REG (parts
.index
);
14778 index
= parts
.index
;
14783 - esp as the base always wants an index,
14784 - ebp as the base always wants a displacement. */
14786 /* Register Indirect. */
14787 if (base
&& !index
&& !disp
)
14789 /* esp (for its index) and ebp (for its displacement) need
14790 the two-byte modrm form. */
14791 if (addr
== stack_pointer_rtx
14792 || addr
== arg_pointer_rtx
14793 || addr
== frame_pointer_rtx
14794 || addr
== hard_frame_pointer_rtx
)
14798 /* Direct Addressing. */
14799 else if (disp
&& !base
&& !index
)
14804 /* Find the length of the displacement constant. */
14807 if (base
&& satisfies_constraint_K (disp
))
14812 /* ebp always wants a displacement. */
14813 else if (base
== hard_frame_pointer_rtx
)
14816 /* An index requires the two-byte modrm form.... */
14818 /* ...like esp, which always wants an index. */
14819 || base
== stack_pointer_rtx
14820 || base
== arg_pointer_rtx
14821 || base
== frame_pointer_rtx
)
14828 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14829 is set, expect that insn have 8bit immediate alternative. */
14831 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
14835 extract_insn_cached (insn
);
14836 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14837 if (CONSTANT_P (recog_data
.operand
[i
]))
14840 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
14844 switch (get_attr_mode (insn
))
14855 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14860 fatal_insn ("unknown insn mode", insn
);
14866 /* Compute default value for "length_address" attribute. */
14868 ix86_attr_length_address_default (rtx insn
)
14872 if (get_attr_type (insn
) == TYPE_LEA
)
14874 rtx set
= PATTERN (insn
);
14876 if (GET_CODE (set
) == PARALLEL
)
14877 set
= XVECEXP (set
, 0, 0);
14879 gcc_assert (GET_CODE (set
) == SET
);
14881 return memory_address_length (SET_SRC (set
));
14884 extract_insn_cached (insn
);
14885 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14886 if (MEM_P (recog_data
.operand
[i
]))
14888 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
14894 /* Return the maximum number of instructions a cpu can issue. */
14897 ix86_issue_rate (void)
14901 case PROCESSOR_PENTIUM
:
14905 case PROCESSOR_PENTIUMPRO
:
14906 case PROCESSOR_PENTIUM4
:
14907 case PROCESSOR_ATHLON
:
14909 case PROCESSOR_AMDFAM10
:
14910 case PROCESSOR_NOCONA
:
14911 case PROCESSOR_GENERIC32
:
14912 case PROCESSOR_GENERIC64
:
14915 case PROCESSOR_CORE2
:
14923 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14924 by DEP_INSN and nothing set by DEP_INSN. */
14927 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
14931 /* Simplify the test for uninteresting insns. */
14932 if (insn_type
!= TYPE_SETCC
14933 && insn_type
!= TYPE_ICMOV
14934 && insn_type
!= TYPE_FCMOV
14935 && insn_type
!= TYPE_IBR
)
14938 if ((set
= single_set (dep_insn
)) != 0)
14940 set
= SET_DEST (set
);
14943 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
14944 && XVECLEN (PATTERN (dep_insn
), 0) == 2
14945 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
14946 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
14948 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
14949 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
14954 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
14957 /* This test is true if the dependent insn reads the flags but
14958 not any other potentially set register. */
14959 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
14962 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
14968 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14969 address with operands set by DEP_INSN. */
14972 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
14976 if (insn_type
== TYPE_LEA
14979 addr
= PATTERN (insn
);
14981 if (GET_CODE (addr
) == PARALLEL
)
14982 addr
= XVECEXP (addr
, 0, 0);
14984 gcc_assert (GET_CODE (addr
) == SET
);
14986 addr
= SET_SRC (addr
);
14991 extract_insn_cached (insn
);
14992 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
14993 if (MEM_P (recog_data
.operand
[i
]))
14995 addr
= XEXP (recog_data
.operand
[i
], 0);
15002 return modified_in_p (addr
, dep_insn
);
15006 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15008 enum attr_type insn_type
, dep_insn_type
;
15009 enum attr_memory memory
;
15011 int dep_insn_code_number
;
15013 /* Anti and output dependencies have zero cost on all CPUs. */
15014 if (REG_NOTE_KIND (link
) != 0)
15017 dep_insn_code_number
= recog_memoized (dep_insn
);
15019 /* If we can't recognize the insns, we can't really do anything. */
15020 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15023 insn_type
= get_attr_type (insn
);
15024 dep_insn_type
= get_attr_type (dep_insn
);
15028 case PROCESSOR_PENTIUM
:
15029 /* Address Generation Interlock adds a cycle of latency. */
15030 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15033 /* ??? Compares pair with jump/setcc. */
15034 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15037 /* Floating point stores require value to be ready one cycle earlier. */
15038 if (insn_type
== TYPE_FMOV
15039 && get_attr_memory (insn
) == MEMORY_STORE
15040 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15044 case PROCESSOR_PENTIUMPRO
:
15045 memory
= get_attr_memory (insn
);
15047 /* INT->FP conversion is expensive. */
15048 if (get_attr_fp_int_src (dep_insn
))
15051 /* There is one cycle extra latency between an FP op and a store. */
15052 if (insn_type
== TYPE_FMOV
15053 && (set
= single_set (dep_insn
)) != NULL_RTX
15054 && (set2
= single_set (insn
)) != NULL_RTX
15055 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15056 && MEM_P (SET_DEST (set2
)))
15059 /* Show ability of reorder buffer to hide latency of load by executing
15060 in parallel with previous instruction in case
15061 previous instruction is not needed to compute the address. */
15062 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15063 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15065 /* Claim moves to take one cycle, as core can issue one load
15066 at time and the next load can start cycle later. */
15067 if (dep_insn_type
== TYPE_IMOV
15068 || dep_insn_type
== TYPE_FMOV
)
15076 memory
= get_attr_memory (insn
);
15078 /* The esp dependency is resolved before the instruction is really
15080 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15081 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15084 /* INT->FP conversion is expensive. */
15085 if (get_attr_fp_int_src (dep_insn
))
15088 /* Show ability of reorder buffer to hide latency of load by executing
15089 in parallel with previous instruction in case
15090 previous instruction is not needed to compute the address. */
15091 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15092 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15094 /* Claim moves to take one cycle, as core can issue one load
15095 at time and the next load can start cycle later. */
15096 if (dep_insn_type
== TYPE_IMOV
15097 || dep_insn_type
== TYPE_FMOV
)
15106 case PROCESSOR_ATHLON
:
15108 case PROCESSOR_AMDFAM10
:
15109 case PROCESSOR_GENERIC32
:
15110 case PROCESSOR_GENERIC64
:
15111 memory
= get_attr_memory (insn
);
15113 /* Show ability of reorder buffer to hide latency of load by executing
15114 in parallel with previous instruction in case
15115 previous instruction is not needed to compute the address. */
15116 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15117 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15119 enum attr_unit unit
= get_attr_unit (insn
);
15122 /* Because of the difference between the length of integer and
15123 floating unit pipeline preparation stages, the memory operands
15124 for floating point are cheaper.
15126 ??? For Athlon it the difference is most probably 2. */
15127 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15130 loadcost
= TARGET_ATHLON
? 2 : 0;
15132 if (cost
>= loadcost
)
15145 /* How many alternative schedules to try. This should be as wide as the
15146 scheduling freedom in the DFA, but no wider. Making this value too
15147 large results extra work for the scheduler. */
15150 ia32_multipass_dfa_lookahead (void)
15152 if (ix86_tune
== PROCESSOR_PENTIUM
)
15155 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15156 || ix86_tune
== PROCESSOR_K6
)
15164 /* Compute the alignment given to a constant that is being placed in memory.
15165 EXP is the constant and ALIGN is the alignment that the object would
15167 The value of this function is used instead of that alignment to align
15171 ix86_constant_alignment (tree exp
, int align
)
15173 if (TREE_CODE (exp
) == REAL_CST
)
15175 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15177 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15180 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15181 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15182 return BITS_PER_WORD
;
15187 /* Compute the alignment for a static variable.
15188 TYPE is the data type, and ALIGN is the alignment that
15189 the object would ordinarily have. The value of this function is used
15190 instead of that alignment to align the object. */
15193 ix86_data_alignment (tree type
, int align
)
15195 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
15197 if (AGGREGATE_TYPE_P (type
)
15198 && TYPE_SIZE (type
)
15199 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15200 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15201 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15202 && align
< max_align
)
15205 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15206 to 16byte boundary. */
15209 if (AGGREGATE_TYPE_P (type
)
15210 && TYPE_SIZE (type
)
15211 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15212 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15213 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15217 if (TREE_CODE (type
) == ARRAY_TYPE
)
15219 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15221 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15224 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15227 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15229 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15232 else if ((TREE_CODE (type
) == RECORD_TYPE
15233 || TREE_CODE (type
) == UNION_TYPE
15234 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15235 && TYPE_FIELDS (type
))
15237 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15239 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15242 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15243 || TREE_CODE (type
) == INTEGER_TYPE
)
15245 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15247 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15254 /* Compute the alignment for a local variable.
15255 TYPE is the data type, and ALIGN is the alignment that
15256 the object would ordinarily have. The value of this macro is used
15257 instead of that alignment to align the object. */
15260 ix86_local_alignment (tree type
, int align
)
15262 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15263 to 16byte boundary. */
15266 if (AGGREGATE_TYPE_P (type
)
15267 && TYPE_SIZE (type
)
15268 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15269 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15270 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15273 if (TREE_CODE (type
) == ARRAY_TYPE
)
15275 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15277 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15280 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15282 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15284 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15287 else if ((TREE_CODE (type
) == RECORD_TYPE
15288 || TREE_CODE (type
) == UNION_TYPE
15289 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15290 && TYPE_FIELDS (type
))
15292 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15294 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15297 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15298 || TREE_CODE (type
) == INTEGER_TYPE
)
15301 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15303 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15309 /* Emit RTL insns to initialize the variable parts of a trampoline.
15310 FNADDR is an RTX for the address of the function's pure code.
15311 CXT is an RTX for the static chain value for the function. */
15313 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15317 /* Compute offset from the end of the jmp to the target function. */
15318 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15319 plus_constant (tramp
, 10),
15320 NULL_RTX
, 1, OPTAB_DIRECT
);
15321 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15322 gen_int_mode (0xb9, QImode
));
15323 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15324 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15325 gen_int_mode (0xe9, QImode
));
15326 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15331 /* Try to load address using shorter movl instead of movabs.
15332 We may want to support movq for kernel mode, but kernel does not use
15333 trampolines at the moment. */
15334 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15336 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15337 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15338 gen_int_mode (0xbb41, HImode
));
15339 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15340 gen_lowpart (SImode
, fnaddr
));
15345 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15346 gen_int_mode (0xbb49, HImode
));
15347 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15351 /* Load static chain using movabs to r10. */
15352 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15353 gen_int_mode (0xba49, HImode
));
15354 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15357 /* Jump to the r11 */
15358 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15359 gen_int_mode (0xff49, HImode
));
15360 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15361 gen_int_mode (0xe3, QImode
));
15363 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15366 #ifdef ENABLE_EXECUTE_STACK
15367 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15368 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15372 /* Codes for all the SSE/MMX builtins. */
15375 IX86_BUILTIN_ADDPS
,
15376 IX86_BUILTIN_ADDSS
,
15377 IX86_BUILTIN_DIVPS
,
15378 IX86_BUILTIN_DIVSS
,
15379 IX86_BUILTIN_MULPS
,
15380 IX86_BUILTIN_MULSS
,
15381 IX86_BUILTIN_SUBPS
,
15382 IX86_BUILTIN_SUBSS
,
15384 IX86_BUILTIN_CMPEQPS
,
15385 IX86_BUILTIN_CMPLTPS
,
15386 IX86_BUILTIN_CMPLEPS
,
15387 IX86_BUILTIN_CMPGTPS
,
15388 IX86_BUILTIN_CMPGEPS
,
15389 IX86_BUILTIN_CMPNEQPS
,
15390 IX86_BUILTIN_CMPNLTPS
,
15391 IX86_BUILTIN_CMPNLEPS
,
15392 IX86_BUILTIN_CMPNGTPS
,
15393 IX86_BUILTIN_CMPNGEPS
,
15394 IX86_BUILTIN_CMPORDPS
,
15395 IX86_BUILTIN_CMPUNORDPS
,
15396 IX86_BUILTIN_CMPEQSS
,
15397 IX86_BUILTIN_CMPLTSS
,
15398 IX86_BUILTIN_CMPLESS
,
15399 IX86_BUILTIN_CMPNEQSS
,
15400 IX86_BUILTIN_CMPNLTSS
,
15401 IX86_BUILTIN_CMPNLESS
,
15402 IX86_BUILTIN_CMPNGTSS
,
15403 IX86_BUILTIN_CMPNGESS
,
15404 IX86_BUILTIN_CMPORDSS
,
15405 IX86_BUILTIN_CMPUNORDSS
,
15407 IX86_BUILTIN_COMIEQSS
,
15408 IX86_BUILTIN_COMILTSS
,
15409 IX86_BUILTIN_COMILESS
,
15410 IX86_BUILTIN_COMIGTSS
,
15411 IX86_BUILTIN_COMIGESS
,
15412 IX86_BUILTIN_COMINEQSS
,
15413 IX86_BUILTIN_UCOMIEQSS
,
15414 IX86_BUILTIN_UCOMILTSS
,
15415 IX86_BUILTIN_UCOMILESS
,
15416 IX86_BUILTIN_UCOMIGTSS
,
15417 IX86_BUILTIN_UCOMIGESS
,
15418 IX86_BUILTIN_UCOMINEQSS
,
15420 IX86_BUILTIN_CVTPI2PS
,
15421 IX86_BUILTIN_CVTPS2PI
,
15422 IX86_BUILTIN_CVTSI2SS
,
15423 IX86_BUILTIN_CVTSI642SS
,
15424 IX86_BUILTIN_CVTSS2SI
,
15425 IX86_BUILTIN_CVTSS2SI64
,
15426 IX86_BUILTIN_CVTTPS2PI
,
15427 IX86_BUILTIN_CVTTSS2SI
,
15428 IX86_BUILTIN_CVTTSS2SI64
,
15430 IX86_BUILTIN_MAXPS
,
15431 IX86_BUILTIN_MAXSS
,
15432 IX86_BUILTIN_MINPS
,
15433 IX86_BUILTIN_MINSS
,
15435 IX86_BUILTIN_LOADUPS
,
15436 IX86_BUILTIN_STOREUPS
,
15437 IX86_BUILTIN_MOVSS
,
15439 IX86_BUILTIN_MOVHLPS
,
15440 IX86_BUILTIN_MOVLHPS
,
15441 IX86_BUILTIN_LOADHPS
,
15442 IX86_BUILTIN_LOADLPS
,
15443 IX86_BUILTIN_STOREHPS
,
15444 IX86_BUILTIN_STORELPS
,
15446 IX86_BUILTIN_MASKMOVQ
,
15447 IX86_BUILTIN_MOVMSKPS
,
15448 IX86_BUILTIN_PMOVMSKB
,
15450 IX86_BUILTIN_MOVNTPS
,
15451 IX86_BUILTIN_MOVNTQ
,
15453 IX86_BUILTIN_LOADDQU
,
15454 IX86_BUILTIN_STOREDQU
,
15456 IX86_BUILTIN_PACKSSWB
,
15457 IX86_BUILTIN_PACKSSDW
,
15458 IX86_BUILTIN_PACKUSWB
,
15460 IX86_BUILTIN_PADDB
,
15461 IX86_BUILTIN_PADDW
,
15462 IX86_BUILTIN_PADDD
,
15463 IX86_BUILTIN_PADDQ
,
15464 IX86_BUILTIN_PADDSB
,
15465 IX86_BUILTIN_PADDSW
,
15466 IX86_BUILTIN_PADDUSB
,
15467 IX86_BUILTIN_PADDUSW
,
15468 IX86_BUILTIN_PSUBB
,
15469 IX86_BUILTIN_PSUBW
,
15470 IX86_BUILTIN_PSUBD
,
15471 IX86_BUILTIN_PSUBQ
,
15472 IX86_BUILTIN_PSUBSB
,
15473 IX86_BUILTIN_PSUBSW
,
15474 IX86_BUILTIN_PSUBUSB
,
15475 IX86_BUILTIN_PSUBUSW
,
15478 IX86_BUILTIN_PANDN
,
15482 IX86_BUILTIN_PAVGB
,
15483 IX86_BUILTIN_PAVGW
,
15485 IX86_BUILTIN_PCMPEQB
,
15486 IX86_BUILTIN_PCMPEQW
,
15487 IX86_BUILTIN_PCMPEQD
,
15488 IX86_BUILTIN_PCMPGTB
,
15489 IX86_BUILTIN_PCMPGTW
,
15490 IX86_BUILTIN_PCMPGTD
,
15492 IX86_BUILTIN_PMADDWD
,
15494 IX86_BUILTIN_PMAXSW
,
15495 IX86_BUILTIN_PMAXUB
,
15496 IX86_BUILTIN_PMINSW
,
15497 IX86_BUILTIN_PMINUB
,
15499 IX86_BUILTIN_PMULHUW
,
15500 IX86_BUILTIN_PMULHW
,
15501 IX86_BUILTIN_PMULLW
,
15503 IX86_BUILTIN_PSADBW
,
15504 IX86_BUILTIN_PSHUFW
,
15506 IX86_BUILTIN_PSLLW
,
15507 IX86_BUILTIN_PSLLD
,
15508 IX86_BUILTIN_PSLLQ
,
15509 IX86_BUILTIN_PSRAW
,
15510 IX86_BUILTIN_PSRAD
,
15511 IX86_BUILTIN_PSRLW
,
15512 IX86_BUILTIN_PSRLD
,
15513 IX86_BUILTIN_PSRLQ
,
15514 IX86_BUILTIN_PSLLWI
,
15515 IX86_BUILTIN_PSLLDI
,
15516 IX86_BUILTIN_PSLLQI
,
15517 IX86_BUILTIN_PSRAWI
,
15518 IX86_BUILTIN_PSRADI
,
15519 IX86_BUILTIN_PSRLWI
,
15520 IX86_BUILTIN_PSRLDI
,
15521 IX86_BUILTIN_PSRLQI
,
15523 IX86_BUILTIN_PUNPCKHBW
,
15524 IX86_BUILTIN_PUNPCKHWD
,
15525 IX86_BUILTIN_PUNPCKHDQ
,
15526 IX86_BUILTIN_PUNPCKLBW
,
15527 IX86_BUILTIN_PUNPCKLWD
,
15528 IX86_BUILTIN_PUNPCKLDQ
,
15530 IX86_BUILTIN_SHUFPS
,
15532 IX86_BUILTIN_RCPPS
,
15533 IX86_BUILTIN_RCPSS
,
15534 IX86_BUILTIN_RSQRTPS
,
15535 IX86_BUILTIN_RSQRTSS
,
15536 IX86_BUILTIN_SQRTPS
,
15537 IX86_BUILTIN_SQRTSS
,
15539 IX86_BUILTIN_UNPCKHPS
,
15540 IX86_BUILTIN_UNPCKLPS
,
15542 IX86_BUILTIN_ANDPS
,
15543 IX86_BUILTIN_ANDNPS
,
15545 IX86_BUILTIN_XORPS
,
15548 IX86_BUILTIN_LDMXCSR
,
15549 IX86_BUILTIN_STMXCSR
,
15550 IX86_BUILTIN_SFENCE
,
15552 /* 3DNow! Original */
15553 IX86_BUILTIN_FEMMS
,
15554 IX86_BUILTIN_PAVGUSB
,
15555 IX86_BUILTIN_PF2ID
,
15556 IX86_BUILTIN_PFACC
,
15557 IX86_BUILTIN_PFADD
,
15558 IX86_BUILTIN_PFCMPEQ
,
15559 IX86_BUILTIN_PFCMPGE
,
15560 IX86_BUILTIN_PFCMPGT
,
15561 IX86_BUILTIN_PFMAX
,
15562 IX86_BUILTIN_PFMIN
,
15563 IX86_BUILTIN_PFMUL
,
15564 IX86_BUILTIN_PFRCP
,
15565 IX86_BUILTIN_PFRCPIT1
,
15566 IX86_BUILTIN_PFRCPIT2
,
15567 IX86_BUILTIN_PFRSQIT1
,
15568 IX86_BUILTIN_PFRSQRT
,
15569 IX86_BUILTIN_PFSUB
,
15570 IX86_BUILTIN_PFSUBR
,
15571 IX86_BUILTIN_PI2FD
,
15572 IX86_BUILTIN_PMULHRW
,
15574 /* 3DNow! Athlon Extensions */
15575 IX86_BUILTIN_PF2IW
,
15576 IX86_BUILTIN_PFNACC
,
15577 IX86_BUILTIN_PFPNACC
,
15578 IX86_BUILTIN_PI2FW
,
15579 IX86_BUILTIN_PSWAPDSI
,
15580 IX86_BUILTIN_PSWAPDSF
,
15583 IX86_BUILTIN_ADDPD
,
15584 IX86_BUILTIN_ADDSD
,
15585 IX86_BUILTIN_DIVPD
,
15586 IX86_BUILTIN_DIVSD
,
15587 IX86_BUILTIN_MULPD
,
15588 IX86_BUILTIN_MULSD
,
15589 IX86_BUILTIN_SUBPD
,
15590 IX86_BUILTIN_SUBSD
,
15592 IX86_BUILTIN_CMPEQPD
,
15593 IX86_BUILTIN_CMPLTPD
,
15594 IX86_BUILTIN_CMPLEPD
,
15595 IX86_BUILTIN_CMPGTPD
,
15596 IX86_BUILTIN_CMPGEPD
,
15597 IX86_BUILTIN_CMPNEQPD
,
15598 IX86_BUILTIN_CMPNLTPD
,
15599 IX86_BUILTIN_CMPNLEPD
,
15600 IX86_BUILTIN_CMPNGTPD
,
15601 IX86_BUILTIN_CMPNGEPD
,
15602 IX86_BUILTIN_CMPORDPD
,
15603 IX86_BUILTIN_CMPUNORDPD
,
15604 IX86_BUILTIN_CMPNEPD
,
15605 IX86_BUILTIN_CMPEQSD
,
15606 IX86_BUILTIN_CMPLTSD
,
15607 IX86_BUILTIN_CMPLESD
,
15608 IX86_BUILTIN_CMPNEQSD
,
15609 IX86_BUILTIN_CMPNLTSD
,
15610 IX86_BUILTIN_CMPNLESD
,
15611 IX86_BUILTIN_CMPORDSD
,
15612 IX86_BUILTIN_CMPUNORDSD
,
15613 IX86_BUILTIN_CMPNESD
,
15615 IX86_BUILTIN_COMIEQSD
,
15616 IX86_BUILTIN_COMILTSD
,
15617 IX86_BUILTIN_COMILESD
,
15618 IX86_BUILTIN_COMIGTSD
,
15619 IX86_BUILTIN_COMIGESD
,
15620 IX86_BUILTIN_COMINEQSD
,
15621 IX86_BUILTIN_UCOMIEQSD
,
15622 IX86_BUILTIN_UCOMILTSD
,
15623 IX86_BUILTIN_UCOMILESD
,
15624 IX86_BUILTIN_UCOMIGTSD
,
15625 IX86_BUILTIN_UCOMIGESD
,
15626 IX86_BUILTIN_UCOMINEQSD
,
15628 IX86_BUILTIN_MAXPD
,
15629 IX86_BUILTIN_MAXSD
,
15630 IX86_BUILTIN_MINPD
,
15631 IX86_BUILTIN_MINSD
,
15633 IX86_BUILTIN_ANDPD
,
15634 IX86_BUILTIN_ANDNPD
,
15636 IX86_BUILTIN_XORPD
,
15638 IX86_BUILTIN_SQRTPD
,
15639 IX86_BUILTIN_SQRTSD
,
15641 IX86_BUILTIN_UNPCKHPD
,
15642 IX86_BUILTIN_UNPCKLPD
,
15644 IX86_BUILTIN_SHUFPD
,
15646 IX86_BUILTIN_LOADUPD
,
15647 IX86_BUILTIN_STOREUPD
,
15648 IX86_BUILTIN_MOVSD
,
15650 IX86_BUILTIN_LOADHPD
,
15651 IX86_BUILTIN_LOADLPD
,
15653 IX86_BUILTIN_CVTDQ2PD
,
15654 IX86_BUILTIN_CVTDQ2PS
,
15656 IX86_BUILTIN_CVTPD2DQ
,
15657 IX86_BUILTIN_CVTPD2PI
,
15658 IX86_BUILTIN_CVTPD2PS
,
15659 IX86_BUILTIN_CVTTPD2DQ
,
15660 IX86_BUILTIN_CVTTPD2PI
,
15662 IX86_BUILTIN_CVTPI2PD
,
15663 IX86_BUILTIN_CVTSI2SD
,
15664 IX86_BUILTIN_CVTSI642SD
,
15666 IX86_BUILTIN_CVTSD2SI
,
15667 IX86_BUILTIN_CVTSD2SI64
,
15668 IX86_BUILTIN_CVTSD2SS
,
15669 IX86_BUILTIN_CVTSS2SD
,
15670 IX86_BUILTIN_CVTTSD2SI
,
15671 IX86_BUILTIN_CVTTSD2SI64
,
15673 IX86_BUILTIN_CVTPS2DQ
,
15674 IX86_BUILTIN_CVTPS2PD
,
15675 IX86_BUILTIN_CVTTPS2DQ
,
15677 IX86_BUILTIN_MOVNTI
,
15678 IX86_BUILTIN_MOVNTPD
,
15679 IX86_BUILTIN_MOVNTDQ
,
15682 IX86_BUILTIN_MASKMOVDQU
,
15683 IX86_BUILTIN_MOVMSKPD
,
15684 IX86_BUILTIN_PMOVMSKB128
,
15686 IX86_BUILTIN_PACKSSWB128
,
15687 IX86_BUILTIN_PACKSSDW128
,
15688 IX86_BUILTIN_PACKUSWB128
,
15690 IX86_BUILTIN_PADDB128
,
15691 IX86_BUILTIN_PADDW128
,
15692 IX86_BUILTIN_PADDD128
,
15693 IX86_BUILTIN_PADDQ128
,
15694 IX86_BUILTIN_PADDSB128
,
15695 IX86_BUILTIN_PADDSW128
,
15696 IX86_BUILTIN_PADDUSB128
,
15697 IX86_BUILTIN_PADDUSW128
,
15698 IX86_BUILTIN_PSUBB128
,
15699 IX86_BUILTIN_PSUBW128
,
15700 IX86_BUILTIN_PSUBD128
,
15701 IX86_BUILTIN_PSUBQ128
,
15702 IX86_BUILTIN_PSUBSB128
,
15703 IX86_BUILTIN_PSUBSW128
,
15704 IX86_BUILTIN_PSUBUSB128
,
15705 IX86_BUILTIN_PSUBUSW128
,
15707 IX86_BUILTIN_PAND128
,
15708 IX86_BUILTIN_PANDN128
,
15709 IX86_BUILTIN_POR128
,
15710 IX86_BUILTIN_PXOR128
,
15712 IX86_BUILTIN_PAVGB128
,
15713 IX86_BUILTIN_PAVGW128
,
15715 IX86_BUILTIN_PCMPEQB128
,
15716 IX86_BUILTIN_PCMPEQW128
,
15717 IX86_BUILTIN_PCMPEQD128
,
15718 IX86_BUILTIN_PCMPGTB128
,
15719 IX86_BUILTIN_PCMPGTW128
,
15720 IX86_BUILTIN_PCMPGTD128
,
15722 IX86_BUILTIN_PMADDWD128
,
15724 IX86_BUILTIN_PMAXSW128
,
15725 IX86_BUILTIN_PMAXUB128
,
15726 IX86_BUILTIN_PMINSW128
,
15727 IX86_BUILTIN_PMINUB128
,
15729 IX86_BUILTIN_PMULUDQ
,
15730 IX86_BUILTIN_PMULUDQ128
,
15731 IX86_BUILTIN_PMULHUW128
,
15732 IX86_BUILTIN_PMULHW128
,
15733 IX86_BUILTIN_PMULLW128
,
15735 IX86_BUILTIN_PSADBW128
,
15736 IX86_BUILTIN_PSHUFHW
,
15737 IX86_BUILTIN_PSHUFLW
,
15738 IX86_BUILTIN_PSHUFD
,
15740 IX86_BUILTIN_PSLLW128
,
15741 IX86_BUILTIN_PSLLD128
,
15742 IX86_BUILTIN_PSLLQ128
,
15743 IX86_BUILTIN_PSRAW128
,
15744 IX86_BUILTIN_PSRAD128
,
15745 IX86_BUILTIN_PSRLW128
,
15746 IX86_BUILTIN_PSRLD128
,
15747 IX86_BUILTIN_PSRLQ128
,
15748 IX86_BUILTIN_PSLLDQI128
,
15749 IX86_BUILTIN_PSLLWI128
,
15750 IX86_BUILTIN_PSLLDI128
,
15751 IX86_BUILTIN_PSLLQI128
,
15752 IX86_BUILTIN_PSRAWI128
,
15753 IX86_BUILTIN_PSRADI128
,
15754 IX86_BUILTIN_PSRLDQI128
,
15755 IX86_BUILTIN_PSRLWI128
,
15756 IX86_BUILTIN_PSRLDI128
,
15757 IX86_BUILTIN_PSRLQI128
,
15759 IX86_BUILTIN_PUNPCKHBW128
,
15760 IX86_BUILTIN_PUNPCKHWD128
,
15761 IX86_BUILTIN_PUNPCKHDQ128
,
15762 IX86_BUILTIN_PUNPCKHQDQ128
,
15763 IX86_BUILTIN_PUNPCKLBW128
,
15764 IX86_BUILTIN_PUNPCKLWD128
,
15765 IX86_BUILTIN_PUNPCKLDQ128
,
15766 IX86_BUILTIN_PUNPCKLQDQ128
,
15768 IX86_BUILTIN_CLFLUSH
,
15769 IX86_BUILTIN_MFENCE
,
15770 IX86_BUILTIN_LFENCE
,
15772 /* Prescott New Instructions. */
15773 IX86_BUILTIN_ADDSUBPS
,
15774 IX86_BUILTIN_HADDPS
,
15775 IX86_BUILTIN_HSUBPS
,
15776 IX86_BUILTIN_MOVSHDUP
,
15777 IX86_BUILTIN_MOVSLDUP
,
15778 IX86_BUILTIN_ADDSUBPD
,
15779 IX86_BUILTIN_HADDPD
,
15780 IX86_BUILTIN_HSUBPD
,
15781 IX86_BUILTIN_LDDQU
,
15783 IX86_BUILTIN_MONITOR
,
15784 IX86_BUILTIN_MWAIT
,
15787 IX86_BUILTIN_PHADDW
,
15788 IX86_BUILTIN_PHADDD
,
15789 IX86_BUILTIN_PHADDSW
,
15790 IX86_BUILTIN_PHSUBW
,
15791 IX86_BUILTIN_PHSUBD
,
15792 IX86_BUILTIN_PHSUBSW
,
15793 IX86_BUILTIN_PMADDUBSW
,
15794 IX86_BUILTIN_PMULHRSW
,
15795 IX86_BUILTIN_PSHUFB
,
15796 IX86_BUILTIN_PSIGNB
,
15797 IX86_BUILTIN_PSIGNW
,
15798 IX86_BUILTIN_PSIGND
,
15799 IX86_BUILTIN_PALIGNR
,
15800 IX86_BUILTIN_PABSB
,
15801 IX86_BUILTIN_PABSW
,
15802 IX86_BUILTIN_PABSD
,
15804 IX86_BUILTIN_PHADDW128
,
15805 IX86_BUILTIN_PHADDD128
,
15806 IX86_BUILTIN_PHADDSW128
,
15807 IX86_BUILTIN_PHSUBW128
,
15808 IX86_BUILTIN_PHSUBD128
,
15809 IX86_BUILTIN_PHSUBSW128
,
15810 IX86_BUILTIN_PMADDUBSW128
,
15811 IX86_BUILTIN_PMULHRSW128
,
15812 IX86_BUILTIN_PSHUFB128
,
15813 IX86_BUILTIN_PSIGNB128
,
15814 IX86_BUILTIN_PSIGNW128
,
15815 IX86_BUILTIN_PSIGND128
,
15816 IX86_BUILTIN_PALIGNR128
,
15817 IX86_BUILTIN_PABSB128
,
15818 IX86_BUILTIN_PABSW128
,
15819 IX86_BUILTIN_PABSD128
,
15821 /* AMDFAM10 - SSE4A New Instructions. */
15822 IX86_BUILTIN_MOVNTSD
,
15823 IX86_BUILTIN_MOVNTSS
,
15824 IX86_BUILTIN_EXTRQI
,
15825 IX86_BUILTIN_EXTRQ
,
15826 IX86_BUILTIN_INSERTQI
,
15827 IX86_BUILTIN_INSERTQ
,
15829 IX86_BUILTIN_VEC_INIT_V2SI
,
15830 IX86_BUILTIN_VEC_INIT_V4HI
,
15831 IX86_BUILTIN_VEC_INIT_V8QI
,
15832 IX86_BUILTIN_VEC_EXT_V2DF
,
15833 IX86_BUILTIN_VEC_EXT_V2DI
,
15834 IX86_BUILTIN_VEC_EXT_V4SF
,
15835 IX86_BUILTIN_VEC_EXT_V4SI
,
15836 IX86_BUILTIN_VEC_EXT_V8HI
,
15837 IX86_BUILTIN_VEC_EXT_V2SI
,
15838 IX86_BUILTIN_VEC_EXT_V4HI
,
15839 IX86_BUILTIN_VEC_SET_V8HI
,
15840 IX86_BUILTIN_VEC_SET_V4HI
,
15845 /* Table for the ix86 builtin decls. */
15846 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
15848 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15849 * if the target_flags include one of MASK. Stores the function decl
15850 * in the ix86_builtins array.
15851 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15854 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
15856 tree decl
= NULL_TREE
;
15858 if (mask
& target_flags
15859 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
15861 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
15863 ix86_builtins
[(int) code
] = decl
;
15869 /* Like def_builtin, but also marks the function decl "const". */
15872 def_builtin_const (int mask
, const char *name
, tree type
,
15873 enum ix86_builtins code
)
15875 tree decl
= def_builtin (mask
, name
, type
, code
);
15877 TREE_READONLY (decl
) = 1;
15881 /* Bits for builtin_description.flag. */
15883 /* Set when we don't support the comparison natively, and should
15884 swap_comparison in order to support it. */
15885 #define BUILTIN_DESC_SWAP_OPERANDS 1
15887 struct builtin_description
15889 const unsigned int mask
;
15890 const enum insn_code icode
;
15891 const char *const name
;
15892 const enum ix86_builtins code
;
15893 const enum rtx_code comparison
;
15894 const unsigned int flag
;
15897 static const struct builtin_description bdesc_comi
[] =
15899 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
15900 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
15901 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
15902 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
15903 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
15904 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
15905 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
15906 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
15907 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
15908 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
15909 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
15910 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
15911 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
15912 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
15913 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
15914 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
15915 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
15916 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
15917 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
15918 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
15919 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
15920 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
15921 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
15922 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
15925 static const struct builtin_description bdesc_2arg
[] =
15928 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
15929 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
15930 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
15931 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
15932 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
15933 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
15934 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
15935 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
15937 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
15938 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
15939 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
15940 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
15941 BUILTIN_DESC_SWAP_OPERANDS
},
15942 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
15943 BUILTIN_DESC_SWAP_OPERANDS
},
15944 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
15945 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
15946 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
15947 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
15948 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
15949 BUILTIN_DESC_SWAP_OPERANDS
},
15950 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
15951 BUILTIN_DESC_SWAP_OPERANDS
},
15952 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
15953 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
15954 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
15955 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
15956 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
15957 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
15958 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
15959 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
15960 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
15961 BUILTIN_DESC_SWAP_OPERANDS
},
15962 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
15963 BUILTIN_DESC_SWAP_OPERANDS
},
15964 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
15966 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
15967 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
15968 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
15969 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
15971 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
15972 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
15973 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
15974 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
15976 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
15977 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
15978 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
15979 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
15980 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
15983 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
15984 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
15985 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
15986 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
15987 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
15988 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
15989 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
15990 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
15992 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
15993 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
15994 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
15995 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
15996 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
15997 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
15998 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
15999 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16001 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16002 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16003 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16005 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16006 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16007 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16008 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16010 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16011 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16013 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16014 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16015 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16016 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16017 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16018 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16020 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16021 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16022 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16023 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16025 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16026 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16027 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16028 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16029 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16030 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16033 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16034 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16035 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16037 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16038 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16039 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16041 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16042 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16043 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16044 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16045 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16046 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16048 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16049 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16050 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16051 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16052 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16053 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16055 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16056 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16057 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16058 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16060 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16061 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16064 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16065 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16066 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16067 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16068 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16069 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16070 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16071 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16073 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16074 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16075 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16076 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16077 BUILTIN_DESC_SWAP_OPERANDS
},
16078 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16079 BUILTIN_DESC_SWAP_OPERANDS
},
16080 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16081 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16082 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16083 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16084 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16085 BUILTIN_DESC_SWAP_OPERANDS
},
16086 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16087 BUILTIN_DESC_SWAP_OPERANDS
},
16088 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16089 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16090 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16091 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16092 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16093 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16094 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16095 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16096 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16098 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16099 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16100 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16101 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16103 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16104 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16105 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16106 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16108 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16109 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16110 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16113 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16114 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16115 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16116 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16117 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16118 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16119 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16120 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16122 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16123 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16124 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16125 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16126 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16127 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16128 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16129 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16131 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16132 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16134 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16135 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16136 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16137 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16139 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16140 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16142 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16143 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16144 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16145 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16146 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16147 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16149 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16150 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16151 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16152 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16154 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16155 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16156 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16157 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16158 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16159 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16160 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16161 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16163 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16164 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16165 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16167 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16168 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16170 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16171 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16173 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16174 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16175 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16177 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16178 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16179 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16181 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16182 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16184 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16186 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16187 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16188 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16189 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16192 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16193 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16194 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16195 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16196 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16197 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16200 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16201 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16202 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16203 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16204 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16205 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16206 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16207 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16208 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16209 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16210 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16211 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16212 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16213 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16214 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16215 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16216 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16217 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16218 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16219 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16220 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16221 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16222 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16223 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16226 static const struct builtin_description bdesc_1arg
[] =
16228 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16229 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16231 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16232 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16233 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16235 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16236 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16237 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16238 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16239 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16240 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16242 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16243 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16245 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16247 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16248 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16250 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16251 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16252 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16253 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16254 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16256 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16258 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16259 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16260 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16261 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16263 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16264 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16265 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16268 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16269 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16272 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16273 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16274 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16275 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16276 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16277 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16281 ix86_init_builtins (void)
16284 ix86_init_mmx_sse_builtins ();
16287 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16288 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16291 ix86_init_mmx_sse_builtins (void)
16293 const struct builtin_description
* d
;
16296 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16297 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16298 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16299 tree V2DI_type_node
16300 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16301 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16302 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16303 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16304 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16305 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16306 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16308 tree pchar_type_node
= build_pointer_type (char_type_node
);
16309 tree pcchar_type_node
= build_pointer_type (
16310 build_type_variant (char_type_node
, 1, 0));
16311 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16312 tree pcfloat_type_node
= build_pointer_type (
16313 build_type_variant (float_type_node
, 1, 0));
16314 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16315 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16316 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16319 tree int_ftype_v4sf_v4sf
16320 = build_function_type_list (integer_type_node
,
16321 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16322 tree v4si_ftype_v4sf_v4sf
16323 = build_function_type_list (V4SI_type_node
,
16324 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16325 /* MMX/SSE/integer conversions. */
16326 tree int_ftype_v4sf
16327 = build_function_type_list (integer_type_node
,
16328 V4SF_type_node
, NULL_TREE
);
16329 tree int64_ftype_v4sf
16330 = build_function_type_list (long_long_integer_type_node
,
16331 V4SF_type_node
, NULL_TREE
);
16332 tree int_ftype_v8qi
16333 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16334 tree v4sf_ftype_v4sf_int
16335 = build_function_type_list (V4SF_type_node
,
16336 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16337 tree v4sf_ftype_v4sf_int64
16338 = build_function_type_list (V4SF_type_node
,
16339 V4SF_type_node
, long_long_integer_type_node
,
16341 tree v4sf_ftype_v4sf_v2si
16342 = build_function_type_list (V4SF_type_node
,
16343 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16345 /* Miscellaneous. */
16346 tree v8qi_ftype_v4hi_v4hi
16347 = build_function_type_list (V8QI_type_node
,
16348 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16349 tree v4hi_ftype_v2si_v2si
16350 = build_function_type_list (V4HI_type_node
,
16351 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16352 tree v4sf_ftype_v4sf_v4sf_int
16353 = build_function_type_list (V4SF_type_node
,
16354 V4SF_type_node
, V4SF_type_node
,
16355 integer_type_node
, NULL_TREE
);
16356 tree v2si_ftype_v4hi_v4hi
16357 = build_function_type_list (V2SI_type_node
,
16358 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16359 tree v4hi_ftype_v4hi_int
16360 = build_function_type_list (V4HI_type_node
,
16361 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16362 tree v4hi_ftype_v4hi_di
16363 = build_function_type_list (V4HI_type_node
,
16364 V4HI_type_node
, long_long_unsigned_type_node
,
16366 tree v2si_ftype_v2si_di
16367 = build_function_type_list (V2SI_type_node
,
16368 V2SI_type_node
, long_long_unsigned_type_node
,
16370 tree void_ftype_void
16371 = build_function_type (void_type_node
, void_list_node
);
16372 tree void_ftype_unsigned
16373 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16374 tree void_ftype_unsigned_unsigned
16375 = build_function_type_list (void_type_node
, unsigned_type_node
,
16376 unsigned_type_node
, NULL_TREE
);
16377 tree void_ftype_pcvoid_unsigned_unsigned
16378 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16379 unsigned_type_node
, unsigned_type_node
,
16381 tree unsigned_ftype_void
16382 = build_function_type (unsigned_type_node
, void_list_node
);
16383 tree v2si_ftype_v4sf
16384 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16385 /* Loads/stores. */
16386 tree void_ftype_v8qi_v8qi_pchar
16387 = build_function_type_list (void_type_node
,
16388 V8QI_type_node
, V8QI_type_node
,
16389 pchar_type_node
, NULL_TREE
);
16390 tree v4sf_ftype_pcfloat
16391 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16392 /* @@@ the type is bogus */
16393 tree v4sf_ftype_v4sf_pv2si
16394 = build_function_type_list (V4SF_type_node
,
16395 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16396 tree void_ftype_pv2si_v4sf
16397 = build_function_type_list (void_type_node
,
16398 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16399 tree void_ftype_pfloat_v4sf
16400 = build_function_type_list (void_type_node
,
16401 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16402 tree void_ftype_pdi_di
16403 = build_function_type_list (void_type_node
,
16404 pdi_type_node
, long_long_unsigned_type_node
,
16406 tree void_ftype_pv2di_v2di
16407 = build_function_type_list (void_type_node
,
16408 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16409 /* Normal vector unops. */
16410 tree v4sf_ftype_v4sf
16411 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16412 tree v16qi_ftype_v16qi
16413 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16414 tree v8hi_ftype_v8hi
16415 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16416 tree v4si_ftype_v4si
16417 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16418 tree v8qi_ftype_v8qi
16419 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16420 tree v4hi_ftype_v4hi
16421 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16423 /* Normal vector binops. */
16424 tree v4sf_ftype_v4sf_v4sf
16425 = build_function_type_list (V4SF_type_node
,
16426 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16427 tree v8qi_ftype_v8qi_v8qi
16428 = build_function_type_list (V8QI_type_node
,
16429 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16430 tree v4hi_ftype_v4hi_v4hi
16431 = build_function_type_list (V4HI_type_node
,
16432 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16433 tree v2si_ftype_v2si_v2si
16434 = build_function_type_list (V2SI_type_node
,
16435 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16436 tree di_ftype_di_di
16437 = build_function_type_list (long_long_unsigned_type_node
,
16438 long_long_unsigned_type_node
,
16439 long_long_unsigned_type_node
, NULL_TREE
);
16441 tree di_ftype_di_di_int
16442 = build_function_type_list (long_long_unsigned_type_node
,
16443 long_long_unsigned_type_node
,
16444 long_long_unsigned_type_node
,
16445 integer_type_node
, NULL_TREE
);
16447 tree v2si_ftype_v2sf
16448 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16449 tree v2sf_ftype_v2si
16450 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16451 tree v2si_ftype_v2si
16452 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16453 tree v2sf_ftype_v2sf
16454 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16455 tree v2sf_ftype_v2sf_v2sf
16456 = build_function_type_list (V2SF_type_node
,
16457 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16458 tree v2si_ftype_v2sf_v2sf
16459 = build_function_type_list (V2SI_type_node
,
16460 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16461 tree pint_type_node
= build_pointer_type (integer_type_node
);
16462 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16463 tree pcdouble_type_node
= build_pointer_type (
16464 build_type_variant (double_type_node
, 1, 0));
16465 tree int_ftype_v2df_v2df
16466 = build_function_type_list (integer_type_node
,
16467 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16469 tree void_ftype_pcvoid
16470 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16471 tree v4sf_ftype_v4si
16472 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16473 tree v4si_ftype_v4sf
16474 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16475 tree v2df_ftype_v4si
16476 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16477 tree v4si_ftype_v2df
16478 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16479 tree v2si_ftype_v2df
16480 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16481 tree v4sf_ftype_v2df
16482 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16483 tree v2df_ftype_v2si
16484 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16485 tree v2df_ftype_v4sf
16486 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16487 tree int_ftype_v2df
16488 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16489 tree int64_ftype_v2df
16490 = build_function_type_list (long_long_integer_type_node
,
16491 V2DF_type_node
, NULL_TREE
);
16492 tree v2df_ftype_v2df_int
16493 = build_function_type_list (V2DF_type_node
,
16494 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16495 tree v2df_ftype_v2df_int64
16496 = build_function_type_list (V2DF_type_node
,
16497 V2DF_type_node
, long_long_integer_type_node
,
16499 tree v4sf_ftype_v4sf_v2df
16500 = build_function_type_list (V4SF_type_node
,
16501 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16502 tree v2df_ftype_v2df_v4sf
16503 = build_function_type_list (V2DF_type_node
,
16504 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16505 tree v2df_ftype_v2df_v2df_int
16506 = build_function_type_list (V2DF_type_node
,
16507 V2DF_type_node
, V2DF_type_node
,
16510 tree v2df_ftype_v2df_pcdouble
16511 = build_function_type_list (V2DF_type_node
,
16512 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16513 tree void_ftype_pdouble_v2df
16514 = build_function_type_list (void_type_node
,
16515 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16516 tree void_ftype_pint_int
16517 = build_function_type_list (void_type_node
,
16518 pint_type_node
, integer_type_node
, NULL_TREE
);
16519 tree void_ftype_v16qi_v16qi_pchar
16520 = build_function_type_list (void_type_node
,
16521 V16QI_type_node
, V16QI_type_node
,
16522 pchar_type_node
, NULL_TREE
);
16523 tree v2df_ftype_pcdouble
16524 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16525 tree v2df_ftype_v2df_v2df
16526 = build_function_type_list (V2DF_type_node
,
16527 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16528 tree v16qi_ftype_v16qi_v16qi
16529 = build_function_type_list (V16QI_type_node
,
16530 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16531 tree v8hi_ftype_v8hi_v8hi
16532 = build_function_type_list (V8HI_type_node
,
16533 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16534 tree v4si_ftype_v4si_v4si
16535 = build_function_type_list (V4SI_type_node
,
16536 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16537 tree v2di_ftype_v2di_v2di
16538 = build_function_type_list (V2DI_type_node
,
16539 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16540 tree v2di_ftype_v2df_v2df
16541 = build_function_type_list (V2DI_type_node
,
16542 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16543 tree v2df_ftype_v2df
16544 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16545 tree v2di_ftype_v2di_int
16546 = build_function_type_list (V2DI_type_node
,
16547 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16548 tree v2di_ftype_v2di_v2di_int
16549 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16550 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16551 tree v4si_ftype_v4si_int
16552 = build_function_type_list (V4SI_type_node
,
16553 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16554 tree v8hi_ftype_v8hi_int
16555 = build_function_type_list (V8HI_type_node
,
16556 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16557 tree v8hi_ftype_v8hi_v2di
16558 = build_function_type_list (V8HI_type_node
,
16559 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16560 tree v4si_ftype_v4si_v2di
16561 = build_function_type_list (V4SI_type_node
,
16562 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16563 tree v4si_ftype_v8hi_v8hi
16564 = build_function_type_list (V4SI_type_node
,
16565 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16566 tree di_ftype_v8qi_v8qi
16567 = build_function_type_list (long_long_unsigned_type_node
,
16568 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16569 tree di_ftype_v2si_v2si
16570 = build_function_type_list (long_long_unsigned_type_node
,
16571 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16572 tree v2di_ftype_v16qi_v16qi
16573 = build_function_type_list (V2DI_type_node
,
16574 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16575 tree v2di_ftype_v4si_v4si
16576 = build_function_type_list (V2DI_type_node
,
16577 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16578 tree int_ftype_v16qi
16579 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16580 tree v16qi_ftype_pcchar
16581 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16582 tree void_ftype_pchar_v16qi
16583 = build_function_type_list (void_type_node
,
16584 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16586 tree v2di_ftype_v2di_unsigned_unsigned
16587 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16588 unsigned_type_node
, unsigned_type_node
,
16590 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16591 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16592 unsigned_type_node
, unsigned_type_node
,
16594 tree v2di_ftype_v2di_v16qi
16595 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16599 tree float128_type
;
16602 /* The __float80 type. */
16603 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16604 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16608 /* The __float80 type. */
16609 float80_type
= make_node (REAL_TYPE
);
16610 TYPE_PRECISION (float80_type
) = 80;
16611 layout_type (float80_type
);
16612 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16617 float128_type
= make_node (REAL_TYPE
);
16618 TYPE_PRECISION (float128_type
) = 128;
16619 layout_type (float128_type
);
16620 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16623 /* Add all builtins that are more or less simple operations on two
16625 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16627 /* Use one of the operands; the target can have a different mode for
16628 mask-generating compares. */
16629 enum machine_mode mode
;
16634 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16639 type
= v16qi_ftype_v16qi_v16qi
;
16642 type
= v8hi_ftype_v8hi_v8hi
;
16645 type
= v4si_ftype_v4si_v4si
;
16648 type
= v2di_ftype_v2di_v2di
;
16651 type
= v2df_ftype_v2df_v2df
;
16654 type
= v4sf_ftype_v4sf_v4sf
;
16657 type
= v8qi_ftype_v8qi_v8qi
;
16660 type
= v4hi_ftype_v4hi_v4hi
;
16663 type
= v2si_ftype_v2si_v2si
;
16666 type
= di_ftype_di_di
;
16670 gcc_unreachable ();
16673 /* Override for comparisons. */
16674 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16675 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16676 type
= v4si_ftype_v4sf_v4sf
;
16678 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16679 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16680 type
= v2di_ftype_v2df_v2df
;
16682 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16685 /* Add all builtins that are more or less simple operations on 1 operand. */
16686 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16688 enum machine_mode mode
;
16693 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16698 type
= v16qi_ftype_v16qi
;
16701 type
= v8hi_ftype_v8hi
;
16704 type
= v4si_ftype_v4si
;
16707 type
= v2df_ftype_v2df
;
16710 type
= v4sf_ftype_v4sf
;
16713 type
= v8qi_ftype_v8qi
;
16716 type
= v4hi_ftype_v4hi
;
16719 type
= v2si_ftype_v2si
;
16726 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16729 /* Add the remaining MMX insns with somewhat more complicated types. */
16730 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16731 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16732 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16733 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16735 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16736 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16737 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16739 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
16740 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
16742 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
16743 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
16745 /* comi/ucomi insns. */
16746 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16747 if (d
->mask
== MASK_SSE2
)
16748 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
16750 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
16752 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
16753 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
16754 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
16756 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
16757 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
16758 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
16759 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
16760 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
16761 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
16762 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
16763 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
16764 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
16765 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
16766 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
16768 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
16770 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
16771 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
16773 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
16774 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
16775 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
16776 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
16778 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
16779 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
16780 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
16781 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
16783 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
16785 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
16787 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
16788 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
16789 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
16790 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
16791 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
16792 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
16794 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
16796 /* Original 3DNow! */
16797 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
16798 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
16799 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
16800 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
16801 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
16802 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
16803 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
16804 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
16805 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
16806 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
16807 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
16808 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
16809 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
16810 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
16811 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
16812 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
16813 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
16814 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
16815 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
16816 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
16818 /* 3DNow! extension as used in the Athlon CPU. */
16819 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
16820 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
16821 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
16822 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
16823 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
16824 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
16827 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
16829 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
16830 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
16832 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
16833 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
16835 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
16836 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
16837 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
16838 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
16839 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
16841 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
16842 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
16843 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
16844 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
16846 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
16847 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
16849 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
16851 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
16852 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
16854 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
16855 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
16856 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
16857 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
16858 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
16860 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
16862 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
16863 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
16864 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
16865 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
16867 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
16868 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
16869 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
16871 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
16872 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
16873 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
16874 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
16876 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
16877 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
16878 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
16880 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
16881 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
16883 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
16884 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
16886 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
16887 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
16888 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
16890 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
16891 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
16892 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
16894 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
16895 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
16897 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
16898 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
16899 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
16900 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
16902 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
16903 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
16904 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
16905 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
16907 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
16908 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
16910 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
16912 /* Prescott New Instructions. */
16913 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
16914 void_ftype_pcvoid_unsigned_unsigned
,
16915 IX86_BUILTIN_MONITOR
);
16916 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
16917 void_ftype_unsigned_unsigned
,
16918 IX86_BUILTIN_MWAIT
);
16919 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
16921 IX86_BUILTIN_MOVSHDUP
);
16922 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
16924 IX86_BUILTIN_MOVSLDUP
);
16925 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
16926 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
16929 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
16930 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
16931 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
16932 IX86_BUILTIN_PALIGNR
);
16934 /* AMDFAM10 SSE4A New built-ins */
16935 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
16936 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
16937 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
16938 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
16939 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
16940 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
16941 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
16942 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
16943 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
16944 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
16945 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
16946 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
16948 /* Access to the vec_init patterns. */
16949 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
16950 integer_type_node
, NULL_TREE
);
16951 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
16952 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
16954 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
16955 short_integer_type_node
,
16956 short_integer_type_node
,
16957 short_integer_type_node
, NULL_TREE
);
16958 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
16959 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
16961 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
16962 char_type_node
, char_type_node
,
16963 char_type_node
, char_type_node
,
16964 char_type_node
, char_type_node
,
16965 char_type_node
, NULL_TREE
);
16966 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
16967 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
16969 /* Access to the vec_extract patterns. */
16970 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
16971 integer_type_node
, NULL_TREE
);
16972 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
16973 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
16975 ftype
= build_function_type_list (long_long_integer_type_node
,
16976 V2DI_type_node
, integer_type_node
,
16978 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
16979 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
16981 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
16982 integer_type_node
, NULL_TREE
);
16983 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
16984 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
16986 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
16987 integer_type_node
, NULL_TREE
);
16988 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
16989 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
16991 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
16992 integer_type_node
, NULL_TREE
);
16993 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
16994 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
16996 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
16997 integer_type_node
, NULL_TREE
);
16998 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
16999 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17001 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17002 integer_type_node
, NULL_TREE
);
17003 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17004 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17006 /* Access to the vec_set patterns. */
17007 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17009 integer_type_node
, NULL_TREE
);
17010 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17011 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17013 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17015 integer_type_node
, NULL_TREE
);
17016 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17017 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17020 /* Errors in the source file can cause expand_expr to return const0_rtx
17021 where we expect a vector. To avoid crashing, use one of the vector
17022 clear instructions. */
17024 safe_vector_operand (rtx x
, enum machine_mode mode
)
17026 if (x
== const0_rtx
)
17027 x
= CONST0_RTX (mode
);
17031 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17034 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
17037 tree arg0
= TREE_VALUE (arglist
);
17038 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17039 rtx op0
= expand_normal (arg0
);
17040 rtx op1
= expand_normal (arg1
);
17041 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17042 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17043 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17045 if (VECTOR_MODE_P (mode0
))
17046 op0
= safe_vector_operand (op0
, mode0
);
17047 if (VECTOR_MODE_P (mode1
))
17048 op1
= safe_vector_operand (op1
, mode1
);
17050 if (optimize
|| !target
17051 || GET_MODE (target
) != tmode
17052 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17053 target
= gen_reg_rtx (tmode
);
17055 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17057 rtx x
= gen_reg_rtx (V4SImode
);
17058 emit_insn (gen_sse2_loadd (x
, op1
));
17059 op1
= gen_lowpart (TImode
, x
);
17062 /* The insn must want input operands in the same modes as the
17064 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17065 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17067 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17068 op0
= copy_to_mode_reg (mode0
, op0
);
17069 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17070 op1
= copy_to_mode_reg (mode1
, op1
);
17072 /* ??? Using ix86_fixup_binary_operands is problematic when
17073 we've got mismatched modes. Fake it. */
17079 if (tmode
== mode0
&& tmode
== mode1
)
17081 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17085 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17087 op0
= force_reg (mode0
, op0
);
17088 op1
= force_reg (mode1
, op1
);
17089 target
= gen_reg_rtx (tmode
);
17092 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17099 /* Subroutine of ix86_expand_builtin to take care of stores. */
17102 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
17105 tree arg0
= TREE_VALUE (arglist
);
17106 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17107 rtx op0
= expand_normal (arg0
);
17108 rtx op1
= expand_normal (arg1
);
17109 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17110 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17112 if (VECTOR_MODE_P (mode1
))
17113 op1
= safe_vector_operand (op1
, mode1
);
17115 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17116 op1
= copy_to_mode_reg (mode1
, op1
);
17118 pat
= GEN_FCN (icode
) (op0
, op1
);
17124 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17127 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
17128 rtx target
, int do_load
)
17131 tree arg0
= TREE_VALUE (arglist
);
17132 rtx op0
= expand_normal (arg0
);
17133 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17134 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17136 if (optimize
|| !target
17137 || GET_MODE (target
) != tmode
17138 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17139 target
= gen_reg_rtx (tmode
);
17141 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17144 if (VECTOR_MODE_P (mode0
))
17145 op0
= safe_vector_operand (op0
, mode0
);
17147 if ((optimize
&& !register_operand (op0
, mode0
))
17148 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17149 op0
= copy_to_mode_reg (mode0
, op0
);
17152 pat
= GEN_FCN (icode
) (target
, op0
);
17159 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17160 sqrtss, rsqrtss, rcpss. */
17163 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
17166 tree arg0
= TREE_VALUE (arglist
);
17167 rtx op1
, op0
= expand_normal (arg0
);
17168 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17169 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17171 if (optimize
|| !target
17172 || GET_MODE (target
) != tmode
17173 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17174 target
= gen_reg_rtx (tmode
);
17176 if (VECTOR_MODE_P (mode0
))
17177 op0
= safe_vector_operand (op0
, mode0
);
17179 if ((optimize
&& !register_operand (op0
, mode0
))
17180 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17181 op0
= copy_to_mode_reg (mode0
, op0
);
17184 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17185 op1
= copy_to_mode_reg (mode0
, op1
);
17187 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17194 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17197 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
17201 tree arg0
= TREE_VALUE (arglist
);
17202 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17203 rtx op0
= expand_normal (arg0
);
17204 rtx op1
= expand_normal (arg1
);
17206 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17207 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17208 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17209 enum rtx_code comparison
= d
->comparison
;
17211 if (VECTOR_MODE_P (mode0
))
17212 op0
= safe_vector_operand (op0
, mode0
);
17213 if (VECTOR_MODE_P (mode1
))
17214 op1
= safe_vector_operand (op1
, mode1
);
17216 /* Swap operands if we have a comparison that isn't available in
17218 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17220 rtx tmp
= gen_reg_rtx (mode1
);
17221 emit_move_insn (tmp
, op1
);
17226 if (optimize
|| !target
17227 || GET_MODE (target
) != tmode
17228 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17229 target
= gen_reg_rtx (tmode
);
17231 if ((optimize
&& !register_operand (op0
, mode0
))
17232 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17233 op0
= copy_to_mode_reg (mode0
, op0
);
17234 if ((optimize
&& !register_operand (op1
, mode1
))
17235 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17236 op1
= copy_to_mode_reg (mode1
, op1
);
17238 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17239 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17246 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17249 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
17253 tree arg0
= TREE_VALUE (arglist
);
17254 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17255 rtx op0
= expand_normal (arg0
);
17256 rtx op1
= expand_normal (arg1
);
17258 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17259 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17260 enum rtx_code comparison
= d
->comparison
;
17262 if (VECTOR_MODE_P (mode0
))
17263 op0
= safe_vector_operand (op0
, mode0
);
17264 if (VECTOR_MODE_P (mode1
))
17265 op1
= safe_vector_operand (op1
, mode1
);
17267 /* Swap operands if we have a comparison that isn't available in
17269 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17276 target
= gen_reg_rtx (SImode
);
17277 emit_move_insn (target
, const0_rtx
);
17278 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17280 if ((optimize
&& !register_operand (op0
, mode0
))
17281 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17282 op0
= copy_to_mode_reg (mode0
, op0
);
17283 if ((optimize
&& !register_operand (op1
, mode1
))
17284 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17285 op1
= copy_to_mode_reg (mode1
, op1
);
17287 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17288 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17292 emit_insn (gen_rtx_SET (VOIDmode
,
17293 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17294 gen_rtx_fmt_ee (comparison
, QImode
,
17298 return SUBREG_REG (target
);
17301 /* Return the integer constant in ARG. Constrain it to be in the range
17302 of the subparts of VEC_TYPE; issue an error if not. */
17305 get_element_number (tree vec_type
, tree arg
)
17307 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17309 if (!host_integerp (arg
, 1)
17310 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17312 error ("selector must be an integer constant in the range 0..%wi", max
);
17319 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17320 ix86_expand_vector_init. We DO have language-level syntax for this, in
17321 the form of (type){ init-list }. Except that since we can't place emms
17322 instructions from inside the compiler, we can't allow the use of MMX
17323 registers unless the user explicitly asks for it. So we do *not* define
17324 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17325 we have builtins invoked by mmintrin.h that gives us license to emit
17326 these sorts of instructions. */
17329 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
17331 enum machine_mode tmode
= TYPE_MODE (type
);
17332 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17333 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17334 rtvec v
= rtvec_alloc (n_elt
);
17336 gcc_assert (VECTOR_MODE_P (tmode
));
17338 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
17340 rtx x
= expand_normal (TREE_VALUE (arglist
));
17341 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17344 gcc_assert (arglist
== NULL
);
17346 if (!target
|| !register_operand (target
, tmode
))
17347 target
= gen_reg_rtx (tmode
);
17349 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17353 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17354 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17355 had a language-level syntax for referencing vector elements. */
17358 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
17360 enum machine_mode tmode
, mode0
;
17365 arg0
= TREE_VALUE (arglist
);
17366 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17368 op0
= expand_normal (arg0
);
17369 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17371 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17372 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17373 gcc_assert (VECTOR_MODE_P (mode0
));
17375 op0
= force_reg (mode0
, op0
);
17377 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17378 target
= gen_reg_rtx (tmode
);
17380 ix86_expand_vector_extract (true, target
, op0
, elt
);
17385 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17386 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17387 a language-level syntax for referencing vector elements. */
17390 ix86_expand_vec_set_builtin (tree arglist
)
17392 enum machine_mode tmode
, mode1
;
17393 tree arg0
, arg1
, arg2
;
17397 arg0
= TREE_VALUE (arglist
);
17398 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17399 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17401 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17402 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17403 gcc_assert (VECTOR_MODE_P (tmode
));
17405 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17406 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17407 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17409 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17410 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17412 op0
= force_reg (tmode
, op0
);
17413 op1
= force_reg (mode1
, op1
);
17415 ix86_expand_vector_set (true, op0
, op1
, elt
);
17420 /* Expand an expression EXP that calls a built-in function,
17421 with result going to TARGET if that's convenient
17422 (and in mode MODE if that's convenient).
17423 SUBTARGET may be used as the target for computing one of EXP's operands.
17424 IGNORE is nonzero if the value is to be ignored. */
17427 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17428 enum machine_mode mode ATTRIBUTE_UNUSED
,
17429 int ignore ATTRIBUTE_UNUSED
)
17431 const struct builtin_description
*d
;
17433 enum insn_code icode
;
17434 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
17435 tree arglist
= TREE_OPERAND (exp
, 1);
17436 tree arg0
, arg1
, arg2
, arg3
;
17437 rtx op0
, op1
, op2
, op3
, pat
;
17438 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17439 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17443 case IX86_BUILTIN_EMMS
:
17444 emit_insn (gen_mmx_emms ());
17447 case IX86_BUILTIN_SFENCE
:
17448 emit_insn (gen_sse_sfence ());
17451 case IX86_BUILTIN_MASKMOVQ
:
17452 case IX86_BUILTIN_MASKMOVDQU
:
17453 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17454 ? CODE_FOR_mmx_maskmovq
17455 : CODE_FOR_sse2_maskmovdqu
);
17456 /* Note the arg order is different from the operand order. */
17457 arg1
= TREE_VALUE (arglist
);
17458 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
17459 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17460 op0
= expand_normal (arg0
);
17461 op1
= expand_normal (arg1
);
17462 op2
= expand_normal (arg2
);
17463 mode0
= insn_data
[icode
].operand
[0].mode
;
17464 mode1
= insn_data
[icode
].operand
[1].mode
;
17465 mode2
= insn_data
[icode
].operand
[2].mode
;
17467 op0
= force_reg (Pmode
, op0
);
17468 op0
= gen_rtx_MEM (mode1
, op0
);
17470 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17471 op0
= copy_to_mode_reg (mode0
, op0
);
17472 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17473 op1
= copy_to_mode_reg (mode1
, op1
);
17474 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17475 op2
= copy_to_mode_reg (mode2
, op2
);
17476 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17482 case IX86_BUILTIN_SQRTSS
:
17483 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
17484 case IX86_BUILTIN_RSQRTSS
:
17485 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
17486 case IX86_BUILTIN_RCPSS
:
17487 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
17489 case IX86_BUILTIN_LOADUPS
:
17490 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
17492 case IX86_BUILTIN_STOREUPS
:
17493 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
17495 case IX86_BUILTIN_LOADHPS
:
17496 case IX86_BUILTIN_LOADLPS
:
17497 case IX86_BUILTIN_LOADHPD
:
17498 case IX86_BUILTIN_LOADLPD
:
17499 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17500 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17501 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17502 : CODE_FOR_sse2_loadlpd
);
17503 arg0
= TREE_VALUE (arglist
);
17504 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17505 op0
= expand_normal (arg0
);
17506 op1
= expand_normal (arg1
);
17507 tmode
= insn_data
[icode
].operand
[0].mode
;
17508 mode0
= insn_data
[icode
].operand
[1].mode
;
17509 mode1
= insn_data
[icode
].operand
[2].mode
;
17511 op0
= force_reg (mode0
, op0
);
17512 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17513 if (optimize
|| target
== 0
17514 || GET_MODE (target
) != tmode
17515 || !register_operand (target
, tmode
))
17516 target
= gen_reg_rtx (tmode
);
17517 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17523 case IX86_BUILTIN_STOREHPS
:
17524 case IX86_BUILTIN_STORELPS
:
17525 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17526 : CODE_FOR_sse_storelps
);
17527 arg0
= TREE_VALUE (arglist
);
17528 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17529 op0
= expand_normal (arg0
);
17530 op1
= expand_normal (arg1
);
17531 mode0
= insn_data
[icode
].operand
[0].mode
;
17532 mode1
= insn_data
[icode
].operand
[1].mode
;
17534 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17535 op1
= force_reg (mode1
, op1
);
17537 pat
= GEN_FCN (icode
) (op0
, op1
);
17543 case IX86_BUILTIN_MOVNTPS
:
17544 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
17545 case IX86_BUILTIN_MOVNTQ
:
17546 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
17548 case IX86_BUILTIN_LDMXCSR
:
17549 op0
= expand_normal (TREE_VALUE (arglist
));
17550 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17551 emit_move_insn (target
, op0
);
17552 emit_insn (gen_sse_ldmxcsr (target
));
17555 case IX86_BUILTIN_STMXCSR
:
17556 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17557 emit_insn (gen_sse_stmxcsr (target
));
17558 return copy_to_mode_reg (SImode
, target
);
17560 case IX86_BUILTIN_SHUFPS
:
17561 case IX86_BUILTIN_SHUFPD
:
17562 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17563 ? CODE_FOR_sse_shufps
17564 : CODE_FOR_sse2_shufpd
);
17565 arg0
= TREE_VALUE (arglist
);
17566 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17567 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17568 op0
= expand_normal (arg0
);
17569 op1
= expand_normal (arg1
);
17570 op2
= expand_normal (arg2
);
17571 tmode
= insn_data
[icode
].operand
[0].mode
;
17572 mode0
= insn_data
[icode
].operand
[1].mode
;
17573 mode1
= insn_data
[icode
].operand
[2].mode
;
17574 mode2
= insn_data
[icode
].operand
[3].mode
;
17576 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17577 op0
= copy_to_mode_reg (mode0
, op0
);
17578 if ((optimize
&& !register_operand (op1
, mode1
))
17579 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17580 op1
= copy_to_mode_reg (mode1
, op1
);
17581 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17583 /* @@@ better error message */
17584 error ("mask must be an immediate");
17585 return gen_reg_rtx (tmode
);
17587 if (optimize
|| target
== 0
17588 || GET_MODE (target
) != tmode
17589 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17590 target
= gen_reg_rtx (tmode
);
17591 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17597 case IX86_BUILTIN_PSHUFW
:
17598 case IX86_BUILTIN_PSHUFD
:
17599 case IX86_BUILTIN_PSHUFHW
:
17600 case IX86_BUILTIN_PSHUFLW
:
17601 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17602 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17603 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17604 : CODE_FOR_mmx_pshufw
);
17605 arg0
= TREE_VALUE (arglist
);
17606 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17607 op0
= expand_normal (arg0
);
17608 op1
= expand_normal (arg1
);
17609 tmode
= insn_data
[icode
].operand
[0].mode
;
17610 mode1
= insn_data
[icode
].operand
[1].mode
;
17611 mode2
= insn_data
[icode
].operand
[2].mode
;
17613 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17614 op0
= copy_to_mode_reg (mode1
, op0
);
17615 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17617 /* @@@ better error message */
17618 error ("mask must be an immediate");
17622 || GET_MODE (target
) != tmode
17623 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17624 target
= gen_reg_rtx (tmode
);
17625 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17631 case IX86_BUILTIN_PSLLDQI128
:
17632 case IX86_BUILTIN_PSRLDQI128
:
17633 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17634 : CODE_FOR_sse2_lshrti3
);
17635 arg0
= TREE_VALUE (arglist
);
17636 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17637 op0
= expand_normal (arg0
);
17638 op1
= expand_normal (arg1
);
17639 tmode
= insn_data
[icode
].operand
[0].mode
;
17640 mode1
= insn_data
[icode
].operand
[1].mode
;
17641 mode2
= insn_data
[icode
].operand
[2].mode
;
17643 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17645 op0
= copy_to_reg (op0
);
17646 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17648 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17650 error ("shift must be an immediate");
17653 target
= gen_reg_rtx (V2DImode
);
17654 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
17660 case IX86_BUILTIN_FEMMS
:
17661 emit_insn (gen_mmx_femms ());
17664 case IX86_BUILTIN_PAVGUSB
:
17665 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
17667 case IX86_BUILTIN_PF2ID
:
17668 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
17670 case IX86_BUILTIN_PFACC
:
17671 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
17673 case IX86_BUILTIN_PFADD
:
17674 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
17676 case IX86_BUILTIN_PFCMPEQ
:
17677 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
17679 case IX86_BUILTIN_PFCMPGE
:
17680 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
17682 case IX86_BUILTIN_PFCMPGT
:
17683 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
17685 case IX86_BUILTIN_PFMAX
:
17686 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
17688 case IX86_BUILTIN_PFMIN
:
17689 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
17691 case IX86_BUILTIN_PFMUL
:
17692 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
17694 case IX86_BUILTIN_PFRCP
:
17695 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
17697 case IX86_BUILTIN_PFRCPIT1
:
17698 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
17700 case IX86_BUILTIN_PFRCPIT2
:
17701 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
17703 case IX86_BUILTIN_PFRSQIT1
:
17704 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
17706 case IX86_BUILTIN_PFRSQRT
:
17707 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
17709 case IX86_BUILTIN_PFSUB
:
17710 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
17712 case IX86_BUILTIN_PFSUBR
:
17713 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
17715 case IX86_BUILTIN_PI2FD
:
17716 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
17718 case IX86_BUILTIN_PMULHRW
:
17719 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
17721 case IX86_BUILTIN_PF2IW
:
17722 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
17724 case IX86_BUILTIN_PFNACC
:
17725 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
17727 case IX86_BUILTIN_PFPNACC
:
17728 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
17730 case IX86_BUILTIN_PI2FW
:
17731 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
17733 case IX86_BUILTIN_PSWAPDSI
:
17734 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
17736 case IX86_BUILTIN_PSWAPDSF
:
17737 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
17739 case IX86_BUILTIN_SQRTSD
:
17740 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
17741 case IX86_BUILTIN_LOADUPD
:
17742 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
17743 case IX86_BUILTIN_STOREUPD
:
17744 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
17746 case IX86_BUILTIN_MFENCE
:
17747 emit_insn (gen_sse2_mfence ());
17749 case IX86_BUILTIN_LFENCE
:
17750 emit_insn (gen_sse2_lfence ());
17753 case IX86_BUILTIN_CLFLUSH
:
17754 arg0
= TREE_VALUE (arglist
);
17755 op0
= expand_normal (arg0
);
17756 icode
= CODE_FOR_sse2_clflush
;
17757 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
17758 op0
= copy_to_mode_reg (Pmode
, op0
);
17760 emit_insn (gen_sse2_clflush (op0
));
17763 case IX86_BUILTIN_MOVNTPD
:
17764 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
17765 case IX86_BUILTIN_MOVNTDQ
:
17766 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
17767 case IX86_BUILTIN_MOVNTI
:
17768 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
17770 case IX86_BUILTIN_LOADDQU
:
17771 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
17772 case IX86_BUILTIN_STOREDQU
:
17773 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
17775 case IX86_BUILTIN_MONITOR
:
17776 arg0
= TREE_VALUE (arglist
);
17777 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17778 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17779 op0
= expand_normal (arg0
);
17780 op1
= expand_normal (arg1
);
17781 op2
= expand_normal (arg2
);
17783 op0
= copy_to_mode_reg (Pmode
, op0
);
17785 op1
= copy_to_mode_reg (SImode
, op1
);
17787 op2
= copy_to_mode_reg (SImode
, op2
);
17789 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
17791 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
17794 case IX86_BUILTIN_MWAIT
:
17795 arg0
= TREE_VALUE (arglist
);
17796 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17797 op0
= expand_normal (arg0
);
17798 op1
= expand_normal (arg1
);
17800 op0
= copy_to_mode_reg (SImode
, op0
);
17802 op1
= copy_to_mode_reg (SImode
, op1
);
17803 emit_insn (gen_sse3_mwait (op0
, op1
));
17806 case IX86_BUILTIN_LDDQU
:
17807 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
17810 case IX86_BUILTIN_PALIGNR
:
17811 case IX86_BUILTIN_PALIGNR128
:
17812 if (fcode
== IX86_BUILTIN_PALIGNR
)
17814 icode
= CODE_FOR_ssse3_palignrdi
;
17819 icode
= CODE_FOR_ssse3_palignrti
;
17822 arg0
= TREE_VALUE (arglist
);
17823 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17824 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17825 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
17826 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
17827 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
17828 tmode
= insn_data
[icode
].operand
[0].mode
;
17829 mode1
= insn_data
[icode
].operand
[1].mode
;
17830 mode2
= insn_data
[icode
].operand
[2].mode
;
17831 mode3
= insn_data
[icode
].operand
[3].mode
;
17833 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17835 op0
= copy_to_reg (op0
);
17836 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17838 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17840 op1
= copy_to_reg (op1
);
17841 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
17843 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17845 error ("shift must be an immediate");
17848 target
= gen_reg_rtx (mode
);
17849 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
17856 case IX86_BUILTIN_MOVNTSD
:
17857 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, arglist
);
17859 case IX86_BUILTIN_MOVNTSS
:
17860 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, arglist
);
17862 case IX86_BUILTIN_INSERTQ
:
17863 case IX86_BUILTIN_EXTRQ
:
17864 icode
= (fcode
== IX86_BUILTIN_EXTRQ
17865 ? CODE_FOR_sse4a_extrq
17866 : CODE_FOR_sse4a_insertq
);
17867 arg0
= TREE_VALUE (arglist
);
17868 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17869 op0
= expand_normal (arg0
);
17870 op1
= expand_normal (arg1
);
17871 tmode
= insn_data
[icode
].operand
[0].mode
;
17872 mode1
= insn_data
[icode
].operand
[1].mode
;
17873 mode2
= insn_data
[icode
].operand
[2].mode
;
17874 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17875 op0
= copy_to_mode_reg (mode1
, op0
);
17876 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17877 op1
= copy_to_mode_reg (mode2
, op1
);
17878 if (optimize
|| target
== 0
17879 || GET_MODE (target
) != tmode
17880 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17881 target
= gen_reg_rtx (tmode
);
17882 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17888 case IX86_BUILTIN_EXTRQI
:
17889 icode
= CODE_FOR_sse4a_extrqi
;
17890 arg0
= TREE_VALUE (arglist
);
17891 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17892 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17893 op0
= expand_normal (arg0
);
17894 op1
= expand_normal (arg1
);
17895 op2
= expand_normal (arg2
);
17896 tmode
= insn_data
[icode
].operand
[0].mode
;
17897 mode1
= insn_data
[icode
].operand
[1].mode
;
17898 mode2
= insn_data
[icode
].operand
[2].mode
;
17899 mode3
= insn_data
[icode
].operand
[3].mode
;
17900 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17901 op0
= copy_to_mode_reg (mode1
, op0
);
17902 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17904 error ("index mask must be an immediate");
17905 return gen_reg_rtx (tmode
);
17907 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17909 error ("length mask must be an immediate");
17910 return gen_reg_rtx (tmode
);
17912 if (optimize
|| target
== 0
17913 || GET_MODE (target
) != tmode
17914 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17915 target
= gen_reg_rtx (tmode
);
17916 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17922 case IX86_BUILTIN_INSERTQI
:
17923 icode
= CODE_FOR_sse4a_insertqi
;
17924 arg0
= TREE_VALUE (arglist
);
17925 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
17926 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
17927 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
17928 op0
= expand_normal (arg0
);
17929 op1
= expand_normal (arg1
);
17930 op2
= expand_normal (arg2
);
17931 op3
= expand_normal (arg3
);
17932 tmode
= insn_data
[icode
].operand
[0].mode
;
17933 mode1
= insn_data
[icode
].operand
[1].mode
;
17934 mode2
= insn_data
[icode
].operand
[2].mode
;
17935 mode3
= insn_data
[icode
].operand
[3].mode
;
17936 mode4
= insn_data
[icode
].operand
[4].mode
;
17938 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17939 op0
= copy_to_mode_reg (mode1
, op0
);
17941 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17942 op1
= copy_to_mode_reg (mode2
, op1
);
17944 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
17946 error ("index mask must be an immediate");
17947 return gen_reg_rtx (tmode
);
17949 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
17951 error ("length mask must be an immediate");
17952 return gen_reg_rtx (tmode
);
17954 if (optimize
|| target
== 0
17955 || GET_MODE (target
) != tmode
17956 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17957 target
= gen_reg_rtx (tmode
);
17958 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
17964 case IX86_BUILTIN_VEC_INIT_V2SI
:
17965 case IX86_BUILTIN_VEC_INIT_V4HI
:
17966 case IX86_BUILTIN_VEC_INIT_V8QI
:
17967 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
17969 case IX86_BUILTIN_VEC_EXT_V2DF
:
17970 case IX86_BUILTIN_VEC_EXT_V2DI
:
17971 case IX86_BUILTIN_VEC_EXT_V4SF
:
17972 case IX86_BUILTIN_VEC_EXT_V4SI
:
17973 case IX86_BUILTIN_VEC_EXT_V8HI
:
17974 case IX86_BUILTIN_VEC_EXT_V2SI
:
17975 case IX86_BUILTIN_VEC_EXT_V4HI
:
17976 return ix86_expand_vec_ext_builtin (arglist
, target
);
17978 case IX86_BUILTIN_VEC_SET_V8HI
:
17979 case IX86_BUILTIN_VEC_SET_V4HI
:
17980 return ix86_expand_vec_set_builtin (arglist
);
17986 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17987 if (d
->code
== fcode
)
17989 /* Compares are treated specially. */
17990 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17991 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
17992 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17993 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17994 return ix86_expand_sse_compare (d
, arglist
, target
);
17996 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
17999 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18000 if (d
->code
== fcode
)
18001 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
18003 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18004 if (d
->code
== fcode
)
18005 return ix86_expand_sse_comi (d
, arglist
, target
);
18007 gcc_unreachable ();
18010 /* Returns a function decl for a vectorized version of the builtin function
18011 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18012 if it is not available. */
18015 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18018 enum machine_mode in_mode
, out_mode
;
18021 if (TREE_CODE (type_out
) != VECTOR_TYPE
18022 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18025 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18026 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18027 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18028 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18032 case BUILT_IN_SQRT
:
18033 if (out_mode
== DFmode
&& out_n
== 2
18034 && in_mode
== DFmode
&& in_n
== 2)
18035 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18038 case BUILT_IN_SQRTF
:
18039 if (out_mode
== SFmode
&& out_n
== 4
18040 && in_mode
== SFmode
&& in_n
== 4)
18041 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18044 case BUILT_IN_LRINTF
:
18045 if (out_mode
== SImode
&& out_n
== 4
18046 && in_mode
== SFmode
&& in_n
== 4)
18047 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18057 /* Store OPERAND to the memory after reload is completed. This means
18058 that we can't easily use assign_stack_local. */
18060 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18064 gcc_assert (reload_completed
);
18065 if (TARGET_RED_ZONE
)
18067 result
= gen_rtx_MEM (mode
,
18068 gen_rtx_PLUS (Pmode
,
18070 GEN_INT (-RED_ZONE_SIZE
)));
18071 emit_move_insn (result
, operand
);
18073 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18079 operand
= gen_lowpart (DImode
, operand
);
18083 gen_rtx_SET (VOIDmode
,
18084 gen_rtx_MEM (DImode
,
18085 gen_rtx_PRE_DEC (DImode
,
18086 stack_pointer_rtx
)),
18090 gcc_unreachable ();
18092 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18101 split_di (&operand
, 1, operands
, operands
+ 1);
18103 gen_rtx_SET (VOIDmode
,
18104 gen_rtx_MEM (SImode
,
18105 gen_rtx_PRE_DEC (Pmode
,
18106 stack_pointer_rtx
)),
18109 gen_rtx_SET (VOIDmode
,
18110 gen_rtx_MEM (SImode
,
18111 gen_rtx_PRE_DEC (Pmode
,
18112 stack_pointer_rtx
)),
18117 /* Store HImodes as SImodes. */
18118 operand
= gen_lowpart (SImode
, operand
);
18122 gen_rtx_SET (VOIDmode
,
18123 gen_rtx_MEM (GET_MODE (operand
),
18124 gen_rtx_PRE_DEC (SImode
,
18125 stack_pointer_rtx
)),
18129 gcc_unreachable ();
18131 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18136 /* Free operand from the memory. */
18138 ix86_free_from_memory (enum machine_mode mode
)
18140 if (!TARGET_RED_ZONE
)
18144 if (mode
== DImode
|| TARGET_64BIT
)
18148 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18149 to pop or add instruction if registers are available. */
18150 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18151 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18156 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18157 QImode must go into class Q_REGS.
18158 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18159 movdf to do mem-to-mem moves through integer regs. */
18161 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18163 enum machine_mode mode
= GET_MODE (x
);
18165 /* We're only allowed to return a subclass of CLASS. Many of the
18166 following checks fail for NO_REGS, so eliminate that early. */
18167 if (class == NO_REGS
)
18170 /* All classes can load zeros. */
18171 if (x
== CONST0_RTX (mode
))
18174 /* Force constants into memory if we are loading a (nonzero) constant into
18175 an MMX or SSE register. This is because there are no MMX/SSE instructions
18176 to load from a constant. */
18178 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18181 /* Prefer SSE regs only, if we can use them for math. */
18182 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18183 return SSE_CLASS_P (class) ? class : NO_REGS
;
18185 /* Floating-point constants need more complex checks. */
18186 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18188 /* General regs can load everything. */
18189 if (reg_class_subset_p (class, GENERAL_REGS
))
18192 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18193 zero above. We only want to wind up preferring 80387 registers if
18194 we plan on doing computation with them. */
18196 && standard_80387_constant_p (x
))
18198 /* Limit class to non-sse. */
18199 if (class == FLOAT_SSE_REGS
)
18201 if (class == FP_TOP_SSE_REGS
)
18203 if (class == FP_SECOND_SSE_REGS
)
18204 return FP_SECOND_REG
;
18205 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18212 /* Generally when we see PLUS here, it's the function invariant
18213 (plus soft-fp const_int). Which can only be computed into general
18215 if (GET_CODE (x
) == PLUS
)
18216 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18218 /* QImode constants are easy to load, but non-constant QImode data
18219 must go into Q_REGS. */
18220 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18222 if (reg_class_subset_p (class, Q_REGS
))
18224 if (reg_class_subset_p (Q_REGS
, class))
18232 /* Discourage putting floating-point values in SSE registers unless
18233 SSE math is being used, and likewise for the 387 registers. */
18235 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18237 enum machine_mode mode
= GET_MODE (x
);
18239 /* Restrict the output reload class to the register bank that we are doing
18240 math on. If we would like not to return a subset of CLASS, reject this
18241 alternative: if reload cannot do this, it will still use its choice. */
18242 mode
= GET_MODE (x
);
18243 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18244 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18246 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18248 if (class == FP_TOP_SSE_REGS
)
18250 else if (class == FP_SECOND_SSE_REGS
)
18251 return FP_SECOND_REG
;
18253 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18259 /* If we are copying between general and FP registers, we need a memory
18260 location. The same is true for SSE and MMX registers.
18262 The macro can't work reliably when one of the CLASSES is class containing
18263 registers from multiple units (SSE, MMX, integer). We avoid this by never
18264 combining those units in single alternative in the machine description.
18265 Ensure that this constraint holds to avoid unexpected surprises.
18267 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18268 enforce these sanity checks. */
18271 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18272 enum machine_mode mode
, int strict
)
18274 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18275 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18276 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18277 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18278 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18279 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18281 gcc_assert (!strict
);
18285 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18288 /* ??? This is a lie. We do have moves between mmx/general, and for
18289 mmx/sse2. But by saying we need secondary memory we discourage the
18290 register allocator from using the mmx registers unless needed. */
18291 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18294 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18296 /* SSE1 doesn't have any direct moves from other classes. */
18300 /* If the target says that inter-unit moves are more expensive
18301 than moving through memory, then don't generate them. */
18302 if (!TARGET_INTER_UNIT_MOVES
)
18305 /* Between SSE and general, we have moves no larger than word size. */
18306 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18313 /* Return true if the registers in CLASS cannot represent the change from
18314 modes FROM to TO. */
18317 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18318 enum reg_class
class)
18323 /* x87 registers can't do subreg at all, as all values are reformatted
18324 to extended precision. */
18325 if (MAYBE_FLOAT_CLASS_P (class))
18328 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18330 /* Vector registers do not support QI or HImode loads. If we don't
18331 disallow a change to these modes, reload will assume it's ok to
18332 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18333 the vec_dupv4hi pattern. */
18334 if (GET_MODE_SIZE (from
) < 4)
18337 /* Vector registers do not support subreg with nonzero offsets, which
18338 are otherwise valid for integer registers. Since we can't see
18339 whether we have a nonzero offset from here, prohibit all
18340 nonparadoxical subregs changing size. */
18341 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18348 /* Return the cost of moving data from a register in class CLASS1 to
18349 one in class CLASS2.
18351 It is not required that the cost always equal 2 when FROM is the same as TO;
18352 on some machines it is expensive to move between registers if they are not
18353 general registers. */
18356 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18357 enum reg_class class2
)
18359 /* In case we require secondary memory, compute cost of the store followed
18360 by load. In order to avoid bad register allocation choices, we need
18361 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18363 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18367 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18368 MEMORY_MOVE_COST (mode
, class1
, 1));
18369 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18370 MEMORY_MOVE_COST (mode
, class2
, 1));
18372 /* In case of copying from general_purpose_register we may emit multiple
18373 stores followed by single load causing memory size mismatch stall.
18374 Count this as arbitrarily high cost of 20. */
18375 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18378 /* In the case of FP/MMX moves, the registers actually overlap, and we
18379 have to switch modes in order to treat them differently. */
18380 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18381 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18387 /* Moves between SSE/MMX and integer unit are expensive. */
18388 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18389 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18390 return ix86_cost
->mmxsse_to_integer
;
18391 if (MAYBE_FLOAT_CLASS_P (class1
))
18392 return ix86_cost
->fp_move
;
18393 if (MAYBE_SSE_CLASS_P (class1
))
18394 return ix86_cost
->sse_move
;
18395 if (MAYBE_MMX_CLASS_P (class1
))
18396 return ix86_cost
->mmx_move
;
18400 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18403 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18405 /* Flags and only flags can only hold CCmode values. */
18406 if (CC_REGNO_P (regno
))
18407 return GET_MODE_CLASS (mode
) == MODE_CC
;
18408 if (GET_MODE_CLASS (mode
) == MODE_CC
18409 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18410 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18412 if (FP_REGNO_P (regno
))
18413 return VALID_FP_MODE_P (mode
);
18414 if (SSE_REGNO_P (regno
))
18416 /* We implement the move patterns for all vector modes into and
18417 out of SSE registers, even when no operation instructions
18419 return (VALID_SSE_REG_MODE (mode
)
18420 || VALID_SSE2_REG_MODE (mode
)
18421 || VALID_MMX_REG_MODE (mode
)
18422 || VALID_MMX_REG_MODE_3DNOW (mode
));
18424 if (MMX_REGNO_P (regno
))
18426 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18427 so if the register is available at all, then we can move data of
18428 the given mode into or out of it. */
18429 return (VALID_MMX_REG_MODE (mode
)
18430 || VALID_MMX_REG_MODE_3DNOW (mode
));
18433 if (mode
== QImode
)
18435 /* Take care for QImode values - they can be in non-QI regs,
18436 but then they do cause partial register stalls. */
18437 if (regno
< 4 || TARGET_64BIT
)
18439 if (!TARGET_PARTIAL_REG_STALL
)
18441 return reload_in_progress
|| reload_completed
;
18443 /* We handle both integer and floats in the general purpose registers. */
18444 else if (VALID_INT_MODE_P (mode
))
18446 else if (VALID_FP_MODE_P (mode
))
18448 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18449 on to use that value in smaller contexts, this can easily force a
18450 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18451 supporting DImode, allow it. */
18452 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18458 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18459 tieable integer mode. */
18462 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18471 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18474 return TARGET_64BIT
;
18481 /* Return true if MODE1 is accessible in a register that can hold MODE2
18482 without copying. That is, all register classes that can hold MODE2
18483 can also hold MODE1. */
18486 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18488 if (mode1
== mode2
)
18491 if (ix86_tieable_integer_mode_p (mode1
)
18492 && ix86_tieable_integer_mode_p (mode2
))
18495 /* MODE2 being XFmode implies fp stack or general regs, which means we
18496 can tie any smaller floating point modes to it. Note that we do not
18497 tie this with TFmode. */
18498 if (mode2
== XFmode
)
18499 return mode1
== SFmode
|| mode1
== DFmode
;
18501 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18502 that we can tie it with SFmode. */
18503 if (mode2
== DFmode
)
18504 return mode1
== SFmode
;
18506 /* If MODE2 is only appropriate for an SSE register, then tie with
18507 any other mode acceptable to SSE registers. */
18508 if (GET_MODE_SIZE (mode2
) >= 8
18509 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18510 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
18512 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18513 with any other mode acceptable to MMX registers. */
18514 if (GET_MODE_SIZE (mode2
) == 8
18515 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18516 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
18521 /* Return the cost of moving data of mode M between a
18522 register and memory. A value of 2 is the default; this cost is
18523 relative to those in `REGISTER_MOVE_COST'.
18525 If moving between registers and memory is more expensive than
18526 between two registers, you should define this macro to express the
18529 Model also increased moving costs of QImode registers in non
18533 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18535 if (FLOAT_CLASS_P (class))
18552 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18554 if (SSE_CLASS_P (class))
18557 switch (GET_MODE_SIZE (mode
))
18571 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18573 if (MMX_CLASS_P (class))
18576 switch (GET_MODE_SIZE (mode
))
18587 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18589 switch (GET_MODE_SIZE (mode
))
18593 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18594 : ix86_cost
->movzbl_load
);
18596 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18597 : ix86_cost
->int_store
[0] + 4);
18600 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18602 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18603 if (mode
== TFmode
)
18605 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18606 * (((int) GET_MODE_SIZE (mode
)
18607 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18611 /* Compute a (partial) cost for rtx X. Return true if the complete
18612 cost has been computed, and false if subexpressions should be
18613 scanned. In either case, *TOTAL contains the cost result. */
18616 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18618 enum machine_mode mode
= GET_MODE (x
);
18626 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18628 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18630 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18632 || (!GET_CODE (x
) != LABEL_REF
18633 && (GET_CODE (x
) != SYMBOL_REF
18634 || !SYMBOL_REF_LOCAL_P (x
)))))
18641 if (mode
== VOIDmode
)
18644 switch (standard_80387_constant_p (x
))
18649 default: /* Other constants */
18654 /* Start with (MEM (SYMBOL_REF)), since that's where
18655 it'll probably end up. Add a penalty for size. */
18656 *total
= (COSTS_N_INSNS (1)
18657 + (flag_pic
!= 0 && !TARGET_64BIT
)
18658 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18664 /* The zero extensions is often completely free on x86_64, so make
18665 it as cheap as possible. */
18666 if (TARGET_64BIT
&& mode
== DImode
18667 && GET_MODE (XEXP (x
, 0)) == SImode
)
18669 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18670 *total
= ix86_cost
->add
;
18672 *total
= ix86_cost
->movzx
;
18676 *total
= ix86_cost
->movsx
;
18680 if (CONST_INT_P (XEXP (x
, 1))
18681 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18683 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18686 *total
= ix86_cost
->add
;
18689 if ((value
== 2 || value
== 3)
18690 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
18692 *total
= ix86_cost
->lea
;
18702 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
18704 if (CONST_INT_P (XEXP (x
, 1)))
18706 if (INTVAL (XEXP (x
, 1)) > 32)
18707 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
18709 *total
= ix86_cost
->shift_const
* 2;
18713 if (GET_CODE (XEXP (x
, 1)) == AND
)
18714 *total
= ix86_cost
->shift_var
* 2;
18716 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
18721 if (CONST_INT_P (XEXP (x
, 1)))
18722 *total
= ix86_cost
->shift_const
;
18724 *total
= ix86_cost
->shift_var
;
18729 if (FLOAT_MODE_P (mode
))
18731 *total
= ix86_cost
->fmul
;
18736 rtx op0
= XEXP (x
, 0);
18737 rtx op1
= XEXP (x
, 1);
18739 if (CONST_INT_P (XEXP (x
, 1)))
18741 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18742 for (nbits
= 0; value
!= 0; value
&= value
- 1)
18746 /* This is arbitrary. */
18749 /* Compute costs correctly for widening multiplication. */
18750 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
18751 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
18752 == GET_MODE_SIZE (mode
))
18754 int is_mulwiden
= 0;
18755 enum machine_mode inner_mode
= GET_MODE (op0
);
18757 if (GET_CODE (op0
) == GET_CODE (op1
))
18758 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
18759 else if (CONST_INT_P (op1
))
18761 if (GET_CODE (op0
) == SIGN_EXTEND
)
18762 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
18765 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
18769 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
18772 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
18773 + nbits
* ix86_cost
->mult_bit
18774 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
18783 if (FLOAT_MODE_P (mode
))
18784 *total
= ix86_cost
->fdiv
;
18786 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
18790 if (FLOAT_MODE_P (mode
))
18791 *total
= ix86_cost
->fadd
;
18792 else if (GET_MODE_CLASS (mode
) == MODE_INT
18793 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
18795 if (GET_CODE (XEXP (x
, 0)) == PLUS
18796 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
18797 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
18798 && CONSTANT_P (XEXP (x
, 1)))
18800 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
18801 if (val
== 2 || val
== 4 || val
== 8)
18803 *total
= ix86_cost
->lea
;
18804 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
18805 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
18807 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18811 else if (GET_CODE (XEXP (x
, 0)) == MULT
18812 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
18814 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
18815 if (val
== 2 || val
== 4 || val
== 8)
18817 *total
= ix86_cost
->lea
;
18818 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
18819 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18823 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
18825 *total
= ix86_cost
->lea
;
18826 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
18827 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
18828 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
18835 if (FLOAT_MODE_P (mode
))
18837 *total
= ix86_cost
->fadd
;
18845 if (!TARGET_64BIT
&& mode
== DImode
)
18847 *total
= (ix86_cost
->add
* 2
18848 + (rtx_cost (XEXP (x
, 0), outer_code
)
18849 << (GET_MODE (XEXP (x
, 0)) != DImode
))
18850 + (rtx_cost (XEXP (x
, 1), outer_code
)
18851 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
18857 if (FLOAT_MODE_P (mode
))
18859 *total
= ix86_cost
->fchs
;
18865 if (!TARGET_64BIT
&& mode
== DImode
)
18866 *total
= ix86_cost
->add
* 2;
18868 *total
= ix86_cost
->add
;
18872 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
18873 && XEXP (XEXP (x
, 0), 1) == const1_rtx
18874 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
18875 && XEXP (x
, 1) == const0_rtx
)
18877 /* This kind of construct is implemented using test[bwl].
18878 Treat it as if we had an AND. */
18879 *total
= (ix86_cost
->add
18880 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
18881 + rtx_cost (const1_rtx
, outer_code
));
18887 if (!TARGET_SSE_MATH
18889 || (mode
== DFmode
&& !TARGET_SSE2
))
18894 if (FLOAT_MODE_P (mode
))
18895 *total
= ix86_cost
->fabs
;
18899 if (FLOAT_MODE_P (mode
))
18900 *total
= ix86_cost
->fsqrt
;
18904 if (XINT (x
, 1) == UNSPEC_TP
)
18915 static int current_machopic_label_num
;
18917 /* Given a symbol name and its associated stub, write out the
18918 definition of the stub. */
18921 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
18923 unsigned int length
;
18924 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
18925 int label
= ++current_machopic_label_num
;
18927 /* For 64-bit we shouldn't get here. */
18928 gcc_assert (!TARGET_64BIT
);
18930 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18931 symb
= (*targetm
.strip_name_encoding
) (symb
);
18933 length
= strlen (stub
);
18934 binder_name
= alloca (length
+ 32);
18935 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
18937 length
= strlen (symb
);
18938 symbol_name
= alloca (length
+ 32);
18939 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
18941 sprintf (lazy_ptr_name
, "L%d$lz", label
);
18944 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
18946 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
18948 fprintf (file
, "%s:\n", stub
);
18949 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
18953 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
18954 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
18955 fprintf (file
, "\tjmp\t*%%edx\n");
18958 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
18960 fprintf (file
, "%s:\n", binder_name
);
18964 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
18965 fprintf (file
, "\tpushl\t%%eax\n");
18968 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
18970 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
18972 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
18973 fprintf (file
, "%s:\n", lazy_ptr_name
);
18974 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
18975 fprintf (file
, "\t.long %s\n", binder_name
);
18979 darwin_x86_file_end (void)
18981 darwin_file_end ();
18984 #endif /* TARGET_MACHO */
18986 /* Order the registers for register allocator. */
18989 x86_order_regs_for_local_alloc (void)
18994 /* First allocate the local general purpose registers. */
18995 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
18996 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
18997 reg_alloc_order
[pos
++] = i
;
18999 /* Global general purpose registers. */
19000 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19001 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19002 reg_alloc_order
[pos
++] = i
;
19004 /* x87 registers come first in case we are doing FP math
19006 if (!TARGET_SSE_MATH
)
19007 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19008 reg_alloc_order
[pos
++] = i
;
19010 /* SSE registers. */
19011 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19012 reg_alloc_order
[pos
++] = i
;
19013 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19014 reg_alloc_order
[pos
++] = i
;
19016 /* x87 registers. */
19017 if (TARGET_SSE_MATH
)
19018 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19019 reg_alloc_order
[pos
++] = i
;
19021 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19022 reg_alloc_order
[pos
++] = i
;
19024 /* Initialize the rest of array as we do not allocate some registers
19026 while (pos
< FIRST_PSEUDO_REGISTER
)
19027 reg_alloc_order
[pos
++] = 0;
19030 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19031 struct attribute_spec.handler. */
19033 ix86_handle_struct_attribute (tree
*node
, tree name
,
19034 tree args ATTRIBUTE_UNUSED
,
19035 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19038 if (DECL_P (*node
))
19040 if (TREE_CODE (*node
) == TYPE_DECL
)
19041 type
= &TREE_TYPE (*node
);
19046 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19047 || TREE_CODE (*type
) == UNION_TYPE
)))
19049 warning (OPT_Wattributes
, "%qs attribute ignored",
19050 IDENTIFIER_POINTER (name
));
19051 *no_add_attrs
= true;
19054 else if ((is_attribute_p ("ms_struct", name
)
19055 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19056 || ((is_attribute_p ("gcc_struct", name
)
19057 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19059 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19060 IDENTIFIER_POINTER (name
));
19061 *no_add_attrs
= true;
19068 ix86_ms_bitfield_layout_p (tree record_type
)
19070 return (TARGET_MS_BITFIELD_LAYOUT
&&
19071 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19072 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19075 /* Returns an expression indicating where the this parameter is
19076 located on entry to the FUNCTION. */
19079 x86_this_parameter (tree function
)
19081 tree type
= TREE_TYPE (function
);
19085 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19086 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19089 if (ix86_function_regparm (type
, function
) > 0)
19093 parm
= TYPE_ARG_TYPES (type
);
19094 /* Figure out whether or not the function has a variable number of
19096 for (; parm
; parm
= TREE_CHAIN (parm
))
19097 if (TREE_VALUE (parm
) == void_type_node
)
19099 /* If not, the this parameter is in the first argument. */
19103 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19105 return gen_rtx_REG (SImode
, regno
);
19109 if (aggregate_value_p (TREE_TYPE (type
), type
))
19110 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19112 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19115 /* Determine whether x86_output_mi_thunk can succeed. */
19118 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19119 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19120 HOST_WIDE_INT vcall_offset
, tree function
)
19122 /* 64-bit can handle anything. */
19126 /* For 32-bit, everything's fine if we have one free register. */
19127 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19130 /* Need a free register for vcall_offset. */
19134 /* Need a free register for GOT references. */
19135 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19138 /* Otherwise ok. */
19142 /* Output the assembler code for a thunk function. THUNK_DECL is the
19143 declaration for the thunk function itself, FUNCTION is the decl for
19144 the target function. DELTA is an immediate constant offset to be
19145 added to THIS. If VCALL_OFFSET is nonzero, the word at
19146 *(*this + vcall_offset) should be added to THIS. */
19149 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19150 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19151 HOST_WIDE_INT vcall_offset
, tree function
)
19154 rtx
this = x86_this_parameter (function
);
19157 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19158 pull it in now and let DELTA benefit. */
19161 else if (vcall_offset
)
19163 /* Put the this parameter into %eax. */
19165 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19166 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19169 this_reg
= NULL_RTX
;
19171 /* Adjust the this parameter by a fixed constant. */
19174 xops
[0] = GEN_INT (delta
);
19175 xops
[1] = this_reg
? this_reg
: this;
19178 if (!x86_64_general_operand (xops
[0], DImode
))
19180 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19182 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19186 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19189 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19192 /* Adjust the this parameter by a value stored in the vtable. */
19196 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19199 int tmp_regno
= 2 /* ECX */;
19200 if (lookup_attribute ("fastcall",
19201 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19202 tmp_regno
= 0 /* EAX */;
19203 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19206 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19209 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19211 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19213 /* Adjust the this parameter. */
19214 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19215 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19217 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19218 xops
[0] = GEN_INT (vcall_offset
);
19220 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19221 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19223 xops
[1] = this_reg
;
19225 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19227 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19230 /* If necessary, drop THIS back to its stack slot. */
19231 if (this_reg
&& this_reg
!= this)
19233 xops
[0] = this_reg
;
19235 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19238 xops
[0] = XEXP (DECL_RTL (function
), 0);
19241 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19242 output_asm_insn ("jmp\t%P0", xops
);
19245 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19246 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19247 tmp
= gen_rtx_MEM (QImode
, tmp
);
19249 output_asm_insn ("jmp\t%A0", xops
);
19254 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19255 output_asm_insn ("jmp\t%P0", xops
);
19260 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19261 tmp
= (gen_rtx_SYMBOL_REF
19263 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19264 tmp
= gen_rtx_MEM (QImode
, tmp
);
19266 output_asm_insn ("jmp\t%0", xops
);
19269 #endif /* TARGET_MACHO */
19271 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19272 output_set_got (tmp
, NULL_RTX
);
19275 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19276 output_asm_insn ("jmp\t{*}%1", xops
);
19282 x86_file_start (void)
19284 default_file_start ();
19286 darwin_file_start ();
19288 if (X86_FILE_START_VERSION_DIRECTIVE
)
19289 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19290 if (X86_FILE_START_FLTUSED
)
19291 fputs ("\t.global\t__fltused\n", asm_out_file
);
19292 if (ix86_asm_dialect
== ASM_INTEL
)
19293 fputs ("\t.intel_syntax\n", asm_out_file
);
19297 x86_field_alignment (tree field
, int computed
)
19299 enum machine_mode mode
;
19300 tree type
= TREE_TYPE (field
);
19302 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19304 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19305 ? get_inner_array_type (type
) : type
);
19306 if (mode
== DFmode
|| mode
== DCmode
19307 || GET_MODE_CLASS (mode
) == MODE_INT
19308 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19309 return MIN (32, computed
);
19313 /* Output assembler code to FILE to increment profiler label # LABELNO
19314 for profiling a function entry. */
19316 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19321 #ifndef NO_PROFILE_COUNTERS
19322 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19324 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19328 #ifndef NO_PROFILE_COUNTERS
19329 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19331 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19335 #ifndef NO_PROFILE_COUNTERS
19336 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19337 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19339 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19343 #ifndef NO_PROFILE_COUNTERS
19344 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19345 PROFILE_COUNT_REGISTER
);
19347 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19351 /* We don't have exact information about the insn sizes, but we may assume
19352 quite safely that we are informed about all 1 byte insns and memory
19353 address sizes. This is enough to eliminate unnecessary padding in
19357 min_insn_size (rtx insn
)
19361 if (!INSN_P (insn
) || !active_insn_p (insn
))
19364 /* Discard alignments we've emit and jump instructions. */
19365 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19366 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19369 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19370 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19373 /* Important case - calls are always 5 bytes.
19374 It is common to have many calls in the row. */
19376 && symbolic_reference_mentioned_p (PATTERN (insn
))
19377 && !SIBLING_CALL_P (insn
))
19379 if (get_attr_length (insn
) <= 1)
19382 /* For normal instructions we may rely on the sizes of addresses
19383 and the presence of symbol to require 4 bytes of encoding.
19384 This is not the case for jumps where references are PC relative. */
19385 if (!JUMP_P (insn
))
19387 l
= get_attr_length_address (insn
);
19388 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19397 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19401 ix86_avoid_jump_misspredicts (void)
19403 rtx insn
, start
= get_insns ();
19404 int nbytes
= 0, njumps
= 0;
19407 /* Look for all minimal intervals of instructions containing 4 jumps.
19408 The intervals are bounded by START and INSN. NBYTES is the total
19409 size of instructions in the interval including INSN and not including
19410 START. When the NBYTES is smaller than 16 bytes, it is possible
19411 that the end of START and INSN ends up in the same 16byte page.
19413 The smallest offset in the page INSN can start is the case where START
19414 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19415 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19417 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19420 nbytes
+= min_insn_size (insn
);
19422 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19423 INSN_UID (insn
), min_insn_size (insn
));
19425 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19426 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19434 start
= NEXT_INSN (start
);
19435 if ((JUMP_P (start
)
19436 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19437 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19439 njumps
--, isjump
= 1;
19442 nbytes
-= min_insn_size (start
);
19444 gcc_assert (njumps
>= 0);
19446 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19447 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19449 if (njumps
== 3 && isjump
&& nbytes
< 16)
19451 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19454 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19455 INSN_UID (insn
), padsize
);
19456 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19461 /* AMD Athlon works faster
19462 when RET is not destination of conditional jump or directly preceded
19463 by other jump instruction. We avoid the penalty by inserting NOP just
19464 before the RET instructions in such cases. */
19466 ix86_pad_returns (void)
19471 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19473 basic_block bb
= e
->src
;
19474 rtx ret
= BB_END (bb
);
19476 bool replace
= false;
19478 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19479 || !maybe_hot_bb_p (bb
))
19481 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19482 if (active_insn_p (prev
) || LABEL_P (prev
))
19484 if (prev
&& LABEL_P (prev
))
19489 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19490 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19491 && !(e
->flags
& EDGE_FALLTHRU
))
19496 prev
= prev_active_insn (ret
);
19498 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19501 /* Empty functions get branch mispredict even when the jump destination
19502 is not visible to us. */
19503 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19508 emit_insn_before (gen_return_internal_long (), ret
);
19514 /* Implement machine specific optimizations. We implement padding of returns
19515 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19519 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19520 ix86_pad_returns ();
19521 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19522 ix86_avoid_jump_misspredicts ();
19525 /* Return nonzero when QImode register that must be represented via REX prefix
19528 x86_extended_QIreg_mentioned_p (rtx insn
)
19531 extract_insn_cached (insn
);
19532 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19533 if (REG_P (recog_data
.operand
[i
])
19534 && REGNO (recog_data
.operand
[i
]) >= 4)
19539 /* Return nonzero when P points to register encoded via REX prefix.
19540 Called via for_each_rtx. */
19542 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19544 unsigned int regno
;
19547 regno
= REGNO (*p
);
19548 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19551 /* Return true when INSN mentions register that must be encoded using REX
19554 x86_extended_reg_mentioned_p (rtx insn
)
19556 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19559 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19560 optabs would emit if we didn't have TFmode patterns. */
19563 x86_emit_floatuns (rtx operands
[2])
19565 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19566 enum machine_mode mode
, inmode
;
19568 inmode
= GET_MODE (operands
[1]);
19569 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19572 in
= force_reg (inmode
, operands
[1]);
19573 mode
= GET_MODE (out
);
19574 neglab
= gen_label_rtx ();
19575 donelab
= gen_label_rtx ();
19576 i1
= gen_reg_rtx (Pmode
);
19577 f0
= gen_reg_rtx (mode
);
19579 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
19581 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
19582 emit_jump_insn (gen_jump (donelab
));
19585 emit_label (neglab
);
19587 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
19588 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
19589 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19590 expand_float (f0
, i0
, 0);
19591 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19593 emit_label (donelab
);
19596 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19597 with all elements equal to VAR. Return true if successful. */
19600 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19601 rtx target
, rtx val
)
19603 enum machine_mode smode
, wsmode
, wvmode
;
19618 val
= force_reg (GET_MODE_INNER (mode
), val
);
19619 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19620 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19626 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19628 val
= gen_lowpart (SImode
, val
);
19629 x
= gen_rtx_TRUNCATE (HImode
, val
);
19630 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19631 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19653 /* Extend HImode to SImode using a paradoxical SUBREG. */
19654 tmp1
= gen_reg_rtx (SImode
);
19655 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19656 /* Insert the SImode value as low element of V4SImode vector. */
19657 tmp2
= gen_reg_rtx (V4SImode
);
19658 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19659 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19660 CONST0_RTX (V4SImode
),
19662 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19663 /* Cast the V4SImode vector back to a V8HImode vector. */
19664 tmp1
= gen_reg_rtx (V8HImode
);
19665 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19666 /* Duplicate the low short through the whole low SImode word. */
19667 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19668 /* Cast the V8HImode vector back to a V4SImode vector. */
19669 tmp2
= gen_reg_rtx (V4SImode
);
19670 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19671 /* Replicate the low element of the V4SImode vector. */
19672 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19673 /* Cast the V2SImode back to V8HImode, and store in target. */
19674 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
19685 /* Extend QImode to SImode using a paradoxical SUBREG. */
19686 tmp1
= gen_reg_rtx (SImode
);
19687 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19688 /* Insert the SImode value as low element of V4SImode vector. */
19689 tmp2
= gen_reg_rtx (V4SImode
);
19690 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19691 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19692 CONST0_RTX (V4SImode
),
19694 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19695 /* Cast the V4SImode vector back to a V16QImode vector. */
19696 tmp1
= gen_reg_rtx (V16QImode
);
19697 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
19698 /* Duplicate the low byte through the whole low SImode word. */
19699 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19700 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19701 /* Cast the V16QImode vector back to a V4SImode vector. */
19702 tmp2
= gen_reg_rtx (V4SImode
);
19703 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19704 /* Replicate the low element of the V4SImode vector. */
19705 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19706 /* Cast the V2SImode back to V16QImode, and store in target. */
19707 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
19715 /* Replicate the value once into the next wider mode and recurse. */
19716 val
= convert_modes (wsmode
, smode
, val
, true);
19717 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
19718 GEN_INT (GET_MODE_BITSIZE (smode
)),
19719 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19720 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
19722 x
= gen_reg_rtx (wvmode
);
19723 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
19724 gcc_unreachable ();
19725 emit_move_insn (target
, gen_lowpart (mode
, x
));
19733 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19734 whose ONE_VAR element is VAR, and other elements are zero. Return true
19738 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
19739 rtx target
, rtx var
, int one_var
)
19741 enum machine_mode vsimode
;
19757 var
= force_reg (GET_MODE_INNER (mode
), var
);
19758 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
19759 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19764 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
19765 new_target
= gen_reg_rtx (mode
);
19767 new_target
= target
;
19768 var
= force_reg (GET_MODE_INNER (mode
), var
);
19769 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
19770 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
19771 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
19774 /* We need to shuffle the value to the correct position, so
19775 create a new pseudo to store the intermediate result. */
19777 /* With SSE2, we can use the integer shuffle insns. */
19778 if (mode
!= V4SFmode
&& TARGET_SSE2
)
19780 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
19782 GEN_INT (one_var
== 1 ? 0 : 1),
19783 GEN_INT (one_var
== 2 ? 0 : 1),
19784 GEN_INT (one_var
== 3 ? 0 : 1)));
19785 if (target
!= new_target
)
19786 emit_move_insn (target
, new_target
);
19790 /* Otherwise convert the intermediate result to V4SFmode and
19791 use the SSE1 shuffle instructions. */
19792 if (mode
!= V4SFmode
)
19794 tmp
= gen_reg_rtx (V4SFmode
);
19795 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
19800 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
19802 GEN_INT (one_var
== 1 ? 0 : 1),
19803 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
19804 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
19806 if (mode
!= V4SFmode
)
19807 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
19808 else if (tmp
!= target
)
19809 emit_move_insn (target
, tmp
);
19811 else if (target
!= new_target
)
19812 emit_move_insn (target
, new_target
);
19817 vsimode
= V4SImode
;
19823 vsimode
= V2SImode
;
19829 /* Zero extend the variable element to SImode and recurse. */
19830 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
19832 x
= gen_reg_rtx (vsimode
);
19833 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
19835 gcc_unreachable ();
19837 emit_move_insn (target
, gen_lowpart (mode
, x
));
19845 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19846 consisting of the values in VALS. It is known that all elements
19847 except ONE_VAR are constants. Return true if successful. */
19850 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
19851 rtx target
, rtx vals
, int one_var
)
19853 rtx var
= XVECEXP (vals
, 0, one_var
);
19854 enum machine_mode wmode
;
19857 const_vec
= copy_rtx (vals
);
19858 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
19859 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
19867 /* For the two element vectors, it's just as easy to use
19868 the general case. */
19884 /* There's no way to set one QImode entry easily. Combine
19885 the variable value with its adjacent constant value, and
19886 promote to an HImode set. */
19887 x
= XVECEXP (vals
, 0, one_var
^ 1);
19890 var
= convert_modes (HImode
, QImode
, var
, true);
19891 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
19892 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
19893 x
= GEN_INT (INTVAL (x
) & 0xff);
19897 var
= convert_modes (HImode
, QImode
, var
, true);
19898 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
19900 if (x
!= const0_rtx
)
19901 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
19902 1, OPTAB_LIB_WIDEN
);
19904 x
= gen_reg_rtx (wmode
);
19905 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
19906 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
19908 emit_move_insn (target
, gen_lowpart (mode
, x
));
19915 emit_move_insn (target
, const_vec
);
19916 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
19920 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19921 all values variable, and none identical. */
19924 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
19925 rtx target
, rtx vals
)
19927 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
19928 rtx op0
= NULL
, op1
= NULL
;
19929 bool use_vec_concat
= false;
19935 if (!mmx_ok
&& !TARGET_SSE
)
19941 /* For the two element vectors, we always implement VEC_CONCAT. */
19942 op0
= XVECEXP (vals
, 0, 0);
19943 op1
= XVECEXP (vals
, 0, 1);
19944 use_vec_concat
= true;
19948 half_mode
= V2SFmode
;
19951 half_mode
= V2SImode
;
19957 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19958 Recurse to load the two halves. */
19960 op0
= gen_reg_rtx (half_mode
);
19961 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
19962 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
19964 op1
= gen_reg_rtx (half_mode
);
19965 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
19966 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
19968 use_vec_concat
= true;
19979 gcc_unreachable ();
19982 if (use_vec_concat
)
19984 if (!register_operand (op0
, half_mode
))
19985 op0
= force_reg (half_mode
, op0
);
19986 if (!register_operand (op1
, half_mode
))
19987 op1
= force_reg (half_mode
, op1
);
19989 emit_insn (gen_rtx_SET (VOIDmode
, target
,
19990 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
19994 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
19995 enum machine_mode inner_mode
;
19996 rtx words
[4], shift
;
19998 inner_mode
= GET_MODE_INNER (mode
);
19999 n_elts
= GET_MODE_NUNITS (mode
);
20000 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20001 n_elt_per_word
= n_elts
/ n_words
;
20002 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20004 for (i
= 0; i
< n_words
; ++i
)
20006 rtx word
= NULL_RTX
;
20008 for (j
= 0; j
< n_elt_per_word
; ++j
)
20010 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20011 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20017 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20018 word
, 1, OPTAB_LIB_WIDEN
);
20019 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20020 word
, 1, OPTAB_LIB_WIDEN
);
20028 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20029 else if (n_words
== 2)
20031 rtx tmp
= gen_reg_rtx (mode
);
20032 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20033 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20034 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20035 emit_move_insn (target
, tmp
);
20037 else if (n_words
== 4)
20039 rtx tmp
= gen_reg_rtx (V4SImode
);
20040 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20041 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20042 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20045 gcc_unreachable ();
20049 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20050 instructions unless MMX_OK is true. */
20053 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20055 enum machine_mode mode
= GET_MODE (target
);
20056 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20057 int n_elts
= GET_MODE_NUNITS (mode
);
20058 int n_var
= 0, one_var
= -1;
20059 bool all_same
= true, all_const_zero
= true;
20063 for (i
= 0; i
< n_elts
; ++i
)
20065 x
= XVECEXP (vals
, 0, i
);
20066 if (!CONSTANT_P (x
))
20067 n_var
++, one_var
= i
;
20068 else if (x
!= CONST0_RTX (inner_mode
))
20069 all_const_zero
= false;
20070 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20074 /* Constants are best loaded from the constant pool. */
20077 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20081 /* If all values are identical, broadcast the value. */
20083 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20084 XVECEXP (vals
, 0, 0)))
20087 /* Values where only one field is non-constant are best loaded from
20088 the pool and overwritten via move later. */
20092 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20093 XVECEXP (vals
, 0, one_var
),
20097 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20101 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20105 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20107 enum machine_mode mode
= GET_MODE (target
);
20108 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20109 bool use_vec_merge
= false;
20118 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20119 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20121 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20123 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20124 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20134 /* For the two element vectors, we implement a VEC_CONCAT with
20135 the extraction of the other element. */
20137 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20138 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20141 op0
= val
, op1
= tmp
;
20143 op0
= tmp
, op1
= val
;
20145 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20146 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20154 use_vec_merge
= true;
20158 /* tmp = target = A B C D */
20159 tmp
= copy_to_reg (target
);
20160 /* target = A A B B */
20161 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20162 /* target = X A B B */
20163 ix86_expand_vector_set (false, target
, val
, 0);
20164 /* target = A X C D */
20165 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20166 GEN_INT (1), GEN_INT (0),
20167 GEN_INT (2+4), GEN_INT (3+4)));
20171 /* tmp = target = A B C D */
20172 tmp
= copy_to_reg (target
);
20173 /* tmp = X B C D */
20174 ix86_expand_vector_set (false, tmp
, val
, 0);
20175 /* target = A B X D */
20176 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20177 GEN_INT (0), GEN_INT (1),
20178 GEN_INT (0+4), GEN_INT (3+4)));
20182 /* tmp = target = A B C D */
20183 tmp
= copy_to_reg (target
);
20184 /* tmp = X B C D */
20185 ix86_expand_vector_set (false, tmp
, val
, 0);
20186 /* target = A B X D */
20187 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20188 GEN_INT (0), GEN_INT (1),
20189 GEN_INT (2+4), GEN_INT (0+4)));
20193 gcc_unreachable ();
20198 /* Element 0 handled by vec_merge below. */
20201 use_vec_merge
= true;
20207 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20208 store into element 0, then shuffle them back. */
20212 order
[0] = GEN_INT (elt
);
20213 order
[1] = const1_rtx
;
20214 order
[2] = const2_rtx
;
20215 order
[3] = GEN_INT (3);
20216 order
[elt
] = const0_rtx
;
20218 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20219 order
[1], order
[2], order
[3]));
20221 ix86_expand_vector_set (false, target
, val
, 0);
20223 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20224 order
[1], order
[2], order
[3]));
20228 /* For SSE1, we have to reuse the V4SF code. */
20229 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20230 gen_lowpart (SFmode
, val
), elt
);
20235 use_vec_merge
= TARGET_SSE2
;
20238 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20249 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20250 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20251 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20255 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20257 emit_move_insn (mem
, target
);
20259 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20260 emit_move_insn (tmp
, val
);
20262 emit_move_insn (target
, mem
);
20267 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20269 enum machine_mode mode
= GET_MODE (vec
);
20270 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20271 bool use_vec_extr
= false;
20284 use_vec_extr
= true;
20296 tmp
= gen_reg_rtx (mode
);
20297 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20298 GEN_INT (elt
), GEN_INT (elt
),
20299 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20303 tmp
= gen_reg_rtx (mode
);
20304 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20308 gcc_unreachable ();
20311 use_vec_extr
= true;
20326 tmp
= gen_reg_rtx (mode
);
20327 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20328 GEN_INT (elt
), GEN_INT (elt
),
20329 GEN_INT (elt
), GEN_INT (elt
)));
20333 tmp
= gen_reg_rtx (mode
);
20334 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20338 gcc_unreachable ();
20341 use_vec_extr
= true;
20346 /* For SSE1, we have to reuse the V4SF code. */
20347 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20348 gen_lowpart (V4SFmode
, vec
), elt
);
20354 use_vec_extr
= TARGET_SSE2
;
20357 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20362 /* ??? Could extract the appropriate HImode element and shift. */
20369 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20370 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20372 /* Let the rtl optimizers know about the zero extension performed. */
20373 if (inner_mode
== HImode
)
20375 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20376 target
= gen_lowpart (SImode
, target
);
20379 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20383 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20385 emit_move_insn (mem
, vec
);
20387 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20388 emit_move_insn (target
, tmp
);
20392 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20393 pattern to reduce; DEST is the destination; IN is the input vector. */
20396 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20398 rtx tmp1
, tmp2
, tmp3
;
20400 tmp1
= gen_reg_rtx (V4SFmode
);
20401 tmp2
= gen_reg_rtx (V4SFmode
);
20402 tmp3
= gen_reg_rtx (V4SFmode
);
20404 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20405 emit_insn (fn (tmp2
, tmp1
, in
));
20407 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20408 GEN_INT (1), GEN_INT (1),
20409 GEN_INT (1+4), GEN_INT (1+4)));
20410 emit_insn (fn (dest
, tmp2
, tmp3
));
20413 /* Target hook for scalar_mode_supported_p. */
20415 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20417 if (DECIMAL_FLOAT_MODE_P (mode
))
20420 return default_scalar_mode_supported_p (mode
);
20423 /* Implements target hook vector_mode_supported_p. */
20425 ix86_vector_mode_supported_p (enum machine_mode mode
)
20427 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20429 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20431 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20433 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20438 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20440 We do this in the new i386 backend to maintain source compatibility
20441 with the old cc0-based compiler. */
20444 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20445 tree inputs ATTRIBUTE_UNUSED
,
20448 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20450 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20455 /* Return true if this goes in small data/bss. */
20458 ix86_in_large_data_p (tree exp
)
20460 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20463 /* Functions are never large data. */
20464 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20467 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20469 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20470 if (strcmp (section
, ".ldata") == 0
20471 || strcmp (section
, ".lbss") == 0)
20477 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20479 /* If this is an incomplete type with size 0, then we can't put it
20480 in data because it might be too big when completed. */
20481 if (!size
|| size
> ix86_section_threshold
)
20488 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20490 default_encode_section_info (decl
, rtl
, first
);
20492 if (TREE_CODE (decl
) == VAR_DECL
20493 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20494 && ix86_in_large_data_p (decl
))
20495 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20498 /* Worker function for REVERSE_CONDITION. */
20501 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20503 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20504 ? reverse_condition (code
)
20505 : reverse_condition_maybe_unordered (code
));
20508 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20512 output_387_reg_move (rtx insn
, rtx
*operands
)
20514 if (REG_P (operands
[1])
20515 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20517 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20518 return output_387_ffreep (operands
, 0);
20519 return "fstp\t%y0";
20521 if (STACK_TOP_P (operands
[0]))
20522 return "fld%z1\t%y1";
20526 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20527 FP status register is set. */
20530 ix86_emit_fp_unordered_jump (rtx label
)
20532 rtx reg
= gen_reg_rtx (HImode
);
20535 emit_insn (gen_x86_fnstsw_1 (reg
));
20537 if (TARGET_USE_SAHF
)
20539 emit_insn (gen_x86_sahf_1 (reg
));
20541 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20542 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20546 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20548 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20549 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20552 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20553 gen_rtx_LABEL_REF (VOIDmode
, label
),
20555 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20556 emit_jump_insn (temp
);
20559 /* Output code to perform a log1p XFmode calculation. */
20561 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20563 rtx label1
= gen_label_rtx ();
20564 rtx label2
= gen_label_rtx ();
20566 rtx tmp
= gen_reg_rtx (XFmode
);
20567 rtx tmp2
= gen_reg_rtx (XFmode
);
20569 emit_insn (gen_absxf2 (tmp
, op1
));
20570 emit_insn (gen_cmpxf (tmp
,
20571 CONST_DOUBLE_FROM_REAL_VALUE (
20572 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20574 emit_jump_insn (gen_bge (label1
));
20576 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20577 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20578 emit_jump (label2
);
20580 emit_label (label1
);
20581 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20582 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20583 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20584 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20586 emit_label (label2
);
20589 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20592 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20595 /* With Binutils 2.15, the "@unwind" marker must be specified on
20596 every occurrence of the ".eh_frame" section, not just the first
20599 && strcmp (name
, ".eh_frame") == 0)
20601 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20602 flags
& SECTION_WRITE
? "aw" : "a");
20605 default_elf_asm_named_section (name
, flags
, decl
);
20608 /* Return the mangling of TYPE if it is an extended fundamental type. */
20610 static const char *
20611 ix86_mangle_fundamental_type (tree type
)
20613 switch (TYPE_MODE (type
))
20616 /* __float128 is "g". */
20619 /* "long double" or __float80 is "e". */
20626 /* For 32-bit code we can save PIC register setup by using
20627 __stack_chk_fail_local hidden function instead of calling
20628 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20629 register, so it is better to call __stack_chk_fail directly. */
20632 ix86_stack_protect_fail (void)
20634 return TARGET_64BIT
20635 ? default_external_stack_protect_fail ()
20636 : default_hidden_stack_protect_fail ();
20639 /* Select a format to encode pointers in exception handling data. CODE
20640 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20641 true if the symbol may be affected by dynamic relocations.
20643 ??? All x86 object file formats are capable of representing this.
20644 After all, the relocation needed is the same as for the call insn.
20645 Whether or not a particular assembler allows us to enter such, I
20646 guess we'll have to see. */
20648 asm_preferred_eh_data_format (int code
, int global
)
20652 int type
= DW_EH_PE_sdata8
;
20654 || ix86_cmodel
== CM_SMALL_PIC
20655 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20656 type
= DW_EH_PE_sdata4
;
20657 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20659 if (ix86_cmodel
== CM_SMALL
20660 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20661 return DW_EH_PE_udata4
;
20662 return DW_EH_PE_absptr
;
20665 /* Expand copysign from SIGN to the positive value ABS_VALUE
20666 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20669 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20671 enum machine_mode mode
= GET_MODE (sign
);
20672 rtx sgn
= gen_reg_rtx (mode
);
20673 if (mask
== NULL_RTX
)
20675 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20676 if (!VECTOR_MODE_P (mode
))
20678 /* We need to generate a scalar mode mask in this case. */
20679 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20680 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20681 mask
= gen_reg_rtx (mode
);
20682 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20686 mask
= gen_rtx_NOT (mode
, mask
);
20687 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20688 gen_rtx_AND (mode
, mask
, sign
)));
20689 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20690 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20693 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20694 mask for masking out the sign-bit is stored in *SMASK, if that is
20697 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20699 enum machine_mode mode
= GET_MODE (op0
);
20702 xa
= gen_reg_rtx (mode
);
20703 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20704 if (!VECTOR_MODE_P (mode
))
20706 /* We need to generate a scalar mode mask in this case. */
20707 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20708 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20709 mask
= gen_reg_rtx (mode
);
20710 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20712 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
20713 gen_rtx_AND (mode
, op0
, mask
)));
20721 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20722 swapping the operands if SWAP_OPERANDS is true. The expanded
20723 code is a forward jump to a newly created label in case the
20724 comparison is true. The generated label rtx is returned. */
20726 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
20727 bool swap_operands
)
20738 label
= gen_label_rtx ();
20739 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
20740 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20741 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
20742 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
20743 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
20744 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
20745 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
20746 JUMP_LABEL (tmp
) = label
;
20751 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20752 using comparison code CODE. Operands are swapped for the comparison if
20753 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20755 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
20756 bool swap_operands
)
20758 enum machine_mode mode
= GET_MODE (op0
);
20759 rtx mask
= gen_reg_rtx (mode
);
20768 if (mode
== DFmode
)
20769 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
20770 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
20772 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
20773 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
20778 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20779 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20781 ix86_gen_TWO52 (enum machine_mode mode
)
20783 REAL_VALUE_TYPE TWO52r
;
20786 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
20787 TWO52
= const_double_from_real_value (TWO52r
, mode
);
20788 TWO52
= force_reg (mode
, TWO52
);
20793 /* Expand SSE sequence for computing lround from OP1 storing
20796 ix86_expand_lround (rtx op0
, rtx op1
)
20798 /* C code for the stuff we're doing below:
20799 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20802 enum machine_mode mode
= GET_MODE (op1
);
20803 const struct real_format
*fmt
;
20804 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
20807 /* load nextafter (0.5, 0.0) */
20808 fmt
= REAL_MODE_FORMAT (mode
);
20809 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
20810 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
20812 /* adj = copysign (0.5, op1) */
20813 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
20814 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
20816 /* adj = op1 + adj */
20817 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
20819 /* op0 = (imode)adj */
20820 expand_fix (op0
, adj
, 0);
20823 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20826 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
20828 /* C code for the stuff we're doing below (for do_floor):
20830 xi -= (double)xi > op1 ? 1 : 0;
20833 enum machine_mode fmode
= GET_MODE (op1
);
20834 enum machine_mode imode
= GET_MODE (op0
);
20835 rtx ireg
, freg
, label
, tmp
;
20837 /* reg = (long)op1 */
20838 ireg
= gen_reg_rtx (imode
);
20839 expand_fix (ireg
, op1
, 0);
20841 /* freg = (double)reg */
20842 freg
= gen_reg_rtx (fmode
);
20843 expand_float (freg
, ireg
, 0);
20845 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20846 label
= ix86_expand_sse_compare_and_jump (UNLE
,
20847 freg
, op1
, !do_floor
);
20848 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
20849 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
20850 emit_move_insn (ireg
, tmp
);
20852 emit_label (label
);
20853 LABEL_NUSES (label
) = 1;
20855 emit_move_insn (op0
, ireg
);
20858 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20859 result in OPERAND0. */
20861 ix86_expand_rint (rtx operand0
, rtx operand1
)
20863 /* C code for the stuff we're doing below:
20864 xa = fabs (operand1);
20865 if (!isless (xa, 2**52))
20867 xa = xa + 2**52 - 2**52;
20868 return copysign (xa, operand1);
20870 enum machine_mode mode
= GET_MODE (operand0
);
20871 rtx res
, xa
, label
, TWO52
, mask
;
20873 res
= gen_reg_rtx (mode
);
20874 emit_move_insn (res
, operand1
);
20876 /* xa = abs (operand1) */
20877 xa
= ix86_expand_sse_fabs (res
, &mask
);
20879 /* if (!isless (xa, TWO52)) goto label; */
20880 TWO52
= ix86_gen_TWO52 (mode
);
20881 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20883 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20884 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
20886 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
20888 emit_label (label
);
20889 LABEL_NUSES (label
) = 1;
20891 emit_move_insn (operand0
, res
);
20894 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20897 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
20899 /* C code for the stuff we expand below.
20900 double xa = fabs (x), x2;
20901 if (!isless (xa, TWO52))
20903 xa = xa + TWO52 - TWO52;
20904 x2 = copysign (xa, x);
20913 enum machine_mode mode
= GET_MODE (operand0
);
20914 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
20916 TWO52
= ix86_gen_TWO52 (mode
);
20918 /* Temporary for holding the result, initialized to the input
20919 operand to ease control flow. */
20920 res
= gen_reg_rtx (mode
);
20921 emit_move_insn (res
, operand1
);
20923 /* xa = abs (operand1) */
20924 xa
= ix86_expand_sse_fabs (res
, &mask
);
20926 /* if (!isless (xa, TWO52)) goto label; */
20927 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20929 /* xa = xa + TWO52 - TWO52; */
20930 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
20931 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
20933 /* xa = copysign (xa, operand1) */
20934 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
20936 /* generate 1.0 or -1.0 */
20937 one
= force_reg (mode
,
20938 const_double_from_real_value (do_floor
20939 ? dconst1
: dconstm1
, mode
));
20941 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20942 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
20943 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
20944 gen_rtx_AND (mode
, one
, tmp
)));
20945 /* We always need to subtract here to preserve signed zero. */
20946 tmp
= expand_simple_binop (mode
, MINUS
,
20947 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
20948 emit_move_insn (res
, tmp
);
20950 emit_label (label
);
20951 LABEL_NUSES (label
) = 1;
20953 emit_move_insn (operand0
, res
);
20956 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20959 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
20961 /* C code for the stuff we expand below.
20962 double xa = fabs (x), x2;
20963 if (!isless (xa, TWO52))
20965 x2 = (double)(long)x;
20972 if (HONOR_SIGNED_ZEROS (mode))
20973 return copysign (x2, x);
20976 enum machine_mode mode
= GET_MODE (operand0
);
20977 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
20979 TWO52
= ix86_gen_TWO52 (mode
);
20981 /* Temporary for holding the result, initialized to the input
20982 operand to ease control flow. */
20983 res
= gen_reg_rtx (mode
);
20984 emit_move_insn (res
, operand1
);
20986 /* xa = abs (operand1) */
20987 xa
= ix86_expand_sse_fabs (res
, &mask
);
20989 /* if (!isless (xa, TWO52)) goto label; */
20990 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20992 /* xa = (double)(long)x */
20993 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
20994 expand_fix (xi
, res
, 0);
20995 expand_float (xa
, xi
, 0);
20998 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21000 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21001 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21002 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21003 gen_rtx_AND (mode
, one
, tmp
)));
21004 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21005 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21006 emit_move_insn (res
, tmp
);
21008 if (HONOR_SIGNED_ZEROS (mode
))
21009 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21011 emit_label (label
);
21012 LABEL_NUSES (label
) = 1;
21014 emit_move_insn (operand0
, res
);
21017 /* Expand SSE sequence for computing round from OPERAND1 storing
21018 into OPERAND0. Sequence that works without relying on DImode truncation
21019 via cvttsd2siq that is only available on 64bit targets. */
21021 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21023 /* C code for the stuff we expand below.
21024 double xa = fabs (x), xa2, x2;
21025 if (!isless (xa, TWO52))
21027 Using the absolute value and copying back sign makes
21028 -0.0 -> -0.0 correct.
21029 xa2 = xa + TWO52 - TWO52;
21034 else if (dxa > 0.5)
21036 x2 = copysign (xa2, x);
21039 enum machine_mode mode
= GET_MODE (operand0
);
21040 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21042 TWO52
= ix86_gen_TWO52 (mode
);
21044 /* Temporary for holding the result, initialized to the input
21045 operand to ease control flow. */
21046 res
= gen_reg_rtx (mode
);
21047 emit_move_insn (res
, operand1
);
21049 /* xa = abs (operand1) */
21050 xa
= ix86_expand_sse_fabs (res
, &mask
);
21052 /* if (!isless (xa, TWO52)) goto label; */
21053 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21055 /* xa2 = xa + TWO52 - TWO52; */
21056 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21057 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21059 /* dxa = xa2 - xa; */
21060 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21062 /* generate 0.5, 1.0 and -0.5 */
21063 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21064 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21065 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21069 tmp
= gen_reg_rtx (mode
);
21070 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21071 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21072 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21073 gen_rtx_AND (mode
, one
, tmp
)));
21074 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21075 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21076 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21077 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21078 gen_rtx_AND (mode
, one
, tmp
)));
21079 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21081 /* res = copysign (xa2, operand1) */
21082 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21084 emit_label (label
);
21085 LABEL_NUSES (label
) = 1;
21087 emit_move_insn (operand0
, res
);
21090 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21093 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21095 /* C code for SSE variant we expand below.
21096 double xa = fabs (x), x2;
21097 if (!isless (xa, TWO52))
21099 x2 = (double)(long)x;
21100 if (HONOR_SIGNED_ZEROS (mode))
21101 return copysign (x2, x);
21104 enum machine_mode mode
= GET_MODE (operand0
);
21105 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21107 TWO52
= ix86_gen_TWO52 (mode
);
21109 /* Temporary for holding the result, initialized to the input
21110 operand to ease control flow. */
21111 res
= gen_reg_rtx (mode
);
21112 emit_move_insn (res
, operand1
);
21114 /* xa = abs (operand1) */
21115 xa
= ix86_expand_sse_fabs (res
, &mask
);
21117 /* if (!isless (xa, TWO52)) goto label; */
21118 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21120 /* x = (double)(long)x */
21121 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21122 expand_fix (xi
, res
, 0);
21123 expand_float (res
, xi
, 0);
21125 if (HONOR_SIGNED_ZEROS (mode
))
21126 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21128 emit_label (label
);
21129 LABEL_NUSES (label
) = 1;
21131 emit_move_insn (operand0
, res
);
21134 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21137 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21139 enum machine_mode mode
= GET_MODE (operand0
);
21140 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21142 /* C code for SSE variant we expand below.
21143 double xa = fabs (x), x2;
21144 if (!isless (xa, TWO52))
21146 xa2 = xa + TWO52 - TWO52;
21150 x2 = copysign (xa2, x);
21154 TWO52
= ix86_gen_TWO52 (mode
);
21156 /* Temporary for holding the result, initialized to the input
21157 operand to ease control flow. */
21158 res
= gen_reg_rtx (mode
);
21159 emit_move_insn (res
, operand1
);
21161 /* xa = abs (operand1) */
21162 xa
= ix86_expand_sse_fabs (res
, &smask
);
21164 /* if (!isless (xa, TWO52)) goto label; */
21165 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21167 /* res = xa + TWO52 - TWO52; */
21168 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21169 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21170 emit_move_insn (res
, tmp
);
21173 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21175 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21176 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21177 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21178 gen_rtx_AND (mode
, mask
, one
)));
21179 tmp
= expand_simple_binop (mode
, MINUS
,
21180 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21181 emit_move_insn (res
, tmp
);
21183 /* res = copysign (res, operand1) */
21184 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21186 emit_label (label
);
21187 LABEL_NUSES (label
) = 1;
21189 emit_move_insn (operand0
, res
);
21192 /* Expand SSE sequence for computing round from OPERAND1 storing
21195 ix86_expand_round (rtx operand0
, rtx operand1
)
21197 /* C code for the stuff we're doing below:
21198 double xa = fabs (x);
21199 if (!isless (xa, TWO52))
21201 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21202 return copysign (xa, x);
21204 enum machine_mode mode
= GET_MODE (operand0
);
21205 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21206 const struct real_format
*fmt
;
21207 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21209 /* Temporary for holding the result, initialized to the input
21210 operand to ease control flow. */
21211 res
= gen_reg_rtx (mode
);
21212 emit_move_insn (res
, operand1
);
21214 TWO52
= ix86_gen_TWO52 (mode
);
21215 xa
= ix86_expand_sse_fabs (res
, &mask
);
21216 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21218 /* load nextafter (0.5, 0.0) */
21219 fmt
= REAL_MODE_FORMAT (mode
);
21220 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21221 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21223 /* xa = xa + 0.5 */
21224 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21225 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21227 /* xa = (double)(int64_t)xa */
21228 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21229 expand_fix (xi
, xa
, 0);
21230 expand_float (xa
, xi
, 0);
21232 /* res = copysign (xa, operand1) */
21233 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21235 emit_label (label
);
21236 LABEL_NUSES (label
) = 1;
21238 emit_move_insn (operand0
, res
);
21241 #include "gt-i386.h"