1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1043 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1213 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1231 /* X86_ARCH_CMOVE */
1232 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1252 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1254 static enum stringop_alg stringop_alg
= no_stringop
;
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1263 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1264 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1269 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1271 /* ax, dx, cx, bx */
1272 AREG
, DREG
, CREG
, BREG
,
1273 /* si, di, bp, sp */
1274 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1276 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1277 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1282 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1284 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1286 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1287 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1288 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1292 /* The "default" register map used in 32bit mode. */
1294 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1296 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1297 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1298 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1299 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1300 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1302 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1305 static int const x86_64_int_parameter_registers
[6] =
1307 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1308 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1311 static int const x86_64_int_return_registers
[4] =
1313 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1316 /* The "default" register map used in 64bit mode. */
1317 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1319 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1320 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1321 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1322 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1323 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1324 8,9,10,11,12,13,14,15, /* extended integer registers */
1325 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1328 /* Define the register numbers to be used in Dwarf debugging information.
1329 The SVR4 reference port C compiler uses the following register numbers
1330 in its Dwarf output code:
1331 0 for %eax (gcc regno = 0)
1332 1 for %ecx (gcc regno = 2)
1333 2 for %edx (gcc regno = 1)
1334 3 for %ebx (gcc regno = 3)
1335 4 for %esp (gcc regno = 7)
1336 5 for %ebp (gcc regno = 6)
1337 6 for %esi (gcc regno = 4)
1338 7 for %edi (gcc regno = 5)
1339 The following three DWARF register numbers are never generated by
1340 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1341 believes these numbers have these meanings.
1342 8 for %eip (no gcc equivalent)
1343 9 for %eflags (gcc regno = 17)
1344 10 for %trapno (no gcc equivalent)
1345 It is not at all clear how we should number the FP stack registers
1346 for the x86 architecture. If the version of SDB on x86/svr4 were
1347 a bit less brain dead with respect to floating-point then we would
1348 have a precedent to follow with respect to DWARF register numbers
1349 for x86 FP registers, but the SDB on x86/svr4 is so completely
1350 broken with respect to FP registers that it is hardly worth thinking
1351 of it as something to strive for compatibility with.
1352 The version of x86/svr4 SDB I have at the moment does (partially)
1353 seem to believe that DWARF register number 11 is associated with
1354 the x86 register %st(0), but that's about all. Higher DWARF
1355 register numbers don't seem to be associated with anything in
1356 particular, and even for DWARF regno 11, SDB only seems to under-
1357 stand that it should say that a variable lives in %st(0) (when
1358 asked via an `=' command) if we said it was in DWARF regno 11,
1359 but SDB still prints garbage when asked for the value of the
1360 variable in question (via a `/' command).
1361 (Also note that the labels SDB prints for various FP stack regs
1362 when doing an `x' command are all wrong.)
1363 Note that these problems generally don't affect the native SVR4
1364 C compiler because it doesn't allow the use of -O with -g and
1365 because when it is *not* optimizing, it allocates a memory
1366 location for each floating-point variable, and the memory
1367 location is what gets described in the DWARF AT_location
1368 attribute for the variable in question.
1369 Regardless of the severe mental illness of the x86/svr4 SDB, we
1370 do something sensible here and we use the following DWARF
1371 register numbers. Note that these are all stack-top-relative
1373 11 for %st(0) (gcc regno = 8)
1374 12 for %st(1) (gcc regno = 9)
1375 13 for %st(2) (gcc regno = 10)
1376 14 for %st(3) (gcc regno = 11)
1377 15 for %st(4) (gcc regno = 12)
1378 16 for %st(5) (gcc regno = 13)
1379 17 for %st(6) (gcc regno = 14)
1380 18 for %st(7) (gcc regno = 15)
1382 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1384 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1385 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1386 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1387 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1388 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1389 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1393 /* Test and compare insns in i386.md store the information needed to
1394 generate branch and scc insns here. */
1396 rtx ix86_compare_op0
= NULL_RTX
;
1397 rtx ix86_compare_op1
= NULL_RTX
;
1398 rtx ix86_compare_emitted
= NULL_RTX
;
1400 /* Size of the register save area. */
1401 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1403 /* Define the structure for the machine field in struct function. */
1405 struct stack_local_entry
GTY(())
1407 unsigned short mode
;
1410 struct stack_local_entry
*next
;
1413 /* Structure describing stack frame layout.
1414 Stack grows downward:
1420 saved frame pointer if frame_pointer_needed
1421 <- HARD_FRAME_POINTER
1426 [va_arg registers] (
1427 > to_allocate <- FRAME_POINTER
1437 HOST_WIDE_INT frame
;
1439 int outgoing_arguments_size
;
1442 HOST_WIDE_INT to_allocate
;
1443 /* The offsets relative to ARG_POINTER. */
1444 HOST_WIDE_INT frame_pointer_offset
;
1445 HOST_WIDE_INT hard_frame_pointer_offset
;
1446 HOST_WIDE_INT stack_pointer_offset
;
1448 /* When save_regs_using_mov is set, emit prologue using
1449 move instead of push instructions. */
1450 bool save_regs_using_mov
;
1453 /* Code model option. */
1454 enum cmodel ix86_cmodel
;
1456 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1458 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1460 /* Which unit we are generating floating point math for. */
1461 enum fpmath_unit ix86_fpmath
;
1463 /* Which cpu are we scheduling for. */
1464 enum processor_type ix86_tune
;
1466 /* Which instruction set architecture to use. */
1467 enum processor_type ix86_arch
;
1469 /* true if sse prefetch instruction is not NOOP. */
1470 int x86_prefetch_sse
;
1472 /* ix86_regparm_string as a number */
1473 static int ix86_regparm
;
1475 /* -mstackrealign option */
1476 extern int ix86_force_align_arg_pointer
;
1477 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1479 /* Preferred alignment for stack boundary in bits. */
1480 unsigned int ix86_preferred_stack_boundary
;
1482 /* Values 1-5: see jump.c */
1483 int ix86_branch_cost
;
1485 /* Variables which are this size or smaller are put in the data/bss
1486 or ldata/lbss sections. */
1488 int ix86_section_threshold
= 65536;
1490 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1491 char internal_label_prefix
[16];
1492 int internal_label_prefix_len
;
1494 /* Register class used for passing given 64bit part of the argument.
1495 These represent classes as documented by the PS ABI, with the exception
1496 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1497 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1499 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1500 whenever possible (upper half does contain padding). */
1501 enum x86_64_reg_class
1504 X86_64_INTEGER_CLASS
,
1505 X86_64_INTEGERSI_CLASS
,
1512 X86_64_COMPLEX_X87_CLASS
,
1515 static const char * const x86_64_reg_class_name
[] =
1517 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1518 "sseup", "x87", "x87up", "cplx87", "no"
1521 #define MAX_CLASSES 4
1523 /* Table of constants used by fldpi, fldln2, etc.... */
1524 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1525 static bool ext_80387_constants_init
= 0;
1528 static struct machine_function
* ix86_init_machine_status (void);
1529 static rtx
ix86_function_value (tree
, tree
, bool);
1530 static int ix86_function_regparm (tree
, tree
);
1531 static void ix86_compute_frame_layout (struct ix86_frame
*);
1532 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1536 /* The svr4 ABI for the i386 says that records and unions are returned
1538 #ifndef DEFAULT_PCC_STRUCT_RETURN
1539 #define DEFAULT_PCC_STRUCT_RETURN 1
1542 /* Implement TARGET_HANDLE_OPTION. */
1545 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1552 target_flags
&= ~MASK_3DNOW_A
;
1553 target_flags_explicit
|= MASK_3DNOW_A
;
1560 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1561 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1568 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1569 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1576 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1577 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1584 target_flags
&= ~MASK_SSE4A
;
1585 target_flags_explicit
|= MASK_SSE4A
;
1594 /* Sometimes certain combinations of command options do not make
1595 sense on a particular target machine. You can define a macro
1596 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1597 defined, is executed once just after all the command options have
1600 Don't use this macro to turn on various extra optimizations for
1601 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1604 override_options (void)
1607 int ix86_tune_defaulted
= 0;
1608 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1610 /* Comes from final.c -- no real reason to change it. */
1611 #define MAX_CODE_ALIGN 16
1615 const struct processor_costs
*cost
; /* Processor costs */
1616 const int target_enable
; /* Target flags to enable. */
1617 const int target_disable
; /* Target flags to disable. */
1618 const int align_loop
; /* Default alignments. */
1619 const int align_loop_max_skip
;
1620 const int align_jump
;
1621 const int align_jump_max_skip
;
1622 const int align_func
;
1624 const processor_target_table
[PROCESSOR_max
] =
1626 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1627 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1628 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1629 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1630 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1631 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1632 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1633 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1634 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1635 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1636 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1637 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1638 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1639 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1642 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1645 const char *const name
; /* processor name or nickname. */
1646 const enum processor_type processor
;
1647 const enum pta_flags
1653 PTA_PREFETCH_SSE
= 1 << 4,
1655 PTA_3DNOW_A
= 1 << 6,
1659 PTA_POPCNT
= 1 << 10,
1661 PTA_SSE4A
= 1 << 12,
1662 PTA_NO_SAHF
= 1 << 13
1665 const processor_alias_table
[] =
1667 {"i386", PROCESSOR_I386
, 0},
1668 {"i486", PROCESSOR_I486
, 0},
1669 {"i586", PROCESSOR_PENTIUM
, 0},
1670 {"pentium", PROCESSOR_PENTIUM
, 0},
1671 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1672 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1673 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1674 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1675 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1676 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1677 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1678 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1679 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1680 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1681 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1682 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1683 | PTA_MMX
| PTA_PREFETCH_SSE
},
1684 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1685 | PTA_MMX
| PTA_PREFETCH_SSE
},
1686 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1687 | PTA_MMX
| PTA_PREFETCH_SSE
},
1688 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1689 | PTA_MMX
| PTA_PREFETCH_SSE
1690 | PTA_CX16
| PTA_NO_SAHF
},
1691 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1692 | PTA_64BIT
| PTA_MMX
1693 | PTA_PREFETCH_SSE
| PTA_CX16
},
1694 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1696 {"k6", PROCESSOR_K6
, PTA_MMX
},
1697 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1698 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1699 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1701 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1702 | PTA_3DNOW
| PTA_3DNOW_A
},
1703 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1704 | PTA_3DNOW_A
| PTA_SSE
},
1705 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1706 | PTA_3DNOW_A
| PTA_SSE
},
1707 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1708 | PTA_3DNOW_A
| PTA_SSE
},
1709 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1710 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1711 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1712 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1714 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1715 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1716 | PTA_SSE2
| PTA_NO_SAHF
},
1717 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1718 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1719 | PTA_SSE2
| PTA_NO_SAHF
},
1720 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1721 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1722 | PTA_SSE2
| PTA_NO_SAHF
},
1723 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1724 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1725 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1726 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1727 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1728 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1731 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1733 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1734 SUBTARGET_OVERRIDE_OPTIONS
;
1737 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1738 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1741 /* -fPIC is the default for x86_64. */
1742 if (TARGET_MACHO
&& TARGET_64BIT
)
1745 /* Set the default values for switches whose default depends on TARGET_64BIT
1746 in case they weren't overwritten by command line options. */
1749 /* Mach-O doesn't support omitting the frame pointer for now. */
1750 if (flag_omit_frame_pointer
== 2)
1751 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1752 if (flag_asynchronous_unwind_tables
== 2)
1753 flag_asynchronous_unwind_tables
= 1;
1754 if (flag_pcc_struct_return
== 2)
1755 flag_pcc_struct_return
= 0;
1759 if (flag_omit_frame_pointer
== 2)
1760 flag_omit_frame_pointer
= 0;
1761 if (flag_asynchronous_unwind_tables
== 2)
1762 flag_asynchronous_unwind_tables
= 0;
1763 if (flag_pcc_struct_return
== 2)
1764 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1767 /* Need to check -mtune=generic first. */
1768 if (ix86_tune_string
)
1770 if (!strcmp (ix86_tune_string
, "generic")
1771 || !strcmp (ix86_tune_string
, "i686")
1772 /* As special support for cross compilers we read -mtune=native
1773 as -mtune=generic. With native compilers we won't see the
1774 -mtune=native, as it was changed by the driver. */
1775 || !strcmp (ix86_tune_string
, "native"))
1778 ix86_tune_string
= "generic64";
1780 ix86_tune_string
= "generic32";
1782 else if (!strncmp (ix86_tune_string
, "generic", 7))
1783 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1787 if (ix86_arch_string
)
1788 ix86_tune_string
= ix86_arch_string
;
1789 if (!ix86_tune_string
)
1791 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1792 ix86_tune_defaulted
= 1;
1795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1796 need to use a sensible tune option. */
1797 if (!strcmp (ix86_tune_string
, "generic")
1798 || !strcmp (ix86_tune_string
, "x86-64")
1799 || !strcmp (ix86_tune_string
, "i686"))
1802 ix86_tune_string
= "generic64";
1804 ix86_tune_string
= "generic32";
1807 if (ix86_stringop_string
)
1809 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1810 stringop_alg
= rep_prefix_1_byte
;
1811 else if (!strcmp (ix86_stringop_string
, "libcall"))
1812 stringop_alg
= libcall
;
1813 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
1814 stringop_alg
= rep_prefix_4_byte
;
1815 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
1816 stringop_alg
= rep_prefix_8_byte
;
1817 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
1818 stringop_alg
= loop_1_byte
;
1819 else if (!strcmp (ix86_stringop_string
, "loop"))
1820 stringop_alg
= loop
;
1821 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
1822 stringop_alg
= unrolled_loop
;
1824 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
1826 if (!strcmp (ix86_tune_string
, "x86-64"))
1827 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1828 "-mtune=generic instead as appropriate.");
1830 if (!ix86_arch_string
)
1831 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1832 if (!strcmp (ix86_arch_string
, "generic"))
1833 error ("generic CPU can be used only for -mtune= switch");
1834 if (!strncmp (ix86_arch_string
, "generic", 7))
1835 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1837 if (ix86_cmodel_string
!= 0)
1839 if (!strcmp (ix86_cmodel_string
, "small"))
1840 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1841 else if (!strcmp (ix86_cmodel_string
, "medium"))
1842 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1843 else if (!strcmp (ix86_cmodel_string
, "large"))
1844 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
1846 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
1847 else if (!strcmp (ix86_cmodel_string
, "32"))
1848 ix86_cmodel
= CM_32
;
1849 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1850 ix86_cmodel
= CM_KERNEL
;
1852 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1856 ix86_cmodel
= CM_32
;
1858 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1860 if (ix86_asm_string
!= 0)
1863 && !strcmp (ix86_asm_string
, "intel"))
1864 ix86_asm_dialect
= ASM_INTEL
;
1865 else if (!strcmp (ix86_asm_string
, "att"))
1866 ix86_asm_dialect
= ASM_ATT
;
1868 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1870 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1871 error ("code model %qs not supported in the %s bit mode",
1872 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1873 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1874 sorry ("%i-bit mode not compiled in",
1875 (target_flags
& MASK_64BIT
) ? 64 : 32);
1877 for (i
= 0; i
< pta_size
; i
++)
1878 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1880 ix86_arch
= processor_alias_table
[i
].processor
;
1881 /* Default cpu tuning to the architecture. */
1882 ix86_tune
= ix86_arch
;
1883 if (processor_alias_table
[i
].flags
& PTA_MMX
1884 && !(target_flags_explicit
& MASK_MMX
))
1885 target_flags
|= MASK_MMX
;
1886 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1887 && !(target_flags_explicit
& MASK_3DNOW
))
1888 target_flags
|= MASK_3DNOW
;
1889 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1890 && !(target_flags_explicit
& MASK_3DNOW_A
))
1891 target_flags
|= MASK_3DNOW_A
;
1892 if (processor_alias_table
[i
].flags
& PTA_SSE
1893 && !(target_flags_explicit
& MASK_SSE
))
1894 target_flags
|= MASK_SSE
;
1895 if (processor_alias_table
[i
].flags
& PTA_SSE2
1896 && !(target_flags_explicit
& MASK_SSE2
))
1897 target_flags
|= MASK_SSE2
;
1898 if (processor_alias_table
[i
].flags
& PTA_SSE3
1899 && !(target_flags_explicit
& MASK_SSE3
))
1900 target_flags
|= MASK_SSE3
;
1901 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1902 && !(target_flags_explicit
& MASK_SSSE3
))
1903 target_flags
|= MASK_SSSE3
;
1904 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1905 x86_prefetch_sse
= true;
1906 if (processor_alias_table
[i
].flags
& PTA_CX16
)
1907 x86_cmpxchg16b
= true;
1908 if (processor_alias_table
[i
].flags
& PTA_POPCNT
1909 && !(target_flags_explicit
& MASK_POPCNT
))
1910 target_flags
|= MASK_POPCNT
;
1911 if (processor_alias_table
[i
].flags
& PTA_ABM
1912 && !(target_flags_explicit
& MASK_ABM
))
1913 target_flags
|= MASK_ABM
;
1914 if (processor_alias_table
[i
].flags
& PTA_SSE4A
1915 && !(target_flags_explicit
& MASK_SSE4A
))
1916 target_flags
|= MASK_SSE4A
;
1917 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
1919 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1920 error ("CPU you selected does not support x86-64 "
1926 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1928 ix86_arch_mask
= 1u << ix86_arch
;
1929 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
1930 ix86_arch_features
[i
] &= ix86_arch_mask
;
1932 for (i
= 0; i
< pta_size
; i
++)
1933 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1935 ix86_tune
= processor_alias_table
[i
].processor
;
1936 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1938 if (ix86_tune_defaulted
)
1940 ix86_tune_string
= "x86-64";
1941 for (i
= 0; i
< pta_size
; i
++)
1942 if (! strcmp (ix86_tune_string
,
1943 processor_alias_table
[i
].name
))
1945 ix86_tune
= processor_alias_table
[i
].processor
;
1948 error ("CPU you selected does not support x86-64 "
1951 /* Intel CPUs have always interpreted SSE prefetch instructions as
1952 NOPs; so, we can enable SSE prefetch instructions even when
1953 -mtune (rather than -march) points us to a processor that has them.
1954 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1955 higher processors. */
1956 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1957 x86_prefetch_sse
= true;
1961 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1963 ix86_tune_mask
= 1u << ix86_tune
;
1964 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
1965 ix86_tune_features
[i
] &= ix86_tune_mask
;
1968 ix86_cost
= &size_cost
;
1970 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1971 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1972 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1974 /* Arrange to set up i386_stack_locals for all functions. */
1975 init_machine_status
= ix86_init_machine_status
;
1977 /* Validate -mregparm= value. */
1978 if (ix86_regparm_string
)
1980 i
= atoi (ix86_regparm_string
);
1981 if (i
< 0 || i
> REGPARM_MAX
)
1982 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1988 ix86_regparm
= REGPARM_MAX
;
1990 /* If the user has provided any of the -malign-* options,
1991 warn and use that value only if -falign-* is not set.
1992 Remove this code in GCC 3.2 or later. */
1993 if (ix86_align_loops_string
)
1995 warning (0, "-malign-loops is obsolete, use -falign-loops");
1996 if (align_loops
== 0)
1998 i
= atoi (ix86_align_loops_string
);
1999 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2000 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2002 align_loops
= 1 << i
;
2006 if (ix86_align_jumps_string
)
2008 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2009 if (align_jumps
== 0)
2011 i
= atoi (ix86_align_jumps_string
);
2012 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2013 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2015 align_jumps
= 1 << i
;
2019 if (ix86_align_funcs_string
)
2021 warning (0, "-malign-functions is obsolete, use -falign-functions");
2022 if (align_functions
== 0)
2024 i
= atoi (ix86_align_funcs_string
);
2025 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2026 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2028 align_functions
= 1 << i
;
2032 /* Default align_* from the processor table. */
2033 if (align_loops
== 0)
2035 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2036 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2038 if (align_jumps
== 0)
2040 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2041 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2043 if (align_functions
== 0)
2045 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2048 /* Validate -mbranch-cost= value, or provide default. */
2049 ix86_branch_cost
= ix86_cost
->branch_cost
;
2050 if (ix86_branch_cost_string
)
2052 i
= atoi (ix86_branch_cost_string
);
2054 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2056 ix86_branch_cost
= i
;
2058 if (ix86_section_threshold_string
)
2060 i
= atoi (ix86_section_threshold_string
);
2062 error ("-mlarge-data-threshold=%d is negative", i
);
2064 ix86_section_threshold
= i
;
2067 if (ix86_tls_dialect_string
)
2069 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2070 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2071 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2072 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2073 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2074 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2076 error ("bad value (%s) for -mtls-dialect= switch",
2077 ix86_tls_dialect_string
);
2080 /* Keep nonleaf frame pointers. */
2081 if (flag_omit_frame_pointer
)
2082 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2083 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2084 flag_omit_frame_pointer
= 1;
2086 /* If we're doing fast math, we don't care about comparison order
2087 wrt NaNs. This lets us use a shorter comparison sequence. */
2088 if (flag_finite_math_only
)
2089 target_flags
&= ~MASK_IEEE_FP
;
2091 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2092 since the insns won't need emulation. */
2093 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2094 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2096 /* Likewise, if the target doesn't have a 387, or we've specified
2097 software floating point, don't use 387 inline intrinsics. */
2099 target_flags
|= MASK_NO_FANCY_MATH_387
;
2101 /* Turn on SSE3 builtins for -mssse3. */
2103 target_flags
|= MASK_SSE3
;
2105 /* Turn on SSE3 builtins for -msse4a. */
2107 target_flags
|= MASK_SSE3
;
2109 /* Turn on SSE2 builtins for -msse3. */
2111 target_flags
|= MASK_SSE2
;
2113 /* Turn on SSE builtins for -msse2. */
2115 target_flags
|= MASK_SSE
;
2117 /* Turn on MMX builtins for -msse. */
2120 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2121 x86_prefetch_sse
= true;
2124 /* Turn on MMX builtins for 3Dnow. */
2126 target_flags
|= MASK_MMX
;
2128 /* Turn on POPCNT builtins for -mabm. */
2130 target_flags
|= MASK_POPCNT
;
2134 if (TARGET_ALIGN_DOUBLE
)
2135 error ("-malign-double makes no sense in the 64bit mode");
2137 error ("-mrtd calling convention not supported in the 64bit mode");
2139 /* Enable by default the SSE and MMX builtins. Do allow the user to
2140 explicitly disable any of these. In particular, disabling SSE and
2141 MMX for kernel code is extremely useful. */
2143 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2144 & ~target_flags_explicit
);
2148 /* i386 ABI does not specify red zone. It still makes sense to use it
2149 when programmer takes care to stack from being destroyed. */
2150 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2151 target_flags
|= MASK_NO_RED_ZONE
;
2154 /* Validate -mpreferred-stack-boundary= value, or provide default.
2155 The default of 128 bits is for Pentium III's SSE __m128. We can't
2156 change it because of optimize_size. Otherwise, we can't mix object
2157 files compiled with -Os and -On. */
2158 ix86_preferred_stack_boundary
= 128;
2159 if (ix86_preferred_stack_boundary_string
)
2161 i
= atoi (ix86_preferred_stack_boundary_string
);
2162 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2163 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2164 TARGET_64BIT
? 4 : 2);
2166 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2169 /* Accept -msseregparm only if at least SSE support is enabled. */
2170 if (TARGET_SSEREGPARM
2172 error ("-msseregparm used without SSE enabled");
2174 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2175 if (ix86_fpmath_string
!= 0)
2177 if (! strcmp (ix86_fpmath_string
, "387"))
2178 ix86_fpmath
= FPMATH_387
;
2179 else if (! strcmp (ix86_fpmath_string
, "sse"))
2183 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2184 ix86_fpmath
= FPMATH_387
;
2187 ix86_fpmath
= FPMATH_SSE
;
2189 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2190 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2194 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2195 ix86_fpmath
= FPMATH_387
;
2197 else if (!TARGET_80387
)
2199 warning (0, "387 instruction set disabled, using SSE arithmetics");
2200 ix86_fpmath
= FPMATH_SSE
;
2203 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2206 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2209 /* If the i387 is disabled, then do not return values in it. */
2211 target_flags
&= ~MASK_FLOAT_RETURNS
;
2213 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2214 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2216 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2218 /* ??? Unwind info is not correct around the CFG unless either a frame
2219 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2220 unwind info generation to be aware of the CFG and propagating states
2222 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2223 || flag_exceptions
|| flag_non_call_exceptions
)
2224 && flag_omit_frame_pointer
2225 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2227 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2228 warning (0, "unwind tables currently require either a frame pointer "
2229 "or -maccumulate-outgoing-args for correctness");
2230 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2233 /* For sane SSE instruction set generation we need fcomi instruction.
2234 It is safe to enable all CMOVE instructions. */
2238 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2241 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2242 p
= strchr (internal_label_prefix
, 'X');
2243 internal_label_prefix_len
= p
- internal_label_prefix
;
2247 /* When scheduling description is not available, disable scheduler pass
2248 so it won't slow down the compilation and make x87 code slower. */
2249 if (!TARGET_SCHEDULE
)
2250 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2252 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2253 set_param_value ("simultaneous-prefetches",
2254 ix86_cost
->simultaneous_prefetches
);
2255 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2256 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2259 /* Return true if this goes in large data/bss. */
2262 ix86_in_large_data_p (tree exp
)
2264 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2267 /* Functions are never large data. */
2268 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2271 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2273 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2274 if (strcmp (section
, ".ldata") == 0
2275 || strcmp (section
, ".lbss") == 0)
2281 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2283 /* If this is an incomplete type with size 0, then we can't put it
2284 in data because it might be too big when completed. */
2285 if (!size
|| size
> ix86_section_threshold
)
2292 /* Switch to the appropriate section for output of DECL.
2293 DECL is either a `VAR_DECL' node or a constant of some sort.
2294 RELOC indicates whether forming the initial value of DECL requires
2295 link-time relocations. */
2297 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2301 x86_64_elf_select_section (tree decl
, int reloc
,
2302 unsigned HOST_WIDE_INT align
)
2304 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2305 && ix86_in_large_data_p (decl
))
2307 const char *sname
= NULL
;
2308 unsigned int flags
= SECTION_WRITE
;
2309 switch (categorize_decl_for_section (decl
, reloc
))
2314 case SECCAT_DATA_REL
:
2315 sname
= ".ldata.rel";
2317 case SECCAT_DATA_REL_LOCAL
:
2318 sname
= ".ldata.rel.local";
2320 case SECCAT_DATA_REL_RO
:
2321 sname
= ".ldata.rel.ro";
2323 case SECCAT_DATA_REL_RO_LOCAL
:
2324 sname
= ".ldata.rel.ro.local";
2328 flags
|= SECTION_BSS
;
2331 case SECCAT_RODATA_MERGE_STR
:
2332 case SECCAT_RODATA_MERGE_STR_INIT
:
2333 case SECCAT_RODATA_MERGE_CONST
:
2337 case SECCAT_SRODATA
:
2344 /* We don't split these for medium model. Place them into
2345 default sections and hope for best. */
2350 /* We might get called with string constants, but get_named_section
2351 doesn't like them as they are not DECLs. Also, we need to set
2352 flags in that case. */
2354 return get_section (sname
, flags
, NULL
);
2355 return get_named_section (decl
, sname
, reloc
);
2358 return default_elf_select_section (decl
, reloc
, align
);
2361 /* Build up a unique section name, expressed as a
2362 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2363 RELOC indicates whether the initial value of EXP requires
2364 link-time relocations. */
2366 static void ATTRIBUTE_UNUSED
2367 x86_64_elf_unique_section (tree decl
, int reloc
)
2369 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2370 && ix86_in_large_data_p (decl
))
2372 const char *prefix
= NULL
;
2373 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2374 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2376 switch (categorize_decl_for_section (decl
, reloc
))
2379 case SECCAT_DATA_REL
:
2380 case SECCAT_DATA_REL_LOCAL
:
2381 case SECCAT_DATA_REL_RO
:
2382 case SECCAT_DATA_REL_RO_LOCAL
:
2383 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2386 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2389 case SECCAT_RODATA_MERGE_STR
:
2390 case SECCAT_RODATA_MERGE_STR_INIT
:
2391 case SECCAT_RODATA_MERGE_CONST
:
2392 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2394 case SECCAT_SRODATA
:
2401 /* We don't split these for medium model. Place them into
2402 default sections and hope for best. */
2410 plen
= strlen (prefix
);
2412 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2413 name
= targetm
.strip_name_encoding (name
);
2414 nlen
= strlen (name
);
2416 string
= alloca (nlen
+ plen
+ 1);
2417 memcpy (string
, prefix
, plen
);
2418 memcpy (string
+ plen
, name
, nlen
+ 1);
2420 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2424 default_unique_section (decl
, reloc
);
2427 #ifdef COMMON_ASM_OP
2428 /* This says how to output assembler code to declare an
2429 uninitialized external linkage data object.
2431 For medium model x86-64 we need to use .largecomm opcode for
2434 x86_elf_aligned_common (FILE *file
,
2435 const char *name
, unsigned HOST_WIDE_INT size
,
2438 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2439 && size
> (unsigned int)ix86_section_threshold
)
2440 fprintf (file
, ".largecomm\t");
2442 fprintf (file
, "%s", COMMON_ASM_OP
);
2443 assemble_name (file
, name
);
2444 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2445 size
, align
/ BITS_PER_UNIT
);
2449 /* Utility function for targets to use in implementing
2450 ASM_OUTPUT_ALIGNED_BSS. */
2453 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2454 const char *name
, unsigned HOST_WIDE_INT size
,
2457 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2458 && size
> (unsigned int)ix86_section_threshold
)
2459 switch_to_section (get_named_section (decl
, ".lbss", 0));
2461 switch_to_section (bss_section
);
2462 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2463 #ifdef ASM_DECLARE_OBJECT_NAME
2464 last_assemble_variable_decl
= decl
;
2465 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2467 /* Standard thing is just output label for the object. */
2468 ASM_OUTPUT_LABEL (file
, name
);
2469 #endif /* ASM_DECLARE_OBJECT_NAME */
2470 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2474 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2476 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2477 make the problem with not enough registers even worse. */
2478 #ifdef INSN_SCHEDULING
2480 flag_schedule_insns
= 0;
2484 /* The Darwin libraries never set errno, so we might as well
2485 avoid calling them when that's the only reason we would. */
2486 flag_errno_math
= 0;
2488 /* The default values of these switches depend on the TARGET_64BIT
2489 that is not known at this moment. Mark these values with 2 and
2490 let user the to override these. In case there is no command line option
2491 specifying them, we will set the defaults in override_options. */
2493 flag_omit_frame_pointer
= 2;
2494 flag_pcc_struct_return
= 2;
2495 flag_asynchronous_unwind_tables
= 2;
2496 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2497 SUBTARGET_OPTIMIZATION_OPTIONS
;
2501 /* Decide whether we can make a sibling call to a function. DECL is the
2502 declaration of the function being targeted by the call and EXP is the
2503 CALL_EXPR representing the call. */
2506 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2511 /* If we are generating position-independent code, we cannot sibcall
2512 optimize any indirect call, or a direct call to a global function,
2513 as the PLT requires %ebx be live. */
2514 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2521 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2522 if (POINTER_TYPE_P (func
))
2523 func
= TREE_TYPE (func
);
2526 /* Check that the return value locations are the same. Like
2527 if we are returning floats on the 80387 register stack, we cannot
2528 make a sibcall from a function that doesn't return a float to a
2529 function that does or, conversely, from a function that does return
2530 a float to a function that doesn't; the necessary stack adjustment
2531 would not be executed. This is also the place we notice
2532 differences in the return value ABI. Note that it is ok for one
2533 of the functions to have void return type as long as the return
2534 value of the other is passed in a register. */
2535 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2536 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2538 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2540 if (!rtx_equal_p (a
, b
))
2543 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2545 else if (!rtx_equal_p (a
, b
))
2548 /* If this call is indirect, we'll need to be able to use a call-clobbered
2549 register for the address of the target function. Make sure that all
2550 such registers are not used for passing parameters. */
2551 if (!decl
&& !TARGET_64BIT
)
2555 /* We're looking at the CALL_EXPR, we need the type of the function. */
2556 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2557 type
= TREE_TYPE (type
); /* pointer type */
2558 type
= TREE_TYPE (type
); /* function type */
2560 if (ix86_function_regparm (type
, NULL
) >= 3)
2562 /* ??? Need to count the actual number of registers to be used,
2563 not the possible number of registers. Fix later. */
2568 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2569 /* Dllimport'd functions are also called indirectly. */
2570 if (decl
&& DECL_DLLIMPORT_P (decl
)
2571 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2575 /* If we forced aligned the stack, then sibcalling would unalign the
2576 stack, which may break the called function. */
2577 if (cfun
->machine
->force_align_arg_pointer
)
2580 /* Otherwise okay. That also includes certain types of indirect calls. */
2584 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2585 calling convention attributes;
2586 arguments as in struct attribute_spec.handler. */
2589 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2591 int flags ATTRIBUTE_UNUSED
,
2594 if (TREE_CODE (*node
) != FUNCTION_TYPE
2595 && TREE_CODE (*node
) != METHOD_TYPE
2596 && TREE_CODE (*node
) != FIELD_DECL
2597 && TREE_CODE (*node
) != TYPE_DECL
)
2599 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2600 IDENTIFIER_POINTER (name
));
2601 *no_add_attrs
= true;
2605 /* Can combine regparm with all attributes but fastcall. */
2606 if (is_attribute_p ("regparm", name
))
2610 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2612 error ("fastcall and regparm attributes are not compatible");
2615 cst
= TREE_VALUE (args
);
2616 if (TREE_CODE (cst
) != INTEGER_CST
)
2618 warning (OPT_Wattributes
,
2619 "%qs attribute requires an integer constant argument",
2620 IDENTIFIER_POINTER (name
));
2621 *no_add_attrs
= true;
2623 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2625 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2626 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2627 *no_add_attrs
= true;
2631 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2632 TYPE_ATTRIBUTES (*node
))
2633 && compare_tree_int (cst
, REGPARM_MAX
-1))
2635 error ("%s functions limited to %d register parameters",
2636 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2644 warning (OPT_Wattributes
, "%qs attribute ignored",
2645 IDENTIFIER_POINTER (name
));
2646 *no_add_attrs
= true;
2650 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2651 if (is_attribute_p ("fastcall", name
))
2653 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2655 error ("fastcall and cdecl attributes are not compatible");
2657 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2659 error ("fastcall and stdcall attributes are not compatible");
2661 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2663 error ("fastcall and regparm attributes are not compatible");
2667 /* Can combine stdcall with fastcall (redundant), regparm and
2669 else if (is_attribute_p ("stdcall", name
))
2671 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2673 error ("stdcall and cdecl attributes are not compatible");
2675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2677 error ("stdcall and fastcall attributes are not compatible");
2681 /* Can combine cdecl with regparm and sseregparm. */
2682 else if (is_attribute_p ("cdecl", name
))
2684 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2686 error ("stdcall and cdecl attributes are not compatible");
2688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2690 error ("fastcall and cdecl attributes are not compatible");
2694 /* Can combine sseregparm with all attributes. */
2699 /* Return 0 if the attributes for two types are incompatible, 1 if they
2700 are compatible, and 2 if they are nearly compatible (which causes a
2701 warning to be generated). */
2704 ix86_comp_type_attributes (tree type1
, tree type2
)
2706 /* Check for mismatch of non-default calling convention. */
2707 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2709 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2712 /* Check for mismatched fastcall/regparm types. */
2713 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2714 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2715 || (ix86_function_regparm (type1
, NULL
)
2716 != ix86_function_regparm (type2
, NULL
)))
2719 /* Check for mismatched sseregparm types. */
2720 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2721 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2724 /* Check for mismatched return types (cdecl vs stdcall). */
2725 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2726 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2732 /* Return the regparm value for a function with the indicated TYPE and DECL.
2733 DECL may be NULL when calling function indirectly
2734 or considering a libcall. */
2737 ix86_function_regparm (tree type
, tree decl
)
2740 int regparm
= ix86_regparm
;
2745 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2747 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2752 /* Use register calling convention for local functions when possible. */
2753 if (decl
&& flag_unit_at_a_time
&& !profile_flag
)
2755 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2758 int local_regparm
, globals
= 0, regno
;
2761 /* Make sure no regparm register is taken by a
2762 global register variable. */
2763 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2764 if (global_regs
[local_regparm
])
2767 /* We can't use regparm(3) for nested functions as these use
2768 static chain pointer in third argument. */
2769 if (local_regparm
== 3
2770 && decl_function_context (decl
)
2771 && !DECL_NO_STATIC_CHAIN (decl
))
2774 /* If the function realigns its stackpointer, the prologue will
2775 clobber %ecx. If we've already generated code for the callee,
2776 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2777 scanning the attributes for the self-realigning property. */
2778 f
= DECL_STRUCT_FUNCTION (decl
);
2779 if (local_regparm
== 3
2780 && (f
? !!f
->machine
->force_align_arg_pointer
2781 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
2782 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2785 /* Each global register variable increases register preassure,
2786 so the more global reg vars there are, the smaller regparm
2787 optimization use, unless requested by the user explicitly. */
2788 for (regno
= 0; regno
< 6; regno
++)
2789 if (global_regs
[regno
])
2792 = globals
< local_regparm
? local_regparm
- globals
: 0;
2794 if (local_regparm
> regparm
)
2795 regparm
= local_regparm
;
2802 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2803 DFmode (2) arguments in SSE registers for a function with the
2804 indicated TYPE and DECL. DECL may be NULL when calling function
2805 indirectly or considering a libcall. Otherwise return 0. */
2808 ix86_function_sseregparm (tree type
, tree decl
)
2810 gcc_assert (!TARGET_64BIT
);
2812 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2813 by the sseregparm attribute. */
2814 if (TARGET_SSEREGPARM
2815 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2820 error ("Calling %qD with attribute sseregparm without "
2821 "SSE/SSE2 enabled", decl
);
2823 error ("Calling %qT with attribute sseregparm without "
2824 "SSE/SSE2 enabled", type
);
2831 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2832 (and DFmode for SSE2) arguments in SSE registers. */
2833 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2835 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2837 return TARGET_SSE2
? 2 : 1;
2843 /* Return true if EAX is live at the start of the function. Used by
2844 ix86_expand_prologue to determine if we need special help before
2845 calling allocate_stack_worker. */
2848 ix86_eax_live_at_start_p (void)
2850 /* Cheat. Don't bother working forward from ix86_function_regparm
2851 to the function type to whether an actual argument is located in
2852 eax. Instead just look at cfg info, which is still close enough
2853 to correct at this point. This gives false positives for broken
2854 functions that might use uninitialized data that happens to be
2855 allocated in eax, but who cares? */
2856 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2859 /* Return true if TYPE has a variable argument list. */
2862 type_has_variadic_args_p (tree type
)
2866 for (t
= TYPE_ARG_TYPES (type
); t
; t
= TREE_CHAIN (t
))
2867 if (t
== void_list_node
)
2872 /* Value is the number of bytes of arguments automatically
2873 popped when returning from a subroutine call.
2874 FUNDECL is the declaration node of the function (as a tree),
2875 FUNTYPE is the data type of the function (as a tree),
2876 or for a library call it is an identifier node for the subroutine name.
2877 SIZE is the number of bytes of arguments passed on the stack.
2879 On the 80386, the RTD insn may be used to pop them if the number
2880 of args is fixed, but if the number is variable then the caller
2881 must pop them all. RTD can't be used for library calls now
2882 because the library is compiled with the Unix compiler.
2883 Use of RTD is a selectable option, since it is incompatible with
2884 standard Unix calling sequences. If the option is not selected,
2885 the caller must always pop the args.
2887 The attribute stdcall is equivalent to RTD on a per module basis. */
2890 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2894 /* None of the 64-bit ABIs pop arguments. */
2898 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2900 /* Cdecl functions override -mrtd, and never pop the stack. */
2901 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
2903 /* Stdcall and fastcall functions will pop the stack if not
2905 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2906 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2909 if (rtd
&& ! type_has_variadic_args_p (funtype
))
2913 /* Lose any fake structure return argument if it is passed on the stack. */
2914 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2915 && !KEEP_AGGREGATE_RETURN_POINTER
)
2917 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2919 return GET_MODE_SIZE (Pmode
);
2925 /* Argument support functions. */
2927 /* Return true when register may be used to pass function parameters. */
2929 ix86_function_arg_regno_p (int regno
)
2936 return (regno
< REGPARM_MAX
2937 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
2939 return (regno
< REGPARM_MAX
2940 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2941 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2942 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2943 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2948 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
2953 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2954 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2958 /* RAX is used as hidden argument to va_arg functions. */
2962 for (i
= 0; i
< REGPARM_MAX
; i
++)
2963 if (regno
== x86_64_int_parameter_registers
[i
])
2968 /* Return if we do not know how to pass TYPE solely in registers. */
2971 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2973 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2976 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2977 The layout_type routine is crafty and tries to trick us into passing
2978 currently unsupported vector types on the stack by using TImode. */
2979 return (!TARGET_64BIT
&& mode
== TImode
2980 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2983 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2984 for a call to a function whose data type is FNTYPE.
2985 For a library call, FNTYPE is 0. */
2988 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2989 tree fntype
, /* tree ptr for function decl */
2990 rtx libname
, /* SYMBOL_REF of library name or 0 */
2993 memset (cum
, 0, sizeof (*cum
));
2995 /* Set up the number of registers to use for passing arguments. */
2996 cum
->nregs
= ix86_regparm
;
2998 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3000 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3001 cum
->warn_sse
= true;
3002 cum
->warn_mmx
= true;
3003 cum
->maybe_vaarg
= (fntype
? type_has_variadic_args_p (fntype
) : !libname
);
3007 /* If there are variable arguments, then we won't pass anything
3008 in registers in 32-bit mode. */
3009 if (cum
->maybe_vaarg
)
3019 /* Use ecx and edx registers if function has fastcall attribute,
3020 else look for regparm information. */
3023 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3029 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3032 /* Set up the number of SSE registers used for passing SFmode
3033 and DFmode arguments. Warn for mismatching ABI. */
3034 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3038 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3039 But in the case of vector types, it is some vector mode.
3041 When we have only some of our vector isa extensions enabled, then there
3042 are some modes for which vector_mode_supported_p is false. For these
3043 modes, the generic vector support in gcc will choose some non-vector mode
3044 in order to implement the type. By computing the natural mode, we'll
3045 select the proper ABI location for the operand and not depend on whatever
3046 the middle-end decides to do with these vector types. */
3048 static enum machine_mode
3049 type_natural_mode (tree type
)
3051 enum machine_mode mode
= TYPE_MODE (type
);
3053 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3055 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3056 if ((size
== 8 || size
== 16)
3057 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3058 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3060 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3062 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3063 mode
= MIN_MODE_VECTOR_FLOAT
;
3065 mode
= MIN_MODE_VECTOR_INT
;
3067 /* Get the mode which has this inner mode and number of units. */
3068 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3069 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3070 && GET_MODE_INNER (mode
) == innermode
)
3080 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3081 this may not agree with the mode that the type system has chosen for the
3082 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3083 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3086 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3091 if (orig_mode
!= BLKmode
)
3092 tmp
= gen_rtx_REG (orig_mode
, regno
);
3095 tmp
= gen_rtx_REG (mode
, regno
);
3096 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3097 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3103 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3104 of this code is to classify each 8bytes of incoming argument by the register
3105 class and assign registers accordingly. */
3107 /* Return the union class of CLASS1 and CLASS2.
3108 See the x86-64 PS ABI for details. */
3110 static enum x86_64_reg_class
3111 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3113 /* Rule #1: If both classes are equal, this is the resulting class. */
3114 if (class1
== class2
)
3117 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3119 if (class1
== X86_64_NO_CLASS
)
3121 if (class2
== X86_64_NO_CLASS
)
3124 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3125 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3126 return X86_64_MEMORY_CLASS
;
3128 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3129 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3130 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3131 return X86_64_INTEGERSI_CLASS
;
3132 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3133 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3134 return X86_64_INTEGER_CLASS
;
3136 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3138 if (class1
== X86_64_X87_CLASS
3139 || class1
== X86_64_X87UP_CLASS
3140 || class1
== X86_64_COMPLEX_X87_CLASS
3141 || class2
== X86_64_X87_CLASS
3142 || class2
== X86_64_X87UP_CLASS
3143 || class2
== X86_64_COMPLEX_X87_CLASS
)
3144 return X86_64_MEMORY_CLASS
;
3146 /* Rule #6: Otherwise class SSE is used. */
3147 return X86_64_SSE_CLASS
;
3150 /* Classify the argument of type TYPE and mode MODE.
3151 CLASSES will be filled by the register class used to pass each word
3152 of the operand. The number of words is returned. In case the parameter
3153 should be passed in memory, 0 is returned. As a special case for zero
3154 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3156 BIT_OFFSET is used internally for handling records and specifies offset
3157 of the offset in bits modulo 256 to avoid overflow cases.
3159 See the x86-64 PS ABI for details.
3163 classify_argument (enum machine_mode mode
, tree type
,
3164 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3166 HOST_WIDE_INT bytes
=
3167 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3168 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3170 /* Variable sized entities are always passed/returned in memory. */
3174 if (mode
!= VOIDmode
3175 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3178 if (type
&& AGGREGATE_TYPE_P (type
))
3182 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3184 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3188 for (i
= 0; i
< words
; i
++)
3189 classes
[i
] = X86_64_NO_CLASS
;
3191 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3192 signalize memory class, so handle it as special case. */
3195 classes
[0] = X86_64_NO_CLASS
;
3199 /* Classify each field of record and merge classes. */
3200 switch (TREE_CODE (type
))
3203 /* And now merge the fields of structure. */
3204 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3206 if (TREE_CODE (field
) == FIELD_DECL
)
3210 if (TREE_TYPE (field
) == error_mark_node
)
3213 /* Bitfields are always classified as integer. Handle them
3214 early, since later code would consider them to be
3215 misaligned integers. */
3216 if (DECL_BIT_FIELD (field
))
3218 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3219 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3220 + tree_low_cst (DECL_SIZE (field
), 0)
3223 merge_classes (X86_64_INTEGER_CLASS
,
3228 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3229 TREE_TYPE (field
), subclasses
,
3230 (int_bit_position (field
)
3231 + bit_offset
) % 256);
3234 for (i
= 0; i
< num
; i
++)
3237 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3239 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3247 /* Arrays are handled as small records. */
3250 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3251 TREE_TYPE (type
), subclasses
, bit_offset
);
3255 /* The partial classes are now full classes. */
3256 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3257 subclasses
[0] = X86_64_SSE_CLASS
;
3258 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3259 subclasses
[0] = X86_64_INTEGER_CLASS
;
3261 for (i
= 0; i
< words
; i
++)
3262 classes
[i
] = subclasses
[i
% num
];
3267 case QUAL_UNION_TYPE
:
3268 /* Unions are similar to RECORD_TYPE but offset is always 0.
3270 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3272 if (TREE_CODE (field
) == FIELD_DECL
)
3276 if (TREE_TYPE (field
) == error_mark_node
)
3279 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3280 TREE_TYPE (field
), subclasses
,
3284 for (i
= 0; i
< num
; i
++)
3285 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3294 /* Final merger cleanup. */
3295 for (i
= 0; i
< words
; i
++)
3297 /* If one class is MEMORY, everything should be passed in
3299 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3302 /* The X86_64_SSEUP_CLASS should be always preceded by
3303 X86_64_SSE_CLASS. */
3304 if (classes
[i
] == X86_64_SSEUP_CLASS
3305 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3306 classes
[i
] = X86_64_SSE_CLASS
;
3308 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3309 if (classes
[i
] == X86_64_X87UP_CLASS
3310 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3311 classes
[i
] = X86_64_SSE_CLASS
;
3316 /* Compute alignment needed. We align all types to natural boundaries with
3317 exception of XFmode that is aligned to 64bits. */
3318 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3320 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3323 mode_alignment
= 128;
3324 else if (mode
== XCmode
)
3325 mode_alignment
= 256;
3326 if (COMPLEX_MODE_P (mode
))
3327 mode_alignment
/= 2;
3328 /* Misaligned fields are always returned in memory. */
3329 if (bit_offset
% mode_alignment
)
3333 /* for V1xx modes, just use the base mode */
3334 if (VECTOR_MODE_P (mode
)
3335 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3336 mode
= GET_MODE_INNER (mode
);
3338 /* Classification of atomic types. */
3343 classes
[0] = X86_64_SSE_CLASS
;
3346 classes
[0] = X86_64_SSE_CLASS
;
3347 classes
[1] = X86_64_SSEUP_CLASS
;
3356 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3357 classes
[0] = X86_64_INTEGERSI_CLASS
;
3359 classes
[0] = X86_64_INTEGER_CLASS
;
3363 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3368 if (!(bit_offset
% 64))
3369 classes
[0] = X86_64_SSESF_CLASS
;
3371 classes
[0] = X86_64_SSE_CLASS
;
3374 classes
[0] = X86_64_SSEDF_CLASS
;
3377 classes
[0] = X86_64_X87_CLASS
;
3378 classes
[1] = X86_64_X87UP_CLASS
;
3381 classes
[0] = X86_64_SSE_CLASS
;
3382 classes
[1] = X86_64_SSEUP_CLASS
;
3385 classes
[0] = X86_64_SSE_CLASS
;
3388 classes
[0] = X86_64_SSEDF_CLASS
;
3389 classes
[1] = X86_64_SSEDF_CLASS
;
3392 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3395 /* This modes is larger than 16 bytes. */
3403 classes
[0] = X86_64_SSE_CLASS
;
3404 classes
[1] = X86_64_SSEUP_CLASS
;
3410 classes
[0] = X86_64_SSE_CLASS
;
3416 gcc_assert (VECTOR_MODE_P (mode
));
3421 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3423 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3424 classes
[0] = X86_64_INTEGERSI_CLASS
;
3426 classes
[0] = X86_64_INTEGER_CLASS
;
3427 classes
[1] = X86_64_INTEGER_CLASS
;
3428 return 1 + (bytes
> 8);
3432 /* Examine the argument and return set number of register required in each
3433 class. Return 0 iff parameter should be passed in memory. */
3435 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3436 int *int_nregs
, int *sse_nregs
)
3438 enum x86_64_reg_class
class[MAX_CLASSES
];
3439 int n
= classify_argument (mode
, type
, class, 0);
3445 for (n
--; n
>= 0; n
--)
3448 case X86_64_INTEGER_CLASS
:
3449 case X86_64_INTEGERSI_CLASS
:
3452 case X86_64_SSE_CLASS
:
3453 case X86_64_SSESF_CLASS
:
3454 case X86_64_SSEDF_CLASS
:
3457 case X86_64_NO_CLASS
:
3458 case X86_64_SSEUP_CLASS
:
3460 case X86_64_X87_CLASS
:
3461 case X86_64_X87UP_CLASS
:
3465 case X86_64_COMPLEX_X87_CLASS
:
3466 return in_return
? 2 : 0;
3467 case X86_64_MEMORY_CLASS
:
3473 /* Construct container for the argument used by GCC interface. See
3474 FUNCTION_ARG for the detailed description. */
3477 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3478 tree type
, int in_return
, int nintregs
, int nsseregs
,
3479 const int *intreg
, int sse_regno
)
3481 /* The following variables hold the static issued_error state. */
3482 static bool issued_sse_arg_error
;
3483 static bool issued_sse_ret_error
;
3484 static bool issued_x87_ret_error
;
3486 enum machine_mode tmpmode
;
3488 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3489 enum x86_64_reg_class
class[MAX_CLASSES
];
3493 int needed_sseregs
, needed_intregs
;
3494 rtx exp
[MAX_CLASSES
];
3497 n
= classify_argument (mode
, type
, class, 0);
3500 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3503 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3506 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3507 some less clueful developer tries to use floating-point anyway. */
3508 if (needed_sseregs
&& !TARGET_SSE
)
3512 if (!issued_sse_ret_error
)
3514 error ("SSE register return with SSE disabled");
3515 issued_sse_ret_error
= true;
3518 else if (!issued_sse_arg_error
)
3520 error ("SSE register argument with SSE disabled");
3521 issued_sse_arg_error
= true;
3526 /* Likewise, error if the ABI requires us to return values in the
3527 x87 registers and the user specified -mno-80387. */
3528 if (!TARGET_80387
&& in_return
)
3529 for (i
= 0; i
< n
; i
++)
3530 if (class[i
] == X86_64_X87_CLASS
3531 || class[i
] == X86_64_X87UP_CLASS
3532 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3534 if (!issued_x87_ret_error
)
3536 error ("x87 register return with x87 disabled");
3537 issued_x87_ret_error
= true;
3542 /* First construct simple cases. Avoid SCmode, since we want to use
3543 single register to pass this type. */
3544 if (n
== 1 && mode
!= SCmode
)
3547 case X86_64_INTEGER_CLASS
:
3548 case X86_64_INTEGERSI_CLASS
:
3549 return gen_rtx_REG (mode
, intreg
[0]);
3550 case X86_64_SSE_CLASS
:
3551 case X86_64_SSESF_CLASS
:
3552 case X86_64_SSEDF_CLASS
:
3553 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3554 case X86_64_X87_CLASS
:
3555 case X86_64_COMPLEX_X87_CLASS
:
3556 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3557 case X86_64_NO_CLASS
:
3558 /* Zero sized array, struct or class. */
3563 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3565 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3568 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3569 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3570 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3571 && class[1] == X86_64_INTEGER_CLASS
3572 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3573 && intreg
[0] + 1 == intreg
[1])
3574 return gen_rtx_REG (mode
, intreg
[0]);
3576 /* Otherwise figure out the entries of the PARALLEL. */
3577 for (i
= 0; i
< n
; i
++)
3581 case X86_64_NO_CLASS
:
3583 case X86_64_INTEGER_CLASS
:
3584 case X86_64_INTEGERSI_CLASS
:
3585 /* Merge TImodes on aligned occasions here too. */
3586 if (i
* 8 + 8 > bytes
)
3587 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3588 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3592 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3593 if (tmpmode
== BLKmode
)
3595 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3596 gen_rtx_REG (tmpmode
, *intreg
),
3600 case X86_64_SSESF_CLASS
:
3601 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3602 gen_rtx_REG (SFmode
,
3603 SSE_REGNO (sse_regno
)),
3607 case X86_64_SSEDF_CLASS
:
3608 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3609 gen_rtx_REG (DFmode
,
3610 SSE_REGNO (sse_regno
)),
3614 case X86_64_SSE_CLASS
:
3615 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3619 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3620 gen_rtx_REG (tmpmode
,
3621 SSE_REGNO (sse_regno
)),
3623 if (tmpmode
== TImode
)
3632 /* Empty aligned struct, union or class. */
3636 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3637 for (i
= 0; i
< nexps
; i
++)
3638 XVECEXP (ret
, 0, i
) = exp
[i
];
3642 /* Update the data in CUM to advance over an argument of mode MODE
3643 and data type TYPE. (TYPE is null for libcalls where that information
3644 may not be available.) */
3647 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3648 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3664 cum
->words
+= words
;
3665 cum
->nregs
-= words
;
3666 cum
->regno
+= words
;
3668 if (cum
->nregs
<= 0)
3676 if (cum
->float_in_sse
< 2)
3679 if (cum
->float_in_sse
< 1)
3690 if (!type
|| !AGGREGATE_TYPE_P (type
))
3692 cum
->sse_words
+= words
;
3693 cum
->sse_nregs
-= 1;
3694 cum
->sse_regno
+= 1;
3695 if (cum
->sse_nregs
<= 0)
3707 if (!type
|| !AGGREGATE_TYPE_P (type
))
3709 cum
->mmx_words
+= words
;
3710 cum
->mmx_nregs
-= 1;
3711 cum
->mmx_regno
+= 1;
3712 if (cum
->mmx_nregs
<= 0)
3723 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3724 tree type
, HOST_WIDE_INT words
)
3726 int int_nregs
, sse_nregs
;
3728 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3729 cum
->words
+= words
;
3730 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3732 cum
->nregs
-= int_nregs
;
3733 cum
->sse_nregs
-= sse_nregs
;
3734 cum
->regno
+= int_nregs
;
3735 cum
->sse_regno
+= sse_nregs
;
3738 cum
->words
+= words
;
3742 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3743 tree type
, int named ATTRIBUTE_UNUSED
)
3745 HOST_WIDE_INT bytes
, words
;
3747 if (mode
== BLKmode
)
3748 bytes
= int_size_in_bytes (type
);
3750 bytes
= GET_MODE_SIZE (mode
);
3751 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3754 mode
= type_natural_mode (type
);
3757 function_arg_advance_64 (cum
, mode
, type
, words
);
3759 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
3762 /* Define where to put the arguments to a function.
3763 Value is zero to push the argument on the stack,
3764 or a hard register in which to store the argument.
3766 MODE is the argument's machine mode.
3767 TYPE is the data type of the argument (as a tree).
3768 This is null for libcalls where that information may
3770 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3771 the preceding args and about the function being called.
3772 NAMED is nonzero if this argument is a named parameter
3773 (otherwise it is an extra parameter matching an ellipsis). */
3776 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3777 enum machine_mode orig_mode
, tree type
,
3778 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3780 static bool warnedsse
, warnedmmx
;
3782 /* Avoid the AL settings for the Unix64 ABI. */
3783 if (mode
== VOIDmode
)
3799 if (words
<= cum
->nregs
)
3801 int regno
= cum
->regno
;
3803 /* Fastcall allocates the first two DWORD (SImode) or
3804 smaller arguments to ECX and EDX. */
3807 if (mode
== BLKmode
|| mode
== DImode
)
3810 /* ECX not EAX is the first allocated register. */
3814 return gen_rtx_REG (mode
, regno
);
3819 if (cum
->float_in_sse
< 2)
3822 if (cum
->float_in_sse
< 1)
3832 if (!type
|| !AGGREGATE_TYPE_P (type
))
3834 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3837 warning (0, "SSE vector argument without SSE enabled "
3841 return gen_reg_or_parallel (mode
, orig_mode
,
3842 cum
->sse_regno
+ FIRST_SSE_REG
);
3850 if (!type
|| !AGGREGATE_TYPE_P (type
))
3852 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3855 warning (0, "MMX vector argument without MMX enabled "
3859 return gen_reg_or_parallel (mode
, orig_mode
,
3860 cum
->mmx_regno
+ FIRST_MMX_REG
);
3869 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3870 enum machine_mode orig_mode
, tree type
)
3872 /* Handle a hidden AL argument containing number of registers
3873 for varargs x86-64 functions. */
3874 if (mode
== VOIDmode
)
3875 return GEN_INT (cum
->maybe_vaarg
3876 ? (cum
->sse_nregs
< 0
3881 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3883 &x86_64_int_parameter_registers
[cum
->regno
],
3888 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
3889 tree type
, int named ATTRIBUTE_UNUSED
)
3891 enum machine_mode mode
= omode
;
3892 HOST_WIDE_INT bytes
, words
;
3894 if (mode
== BLKmode
)
3895 bytes
= int_size_in_bytes (type
);
3897 bytes
= GET_MODE_SIZE (mode
);
3898 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3900 /* To simplify the code below, represent vector types with a vector mode
3901 even if MMX/SSE are not active. */
3902 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3903 mode
= type_natural_mode (type
);
3906 return function_arg_64 (cum
, mode
, omode
, type
);
3908 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
3911 /* A C expression that indicates when an argument must be passed by
3912 reference. If nonzero for an argument, a copy of that argument is
3913 made in memory and a pointer to the argument is passed instead of
3914 the argument itself. The pointer is passed in whatever way is
3915 appropriate for passing a pointer to that type. */
3918 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3919 enum machine_mode mode ATTRIBUTE_UNUSED
,
3920 tree type
, bool named ATTRIBUTE_UNUSED
)
3922 if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
3928 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3929 ABI. Only called if TARGET_SSE. */
3931 contains_128bit_aligned_vector_p (tree type
)
3933 enum machine_mode mode
= TYPE_MODE (type
);
3934 if (SSE_REG_MODE_P (mode
)
3935 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3937 if (TYPE_ALIGN (type
) < 128)
3940 if (AGGREGATE_TYPE_P (type
))
3942 /* Walk the aggregates recursively. */
3943 switch (TREE_CODE (type
))
3947 case QUAL_UNION_TYPE
:
3951 /* Walk all the structure fields. */
3952 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3954 if (TREE_CODE (field
) == FIELD_DECL
3955 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3962 /* Just for use if some languages passes arrays by value. */
3963 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3974 /* Gives the alignment boundary, in bits, of an argument with the
3975 specified mode and type. */
3978 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3982 align
= TYPE_ALIGN (type
);
3984 align
= GET_MODE_ALIGNMENT (mode
);
3985 if (align
< PARM_BOUNDARY
)
3986 align
= PARM_BOUNDARY
;
3989 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3990 make an exception for SSE modes since these require 128bit
3993 The handling here differs from field_alignment. ICC aligns MMX
3994 arguments to 4 byte boundaries, while structure fields are aligned
3995 to 8 byte boundaries. */
3997 align
= PARM_BOUNDARY
;
4000 if (!SSE_REG_MODE_P (mode
))
4001 align
= PARM_BOUNDARY
;
4005 if (!contains_128bit_aligned_vector_p (type
))
4006 align
= PARM_BOUNDARY
;
4014 /* Return true if N is a possible register number of function value. */
4017 ix86_function_value_regno_p (int regno
)
4024 case FIRST_FLOAT_REG
:
4025 return TARGET_FLOAT_RETURNS_IN_80387
;
4031 if (TARGET_MACHO
|| TARGET_64BIT
)
4039 /* Define how to find the value returned by a function.
4040 VALTYPE is the data type of the value (as a tree).
4041 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4042 otherwise, FUNC is 0. */
4045 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4046 tree fntype
, tree fn
)
4050 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4051 we normally prevent this case when mmx is not available. However
4052 some ABIs may require the result to be returned like DImode. */
4053 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4054 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4056 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4057 we prevent this case when sse is not available. However some ABIs
4058 may require the result to be returned like integer TImode. */
4059 else if (mode
== TImode
4060 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4061 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4063 /* Decimal floating point values can go in %eax, unlike other float modes. */
4064 else if (DECIMAL_FLOAT_MODE_P (mode
))
4067 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4068 else if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4071 /* Floating point return values in %st(0), except for local functions when
4072 SSE math is enabled or for functions with sseregparm attribute. */
4075 regno
= FIRST_FLOAT_REG
;
4077 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4079 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4080 if ((sse_level
>= 1 && mode
== SFmode
)
4081 || (sse_level
== 2 && mode
== DFmode
))
4082 regno
= FIRST_SSE_REG
;
4086 return gen_rtx_REG (orig_mode
, regno
);
4090 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4095 /* Handle libcalls, which don't provide a type node. */
4096 if (valtype
== NULL
)
4108 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4111 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4115 return gen_rtx_REG (mode
, 0);
4119 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4120 REGPARM_MAX
, SSE_REGPARM_MAX
,
4121 x86_64_int_return_registers
, 0);
4123 /* For zero sized structures, construct_container returns NULL, but we
4124 need to keep rest of compiler happy by returning meaningful value. */
4126 ret
= gen_rtx_REG (orig_mode
, 0);
4132 ix86_function_value_1 (tree valtype
, tree fntype_or_decl
,
4133 enum machine_mode orig_mode
, enum machine_mode mode
)
4138 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4139 fn
= fntype_or_decl
;
4140 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4143 return function_value_64 (orig_mode
, mode
, valtype
);
4145 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4149 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4150 bool outgoing ATTRIBUTE_UNUSED
)
4152 enum machine_mode mode
, orig_mode
;
4154 orig_mode
= TYPE_MODE (valtype
);
4155 mode
= type_natural_mode (valtype
);
4156 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4160 ix86_libcall_value (enum machine_mode mode
)
4162 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4165 /* Return true iff type is returned in memory. */
4168 return_in_memory_32 (tree type
, enum machine_mode mode
)
4172 if (mode
== BLKmode
)
4175 size
= int_size_in_bytes (type
);
4177 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4180 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4182 /* User-created vectors small enough to fit in EAX. */
4186 /* MMX/3dNow values are returned in MM0,
4187 except when it doesn't exits. */
4189 return (TARGET_MMX
? 0 : 1);
4191 /* SSE values are returned in XMM0, except when it doesn't exist. */
4193 return (TARGET_SSE
? 0 : 1);
4208 return_in_memory_64 (tree type
, enum machine_mode mode
)
4210 int needed_intregs
, needed_sseregs
;
4211 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4215 ix86_return_in_memory (tree type
)
4217 enum machine_mode mode
= type_natural_mode (type
);
4220 return return_in_memory_64 (type
, mode
);
4222 return return_in_memory_32 (type
, mode
);
4225 /* When returning SSE vector types, we have a choice of either
4226 (1) being abi incompatible with a -march switch, or
4227 (2) generating an error.
4228 Given no good solution, I think the safest thing is one warning.
4229 The user won't be able to use -Werror, but....
4231 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4232 called in response to actually generating a caller or callee that
4233 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4234 via aggregate_value_p for general type probing from tree-ssa. */
4237 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4239 static bool warnedsse
, warnedmmx
;
4241 if (!TARGET_64BIT
&& type
)
4243 /* Look at the return type of the function, not the function type. */
4244 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4246 if (!TARGET_SSE
&& !warnedsse
)
4249 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4252 warning (0, "SSE vector return without SSE enabled "
4257 if (!TARGET_MMX
&& !warnedmmx
)
4259 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4262 warning (0, "MMX vector return without MMX enabled "
4272 /* Create the va_list data type. */
4275 ix86_build_builtin_va_list (void)
4277 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4279 /* For i386 we use plain pointer to argument area. */
4281 return build_pointer_type (char_type_node
);
4283 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4284 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4286 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4287 unsigned_type_node
);
4288 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4289 unsigned_type_node
);
4290 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4292 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4295 va_list_gpr_counter_field
= f_gpr
;
4296 va_list_fpr_counter_field
= f_fpr
;
4298 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4299 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4300 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4301 DECL_FIELD_CONTEXT (f_sav
) = record
;
4303 TREE_CHAIN (record
) = type_decl
;
4304 TYPE_NAME (record
) = type_decl
;
4305 TYPE_FIELDS (record
) = f_gpr
;
4306 TREE_CHAIN (f_gpr
) = f_fpr
;
4307 TREE_CHAIN (f_fpr
) = f_ovf
;
4308 TREE_CHAIN (f_ovf
) = f_sav
;
4310 layout_type (record
);
4312 /* The correct type is an array type of one element. */
4313 return build_array_type (record
, build_index_type (size_zero_node
));
4316 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4319 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4329 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4332 /* Indicate to allocate space on the stack for varargs save area. */
4333 ix86_save_varrargs_registers
= 1;
4334 cfun
->stack_alignment_needed
= 128;
4336 save_area
= frame_pointer_rtx
;
4337 set
= get_varargs_alias_set ();
4339 for (i
= cum
->regno
;
4341 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4344 mem
= gen_rtx_MEM (Pmode
,
4345 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4346 MEM_NOTRAP_P (mem
) = 1;
4347 set_mem_alias_set (mem
, set
);
4348 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4349 x86_64_int_parameter_registers
[i
]));
4352 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4354 /* Now emit code to save SSE registers. The AX parameter contains number
4355 of SSE parameter registers used to call this function. We use
4356 sse_prologue_save insn template that produces computed jump across
4357 SSE saves. We need some preparation work to get this working. */
4359 label
= gen_label_rtx ();
4360 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4362 /* Compute address to jump to :
4363 label - 5*eax + nnamed_sse_arguments*5 */
4364 tmp_reg
= gen_reg_rtx (Pmode
);
4365 nsse_reg
= gen_reg_rtx (Pmode
);
4366 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4367 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4368 gen_rtx_MULT (Pmode
, nsse_reg
,
4373 gen_rtx_CONST (DImode
,
4374 gen_rtx_PLUS (DImode
,
4376 GEN_INT (cum
->sse_regno
* 4))));
4378 emit_move_insn (nsse_reg
, label_ref
);
4379 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4381 /* Compute address of memory block we save into. We always use pointer
4382 pointing 127 bytes after first byte to store - this is needed to keep
4383 instruction size limited by 4 bytes. */
4384 tmp_reg
= gen_reg_rtx (Pmode
);
4385 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4386 plus_constant (save_area
,
4387 8 * REGPARM_MAX
+ 127)));
4388 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4389 MEM_NOTRAP_P (mem
) = 1;
4390 set_mem_alias_set (mem
, set
);
4391 set_mem_align (mem
, BITS_PER_WORD
);
4393 /* And finally do the dirty job! */
4394 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4395 GEN_INT (cum
->sse_regno
), label
));
4400 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4401 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4404 CUMULATIVE_ARGS next_cum
;
4408 /* This argument doesn't appear to be used anymore. Which is good,
4409 because the old code here didn't suppress rtl generation. */
4410 gcc_assert (!no_rtl
);
4415 fntype
= TREE_TYPE (current_function_decl
);
4416 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4417 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4418 != void_type_node
));
4420 /* For varargs, we do not want to skip the dummy va_dcl argument.
4421 For stdargs, we do want to skip the last named argument. */
4424 function_arg_advance (&next_cum
, mode
, type
, 1);
4426 setup_incoming_varargs_64 (&next_cum
);
4429 /* Implement va_start. */
4432 ix86_va_start (tree valist
, rtx nextarg
)
4434 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4435 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4436 tree gpr
, fpr
, ovf
, sav
, t
;
4439 /* Only 64bit target needs something special. */
4442 std_expand_builtin_va_start (valist
, nextarg
);
4446 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4447 f_fpr
= TREE_CHAIN (f_gpr
);
4448 f_ovf
= TREE_CHAIN (f_fpr
);
4449 f_sav
= TREE_CHAIN (f_ovf
);
4451 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4452 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4453 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4454 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4455 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4457 /* Count number of gp and fp argument registers used. */
4458 words
= current_function_args_info
.words
;
4459 n_gpr
= current_function_args_info
.regno
;
4460 n_fpr
= current_function_args_info
.sse_regno
;
4462 if (cfun
->va_list_gpr_size
)
4464 type
= TREE_TYPE (gpr
);
4465 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4466 build_int_cst (type
, n_gpr
* 8));
4467 TREE_SIDE_EFFECTS (t
) = 1;
4468 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4471 if (cfun
->va_list_fpr_size
)
4473 type
= TREE_TYPE (fpr
);
4474 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4475 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4476 TREE_SIDE_EFFECTS (t
) = 1;
4477 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4480 /* Find the overflow area. */
4481 type
= TREE_TYPE (ovf
);
4482 t
= make_tree (type
, virtual_incoming_args_rtx
);
4484 t
= build2 (PLUS_EXPR
, type
, t
,
4485 build_int_cst (type
, words
* UNITS_PER_WORD
));
4486 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4487 TREE_SIDE_EFFECTS (t
) = 1;
4488 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4490 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4492 /* Find the register save area.
4493 Prologue of the function save it right above stack frame. */
4494 type
= TREE_TYPE (sav
);
4495 t
= make_tree (type
, frame_pointer_rtx
);
4496 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4497 TREE_SIDE_EFFECTS (t
) = 1;
4498 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4502 /* Implement va_arg. */
4505 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4507 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4508 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4509 tree gpr
, fpr
, ovf
, sav
, t
;
4511 tree lab_false
, lab_over
= NULL_TREE
;
4516 enum machine_mode nat_mode
;
4518 /* Only 64bit target needs something special. */
4520 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4522 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4523 f_fpr
= TREE_CHAIN (f_gpr
);
4524 f_ovf
= TREE_CHAIN (f_fpr
);
4525 f_sav
= TREE_CHAIN (f_ovf
);
4527 valist
= build_va_arg_indirect_ref (valist
);
4528 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4529 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4530 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4531 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4533 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4535 type
= build_pointer_type (type
);
4536 size
= int_size_in_bytes (type
);
4537 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4539 nat_mode
= type_natural_mode (type
);
4540 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4541 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4543 /* Pull the value out of the saved registers. */
4545 addr
= create_tmp_var (ptr_type_node
, "addr");
4546 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4550 int needed_intregs
, needed_sseregs
;
4552 tree int_addr
, sse_addr
;
4554 lab_false
= create_artificial_label ();
4555 lab_over
= create_artificial_label ();
4557 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4559 need_temp
= (!REG_P (container
)
4560 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4561 || TYPE_ALIGN (type
) > 128));
4563 /* In case we are passing structure, verify that it is consecutive block
4564 on the register save area. If not we need to do moves. */
4565 if (!need_temp
&& !REG_P (container
))
4567 /* Verify that all registers are strictly consecutive */
4568 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4572 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4574 rtx slot
= XVECEXP (container
, 0, i
);
4575 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4576 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4584 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4586 rtx slot
= XVECEXP (container
, 0, i
);
4587 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4588 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4600 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4601 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4602 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4603 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4606 /* First ensure that we fit completely in registers. */
4609 t
= build_int_cst (TREE_TYPE (gpr
),
4610 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4611 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4612 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4613 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4614 gimplify_and_add (t
, pre_p
);
4618 t
= build_int_cst (TREE_TYPE (fpr
),
4619 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4621 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4622 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4623 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4624 gimplify_and_add (t
, pre_p
);
4627 /* Compute index to start of area used for integer regs. */
4630 /* int_addr = gpr + sav; */
4631 t
= fold_convert (ptr_type_node
, gpr
);
4632 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4633 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4634 gimplify_and_add (t
, pre_p
);
4638 /* sse_addr = fpr + sav; */
4639 t
= fold_convert (ptr_type_node
, fpr
);
4640 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4641 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4642 gimplify_and_add (t
, pre_p
);
4647 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4650 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4651 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4652 gimplify_and_add (t
, pre_p
);
4654 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4656 rtx slot
= XVECEXP (container
, 0, i
);
4657 rtx reg
= XEXP (slot
, 0);
4658 enum machine_mode mode
= GET_MODE (reg
);
4659 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4660 tree addr_type
= build_pointer_type (piece_type
);
4663 tree dest_addr
, dest
;
4665 if (SSE_REGNO_P (REGNO (reg
)))
4667 src_addr
= sse_addr
;
4668 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4672 src_addr
= int_addr
;
4673 src_offset
= REGNO (reg
) * 8;
4675 src_addr
= fold_convert (addr_type
, src_addr
);
4676 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4677 size_int (src_offset
));
4678 src
= build_va_arg_indirect_ref (src_addr
);
4680 dest_addr
= fold_convert (addr_type
, addr
);
4681 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4682 size_int (INTVAL (XEXP (slot
, 1))));
4683 dest
= build_va_arg_indirect_ref (dest_addr
);
4685 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4686 gimplify_and_add (t
, pre_p
);
4692 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4693 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4694 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4695 gimplify_and_add (t
, pre_p
);
4699 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4700 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4701 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4702 gimplify_and_add (t
, pre_p
);
4705 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4706 gimplify_and_add (t
, pre_p
);
4708 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4709 append_to_statement_list (t
, pre_p
);
4712 /* ... otherwise out of the overflow area. */
4714 /* Care for on-stack alignment if needed. */
4715 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4716 || integer_zerop (TYPE_SIZE (type
)))
4720 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4721 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4722 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4723 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4724 build_int_cst (TREE_TYPE (t
), -align
));
4726 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4728 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4729 gimplify_and_add (t2
, pre_p
);
4731 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4732 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4733 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4734 gimplify_and_add (t
, pre_p
);
4738 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4739 append_to_statement_list (t
, pre_p
);
4742 ptrtype
= build_pointer_type (type
);
4743 addr
= fold_convert (ptrtype
, addr
);
4746 addr
= build_va_arg_indirect_ref (addr
);
4747 return build_va_arg_indirect_ref (addr
);
4750 /* Return nonzero if OPNUM's MEM should be matched
4751 in movabs* patterns. */
4754 ix86_check_movabs (rtx insn
, int opnum
)
4758 set
= PATTERN (insn
);
4759 if (GET_CODE (set
) == PARALLEL
)
4760 set
= XVECEXP (set
, 0, 0);
4761 gcc_assert (GET_CODE (set
) == SET
);
4762 mem
= XEXP (set
, opnum
);
4763 while (GET_CODE (mem
) == SUBREG
)
4764 mem
= SUBREG_REG (mem
);
4765 gcc_assert (MEM_P (mem
));
4766 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4769 /* Initialize the table of extra 80387 mathematical constants. */
4772 init_ext_80387_constants (void)
4774 static const char * cst
[5] =
4776 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4777 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4778 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4779 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4780 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4784 for (i
= 0; i
< 5; i
++)
4786 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4787 /* Ensure each constant is rounded to XFmode precision. */
4788 real_convert (&ext_80387_constants_table
[i
],
4789 XFmode
, &ext_80387_constants_table
[i
]);
4792 ext_80387_constants_init
= 1;
4795 /* Return true if the constant is something that can be loaded with
4796 a special instruction. */
4799 standard_80387_constant_p (rtx x
)
4803 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4806 if (x
== CONST0_RTX (GET_MODE (x
)))
4808 if (x
== CONST1_RTX (GET_MODE (x
)))
4811 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4813 /* For XFmode constants, try to find a special 80387 instruction when
4814 optimizing for size or on those CPUs that benefit from them. */
4815 if (GET_MODE (x
) == XFmode
4816 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
4820 if (! ext_80387_constants_init
)
4821 init_ext_80387_constants ();
4823 for (i
= 0; i
< 5; i
++)
4824 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4828 /* Load of the constant -0.0 or -1.0 will be split as
4829 fldz;fchs or fld1;fchs sequence. */
4830 if (real_isnegzero (&r
))
4832 if (real_identical (&r
, &dconstm1
))
4838 /* Return the opcode of the special instruction to be used to load
4842 standard_80387_constant_opcode (rtx x
)
4844 switch (standard_80387_constant_p (x
))
4868 /* Return the CONST_DOUBLE representing the 80387 constant that is
4869 loaded by the specified special instruction. The argument IDX
4870 matches the return value from standard_80387_constant_p. */
4873 standard_80387_constant_rtx (int idx
)
4877 if (! ext_80387_constants_init
)
4878 init_ext_80387_constants ();
4894 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4898 /* Return 1 if mode is a valid mode for sse. */
4900 standard_sse_mode_p (enum machine_mode mode
)
4917 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4920 standard_sse_constant_p (rtx x
)
4922 enum machine_mode mode
= GET_MODE (x
);
4924 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4926 if (vector_all_ones_operand (x
, mode
)
4927 && standard_sse_mode_p (mode
))
4928 return TARGET_SSE2
? 2 : -1;
4933 /* Return the opcode of the special instruction to be used to load
4937 standard_sse_constant_opcode (rtx insn
, rtx x
)
4939 switch (standard_sse_constant_p (x
))
4942 if (get_attr_mode (insn
) == MODE_V4SF
)
4943 return "xorps\t%0, %0";
4944 else if (get_attr_mode (insn
) == MODE_V2DF
)
4945 return "xorpd\t%0, %0";
4947 return "pxor\t%0, %0";
4949 return "pcmpeqd\t%0, %0";
4954 /* Returns 1 if OP contains a symbol reference */
4957 symbolic_reference_mentioned_p (rtx op
)
4962 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4965 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4966 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4972 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4973 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4977 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4984 /* Return 1 if it is appropriate to emit `ret' instructions in the
4985 body of a function. Do this only if the epilogue is simple, needing a
4986 couple of insns. Prior to reloading, we can't tell how many registers
4987 must be saved, so return 0 then. Return 0 if there is no frame
4988 marker to de-allocate. */
4991 ix86_can_use_return_insn_p (void)
4993 struct ix86_frame frame
;
4995 if (! reload_completed
|| frame_pointer_needed
)
4998 /* Don't allow more than 32 pop, since that's all we can do
4999 with one instruction. */
5000 if (current_function_pops_args
5001 && current_function_args_size
>= 32768)
5004 ix86_compute_frame_layout (&frame
);
5005 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5008 /* Value should be nonzero if functions must have frame pointers.
5009 Zero means the frame pointer need not be set up (and parms may
5010 be accessed via the stack pointer) in functions that seem suitable. */
5013 ix86_frame_pointer_required (void)
5015 /* If we accessed previous frames, then the generated code expects
5016 to be able to access the saved ebp value in our frame. */
5017 if (cfun
->machine
->accesses_prev_frame
)
5020 /* Several x86 os'es need a frame pointer for other reasons,
5021 usually pertaining to setjmp. */
5022 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5025 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5026 the frame pointer by default. Turn it back on now if we've not
5027 got a leaf function. */
5028 if (TARGET_OMIT_LEAF_FRAME_POINTER
5029 && (!current_function_is_leaf
5030 || ix86_current_function_calls_tls_descriptor
))
5033 if (current_function_profile
)
5039 /* Record that the current function accesses previous call frames. */
5042 ix86_setup_frame_addresses (void)
5044 cfun
->machine
->accesses_prev_frame
= 1;
5047 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5048 # define USE_HIDDEN_LINKONCE 1
5050 # define USE_HIDDEN_LINKONCE 0
5053 static int pic_labels_used
;
5055 /* Fills in the label name that should be used for a pc thunk for
5056 the given register. */
5059 get_pc_thunk_name (char name
[32], unsigned int regno
)
5061 gcc_assert (!TARGET_64BIT
);
5063 if (USE_HIDDEN_LINKONCE
)
5064 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5066 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5070 /* This function generates code for -fpic that loads %ebx with
5071 the return address of the caller and then returns. */
5074 ix86_file_end (void)
5079 for (regno
= 0; regno
< 8; ++regno
)
5083 if (! ((pic_labels_used
>> regno
) & 1))
5086 get_pc_thunk_name (name
, regno
);
5091 switch_to_section (darwin_sections
[text_coal_section
]);
5092 fputs ("\t.weak_definition\t", asm_out_file
);
5093 assemble_name (asm_out_file
, name
);
5094 fputs ("\n\t.private_extern\t", asm_out_file
);
5095 assemble_name (asm_out_file
, name
);
5096 fputs ("\n", asm_out_file
);
5097 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5101 if (USE_HIDDEN_LINKONCE
)
5105 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5107 TREE_PUBLIC (decl
) = 1;
5108 TREE_STATIC (decl
) = 1;
5109 DECL_ONE_ONLY (decl
) = 1;
5111 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5112 switch_to_section (get_named_section (decl
, NULL
, 0));
5114 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5115 fputs ("\t.hidden\t", asm_out_file
);
5116 assemble_name (asm_out_file
, name
);
5117 fputc ('\n', asm_out_file
);
5118 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5122 switch_to_section (text_section
);
5123 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5126 xops
[0] = gen_rtx_REG (SImode
, regno
);
5127 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5128 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5129 output_asm_insn ("ret", xops
);
5132 if (NEED_INDICATE_EXEC_STACK
)
5133 file_end_indicate_exec_stack ();
5136 /* Emit code for the SET_GOT patterns. */
5139 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5145 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5147 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5148 xops
[2] = gen_rtx_MEM (Pmode
,
5149 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5150 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5152 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5153 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5154 an unadorned address. */
5155 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5156 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5157 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5161 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5163 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5165 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5168 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5170 output_asm_insn ("call\t%a2", xops
);
5173 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5174 is what will be referenced by the Mach-O PIC subsystem. */
5176 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5179 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5180 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5183 output_asm_insn ("pop{l}\t%0", xops
);
5188 get_pc_thunk_name (name
, REGNO (dest
));
5189 pic_labels_used
|= 1 << REGNO (dest
);
5191 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5192 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5193 output_asm_insn ("call\t%X2", xops
);
5194 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5195 is what will be referenced by the Mach-O PIC subsystem. */
5198 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5200 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5201 CODE_LABEL_NUMBER (label
));
5208 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5209 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5211 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5216 /* Generate an "push" pattern for input ARG. */
5221 return gen_rtx_SET (VOIDmode
,
5223 gen_rtx_PRE_DEC (Pmode
,
5224 stack_pointer_rtx
)),
5228 /* Return >= 0 if there is an unused call-clobbered register available
5229 for the entire function. */
5232 ix86_select_alt_pic_regnum (void)
5234 if (current_function_is_leaf
&& !current_function_profile
5235 && !ix86_current_function_calls_tls_descriptor
)
5238 for (i
= 2; i
>= 0; --i
)
5239 if (!regs_ever_live
[i
])
5243 return INVALID_REGNUM
;
5246 /* Return 1 if we need to save REGNO. */
5248 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5250 if (pic_offset_table_rtx
5251 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5252 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5253 || current_function_profile
5254 || current_function_calls_eh_return
5255 || current_function_uses_const_pool
))
5257 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5262 if (current_function_calls_eh_return
&& maybe_eh_return
)
5267 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5268 if (test
== INVALID_REGNUM
)
5275 if (cfun
->machine
->force_align_arg_pointer
5276 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5279 return (regs_ever_live
[regno
]
5280 && !call_used_regs
[regno
]
5281 && !fixed_regs
[regno
]
5282 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5285 /* Return number of registers to be saved on the stack. */
5288 ix86_nsaved_regs (void)
5293 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5294 if (ix86_save_reg (regno
, true))
5299 /* Return the offset between two registers, one to be eliminated, and the other
5300 its replacement, at the start of a routine. */
5303 ix86_initial_elimination_offset (int from
, int to
)
5305 struct ix86_frame frame
;
5306 ix86_compute_frame_layout (&frame
);
5308 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5309 return frame
.hard_frame_pointer_offset
;
5310 else if (from
== FRAME_POINTER_REGNUM
5311 && to
== HARD_FRAME_POINTER_REGNUM
)
5312 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5315 gcc_assert (to
== STACK_POINTER_REGNUM
);
5317 if (from
== ARG_POINTER_REGNUM
)
5318 return frame
.stack_pointer_offset
;
5320 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5321 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5325 /* Fill structure ix86_frame about frame of currently computed function. */
5328 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5330 HOST_WIDE_INT total_size
;
5331 unsigned int stack_alignment_needed
;
5332 HOST_WIDE_INT offset
;
5333 unsigned int preferred_alignment
;
5334 HOST_WIDE_INT size
= get_frame_size ();
5336 frame
->nregs
= ix86_nsaved_regs ();
5339 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5340 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5342 /* During reload iteration the amount of registers saved can change.
5343 Recompute the value as needed. Do not recompute when amount of registers
5344 didn't change as reload does multiple calls to the function and does not
5345 expect the decision to change within single iteration. */
5347 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5349 int count
= frame
->nregs
;
5351 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5352 /* The fast prologue uses move instead of push to save registers. This
5353 is significantly longer, but also executes faster as modern hardware
5354 can execute the moves in parallel, but can't do that for push/pop.
5356 Be careful about choosing what prologue to emit: When function takes
5357 many instructions to execute we may use slow version as well as in
5358 case function is known to be outside hot spot (this is known with
5359 feedback only). Weight the size of function by number of registers
5360 to save as it is cheap to use one or two push instructions but very
5361 slow to use many of them. */
5363 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5364 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5365 || (flag_branch_probabilities
5366 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5367 cfun
->machine
->use_fast_prologue_epilogue
= false;
5369 cfun
->machine
->use_fast_prologue_epilogue
5370 = !expensive_function_p (count
);
5372 if (TARGET_PROLOGUE_USING_MOVE
5373 && cfun
->machine
->use_fast_prologue_epilogue
)
5374 frame
->save_regs_using_mov
= true;
5376 frame
->save_regs_using_mov
= false;
5379 /* Skip return address and saved base pointer. */
5380 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5382 frame
->hard_frame_pointer_offset
= offset
;
5384 /* Do some sanity checking of stack_alignment_needed and
5385 preferred_alignment, since i386 port is the only using those features
5386 that may break easily. */
5388 gcc_assert (!size
|| stack_alignment_needed
);
5389 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5390 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5391 gcc_assert (stack_alignment_needed
5392 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5394 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5395 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5397 /* Register save area */
5398 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5401 if (ix86_save_varrargs_registers
)
5403 offset
+= X86_64_VARARGS_SIZE
;
5404 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5407 frame
->va_arg_size
= 0;
5409 /* Align start of frame for local function. */
5410 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5411 & -stack_alignment_needed
) - offset
;
5413 offset
+= frame
->padding1
;
5415 /* Frame pointer points here. */
5416 frame
->frame_pointer_offset
= offset
;
5420 /* Add outgoing arguments area. Can be skipped if we eliminated
5421 all the function calls as dead code.
5422 Skipping is however impossible when function calls alloca. Alloca
5423 expander assumes that last current_function_outgoing_args_size
5424 of stack frame are unused. */
5425 if (ACCUMULATE_OUTGOING_ARGS
5426 && (!current_function_is_leaf
|| current_function_calls_alloca
5427 || ix86_current_function_calls_tls_descriptor
))
5429 offset
+= current_function_outgoing_args_size
;
5430 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5433 frame
->outgoing_arguments_size
= 0;
5435 /* Align stack boundary. Only needed if we're calling another function
5437 if (!current_function_is_leaf
|| current_function_calls_alloca
5438 || ix86_current_function_calls_tls_descriptor
)
5439 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5440 & -preferred_alignment
) - offset
;
5442 frame
->padding2
= 0;
5444 offset
+= frame
->padding2
;
5446 /* We've reached end of stack frame. */
5447 frame
->stack_pointer_offset
= offset
;
5449 /* Size prologue needs to allocate. */
5450 frame
->to_allocate
=
5451 (size
+ frame
->padding1
+ frame
->padding2
5452 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5454 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5455 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5456 frame
->save_regs_using_mov
= false;
5458 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5459 && current_function_is_leaf
5460 && !ix86_current_function_calls_tls_descriptor
)
5462 frame
->red_zone_size
= frame
->to_allocate
;
5463 if (frame
->save_regs_using_mov
)
5464 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5465 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5466 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5469 frame
->red_zone_size
= 0;
5470 frame
->to_allocate
-= frame
->red_zone_size
;
5471 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5473 fprintf (stderr
, "\n");
5474 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5475 fprintf (stderr
, "size: %ld\n", (long)size
);
5476 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5477 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5478 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5479 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5480 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5481 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5482 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5483 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5484 (long)frame
->hard_frame_pointer_offset
);
5485 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5486 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5487 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5488 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5492 /* Emit code to save registers in the prologue. */
5495 ix86_emit_save_regs (void)
5500 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5501 if (ix86_save_reg (regno
, true))
5503 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5504 RTX_FRAME_RELATED_P (insn
) = 1;
5508 /* Emit code to save registers using MOV insns. First register
5509 is restored from POINTER + OFFSET. */
5511 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5516 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5517 if (ix86_save_reg (regno
, true))
5519 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5521 gen_rtx_REG (Pmode
, regno
));
5522 RTX_FRAME_RELATED_P (insn
) = 1;
5523 offset
+= UNITS_PER_WORD
;
5527 /* Expand prologue or epilogue stack adjustment.
5528 The pattern exist to put a dependency on all ebp-based memory accesses.
5529 STYLE should be negative if instructions should be marked as frame related,
5530 zero if %r11 register is live and cannot be freely used and positive
5534 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5539 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5540 else if (x86_64_immediate_operand (offset
, DImode
))
5541 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5545 /* r11 is used by indirect sibcall return as well, set before the
5546 epilogue and used after the epilogue. ATM indirect sibcall
5547 shouldn't be used together with huge frame sizes in one
5548 function because of the frame_size check in sibcall.c. */
5550 r11
= gen_rtx_REG (DImode
, R11_REG
);
5551 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5553 RTX_FRAME_RELATED_P (insn
) = 1;
5554 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5558 RTX_FRAME_RELATED_P (insn
) = 1;
5561 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5564 ix86_internal_arg_pointer (void)
5566 bool has_force_align_arg_pointer
=
5567 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5568 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5569 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5570 && DECL_NAME (current_function_decl
)
5571 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5572 && DECL_FILE_SCOPE_P (current_function_decl
))
5573 || ix86_force_align_arg_pointer
5574 || has_force_align_arg_pointer
)
5576 /* Nested functions can't realign the stack due to a register
5578 if (DECL_CONTEXT (current_function_decl
)
5579 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5581 if (ix86_force_align_arg_pointer
)
5582 warning (0, "-mstackrealign ignored for nested functions");
5583 if (has_force_align_arg_pointer
)
5584 error ("%s not supported for nested functions",
5585 ix86_force_align_arg_pointer_string
);
5586 return virtual_incoming_args_rtx
;
5588 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5589 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5592 return virtual_incoming_args_rtx
;
5595 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5596 This is called from dwarf2out.c to emit call frame instructions
5597 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5599 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5601 rtx unspec
= SET_SRC (pattern
);
5602 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5606 case UNSPEC_REG_SAVE
:
5607 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5608 SET_DEST (pattern
));
5610 case UNSPEC_DEF_CFA
:
5611 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5612 INTVAL (XVECEXP (unspec
, 0, 0)));
5619 /* Expand the prologue into a bunch of separate insns. */
5622 ix86_expand_prologue (void)
5626 struct ix86_frame frame
;
5627 HOST_WIDE_INT allocate
;
5629 ix86_compute_frame_layout (&frame
);
5631 if (cfun
->machine
->force_align_arg_pointer
)
5635 /* Grab the argument pointer. */
5636 x
= plus_constant (stack_pointer_rtx
, 4);
5637 y
= cfun
->machine
->force_align_arg_pointer
;
5638 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5639 RTX_FRAME_RELATED_P (insn
) = 1;
5641 /* The unwind info consists of two parts: install the fafp as the cfa,
5642 and record the fafp as the "save register" of the stack pointer.
5643 The later is there in order that the unwinder can see where it
5644 should restore the stack pointer across the and insn. */
5645 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5646 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5647 RTX_FRAME_RELATED_P (x
) = 1;
5648 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5650 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5651 RTX_FRAME_RELATED_P (y
) = 1;
5652 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5653 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5654 REG_NOTES (insn
) = x
;
5656 /* Align the stack. */
5657 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5660 /* And here we cheat like madmen with the unwind info. We force the
5661 cfa register back to sp+4, which is exactly what it was at the
5662 start of the function. Re-pushing the return address results in
5663 the return at the same spot relative to the cfa, and thus is
5664 correct wrt the unwind info. */
5665 x
= cfun
->machine
->force_align_arg_pointer
;
5666 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5667 insn
= emit_insn (gen_push (x
));
5668 RTX_FRAME_RELATED_P (insn
) = 1;
5671 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5672 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5673 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5674 REG_NOTES (insn
) = x
;
5677 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5678 slower on all targets. Also sdb doesn't like it. */
5680 if (frame_pointer_needed
)
5682 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5683 RTX_FRAME_RELATED_P (insn
) = 1;
5685 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5686 RTX_FRAME_RELATED_P (insn
) = 1;
5689 allocate
= frame
.to_allocate
;
5691 if (!frame
.save_regs_using_mov
)
5692 ix86_emit_save_regs ();
5694 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5696 /* When using red zone we may start register saving before allocating
5697 the stack frame saving one cycle of the prologue. */
5698 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5699 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5700 : stack_pointer_rtx
,
5701 -frame
.nregs
* UNITS_PER_WORD
);
5705 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5706 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5707 GEN_INT (-allocate
), -1);
5710 /* Only valid for Win32. */
5711 rtx eax
= gen_rtx_REG (SImode
, 0);
5712 bool eax_live
= ix86_eax_live_at_start_p ();
5715 gcc_assert (!TARGET_64BIT
);
5719 emit_insn (gen_push (eax
));
5723 emit_move_insn (eax
, GEN_INT (allocate
));
5725 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5726 RTX_FRAME_RELATED_P (insn
) = 1;
5727 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5728 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5729 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5730 t
, REG_NOTES (insn
));
5734 if (frame_pointer_needed
)
5735 t
= plus_constant (hard_frame_pointer_rtx
,
5738 - frame
.nregs
* UNITS_PER_WORD
);
5740 t
= plus_constant (stack_pointer_rtx
, allocate
);
5741 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5745 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5747 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5748 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5750 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5751 -frame
.nregs
* UNITS_PER_WORD
);
5754 pic_reg_used
= false;
5755 if (pic_offset_table_rtx
5756 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5757 || current_function_profile
))
5759 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5761 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5762 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5764 pic_reg_used
= true;
5771 if (ix86_cmodel
== CM_LARGE_PIC
)
5773 rtx tmp_reg
= gen_rtx_REG (DImode
,
5774 FIRST_REX_INT_REG
+ 3 /* R11 */);
5775 rtx label
= gen_label_rtx ();
5777 LABEL_PRESERVE_P (label
) = 1;
5778 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
5779 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
5780 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5781 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
5782 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5783 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
5784 pic_offset_table_rtx
, tmp_reg
));
5787 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5790 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5792 /* Even with accurate pre-reload life analysis, we can wind up
5793 deleting all references to the pic register after reload.
5794 Consider if cross-jumping unifies two sides of a branch
5795 controlled by a comparison vs the only read from a global.
5796 In which case, allow the set_got to be deleted, though we're
5797 too late to do anything about the ebx save in the prologue. */
5798 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5801 /* Prevent function calls from be scheduled before the call to mcount.
5802 In the pic_reg_used case, make sure that the got load isn't deleted. */
5803 if (current_function_profile
)
5804 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5807 /* Emit code to restore saved registers using MOV insns. First register
5808 is restored from POINTER + OFFSET. */
5810 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5811 int maybe_eh_return
)
5814 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5816 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5817 if (ix86_save_reg (regno
, maybe_eh_return
))
5819 /* Ensure that adjust_address won't be forced to produce pointer
5820 out of range allowed by x86-64 instruction set. */
5821 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5825 r11
= gen_rtx_REG (DImode
, R11_REG
);
5826 emit_move_insn (r11
, GEN_INT (offset
));
5827 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5828 base_address
= gen_rtx_MEM (Pmode
, r11
);
5831 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5832 adjust_address (base_address
, Pmode
, offset
));
5833 offset
+= UNITS_PER_WORD
;
5837 /* Restore function stack, frame, and registers. */
5840 ix86_expand_epilogue (int style
)
5843 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5844 struct ix86_frame frame
;
5845 HOST_WIDE_INT offset
;
5847 ix86_compute_frame_layout (&frame
);
5849 /* Calculate start of saved registers relative to ebp. Special care
5850 must be taken for the normal return case of a function using
5851 eh_return: the eax and edx registers are marked as saved, but not
5852 restored along this path. */
5853 offset
= frame
.nregs
;
5854 if (current_function_calls_eh_return
&& style
!= 2)
5856 offset
*= -UNITS_PER_WORD
;
5858 /* If we're only restoring one register and sp is not valid then
5859 using a move instruction to restore the register since it's
5860 less work than reloading sp and popping the register.
5862 The default code result in stack adjustment using add/lea instruction,
5863 while this code results in LEAVE instruction (or discrete equivalent),
5864 so it is profitable in some other cases as well. Especially when there
5865 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5866 and there is exactly one register to pop. This heuristic may need some
5867 tuning in future. */
5868 if ((!sp_valid
&& frame
.nregs
<= 1)
5869 || (TARGET_EPILOGUE_USING_MOVE
5870 && cfun
->machine
->use_fast_prologue_epilogue
5871 && (frame
.nregs
> 1 || frame
.to_allocate
))
5872 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5873 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5874 && cfun
->machine
->use_fast_prologue_epilogue
5875 && frame
.nregs
== 1)
5876 || current_function_calls_eh_return
)
5878 /* Restore registers. We can use ebp or esp to address the memory
5879 locations. If both are available, default to ebp, since offsets
5880 are known to be small. Only exception is esp pointing directly to the
5881 end of block of saved registers, where we may simplify addressing
5884 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5885 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5886 frame
.to_allocate
, style
== 2);
5888 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5889 offset
, style
== 2);
5891 /* eh_return epilogues need %ecx added to the stack pointer. */
5894 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5896 if (frame_pointer_needed
)
5898 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5899 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5900 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5902 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5903 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5905 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5910 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5911 tmp
= plus_constant (tmp
, (frame
.to_allocate
5912 + frame
.nregs
* UNITS_PER_WORD
));
5913 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5916 else if (!frame_pointer_needed
)
5917 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5918 GEN_INT (frame
.to_allocate
5919 + frame
.nregs
* UNITS_PER_WORD
),
5921 /* If not an i386, mov & pop is faster than "leave". */
5922 else if (TARGET_USE_LEAVE
|| optimize_size
5923 || !cfun
->machine
->use_fast_prologue_epilogue
)
5924 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5927 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5928 hard_frame_pointer_rtx
,
5931 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5933 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5938 /* First step is to deallocate the stack frame so that we can
5939 pop the registers. */
5942 gcc_assert (frame_pointer_needed
);
5943 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5944 hard_frame_pointer_rtx
,
5945 GEN_INT (offset
), style
);
5947 else if (frame
.to_allocate
)
5948 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5949 GEN_INT (frame
.to_allocate
), style
);
5951 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5952 if (ix86_save_reg (regno
, false))
5955 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5957 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5959 if (frame_pointer_needed
)
5961 /* Leave results in shorter dependency chains on CPUs that are
5962 able to grok it fast. */
5963 if (TARGET_USE_LEAVE
)
5964 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5965 else if (TARGET_64BIT
)
5966 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5968 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5972 if (cfun
->machine
->force_align_arg_pointer
)
5974 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5975 cfun
->machine
->force_align_arg_pointer
,
5979 /* Sibcall epilogues don't want a return instruction. */
5983 if (current_function_pops_args
&& current_function_args_size
)
5985 rtx popc
= GEN_INT (current_function_pops_args
);
5987 /* i386 can only pop 64K bytes. If asked to pop more, pop
5988 return address, do explicit add, and jump indirectly to the
5991 if (current_function_pops_args
>= 65536)
5993 rtx ecx
= gen_rtx_REG (SImode
, 2);
5995 /* There is no "pascal" calling convention in 64bit ABI. */
5996 gcc_assert (!TARGET_64BIT
);
5998 emit_insn (gen_popsi1 (ecx
));
5999 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6000 emit_jump_insn (gen_return_indirect_internal (ecx
));
6003 emit_jump_insn (gen_return_pop_internal (popc
));
6006 emit_jump_insn (gen_return_internal ());
6009 /* Reset from the function's potential modifications. */
6012 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6013 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6015 if (pic_offset_table_rtx
)
6016 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6018 /* Mach-O doesn't support labels at the end of objects, so if
6019 it looks like we might want one, insert a NOP. */
6021 rtx insn
= get_last_insn ();
6024 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6025 insn
= PREV_INSN (insn
);
6029 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6030 fputs ("\tnop\n", file
);
6036 /* Extract the parts of an RTL expression that is a valid memory address
6037 for an instruction. Return 0 if the structure of the address is
6038 grossly off. Return -1 if the address contains ASHIFT, so it is not
6039 strictly valid, but still used for computing length of lea instruction. */
6042 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6044 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6045 rtx base_reg
, index_reg
;
6046 HOST_WIDE_INT scale
= 1;
6047 rtx scale_rtx
= NULL_RTX
;
6049 enum ix86_address_seg seg
= SEG_DEFAULT
;
6051 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6053 else if (GET_CODE (addr
) == PLUS
)
6063 addends
[n
++] = XEXP (op
, 1);
6066 while (GET_CODE (op
) == PLUS
);
6071 for (i
= n
; i
>= 0; --i
)
6074 switch (GET_CODE (op
))
6079 index
= XEXP (op
, 0);
6080 scale_rtx
= XEXP (op
, 1);
6084 if (XINT (op
, 1) == UNSPEC_TP
6085 && TARGET_TLS_DIRECT_SEG_REFS
6086 && seg
== SEG_DEFAULT
)
6087 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6116 else if (GET_CODE (addr
) == MULT
)
6118 index
= XEXP (addr
, 0); /* index*scale */
6119 scale_rtx
= XEXP (addr
, 1);
6121 else if (GET_CODE (addr
) == ASHIFT
)
6125 /* We're called for lea too, which implements ashift on occasion. */
6126 index
= XEXP (addr
, 0);
6127 tmp
= XEXP (addr
, 1);
6128 if (!CONST_INT_P (tmp
))
6130 scale
= INTVAL (tmp
);
6131 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6137 disp
= addr
; /* displacement */
6139 /* Extract the integral value of scale. */
6142 if (!CONST_INT_P (scale_rtx
))
6144 scale
= INTVAL (scale_rtx
);
6147 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6148 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6150 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6151 if (base_reg
&& index_reg
&& scale
== 1
6152 && (index_reg
== arg_pointer_rtx
6153 || index_reg
== frame_pointer_rtx
6154 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6157 tmp
= base
, base
= index
, index
= tmp
;
6158 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6161 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6162 if ((base_reg
== hard_frame_pointer_rtx
6163 || base_reg
== frame_pointer_rtx
6164 || base_reg
== arg_pointer_rtx
) && !disp
)
6167 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6168 Avoid this by transforming to [%esi+0]. */
6169 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6170 && base_reg
&& !index_reg
&& !disp
6172 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6175 /* Special case: encode reg+reg instead of reg*2. */
6176 if (!base
&& index
&& scale
&& scale
== 2)
6177 base
= index
, base_reg
= index_reg
, scale
= 1;
6179 /* Special case: scaling cannot be encoded without base or displacement. */
6180 if (!base
&& !disp
&& index
&& scale
!= 1)
6192 /* Return cost of the memory address x.
6193 For i386, it is better to use a complex address than let gcc copy
6194 the address into a reg and make a new pseudo. But not if the address
6195 requires to two regs - that would mean more pseudos with longer
6198 ix86_address_cost (rtx x
)
6200 struct ix86_address parts
;
6202 int ok
= ix86_decompose_address (x
, &parts
);
6206 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6207 parts
.base
= SUBREG_REG (parts
.base
);
6208 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6209 parts
.index
= SUBREG_REG (parts
.index
);
6211 /* More complex memory references are better. */
6212 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6214 if (parts
.seg
!= SEG_DEFAULT
)
6217 /* Attempt to minimize number of registers in the address. */
6219 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6221 && (!REG_P (parts
.index
)
6222 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6226 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6228 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6229 && parts
.base
!= parts
.index
)
6232 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6233 since it's predecode logic can't detect the length of instructions
6234 and it degenerates to vector decoded. Increase cost of such
6235 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6236 to split such addresses or even refuse such addresses at all.
6238 Following addressing modes are affected:
6243 The first and last case may be avoidable by explicitly coding the zero in
6244 memory address, but I don't have AMD-K6 machine handy to check this
6248 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6249 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6250 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6256 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6257 this is used for to form addresses to local data when -fPIC is in
6261 darwin_local_data_pic (rtx disp
)
6263 if (GET_CODE (disp
) == MINUS
)
6265 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6266 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6267 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6269 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6270 if (! strcmp (sym_name
, "<pic base>"))
6278 /* Determine if a given RTX is a valid constant. We already know this
6279 satisfies CONSTANT_P. */
6282 legitimate_constant_p (rtx x
)
6284 switch (GET_CODE (x
))
6289 if (GET_CODE (x
) == PLUS
)
6291 if (!CONST_INT_P (XEXP (x
, 1)))
6296 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6299 /* Only some unspecs are valid as "constants". */
6300 if (GET_CODE (x
) == UNSPEC
)
6301 switch (XINT (x
, 1))
6306 return TARGET_64BIT
;
6309 x
= XVECEXP (x
, 0, 0);
6310 return (GET_CODE (x
) == SYMBOL_REF
6311 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6313 x
= XVECEXP (x
, 0, 0);
6314 return (GET_CODE (x
) == SYMBOL_REF
6315 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6320 /* We must have drilled down to a symbol. */
6321 if (GET_CODE (x
) == LABEL_REF
)
6323 if (GET_CODE (x
) != SYMBOL_REF
)
6328 /* TLS symbols are never valid. */
6329 if (SYMBOL_REF_TLS_MODEL (x
))
6334 if (GET_MODE (x
) == TImode
6335 && x
!= CONST0_RTX (TImode
)
6341 if (x
== CONST0_RTX (GET_MODE (x
)))
6349 /* Otherwise we handle everything else in the move patterns. */
6353 /* Determine if it's legal to put X into the constant pool. This
6354 is not possible for the address of thread-local symbols, which
6355 is checked above. */
6358 ix86_cannot_force_const_mem (rtx x
)
6360 /* We can always put integral constants and vectors in memory. */
6361 switch (GET_CODE (x
))
6371 return !legitimate_constant_p (x
);
6374 /* Determine if a given RTX is a valid constant address. */
6377 constant_address_p (rtx x
)
6379 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6382 /* Nonzero if the constant value X is a legitimate general operand
6383 when generating PIC code. It is given that flag_pic is on and
6384 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6387 legitimate_pic_operand_p (rtx x
)
6391 switch (GET_CODE (x
))
6394 inner
= XEXP (x
, 0);
6395 if (GET_CODE (inner
) == PLUS
6396 && CONST_INT_P (XEXP (inner
, 1)))
6397 inner
= XEXP (inner
, 0);
6399 /* Only some unspecs are valid as "constants". */
6400 if (GET_CODE (inner
) == UNSPEC
)
6401 switch (XINT (inner
, 1))
6406 return TARGET_64BIT
;
6408 x
= XVECEXP (inner
, 0, 0);
6409 return (GET_CODE (x
) == SYMBOL_REF
6410 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6418 return legitimate_pic_address_disp_p (x
);
6425 /* Determine if a given CONST RTX is a valid memory displacement
6429 legitimate_pic_address_disp_p (rtx disp
)
6433 /* In 64bit mode we can allow direct addresses of symbols and labels
6434 when they are not dynamic symbols. */
6437 rtx op0
= disp
, op1
;
6439 switch (GET_CODE (disp
))
6445 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6447 op0
= XEXP (XEXP (disp
, 0), 0);
6448 op1
= XEXP (XEXP (disp
, 0), 1);
6449 if (!CONST_INT_P (op1
)
6450 || INTVAL (op1
) >= 16*1024*1024
6451 || INTVAL (op1
) < -16*1024*1024)
6453 if (GET_CODE (op0
) == LABEL_REF
)
6455 if (GET_CODE (op0
) != SYMBOL_REF
)
6460 /* TLS references should always be enclosed in UNSPEC. */
6461 if (SYMBOL_REF_TLS_MODEL (op0
))
6463 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6464 && ix86_cmodel
!= CM_LARGE_PIC
)
6472 if (GET_CODE (disp
) != CONST
)
6474 disp
= XEXP (disp
, 0);
6478 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6479 of GOT tables. We should not need these anyway. */
6480 if (GET_CODE (disp
) != UNSPEC
6481 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6482 && XINT (disp
, 1) != UNSPEC_GOTOFF
6483 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6486 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6487 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6493 if (GET_CODE (disp
) == PLUS
)
6495 if (!CONST_INT_P (XEXP (disp
, 1)))
6497 disp
= XEXP (disp
, 0);
6501 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6504 if (GET_CODE (disp
) != UNSPEC
)
6507 switch (XINT (disp
, 1))
6512 /* We need to check for both symbols and labels because VxWorks loads
6513 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6515 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6516 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6518 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6519 While ABI specify also 32bit relocation but we don't produce it in
6520 small PIC model at all. */
6521 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6522 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6524 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6526 case UNSPEC_GOTTPOFF
:
6527 case UNSPEC_GOTNTPOFF
:
6528 case UNSPEC_INDNTPOFF
:
6531 disp
= XVECEXP (disp
, 0, 0);
6532 return (GET_CODE (disp
) == SYMBOL_REF
6533 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6535 disp
= XVECEXP (disp
, 0, 0);
6536 return (GET_CODE (disp
) == SYMBOL_REF
6537 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6539 disp
= XVECEXP (disp
, 0, 0);
6540 return (GET_CODE (disp
) == SYMBOL_REF
6541 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6547 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6548 memory address for an instruction. The MODE argument is the machine mode
6549 for the MEM expression that wants to use this address.
6551 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6552 convert common non-canonical forms to canonical form so that they will
6556 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
6557 rtx addr
, int strict
)
6559 struct ix86_address parts
;
6560 rtx base
, index
, disp
;
6561 HOST_WIDE_INT scale
;
6562 const char *reason
= NULL
;
6563 rtx reason_rtx
= NULL_RTX
;
6565 if (ix86_decompose_address (addr
, &parts
) <= 0)
6567 reason
= "decomposition failed";
6572 index
= parts
.index
;
6574 scale
= parts
.scale
;
6576 /* Validate base register.
6578 Don't allow SUBREG's that span more than a word here. It can lead to spill
6579 failures when the base is one word out of a two word structure, which is
6580 represented internally as a DImode int. */
6589 else if (GET_CODE (base
) == SUBREG
6590 && REG_P (SUBREG_REG (base
))
6591 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6593 reg
= SUBREG_REG (base
);
6596 reason
= "base is not a register";
6600 if (GET_MODE (base
) != Pmode
)
6602 reason
= "base is not in Pmode";
6606 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6607 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6609 reason
= "base is not valid";
6614 /* Validate index register.
6616 Don't allow SUBREG's that span more than a word here -- same as above. */
6625 else if (GET_CODE (index
) == SUBREG
6626 && REG_P (SUBREG_REG (index
))
6627 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6629 reg
= SUBREG_REG (index
);
6632 reason
= "index is not a register";
6636 if (GET_MODE (index
) != Pmode
)
6638 reason
= "index is not in Pmode";
6642 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6643 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6645 reason
= "index is not valid";
6650 /* Validate scale factor. */
6653 reason_rtx
= GEN_INT (scale
);
6656 reason
= "scale without index";
6660 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6662 reason
= "scale is not a valid multiplier";
6667 /* Validate displacement. */
6672 if (GET_CODE (disp
) == CONST
6673 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6674 switch (XINT (XEXP (disp
, 0), 1))
6676 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6677 used. While ABI specify also 32bit relocations, we don't produce
6678 them at all and use IP relative instead. */
6681 gcc_assert (flag_pic
);
6683 goto is_legitimate_pic
;
6684 reason
= "64bit address unspec";
6687 case UNSPEC_GOTPCREL
:
6688 gcc_assert (flag_pic
);
6689 goto is_legitimate_pic
;
6691 case UNSPEC_GOTTPOFF
:
6692 case UNSPEC_GOTNTPOFF
:
6693 case UNSPEC_INDNTPOFF
:
6699 reason
= "invalid address unspec";
6703 else if (SYMBOLIC_CONST (disp
)
6707 && MACHOPIC_INDIRECT
6708 && !machopic_operand_p (disp
)
6714 if (TARGET_64BIT
&& (index
|| base
))
6716 /* foo@dtpoff(%rX) is ok. */
6717 if (GET_CODE (disp
) != CONST
6718 || GET_CODE (XEXP (disp
, 0)) != PLUS
6719 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6720 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6721 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6722 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6724 reason
= "non-constant pic memory reference";
6728 else if (! legitimate_pic_address_disp_p (disp
))
6730 reason
= "displacement is an invalid pic construct";
6734 /* This code used to verify that a symbolic pic displacement
6735 includes the pic_offset_table_rtx register.
6737 While this is good idea, unfortunately these constructs may
6738 be created by "adds using lea" optimization for incorrect
6747 This code is nonsensical, but results in addressing
6748 GOT table with pic_offset_table_rtx base. We can't
6749 just refuse it easily, since it gets matched by
6750 "addsi3" pattern, that later gets split to lea in the
6751 case output register differs from input. While this
6752 can be handled by separate addsi pattern for this case
6753 that never results in lea, this seems to be easier and
6754 correct fix for crash to disable this test. */
6756 else if (GET_CODE (disp
) != LABEL_REF
6757 && !CONST_INT_P (disp
)
6758 && (GET_CODE (disp
) != CONST
6759 || !legitimate_constant_p (disp
))
6760 && (GET_CODE (disp
) != SYMBOL_REF
6761 || !legitimate_constant_p (disp
)))
6763 reason
= "displacement is not constant";
6766 else if (TARGET_64BIT
6767 && !x86_64_immediate_operand (disp
, VOIDmode
))
6769 reason
= "displacement is out of range";
6774 /* Everything looks valid. */
6781 /* Return a unique alias set for the GOT. */
6783 static HOST_WIDE_INT
6784 ix86_GOT_alias_set (void)
6786 static HOST_WIDE_INT set
= -1;
6788 set
= new_alias_set ();
6792 /* Return a legitimate reference for ORIG (an address) using the
6793 register REG. If REG is 0, a new pseudo is generated.
6795 There are two types of references that must be handled:
6797 1. Global data references must load the address from the GOT, via
6798 the PIC reg. An insn is emitted to do this load, and the reg is
6801 2. Static data references, constant pool addresses, and code labels
6802 compute the address as an offset from the GOT, whose base is in
6803 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6804 differentiate them from global data objects. The returned
6805 address is the PIC reg + an unspec constant.
6807 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6808 reg also appears in the address. */
6811 legitimize_pic_address (rtx orig
, rtx reg
)
6818 if (TARGET_MACHO
&& !TARGET_64BIT
)
6821 reg
= gen_reg_rtx (Pmode
);
6822 /* Use the generic Mach-O PIC machinery. */
6823 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6827 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6829 else if (TARGET_64BIT
6830 && ix86_cmodel
!= CM_SMALL_PIC
6831 && gotoff_operand (addr
, Pmode
))
6834 /* This symbol may be referenced via a displacement from the PIC
6835 base address (@GOTOFF). */
6837 if (reload_in_progress
)
6838 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6839 if (GET_CODE (addr
) == CONST
)
6840 addr
= XEXP (addr
, 0);
6841 if (GET_CODE (addr
) == PLUS
)
6843 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6844 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6847 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6848 new = gen_rtx_CONST (Pmode
, new);
6850 tmpreg
= gen_reg_rtx (Pmode
);
6853 emit_move_insn (tmpreg
, new);
6857 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6858 tmpreg
, 1, OPTAB_DIRECT
);
6861 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6863 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
6865 /* This symbol may be referenced via a displacement from the PIC
6866 base address (@GOTOFF). */
6868 if (reload_in_progress
)
6869 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6870 if (GET_CODE (addr
) == CONST
)
6871 addr
= XEXP (addr
, 0);
6872 if (GET_CODE (addr
) == PLUS
)
6874 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6875 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6878 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6879 new = gen_rtx_CONST (Pmode
, new);
6880 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6884 emit_move_insn (reg
, new);
6888 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
6889 /* We can't use @GOTOFF for text labels on VxWorks;
6890 see gotoff_operand. */
6891 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
6893 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
6895 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6896 new = gen_rtx_CONST (Pmode
, new);
6897 new = gen_const_mem (Pmode
, new);
6898 set_mem_alias_set (new, ix86_GOT_alias_set ());
6901 reg
= gen_reg_rtx (Pmode
);
6902 /* Use directly gen_movsi, otherwise the address is loaded
6903 into register for CSE. We don't want to CSE this addresses,
6904 instead we CSE addresses from the GOT table, so skip this. */
6905 emit_insn (gen_movsi (reg
, new));
6910 /* This symbol must be referenced via a load from the
6911 Global Offset Table (@GOT). */
6913 if (reload_in_progress
)
6914 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6915 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6916 new = gen_rtx_CONST (Pmode
, new);
6918 new = force_reg (Pmode
, new);
6919 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6920 new = gen_const_mem (Pmode
, new);
6921 set_mem_alias_set (new, ix86_GOT_alias_set ());
6924 reg
= gen_reg_rtx (Pmode
);
6925 emit_move_insn (reg
, new);
6931 if (CONST_INT_P (addr
)
6932 && !x86_64_immediate_operand (addr
, VOIDmode
))
6936 emit_move_insn (reg
, addr
);
6940 new = force_reg (Pmode
, addr
);
6942 else if (GET_CODE (addr
) == CONST
)
6944 addr
= XEXP (addr
, 0);
6946 /* We must match stuff we generate before. Assume the only
6947 unspecs that can get here are ours. Not that we could do
6948 anything with them anyway.... */
6949 if (GET_CODE (addr
) == UNSPEC
6950 || (GET_CODE (addr
) == PLUS
6951 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6953 gcc_assert (GET_CODE (addr
) == PLUS
);
6955 if (GET_CODE (addr
) == PLUS
)
6957 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6959 /* Check first to see if this is a constant offset from a @GOTOFF
6960 symbol reference. */
6961 if (gotoff_operand (op0
, Pmode
)
6962 && CONST_INT_P (op1
))
6966 if (reload_in_progress
)
6967 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6968 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6970 new = gen_rtx_PLUS (Pmode
, new, op1
);
6971 new = gen_rtx_CONST (Pmode
, new);
6972 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6976 emit_move_insn (reg
, new);
6982 if (INTVAL (op1
) < -16*1024*1024
6983 || INTVAL (op1
) >= 16*1024*1024)
6985 if (!x86_64_immediate_operand (op1
, Pmode
))
6986 op1
= force_reg (Pmode
, op1
);
6987 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6993 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6994 new = legitimize_pic_address (XEXP (addr
, 1),
6995 base
== reg
? NULL_RTX
: reg
);
6997 if (CONST_INT_P (new))
6998 new = plus_constant (base
, INTVAL (new));
7001 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7003 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7004 new = XEXP (new, 1);
7006 new = gen_rtx_PLUS (Pmode
, base
, new);
7014 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7017 get_thread_pointer (int to_reg
)
7021 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7025 reg
= gen_reg_rtx (Pmode
);
7026 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7027 insn
= emit_insn (insn
);
7032 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7033 false if we expect this to be used for a memory address and true if
7034 we expect to load the address into a register. */
7037 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7039 rtx dest
, base
, off
, pic
, tp
;
7044 case TLS_MODEL_GLOBAL_DYNAMIC
:
7045 dest
= gen_reg_rtx (Pmode
);
7046 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7048 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7050 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7053 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7054 insns
= get_insns ();
7057 emit_libcall_block (insns
, dest
, rax
, x
);
7059 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7060 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7062 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7064 if (TARGET_GNU2_TLS
)
7066 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7068 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7072 case TLS_MODEL_LOCAL_DYNAMIC
:
7073 base
= gen_reg_rtx (Pmode
);
7074 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7076 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7078 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7081 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7082 insns
= get_insns ();
7085 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7086 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7087 emit_libcall_block (insns
, base
, rax
, note
);
7089 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7090 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7092 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7094 if (TARGET_GNU2_TLS
)
7096 rtx x
= ix86_tls_module_base ();
7098 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7099 gen_rtx_MINUS (Pmode
, x
, tp
));
7102 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7103 off
= gen_rtx_CONST (Pmode
, off
);
7105 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7107 if (TARGET_GNU2_TLS
)
7109 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7111 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7116 case TLS_MODEL_INITIAL_EXEC
:
7120 type
= UNSPEC_GOTNTPOFF
;
7124 if (reload_in_progress
)
7125 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7126 pic
= pic_offset_table_rtx
;
7127 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7129 else if (!TARGET_ANY_GNU_TLS
)
7131 pic
= gen_reg_rtx (Pmode
);
7132 emit_insn (gen_set_got (pic
));
7133 type
= UNSPEC_GOTTPOFF
;
7138 type
= UNSPEC_INDNTPOFF
;
7141 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7142 off
= gen_rtx_CONST (Pmode
, off
);
7144 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7145 off
= gen_const_mem (Pmode
, off
);
7146 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7148 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7150 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7151 off
= force_reg (Pmode
, off
);
7152 return gen_rtx_PLUS (Pmode
, base
, off
);
7156 base
= get_thread_pointer (true);
7157 dest
= gen_reg_rtx (Pmode
);
7158 emit_insn (gen_subsi3 (dest
, base
, off
));
7162 case TLS_MODEL_LOCAL_EXEC
:
7163 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7164 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7165 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7166 off
= gen_rtx_CONST (Pmode
, off
);
7168 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7170 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7171 return gen_rtx_PLUS (Pmode
, base
, off
);
7175 base
= get_thread_pointer (true);
7176 dest
= gen_reg_rtx (Pmode
);
7177 emit_insn (gen_subsi3 (dest
, base
, off
));
7188 /* Try machine-dependent ways of modifying an illegitimate address
7189 to be legitimate. If we find one, return the new, valid address.
7190 This macro is used in only one place: `memory_address' in explow.c.
7192 OLDX is the address as it was before break_out_memory_refs was called.
7193 In some cases it is useful to look at this to decide what needs to be done.
7195 MODE and WIN are passed so that this macro can use
7196 GO_IF_LEGITIMATE_ADDRESS.
7198 It is always safe for this macro to do nothing. It exists to recognize
7199 opportunities to optimize the output.
7201 For the 80386, we handle X+REG by loading X into a register R and
7202 using R+REG. R will go in a general reg and indexing will be used.
7203 However, if REG is a broken-out memory address or multiplication,
7204 nothing needs to be done because REG can certainly go in a general reg.
7206 When -fpic is used, special handling is needed for symbolic references.
7207 See comments by legitimize_pic_address in i386.c for details. */
7210 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7215 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7217 return legitimize_tls_address (x
, log
, false);
7218 if (GET_CODE (x
) == CONST
7219 && GET_CODE (XEXP (x
, 0)) == PLUS
7220 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7221 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7223 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7224 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7227 if (flag_pic
&& SYMBOLIC_CONST (x
))
7228 return legitimize_pic_address (x
, 0);
7230 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7231 if (GET_CODE (x
) == ASHIFT
7232 && CONST_INT_P (XEXP (x
, 1))
7233 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7236 log
= INTVAL (XEXP (x
, 1));
7237 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7238 GEN_INT (1 << log
));
7241 if (GET_CODE (x
) == PLUS
)
7243 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7245 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7246 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7247 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7250 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7251 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7252 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7253 GEN_INT (1 << log
));
7256 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7257 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7258 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7261 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7262 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7263 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7264 GEN_INT (1 << log
));
7267 /* Put multiply first if it isn't already. */
7268 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7270 rtx tmp
= XEXP (x
, 0);
7271 XEXP (x
, 0) = XEXP (x
, 1);
7276 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7277 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7278 created by virtual register instantiation, register elimination, and
7279 similar optimizations. */
7280 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7283 x
= gen_rtx_PLUS (Pmode
,
7284 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7285 XEXP (XEXP (x
, 1), 0)),
7286 XEXP (XEXP (x
, 1), 1));
7290 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7291 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7292 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7293 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7294 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7295 && CONSTANT_P (XEXP (x
, 1)))
7298 rtx other
= NULL_RTX
;
7300 if (CONST_INT_P (XEXP (x
, 1)))
7302 constant
= XEXP (x
, 1);
7303 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7305 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7307 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7308 other
= XEXP (x
, 1);
7316 x
= gen_rtx_PLUS (Pmode
,
7317 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7318 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7319 plus_constant (other
, INTVAL (constant
)));
7323 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7326 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7329 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7332 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7335 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7339 && REG_P (XEXP (x
, 1))
7340 && REG_P (XEXP (x
, 0)))
7343 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7346 x
= legitimize_pic_address (x
, 0);
7349 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7352 if (REG_P (XEXP (x
, 0)))
7354 rtx temp
= gen_reg_rtx (Pmode
);
7355 rtx val
= force_operand (XEXP (x
, 1), temp
);
7357 emit_move_insn (temp
, val
);
7363 else if (REG_P (XEXP (x
, 1)))
7365 rtx temp
= gen_reg_rtx (Pmode
);
7366 rtx val
= force_operand (XEXP (x
, 0), temp
);
7368 emit_move_insn (temp
, val
);
7378 /* Print an integer constant expression in assembler syntax. Addition
7379 and subtraction are the only arithmetic that may appear in these
7380 expressions. FILE is the stdio stream to write to, X is the rtx, and
7381 CODE is the operand print code from the output string. */
7384 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7388 switch (GET_CODE (x
))
7391 gcc_assert (flag_pic
);
7396 if (! TARGET_MACHO
|| TARGET_64BIT
)
7397 output_addr_const (file
, x
);
7400 const char *name
= XSTR (x
, 0);
7402 /* Mark the decl as referenced so that cgraph will output the function. */
7403 if (SYMBOL_REF_DECL (x
))
7404 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7407 if (MACHOPIC_INDIRECT
7408 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7409 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7411 assemble_name (file
, name
);
7413 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7414 fputs ("@PLT", file
);
7421 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7422 assemble_name (asm_out_file
, buf
);
7426 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7430 /* This used to output parentheses around the expression,
7431 but that does not work on the 386 (either ATT or BSD assembler). */
7432 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7436 if (GET_MODE (x
) == VOIDmode
)
7438 /* We can use %d if the number is <32 bits and positive. */
7439 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7440 fprintf (file
, "0x%lx%08lx",
7441 (unsigned long) CONST_DOUBLE_HIGH (x
),
7442 (unsigned long) CONST_DOUBLE_LOW (x
));
7444 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7447 /* We can't handle floating point constants;
7448 PRINT_OPERAND must handle them. */
7449 output_operand_lossage ("floating constant misused");
7453 /* Some assemblers need integer constants to appear first. */
7454 if (CONST_INT_P (XEXP (x
, 0)))
7456 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7458 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7462 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7463 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7465 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7471 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7472 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7474 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7476 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7480 gcc_assert (XVECLEN (x
, 0) == 1);
7481 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7482 switch (XINT (x
, 1))
7485 fputs ("@GOT", file
);
7488 fputs ("@GOTOFF", file
);
7491 fputs ("@PLTOFF", file
);
7493 case UNSPEC_GOTPCREL
:
7494 fputs ("@GOTPCREL(%rip)", file
);
7496 case UNSPEC_GOTTPOFF
:
7497 /* FIXME: This might be @TPOFF in Sun ld too. */
7498 fputs ("@GOTTPOFF", file
);
7501 fputs ("@TPOFF", file
);
7505 fputs ("@TPOFF", file
);
7507 fputs ("@NTPOFF", file
);
7510 fputs ("@DTPOFF", file
);
7512 case UNSPEC_GOTNTPOFF
:
7514 fputs ("@GOTTPOFF(%rip)", file
);
7516 fputs ("@GOTNTPOFF", file
);
7518 case UNSPEC_INDNTPOFF
:
7519 fputs ("@INDNTPOFF", file
);
7522 output_operand_lossage ("invalid UNSPEC as operand");
7528 output_operand_lossage ("invalid expression as operand");
7532 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7533 We need to emit DTP-relative relocations. */
7535 static void ATTRIBUTE_UNUSED
7536 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7538 fputs (ASM_LONG
, file
);
7539 output_addr_const (file
, x
);
7540 fputs ("@DTPOFF", file
);
7546 fputs (", 0", file
);
7553 /* In the name of slightly smaller debug output, and to cater to
7554 general assembler lossage, recognize PIC+GOTOFF and turn it back
7555 into a direct symbol reference.
7557 On Darwin, this is necessary to avoid a crash, because Darwin
7558 has a different PIC label for each routine but the DWARF debugging
7559 information is not associated with any particular routine, so it's
7560 necessary to remove references to the PIC label from RTL stored by
7561 the DWARF output code. */
7564 ix86_delegitimize_address (rtx orig_x
)
7567 /* reg_addend is NULL or a multiple of some register. */
7568 rtx reg_addend
= NULL_RTX
;
7569 /* const_addend is NULL or a const_int. */
7570 rtx const_addend
= NULL_RTX
;
7571 /* This is the result, or NULL. */
7572 rtx result
= NULL_RTX
;
7579 if (GET_CODE (x
) != CONST
7580 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7581 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7584 return XVECEXP (XEXP (x
, 0), 0, 0);
7587 if (GET_CODE (x
) != PLUS
7588 || GET_CODE (XEXP (x
, 1)) != CONST
)
7591 if (REG_P (XEXP (x
, 0))
7592 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7593 /* %ebx + GOT/GOTOFF */
7595 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7597 /* %ebx + %reg * scale + GOT/GOTOFF */
7598 reg_addend
= XEXP (x
, 0);
7599 if (REG_P (XEXP (reg_addend
, 0))
7600 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7601 reg_addend
= XEXP (reg_addend
, 1);
7602 else if (REG_P (XEXP (reg_addend
, 1))
7603 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7604 reg_addend
= XEXP (reg_addend
, 0);
7607 if (!REG_P (reg_addend
)
7608 && GET_CODE (reg_addend
) != MULT
7609 && GET_CODE (reg_addend
) != ASHIFT
)
7615 x
= XEXP (XEXP (x
, 1), 0);
7616 if (GET_CODE (x
) == PLUS
7617 && CONST_INT_P (XEXP (x
, 1)))
7619 const_addend
= XEXP (x
, 1);
7623 if (GET_CODE (x
) == UNSPEC
7624 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7625 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7626 result
= XVECEXP (x
, 0, 0);
7628 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7630 result
= XEXP (x
, 0);
7636 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7638 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7642 /* If X is a machine specific address (i.e. a symbol or label being
7643 referenced as a displacement from the GOT implemented using an
7644 UNSPEC), then return the base term. Otherwise return X. */
7647 ix86_find_base_term (rtx x
)
7653 if (GET_CODE (x
) != CONST
)
7656 if (GET_CODE (term
) == PLUS
7657 && (CONST_INT_P (XEXP (term
, 1))
7658 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
7659 term
= XEXP (term
, 0);
7660 if (GET_CODE (term
) != UNSPEC
7661 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
7664 term
= XVECEXP (term
, 0, 0);
7666 if (GET_CODE (term
) != SYMBOL_REF
7667 && GET_CODE (term
) != LABEL_REF
)
7673 term
= ix86_delegitimize_address (x
);
7675 if (GET_CODE (term
) != SYMBOL_REF
7676 && GET_CODE (term
) != LABEL_REF
)
7683 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7688 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7690 enum rtx_code second_code
, bypass_code
;
7691 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7692 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7693 code
= ix86_fp_compare_code_to_integer (code
);
7697 code
= reverse_condition (code
);
7708 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7712 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7713 Those same assemblers have the same but opposite lossage on cmov. */
7714 gcc_assert (mode
== CCmode
);
7715 suffix
= fp
? "nbe" : "a";
7735 gcc_assert (mode
== CCmode
);
7757 gcc_assert (mode
== CCmode
);
7758 suffix
= fp
? "nb" : "ae";
7761 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7765 gcc_assert (mode
== CCmode
);
7769 suffix
= fp
? "u" : "p";
7772 suffix
= fp
? "nu" : "np";
7777 fputs (suffix
, file
);
7780 /* Print the name of register X to FILE based on its machine mode and number.
7781 If CODE is 'w', pretend the mode is HImode.
7782 If CODE is 'b', pretend the mode is QImode.
7783 If CODE is 'k', pretend the mode is SImode.
7784 If CODE is 'q', pretend the mode is DImode.
7785 If CODE is 'h', pretend the reg is the 'high' byte register.
7786 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7789 print_reg (rtx x
, int code
, FILE *file
)
7791 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7792 && REGNO (x
) != FRAME_POINTER_REGNUM
7793 && REGNO (x
) != FLAGS_REG
7794 && REGNO (x
) != FPSR_REG
7795 && REGNO (x
) != FPCR_REG
);
7797 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7800 if (code
== 'w' || MMX_REG_P (x
))
7802 else if (code
== 'b')
7804 else if (code
== 'k')
7806 else if (code
== 'q')
7808 else if (code
== 'y')
7810 else if (code
== 'h')
7813 code
= GET_MODE_SIZE (GET_MODE (x
));
7815 /* Irritatingly, AMD extended registers use different naming convention
7816 from the normal registers. */
7817 if (REX_INT_REG_P (x
))
7819 gcc_assert (TARGET_64BIT
);
7823 error ("extended registers have no high halves");
7826 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7829 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7832 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7835 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7838 error ("unsupported operand size for extended register");
7846 if (STACK_TOP_P (x
))
7848 fputs ("st(0)", file
);
7855 if (! ANY_FP_REG_P (x
))
7856 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7861 fputs (hi_reg_name
[REGNO (x
)], file
);
7864 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7866 fputs (qi_reg_name
[REGNO (x
)], file
);
7869 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7871 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7878 /* Locate some local-dynamic symbol still in use by this function
7879 so that we can print its name in some tls_local_dynamic_base
7883 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7887 if (GET_CODE (x
) == SYMBOL_REF
7888 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7890 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7898 get_some_local_dynamic_name (void)
7902 if (cfun
->machine
->some_ld_name
)
7903 return cfun
->machine
->some_ld_name
;
7905 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7907 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7908 return cfun
->machine
->some_ld_name
;
7914 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7915 C -- print opcode suffix for set/cmov insn.
7916 c -- like C, but print reversed condition
7917 F,f -- likewise, but for floating-point.
7918 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7920 R -- print the prefix for register names.
7921 z -- print the opcode suffix for the size of the current operand.
7922 * -- print a star (in certain assembler syntax)
7923 A -- print an absolute memory reference.
7924 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7925 s -- print a shift double count, followed by the assemblers argument
7927 b -- print the QImode name of the register for the indicated operand.
7928 %b0 would print %al if operands[0] is reg 0.
7929 w -- likewise, print the HImode name of the register.
7930 k -- likewise, print the SImode name of the register.
7931 q -- likewise, print the DImode name of the register.
7932 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7933 y -- print "st(0)" instead of "st" as a register.
7934 D -- print condition for SSE cmp instruction.
7935 P -- if PIC, print an @PLT suffix.
7936 X -- don't print any sort of PIC '@' suffix for a symbol.
7937 & -- print some in-use local-dynamic symbol name.
7938 H -- print a memory address offset by 8; used for sse high-parts
7942 print_operand (FILE *file
, rtx x
, int code
)
7949 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7954 assemble_name (file
, get_some_local_dynamic_name ());
7958 switch (ASSEMBLER_DIALECT
)
7965 /* Intel syntax. For absolute addresses, registers should not
7966 be surrounded by braces. */
7970 PRINT_OPERAND (file
, x
, 0);
7980 PRINT_OPERAND (file
, x
, 0);
7985 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7990 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7995 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8000 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8005 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8010 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8015 /* 387 opcodes don't get size suffixes if the operands are
8017 if (STACK_REG_P (x
))
8020 /* Likewise if using Intel opcodes. */
8021 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8024 /* This is the size of op from size of operand. */
8025 switch (GET_MODE_SIZE (GET_MODE (x
)))
8032 #ifdef HAVE_GAS_FILDS_FISTS
8038 if (GET_MODE (x
) == SFmode
)
8053 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8055 #ifdef GAS_MNEMONICS
8081 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8083 PRINT_OPERAND (file
, x
, 0);
8089 /* Little bit of braindamage here. The SSE compare instructions
8090 does use completely different names for the comparisons that the
8091 fp conditional moves. */
8092 switch (GET_CODE (x
))
8107 fputs ("unord", file
);
8111 fputs ("neq", file
);
8115 fputs ("nlt", file
);
8119 fputs ("nle", file
);
8122 fputs ("ord", file
);
8129 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8130 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8132 switch (GET_MODE (x
))
8134 case HImode
: putc ('w', file
); break;
8136 case SFmode
: putc ('l', file
); break;
8138 case DFmode
: putc ('q', file
); break;
8139 default: gcc_unreachable ();
8146 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8149 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8150 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8153 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8156 /* Like above, but reverse condition */
8158 /* Check to see if argument to %c is really a constant
8159 and not a condition code which needs to be reversed. */
8160 if (!COMPARISON_P (x
))
8162 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8165 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8169 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8172 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8176 /* It doesn't actually matter what mode we use here, as we're
8177 only going to use this for printing. */
8178 x
= adjust_address_nv (x
, DImode
, 8);
8185 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8188 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8191 int pred_val
= INTVAL (XEXP (x
, 0));
8193 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8194 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8196 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8197 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8199 /* Emit hints only in the case default branch prediction
8200 heuristics would fail. */
8201 if (taken
!= cputaken
)
8203 /* We use 3e (DS) prefix for taken branches and
8204 2e (CS) prefix for not taken branches. */
8206 fputs ("ds ; ", file
);
8208 fputs ("cs ; ", file
);
8215 output_operand_lossage ("invalid operand code '%c'", code
);
8220 print_reg (x
, code
, file
);
8224 /* No `byte ptr' prefix for call instructions. */
8225 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8228 switch (GET_MODE_SIZE (GET_MODE (x
)))
8230 case 1: size
= "BYTE"; break;
8231 case 2: size
= "WORD"; break;
8232 case 4: size
= "DWORD"; break;
8233 case 8: size
= "QWORD"; break;
8234 case 12: size
= "XWORD"; break;
8235 case 16: size
= "XMMWORD"; break;
8240 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8243 else if (code
== 'w')
8245 else if (code
== 'k')
8249 fputs (" PTR ", file
);
8253 /* Avoid (%rip) for call operands. */
8254 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8255 && !CONST_INT_P (x
))
8256 output_addr_const (file
, x
);
8257 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8258 output_operand_lossage ("invalid constraints for operand");
8263 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8268 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8269 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8271 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8273 fprintf (file
, "0x%08lx", l
);
8276 /* These float cases don't actually occur as immediate operands. */
8277 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8281 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8282 fprintf (file
, "%s", dstr
);
8285 else if (GET_CODE (x
) == CONST_DOUBLE
8286 && GET_MODE (x
) == XFmode
)
8290 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8291 fprintf (file
, "%s", dstr
);
8296 /* We have patterns that allow zero sets of memory, for instance.
8297 In 64-bit mode, we should probably support all 8-byte vectors,
8298 since we can in fact encode that into an immediate. */
8299 if (GET_CODE (x
) == CONST_VECTOR
)
8301 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8307 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8309 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8312 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8313 || GET_CODE (x
) == LABEL_REF
)
8315 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8318 fputs ("OFFSET FLAT:", file
);
8321 if (CONST_INT_P (x
))
8322 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8324 output_pic_addr_const (file
, x
, code
);
8326 output_addr_const (file
, x
);
8330 /* Print a memory operand whose address is ADDR. */
8333 print_operand_address (FILE *file
, rtx addr
)
8335 struct ix86_address parts
;
8336 rtx base
, index
, disp
;
8338 int ok
= ix86_decompose_address (addr
, &parts
);
8343 index
= parts
.index
;
8345 scale
= parts
.scale
;
8353 if (USER_LABEL_PREFIX
[0] == 0)
8355 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8361 if (!base
&& !index
)
8363 /* Displacement only requires special attention. */
8365 if (CONST_INT_P (disp
))
8367 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8369 if (USER_LABEL_PREFIX
[0] == 0)
8371 fputs ("ds:", file
);
8373 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8376 output_pic_addr_const (file
, disp
, 0);
8378 output_addr_const (file
, disp
);
8380 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8383 if (GET_CODE (disp
) == CONST
8384 && GET_CODE (XEXP (disp
, 0)) == PLUS
8385 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8386 disp
= XEXP (XEXP (disp
, 0), 0);
8387 if (GET_CODE (disp
) == LABEL_REF
8388 || (GET_CODE (disp
) == SYMBOL_REF
8389 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8390 fputs ("(%rip)", file
);
8395 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8400 output_pic_addr_const (file
, disp
, 0);
8401 else if (GET_CODE (disp
) == LABEL_REF
)
8402 output_asm_label (disp
);
8404 output_addr_const (file
, disp
);
8409 print_reg (base
, 0, file
);
8413 print_reg (index
, 0, file
);
8415 fprintf (file
, ",%d", scale
);
8421 rtx offset
= NULL_RTX
;
8425 /* Pull out the offset of a symbol; print any symbol itself. */
8426 if (GET_CODE (disp
) == CONST
8427 && GET_CODE (XEXP (disp
, 0)) == PLUS
8428 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8430 offset
= XEXP (XEXP (disp
, 0), 1);
8431 disp
= gen_rtx_CONST (VOIDmode
,
8432 XEXP (XEXP (disp
, 0), 0));
8436 output_pic_addr_const (file
, disp
, 0);
8437 else if (GET_CODE (disp
) == LABEL_REF
)
8438 output_asm_label (disp
);
8439 else if (CONST_INT_P (disp
))
8442 output_addr_const (file
, disp
);
8448 print_reg (base
, 0, file
);
8451 if (INTVAL (offset
) >= 0)
8453 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8457 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8464 print_reg (index
, 0, file
);
8466 fprintf (file
, "*%d", scale
);
8474 output_addr_const_extra (FILE *file
, rtx x
)
8478 if (GET_CODE (x
) != UNSPEC
)
8481 op
= XVECEXP (x
, 0, 0);
8482 switch (XINT (x
, 1))
8484 case UNSPEC_GOTTPOFF
:
8485 output_addr_const (file
, op
);
8486 /* FIXME: This might be @TPOFF in Sun ld. */
8487 fputs ("@GOTTPOFF", file
);
8490 output_addr_const (file
, op
);
8491 fputs ("@TPOFF", file
);
8494 output_addr_const (file
, op
);
8496 fputs ("@TPOFF", file
);
8498 fputs ("@NTPOFF", file
);
8501 output_addr_const (file
, op
);
8502 fputs ("@DTPOFF", file
);
8504 case UNSPEC_GOTNTPOFF
:
8505 output_addr_const (file
, op
);
8507 fputs ("@GOTTPOFF(%rip)", file
);
8509 fputs ("@GOTNTPOFF", file
);
8511 case UNSPEC_INDNTPOFF
:
8512 output_addr_const (file
, op
);
8513 fputs ("@INDNTPOFF", file
);
8523 /* Split one or more DImode RTL references into pairs of SImode
8524 references. The RTL can be REG, offsettable MEM, integer constant, or
8525 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8526 split and "num" is its length. lo_half and hi_half are output arrays
8527 that parallel "operands". */
8530 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8534 rtx op
= operands
[num
];
8536 /* simplify_subreg refuse to split volatile memory addresses,
8537 but we still have to handle it. */
8540 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8541 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8545 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8546 GET_MODE (op
) == VOIDmode
8547 ? DImode
: GET_MODE (op
), 0);
8548 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8549 GET_MODE (op
) == VOIDmode
8550 ? DImode
: GET_MODE (op
), 4);
8554 /* Split one or more TImode RTL references into pairs of DImode
8555 references. The RTL can be REG, offsettable MEM, integer constant, or
8556 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8557 split and "num" is its length. lo_half and hi_half are output arrays
8558 that parallel "operands". */
8561 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8565 rtx op
= operands
[num
];
8567 /* simplify_subreg refuse to split volatile memory addresses, but we
8568 still have to handle it. */
8571 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8572 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8576 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8577 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8582 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8583 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8584 is the expression of the binary operation. The output may either be
8585 emitted here, or returned to the caller, like all output_* functions.
8587 There is no guarantee that the operands are the same mode, as they
8588 might be within FLOAT or FLOAT_EXTEND expressions. */
8590 #ifndef SYSV386_COMPAT
8591 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8592 wants to fix the assemblers because that causes incompatibility
8593 with gcc. No-one wants to fix gcc because that causes
8594 incompatibility with assemblers... You can use the option of
8595 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8596 #define SYSV386_COMPAT 1
8600 output_387_binary_op (rtx insn
, rtx
*operands
)
8602 static char buf
[30];
8605 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8607 #ifdef ENABLE_CHECKING
8608 /* Even if we do not want to check the inputs, this documents input
8609 constraints. Which helps in understanding the following code. */
8610 if (STACK_REG_P (operands
[0])
8611 && ((REG_P (operands
[1])
8612 && REGNO (operands
[0]) == REGNO (operands
[1])
8613 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8614 || (REG_P (operands
[2])
8615 && REGNO (operands
[0]) == REGNO (operands
[2])
8616 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8617 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8620 gcc_assert (is_sse
);
8623 switch (GET_CODE (operands
[3]))
8626 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8627 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8635 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8636 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8644 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8645 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8653 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8654 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8668 if (GET_MODE (operands
[0]) == SFmode
)
8669 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8671 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8676 switch (GET_CODE (operands
[3]))
8680 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8682 rtx temp
= operands
[2];
8683 operands
[2] = operands
[1];
8687 /* know operands[0] == operands[1]. */
8689 if (MEM_P (operands
[2]))
8695 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8697 if (STACK_TOP_P (operands
[0]))
8698 /* How is it that we are storing to a dead operand[2]?
8699 Well, presumably operands[1] is dead too. We can't
8700 store the result to st(0) as st(0) gets popped on this
8701 instruction. Instead store to operands[2] (which I
8702 think has to be st(1)). st(1) will be popped later.
8703 gcc <= 2.8.1 didn't have this check and generated
8704 assembly code that the Unixware assembler rejected. */
8705 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8707 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8711 if (STACK_TOP_P (operands
[0]))
8712 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8714 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8719 if (MEM_P (operands
[1]))
8725 if (MEM_P (operands
[2]))
8731 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8734 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8735 derived assemblers, confusingly reverse the direction of
8736 the operation for fsub{r} and fdiv{r} when the
8737 destination register is not st(0). The Intel assembler
8738 doesn't have this brain damage. Read !SYSV386_COMPAT to
8739 figure out what the hardware really does. */
8740 if (STACK_TOP_P (operands
[0]))
8741 p
= "{p\t%0, %2|rp\t%2, %0}";
8743 p
= "{rp\t%2, %0|p\t%0, %2}";
8745 if (STACK_TOP_P (operands
[0]))
8746 /* As above for fmul/fadd, we can't store to st(0). */
8747 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8749 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8754 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8757 if (STACK_TOP_P (operands
[0]))
8758 p
= "{rp\t%0, %1|p\t%1, %0}";
8760 p
= "{p\t%1, %0|rp\t%0, %1}";
8762 if (STACK_TOP_P (operands
[0]))
8763 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8765 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8770 if (STACK_TOP_P (operands
[0]))
8772 if (STACK_TOP_P (operands
[1]))
8773 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8775 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8778 else if (STACK_TOP_P (operands
[1]))
8781 p
= "{\t%1, %0|r\t%0, %1}";
8783 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8789 p
= "{r\t%2, %0|\t%0, %2}";
8791 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8804 /* Return needed mode for entity in optimize_mode_switching pass. */
8807 ix86_mode_needed (int entity
, rtx insn
)
8809 enum attr_i387_cw mode
;
8811 /* The mode UNINITIALIZED is used to store control word after a
8812 function call or ASM pattern. The mode ANY specify that function
8813 has no requirements on the control word and make no changes in the
8814 bits we are interested in. */
8817 || (NONJUMP_INSN_P (insn
)
8818 && (asm_noperands (PATTERN (insn
)) >= 0
8819 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8820 return I387_CW_UNINITIALIZED
;
8822 if (recog_memoized (insn
) < 0)
8825 mode
= get_attr_i387_cw (insn
);
8830 if (mode
== I387_CW_TRUNC
)
8835 if (mode
== I387_CW_FLOOR
)
8840 if (mode
== I387_CW_CEIL
)
8845 if (mode
== I387_CW_MASK_PM
)
8856 /* Output code to initialize control word copies used by trunc?f?i and
8857 rounding patterns. CURRENT_MODE is set to current control word,
8858 while NEW_MODE is set to new control word. */
8861 emit_i387_cw_initialization (int mode
)
8863 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8868 rtx reg
= gen_reg_rtx (HImode
);
8870 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8871 emit_move_insn (reg
, copy_rtx (stored_mode
));
8873 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8878 /* round toward zero (truncate) */
8879 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8880 slot
= SLOT_CW_TRUNC
;
8884 /* round down toward -oo */
8885 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8886 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8887 slot
= SLOT_CW_FLOOR
;
8891 /* round up toward +oo */
8892 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8893 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8894 slot
= SLOT_CW_CEIL
;
8897 case I387_CW_MASK_PM
:
8898 /* mask precision exception for nearbyint() */
8899 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8900 slot
= SLOT_CW_MASK_PM
;
8912 /* round toward zero (truncate) */
8913 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8914 slot
= SLOT_CW_TRUNC
;
8918 /* round down toward -oo */
8919 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8920 slot
= SLOT_CW_FLOOR
;
8924 /* round up toward +oo */
8925 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8926 slot
= SLOT_CW_CEIL
;
8929 case I387_CW_MASK_PM
:
8930 /* mask precision exception for nearbyint() */
8931 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8932 slot
= SLOT_CW_MASK_PM
;
8940 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8942 new_mode
= assign_386_stack_local (HImode
, slot
);
8943 emit_move_insn (new_mode
, reg
);
8946 /* Output code for INSN to convert a float to a signed int. OPERANDS
8947 are the insn operands. The output may be [HSD]Imode and the input
8948 operand may be [SDX]Fmode. */
8951 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8953 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8954 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8955 int round_mode
= get_attr_i387_cw (insn
);
8957 /* Jump through a hoop or two for DImode, since the hardware has no
8958 non-popping instruction. We used to do this a different way, but
8959 that was somewhat fragile and broke with post-reload splitters. */
8960 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8961 output_asm_insn ("fld\t%y1", operands
);
8963 gcc_assert (STACK_TOP_P (operands
[1]));
8964 gcc_assert (MEM_P (operands
[0]));
8967 output_asm_insn ("fisttp%z0\t%0", operands
);
8970 if (round_mode
!= I387_CW_ANY
)
8971 output_asm_insn ("fldcw\t%3", operands
);
8972 if (stack_top_dies
|| dimode_p
)
8973 output_asm_insn ("fistp%z0\t%0", operands
);
8975 output_asm_insn ("fist%z0\t%0", operands
);
8976 if (round_mode
!= I387_CW_ANY
)
8977 output_asm_insn ("fldcw\t%2", operands
);
8983 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8984 have the values zero or one, indicates the ffreep insn's operand
8985 from the OPERANDS array. */
8988 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8990 if (TARGET_USE_FFREEP
)
8991 #if HAVE_AS_IX86_FFREEP
8992 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8995 static char retval
[] = ".word\t0xc_df";
8996 int regno
= REGNO (operands
[opno
]);
8998 gcc_assert (FP_REGNO_P (regno
));
9000 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9005 return opno
? "fstp\t%y1" : "fstp\t%y0";
9009 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9010 should be used. UNORDERED_P is true when fucom should be used. */
9013 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9016 rtx cmp_op0
, cmp_op1
;
9017 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9021 cmp_op0
= operands
[0];
9022 cmp_op1
= operands
[1];
9026 cmp_op0
= operands
[1];
9027 cmp_op1
= operands
[2];
9032 if (GET_MODE (operands
[0]) == SFmode
)
9034 return "ucomiss\t{%1, %0|%0, %1}";
9036 return "comiss\t{%1, %0|%0, %1}";
9039 return "ucomisd\t{%1, %0|%0, %1}";
9041 return "comisd\t{%1, %0|%0, %1}";
9044 gcc_assert (STACK_TOP_P (cmp_op0
));
9046 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9048 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9052 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9053 return output_387_ffreep (operands
, 1);
9056 return "ftst\n\tfnstsw\t%0";
9059 if (STACK_REG_P (cmp_op1
)
9061 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9062 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9064 /* If both the top of the 387 stack dies, and the other operand
9065 is also a stack register that dies, then this must be a
9066 `fcompp' float compare */
9070 /* There is no double popping fcomi variant. Fortunately,
9071 eflags is immune from the fstp's cc clobbering. */
9073 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9075 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9076 return output_387_ffreep (operands
, 0);
9081 return "fucompp\n\tfnstsw\t%0";
9083 return "fcompp\n\tfnstsw\t%0";
9088 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9090 static const char * const alt
[16] =
9092 "fcom%z2\t%y2\n\tfnstsw\t%0",
9093 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9094 "fucom%z2\t%y2\n\tfnstsw\t%0",
9095 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9097 "ficom%z2\t%y2\n\tfnstsw\t%0",
9098 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9102 "fcomi\t{%y1, %0|%0, %y1}",
9103 "fcomip\t{%y1, %0|%0, %y1}",
9104 "fucomi\t{%y1, %0|%0, %y1}",
9105 "fucomip\t{%y1, %0|%0, %y1}",
9116 mask
= eflags_p
<< 3;
9117 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9118 mask
|= unordered_p
<< 1;
9119 mask
|= stack_top_dies
;
9121 gcc_assert (mask
< 16);
9130 ix86_output_addr_vec_elt (FILE *file
, int value
)
9132 const char *directive
= ASM_LONG
;
9136 directive
= ASM_QUAD
;
9138 gcc_assert (!TARGET_64BIT
);
9141 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9145 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9147 const char *directive
= ASM_LONG
;
9150 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9151 directive
= ASM_QUAD
;
9153 gcc_assert (!TARGET_64BIT
);
9155 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9156 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9157 fprintf (file
, "%s%s%d-%s%d\n",
9158 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9159 else if (HAVE_AS_GOTOFF_IN_DATA
)
9160 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9162 else if (TARGET_MACHO
)
9164 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9165 machopic_output_function_base_name (file
);
9166 fprintf(file
, "\n");
9170 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9171 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9174 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9178 ix86_expand_clear (rtx dest
)
9182 /* We play register width games, which are only valid after reload. */
9183 gcc_assert (reload_completed
);
9185 /* Avoid HImode and its attendant prefix byte. */
9186 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9187 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9189 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9191 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9192 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9194 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9195 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9201 /* X is an unchanging MEM. If it is a constant pool reference, return
9202 the constant pool rtx, else NULL. */
9205 maybe_get_pool_constant (rtx x
)
9207 x
= ix86_delegitimize_address (XEXP (x
, 0));
9209 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9210 return get_pool_constant (x
);
9216 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9218 int strict
= (reload_in_progress
|| reload_completed
);
9220 enum tls_model model
;
9225 if (GET_CODE (op1
) == SYMBOL_REF
)
9227 model
= SYMBOL_REF_TLS_MODEL (op1
);
9230 op1
= legitimize_tls_address (op1
, model
, true);
9231 op1
= force_operand (op1
, op0
);
9236 else if (GET_CODE (op1
) == CONST
9237 && GET_CODE (XEXP (op1
, 0)) == PLUS
9238 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9240 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9243 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9244 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9245 op1
= force_operand (op1
, NULL
);
9246 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9247 op0
, 1, OPTAB_DIRECT
);
9253 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9255 if (TARGET_MACHO
&& !TARGET_64BIT
)
9260 rtx temp
= ((reload_in_progress
9261 || ((op0
&& REG_P (op0
))
9263 ? op0
: gen_reg_rtx (Pmode
));
9264 op1
= machopic_indirect_data_reference (op1
, temp
);
9265 op1
= machopic_legitimize_pic_address (op1
, mode
,
9266 temp
== op1
? 0 : temp
);
9268 else if (MACHOPIC_INDIRECT
)
9269 op1
= machopic_indirect_data_reference (op1
, 0);
9277 op1
= force_reg (Pmode
, op1
);
9278 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9280 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9281 op1
= legitimize_pic_address (op1
, reg
);
9290 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9291 || !push_operand (op0
, mode
))
9293 op1
= force_reg (mode
, op1
);
9295 if (push_operand (op0
, mode
)
9296 && ! general_no_elim_operand (op1
, mode
))
9297 op1
= copy_to_mode_reg (mode
, op1
);
9299 /* Force large constants in 64bit compilation into register
9300 to get them CSEed. */
9301 if (TARGET_64BIT
&& mode
== DImode
9302 && immediate_operand (op1
, mode
)
9303 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9304 && !register_operand (op0
, mode
)
9305 && optimize
&& !reload_completed
&& !reload_in_progress
)
9306 op1
= copy_to_mode_reg (mode
, op1
);
9308 if (FLOAT_MODE_P (mode
))
9310 /* If we are loading a floating point constant to a register,
9311 force the value to memory now, since we'll get better code
9312 out the back end. */
9316 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9318 op1
= validize_mem (force_const_mem (mode
, op1
));
9319 if (!register_operand (op0
, mode
))
9321 rtx temp
= gen_reg_rtx (mode
);
9322 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9323 emit_move_insn (op0
, temp
);
9330 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9334 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9336 rtx op0
= operands
[0], op1
= operands
[1];
9338 /* Force constants other than zero into memory. We do not know how
9339 the instructions used to build constants modify the upper 64 bits
9340 of the register, once we have that information we may be able
9341 to handle some of them more efficiently. */
9342 if ((reload_in_progress
| reload_completed
) == 0
9343 && register_operand (op0
, mode
)
9345 && standard_sse_constant_p (op1
) <= 0)
9346 op1
= validize_mem (force_const_mem (mode
, op1
));
9348 /* Make operand1 a register if it isn't already. */
9350 && !register_operand (op0
, mode
)
9351 && !register_operand (op1
, mode
))
9353 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9357 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9360 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9361 straight to ix86_expand_vector_move. */
9362 /* Code generation for scalar reg-reg moves of single and double precision data:
9363 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9367 if (x86_sse_partial_reg_dependency == true)
9372 Code generation for scalar loads of double precision data:
9373 if (x86_sse_split_regs == true)
9374 movlpd mem, reg (gas syntax)
9378 Code generation for unaligned packed loads of single precision data
9379 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9380 if (x86_sse_unaligned_move_optimal)
9383 if (x86_sse_partial_reg_dependency == true)
9395 Code generation for unaligned packed loads of double precision data
9396 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9397 if (x86_sse_unaligned_move_optimal)
9400 if (x86_sse_split_regs == true)
9413 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9422 /* If we're optimizing for size, movups is the smallest. */
9425 op0
= gen_lowpart (V4SFmode
, op0
);
9426 op1
= gen_lowpart (V4SFmode
, op1
);
9427 emit_insn (gen_sse_movups (op0
, op1
));
9431 /* ??? If we have typed data, then it would appear that using
9432 movdqu is the only way to get unaligned data loaded with
9434 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9436 op0
= gen_lowpart (V16QImode
, op0
);
9437 op1
= gen_lowpart (V16QImode
, op1
);
9438 emit_insn (gen_sse2_movdqu (op0
, op1
));
9442 if (TARGET_SSE2
&& mode
== V2DFmode
)
9446 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9448 op0
= gen_lowpart (V2DFmode
, op0
);
9449 op1
= gen_lowpart (V2DFmode
, op1
);
9450 emit_insn (gen_sse2_movupd (op0
, op1
));
9454 /* When SSE registers are split into halves, we can avoid
9455 writing to the top half twice. */
9456 if (TARGET_SSE_SPLIT_REGS
)
9458 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9463 /* ??? Not sure about the best option for the Intel chips.
9464 The following would seem to satisfy; the register is
9465 entirely cleared, breaking the dependency chain. We
9466 then store to the upper half, with a dependency depth
9467 of one. A rumor has it that Intel recommends two movsd
9468 followed by an unpacklpd, but this is unconfirmed. And
9469 given that the dependency depth of the unpacklpd would
9470 still be one, I'm not sure why this would be better. */
9471 zero
= CONST0_RTX (V2DFmode
);
9474 m
= adjust_address (op1
, DFmode
, 0);
9475 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9476 m
= adjust_address (op1
, DFmode
, 8);
9477 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9481 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9483 op0
= gen_lowpart (V4SFmode
, op0
);
9484 op1
= gen_lowpart (V4SFmode
, op1
);
9485 emit_insn (gen_sse_movups (op0
, op1
));
9489 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9490 emit_move_insn (op0
, CONST0_RTX (mode
));
9492 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9494 if (mode
!= V4SFmode
)
9495 op0
= gen_lowpart (V4SFmode
, op0
);
9496 m
= adjust_address (op1
, V2SFmode
, 0);
9497 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9498 m
= adjust_address (op1
, V2SFmode
, 8);
9499 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9502 else if (MEM_P (op0
))
9504 /* If we're optimizing for size, movups is the smallest. */
9507 op0
= gen_lowpart (V4SFmode
, op0
);
9508 op1
= gen_lowpart (V4SFmode
, op1
);
9509 emit_insn (gen_sse_movups (op0
, op1
));
9513 /* ??? Similar to above, only less clear because of quote
9514 typeless stores unquote. */
9515 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9516 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9518 op0
= gen_lowpart (V16QImode
, op0
);
9519 op1
= gen_lowpart (V16QImode
, op1
);
9520 emit_insn (gen_sse2_movdqu (op0
, op1
));
9524 if (TARGET_SSE2
&& mode
== V2DFmode
)
9526 m
= adjust_address (op0
, DFmode
, 0);
9527 emit_insn (gen_sse2_storelpd (m
, op1
));
9528 m
= adjust_address (op0
, DFmode
, 8);
9529 emit_insn (gen_sse2_storehpd (m
, op1
));
9533 if (mode
!= V4SFmode
)
9534 op1
= gen_lowpart (V4SFmode
, op1
);
9535 m
= adjust_address (op0
, V2SFmode
, 0);
9536 emit_insn (gen_sse_storelps (m
, op1
));
9537 m
= adjust_address (op0
, V2SFmode
, 8);
9538 emit_insn (gen_sse_storehps (m
, op1
));
9545 /* Expand a push in MODE. This is some mode for which we do not support
9546 proper push instructions, at least from the registers that we expect
9547 the value to live in. */
9550 ix86_expand_push (enum machine_mode mode
, rtx x
)
9554 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9555 GEN_INT (-GET_MODE_SIZE (mode
)),
9556 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9557 if (tmp
!= stack_pointer_rtx
)
9558 emit_move_insn (stack_pointer_rtx
, tmp
);
9560 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9561 emit_move_insn (tmp
, x
);
9564 /* Helper function of ix86_fixup_binary_operands to canonicalize
9565 operand order. Returns true if the operands should be swapped. */
9568 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9571 rtx dst
= operands
[0];
9572 rtx src1
= operands
[1];
9573 rtx src2
= operands
[2];
9575 /* If the operation is not commutative, we can't do anything. */
9576 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9579 /* Highest priority is that src1 should match dst. */
9580 if (rtx_equal_p (dst
, src1
))
9582 if (rtx_equal_p (dst
, src2
))
9585 /* Next highest priority is that immediate constants come second. */
9586 if (immediate_operand (src2
, mode
))
9588 if (immediate_operand (src1
, mode
))
9591 /* Lowest priority is that memory references should come second. */
9601 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9602 destination to use for the operation. If different from the true
9603 destination in operands[0], a copy operation will be required. */
9606 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9609 rtx dst
= operands
[0];
9610 rtx src1
= operands
[1];
9611 rtx src2
= operands
[2];
9613 /* Canonicalize operand order. */
9614 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9621 /* Both source operands cannot be in memory. */
9622 if (MEM_P (src1
) && MEM_P (src2
))
9624 /* Optimization: Only read from memory once. */
9625 if (rtx_equal_p (src1
, src2
))
9627 src2
= force_reg (mode
, src2
);
9631 src2
= force_reg (mode
, src2
);
9634 /* If the destination is memory, and we do not have matching source
9635 operands, do things in registers. */
9636 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9637 dst
= gen_reg_rtx (mode
);
9639 /* Source 1 cannot be a constant. */
9640 if (CONSTANT_P (src1
))
9641 src1
= force_reg (mode
, src1
);
9643 /* Source 1 cannot be a non-matching memory. */
9644 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9645 src1
= force_reg (mode
, src1
);
9652 /* Similarly, but assume that the destination has already been
9656 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9657 enum machine_mode mode
, rtx operands
[])
9659 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9660 gcc_assert (dst
== operands
[0]);
9663 /* Attempt to expand a binary operator. Make the expansion closer to the
9664 actual machine, then just general_operand, which will allow 3 separate
9665 memory references (one output, two input) in a single insn. */
9668 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9671 rtx src1
, src2
, dst
, op
, clob
;
9673 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9677 /* Emit the instruction. */
9679 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9680 if (reload_in_progress
)
9682 /* Reload doesn't know about the flags register, and doesn't know that
9683 it doesn't want to clobber it. We can only do this with PLUS. */
9684 gcc_assert (code
== PLUS
);
9689 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9690 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9693 /* Fix up the destination if needed. */
9694 if (dst
!= operands
[0])
9695 emit_move_insn (operands
[0], dst
);
9698 /* Return TRUE or FALSE depending on whether the binary operator meets the
9699 appropriate constraints. */
9702 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9705 rtx dst
= operands
[0];
9706 rtx src1
= operands
[1];
9707 rtx src2
= operands
[2];
9709 /* Both source operands cannot be in memory. */
9710 if (MEM_P (src1
) && MEM_P (src2
))
9713 /* Canonicalize operand order for commutative operators. */
9714 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9721 /* If the destination is memory, we must have a matching source operand. */
9722 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9725 /* Source 1 cannot be a constant. */
9726 if (CONSTANT_P (src1
))
9729 /* Source 1 cannot be a non-matching memory. */
9730 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9736 /* Attempt to expand a unary operator. Make the expansion closer to the
9737 actual machine, then just general_operand, which will allow 2 separate
9738 memory references (one output, one input) in a single insn. */
9741 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9744 int matching_memory
;
9745 rtx src
, dst
, op
, clob
;
9750 /* If the destination is memory, and we do not have matching source
9751 operands, do things in registers. */
9752 matching_memory
= 0;
9755 if (rtx_equal_p (dst
, src
))
9756 matching_memory
= 1;
9758 dst
= gen_reg_rtx (mode
);
9761 /* When source operand is memory, destination must match. */
9762 if (MEM_P (src
) && !matching_memory
)
9763 src
= force_reg (mode
, src
);
9765 /* Emit the instruction. */
9767 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9768 if (reload_in_progress
|| code
== NOT
)
9770 /* Reload doesn't know about the flags register, and doesn't know that
9771 it doesn't want to clobber it. */
9772 gcc_assert (code
== NOT
);
9777 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9778 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9781 /* Fix up the destination if needed. */
9782 if (dst
!= operands
[0])
9783 emit_move_insn (operands
[0], dst
);
9786 /* Return TRUE or FALSE depending on whether the unary operator meets the
9787 appropriate constraints. */
9790 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9791 enum machine_mode mode ATTRIBUTE_UNUSED
,
9792 rtx operands
[2] ATTRIBUTE_UNUSED
)
9794 /* If one of operands is memory, source and destination must match. */
9795 if ((MEM_P (operands
[0])
9796 || MEM_P (operands
[1]))
9797 && ! rtx_equal_p (operands
[0], operands
[1]))
9802 /* Post-reload splitter for converting an SF or DFmode value in an
9803 SSE register into an unsigned SImode. */
9806 ix86_split_convert_uns_si_sse (rtx operands
[])
9808 enum machine_mode vecmode
;
9809 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
9811 large
= operands
[1];
9812 zero_or_two31
= operands
[2];
9813 input
= operands
[3];
9814 two31
= operands
[4];
9815 vecmode
= GET_MODE (large
);
9816 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
9818 /* Load up the value into the low element. We must ensure that the other
9819 elements are valid floats -- zero is the easiest such value. */
9822 if (vecmode
== V4SFmode
)
9823 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
9825 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
9829 input
= gen_rtx_REG (vecmode
, REGNO (input
));
9830 emit_move_insn (value
, CONST0_RTX (vecmode
));
9831 if (vecmode
== V4SFmode
)
9832 emit_insn (gen_sse_movss (value
, value
, input
));
9834 emit_insn (gen_sse2_movsd (value
, value
, input
));
9837 emit_move_insn (large
, two31
);
9838 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
9840 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
9841 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
9843 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
9844 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
9846 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
9847 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
9849 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
9850 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
9852 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
9853 if (vecmode
== V4SFmode
)
9854 emit_insn (gen_sse2_cvttps2dq (x
, value
));
9856 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
9859 emit_insn (gen_xorv4si3 (value
, value
, large
));
9862 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9863 Expects the 64-bit DImode to be supplied in a pair of integral
9864 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9865 -mfpmath=sse, !optimize_size only. */
9868 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
9870 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
9871 rtx int_xmm
, fp_xmm
;
9872 rtx biases
, exponents
;
9875 int_xmm
= gen_reg_rtx (V4SImode
);
9876 if (TARGET_INTER_UNIT_MOVES
)
9877 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
9878 else if (TARGET_SSE_SPLIT_REGS
)
9880 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
9881 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
9885 x
= gen_reg_rtx (V2DImode
);
9886 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
9887 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
9890 x
= gen_rtx_CONST_VECTOR (V4SImode
,
9891 gen_rtvec (4, GEN_INT (0x43300000UL
),
9892 GEN_INT (0x45300000UL
),
9893 const0_rtx
, const0_rtx
));
9894 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
9896 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9897 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
9899 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9900 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9901 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9902 (0x1.0p84 + double(fp_value_hi_xmm)).
9903 Note these exponents differ by 32. */
9905 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
9907 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9908 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9909 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
9910 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
9911 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
9912 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
9913 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
9914 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
9915 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
9917 /* Add the upper and lower DFmode values together. */
9919 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
9922 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
9923 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
9924 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
9927 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
9930 /* Convert an unsigned SImode value into a DFmode. Only currently used
9931 for SSE, but applicable anywhere. */
9934 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
9936 REAL_VALUE_TYPE TWO31r
;
9939 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
9940 NULL
, 1, OPTAB_DIRECT
);
9942 fp
= gen_reg_rtx (DFmode
);
9943 emit_insn (gen_floatsidf2 (fp
, x
));
9945 real_ldexp (&TWO31r
, &dconst1
, 31);
9946 x
= const_double_from_real_value (TWO31r
, DFmode
);
9948 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
9950 emit_move_insn (target
, x
);
9953 /* Convert a signed DImode value into a DFmode. Only used for SSE in
9954 32-bit mode; otherwise we have a direct convert instruction. */
9957 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
9959 REAL_VALUE_TYPE TWO32r
;
9960 rtx fp_lo
, fp_hi
, x
;
9962 fp_lo
= gen_reg_rtx (DFmode
);
9963 fp_hi
= gen_reg_rtx (DFmode
);
9965 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
9967 real_ldexp (&TWO32r
, &dconst1
, 32);
9968 x
= const_double_from_real_value (TWO32r
, DFmode
);
9969 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
9971 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
9973 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
9976 emit_move_insn (target
, x
);
9979 /* Convert an unsigned SImode value into a SFmode, using only SSE.
9980 For x86_32, -mfpmath=sse, !optimize_size only. */
9982 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
9984 REAL_VALUE_TYPE ONE16r
;
9985 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
9987 real_ldexp (&ONE16r
, &dconst1
, 16);
9988 x
= const_double_from_real_value (ONE16r
, SFmode
);
9989 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
9990 NULL
, 0, OPTAB_DIRECT
);
9991 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
9992 NULL
, 0, OPTAB_DIRECT
);
9993 fp_hi
= gen_reg_rtx (SFmode
);
9994 fp_lo
= gen_reg_rtx (SFmode
);
9995 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
9996 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
9997 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
9999 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10001 if (!rtx_equal_p (target
, fp_hi
))
10002 emit_move_insn (target
, fp_hi
);
10005 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10006 then replicate the value for all elements of the vector
10010 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10017 v
= gen_rtvec (4, value
, value
, value
, value
);
10019 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10020 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10021 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10025 v
= gen_rtvec (2, value
, value
);
10027 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10028 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10031 gcc_unreachable ();
10035 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10036 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10037 true, then replicate the mask for all elements of the vector register.
10038 If INVERT is true, then create a mask excluding the sign bit. */
10041 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10043 enum machine_mode vec_mode
;
10044 HOST_WIDE_INT hi
, lo
;
10049 /* Find the sign bit, sign extended to 2*HWI. */
10050 if (mode
== SFmode
)
10051 lo
= 0x80000000, hi
= lo
< 0;
10052 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10053 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10055 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10058 lo
= ~lo
, hi
= ~hi
;
10060 /* Force this value into the low part of a fp vector constant. */
10061 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10062 mask
= gen_lowpart (mode
, mask
);
10064 v
= ix86_build_const_vector (mode
, vect
, mask
);
10065 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10066 return force_reg (vec_mode
, v
);
10069 /* Generate code for floating point ABS or NEG. */
10072 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10075 rtx mask
, set
, use
, clob
, dst
, src
;
10076 bool matching_memory
;
10077 bool use_sse
= false;
10078 bool vector_mode
= VECTOR_MODE_P (mode
);
10079 enum machine_mode elt_mode
= mode
;
10083 elt_mode
= GET_MODE_INNER (mode
);
10086 else if (TARGET_SSE_MATH
)
10087 use_sse
= SSE_FLOAT_MODE_P (mode
);
10089 /* NEG and ABS performed with SSE use bitwise mask operations.
10090 Create the appropriate mask now. */
10092 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10099 /* If the destination is memory, and we don't have matching source
10100 operands or we're using the x87, do things in registers. */
10101 matching_memory
= false;
10104 if (use_sse
&& rtx_equal_p (dst
, src
))
10105 matching_memory
= true;
10107 dst
= gen_reg_rtx (mode
);
10109 if (MEM_P (src
) && !matching_memory
)
10110 src
= force_reg (mode
, src
);
10114 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10115 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10120 set
= gen_rtx_fmt_e (code
, mode
, src
);
10121 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10124 use
= gen_rtx_USE (VOIDmode
, mask
);
10125 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10126 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10127 gen_rtvec (3, set
, use
, clob
)));
10133 if (dst
!= operands
[0])
10134 emit_move_insn (operands
[0], dst
);
10137 /* Expand a copysign operation. Special case operand 0 being a constant. */
10140 ix86_expand_copysign (rtx operands
[])
10142 enum machine_mode mode
, vmode
;
10143 rtx dest
, op0
, op1
, mask
, nmask
;
10145 dest
= operands
[0];
10149 mode
= GET_MODE (dest
);
10150 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10152 if (GET_CODE (op0
) == CONST_DOUBLE
)
10156 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10157 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10159 if (op0
== CONST0_RTX (mode
))
10160 op0
= CONST0_RTX (vmode
);
10163 if (mode
== SFmode
)
10164 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10165 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10167 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10168 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10171 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10173 if (mode
== SFmode
)
10174 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10176 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10180 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10181 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10183 if (mode
== SFmode
)
10184 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10186 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10190 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10191 be a constant, and so has already been expanded into a vector constant. */
10194 ix86_split_copysign_const (rtx operands
[])
10196 enum machine_mode mode
, vmode
;
10197 rtx dest
, op0
, op1
, mask
, x
;
10199 dest
= operands
[0];
10202 mask
= operands
[3];
10204 mode
= GET_MODE (dest
);
10205 vmode
= GET_MODE (mask
);
10207 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10208 x
= gen_rtx_AND (vmode
, dest
, mask
);
10209 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10211 if (op0
!= CONST0_RTX (vmode
))
10213 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10214 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10218 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10219 so we have to do two masks. */
10222 ix86_split_copysign_var (rtx operands
[])
10224 enum machine_mode mode
, vmode
;
10225 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10227 dest
= operands
[0];
10228 scratch
= operands
[1];
10231 nmask
= operands
[4];
10232 mask
= operands
[5];
10234 mode
= GET_MODE (dest
);
10235 vmode
= GET_MODE (mask
);
10237 if (rtx_equal_p (op0
, op1
))
10239 /* Shouldn't happen often (it's useless, obviously), but when it does
10240 we'd generate incorrect code if we continue below. */
10241 emit_move_insn (dest
, op0
);
10245 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10247 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10249 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10250 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10253 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10254 x
= gen_rtx_NOT (vmode
, dest
);
10255 x
= gen_rtx_AND (vmode
, x
, op0
);
10256 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10260 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10262 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10264 else /* alternative 2,4 */
10266 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10267 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10268 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10270 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10272 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10274 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10275 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10277 else /* alternative 3,4 */
10279 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10281 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10282 x
= gen_rtx_AND (vmode
, dest
, op0
);
10284 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10287 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10288 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10291 /* Return TRUE or FALSE depending on whether the first SET in INSN
10292 has source and destination with matching CC modes, and that the
10293 CC mode is at least as constrained as REQ_MODE. */
10296 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10299 enum machine_mode set_mode
;
10301 set
= PATTERN (insn
);
10302 if (GET_CODE (set
) == PARALLEL
)
10303 set
= XVECEXP (set
, 0, 0);
10304 gcc_assert (GET_CODE (set
) == SET
);
10305 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10307 set_mode
= GET_MODE (SET_DEST (set
));
10311 if (req_mode
!= CCNOmode
10312 && (req_mode
!= CCmode
10313 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10317 if (req_mode
== CCGCmode
)
10321 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10325 if (req_mode
== CCZmode
)
10332 gcc_unreachable ();
10335 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10338 /* Generate insn patterns to do an integer compare of OPERANDS. */
10341 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10343 enum machine_mode cmpmode
;
10346 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10347 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10349 /* This is very simple, but making the interface the same as in the
10350 FP case makes the rest of the code easier. */
10351 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10352 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10354 /* Return the test that should be put into the flags user, i.e.
10355 the bcc, scc, or cmov instruction. */
10356 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10359 /* Figure out whether to use ordered or unordered fp comparisons.
10360 Return the appropriate mode to use. */
10363 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10365 /* ??? In order to make all comparisons reversible, we do all comparisons
10366 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10367 all forms trapping and nontrapping comparisons, we can make inequality
10368 comparisons trapping again, since it results in better code when using
10369 FCOM based compares. */
10370 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10374 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10376 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10377 return ix86_fp_compare_mode (code
);
10380 /* Only zero flag is needed. */
10381 case EQ
: /* ZF=0 */
10382 case NE
: /* ZF!=0 */
10384 /* Codes needing carry flag. */
10385 case GEU
: /* CF=0 */
10386 case GTU
: /* CF=0 & ZF=0 */
10387 case LTU
: /* CF=1 */
10388 case LEU
: /* CF=1 | ZF=1 */
10390 /* Codes possibly doable only with sign flag when
10391 comparing against zero. */
10392 case GE
: /* SF=OF or SF=0 */
10393 case LT
: /* SF<>OF or SF=1 */
10394 if (op1
== const0_rtx
)
10397 /* For other cases Carry flag is not required. */
10399 /* Codes doable only with sign flag when comparing
10400 against zero, but we miss jump instruction for it
10401 so we need to use relational tests against overflow
10402 that thus needs to be zero. */
10403 case GT
: /* ZF=0 & SF=OF */
10404 case LE
: /* ZF=1 | SF<>OF */
10405 if (op1
== const0_rtx
)
10409 /* strcmp pattern do (use flags) and combine may ask us for proper
10414 gcc_unreachable ();
10418 /* Return the fixed registers used for condition codes. */
10421 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10428 /* If two condition code modes are compatible, return a condition code
10429 mode which is compatible with both. Otherwise, return
10432 static enum machine_mode
10433 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10438 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10441 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10442 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10448 gcc_unreachable ();
10470 /* These are only compatible with themselves, which we already
10476 /* Split comparison code CODE into comparisons we can do using branch
10477 instructions. BYPASS_CODE is comparison code for branch that will
10478 branch around FIRST_CODE and SECOND_CODE. If some of branches
10479 is not required, set value to UNKNOWN.
10480 We never require more than two branches. */
10483 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10484 enum rtx_code
*first_code
,
10485 enum rtx_code
*second_code
)
10487 *first_code
= code
;
10488 *bypass_code
= UNKNOWN
;
10489 *second_code
= UNKNOWN
;
10491 /* The fcomi comparison sets flags as follows:
10501 case GT
: /* GTU - CF=0 & ZF=0 */
10502 case GE
: /* GEU - CF=0 */
10503 case ORDERED
: /* PF=0 */
10504 case UNORDERED
: /* PF=1 */
10505 case UNEQ
: /* EQ - ZF=1 */
10506 case UNLT
: /* LTU - CF=1 */
10507 case UNLE
: /* LEU - CF=1 | ZF=1 */
10508 case LTGT
: /* EQ - ZF=0 */
10510 case LT
: /* LTU - CF=1 - fails on unordered */
10511 *first_code
= UNLT
;
10512 *bypass_code
= UNORDERED
;
10514 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10515 *first_code
= UNLE
;
10516 *bypass_code
= UNORDERED
;
10518 case EQ
: /* EQ - ZF=1 - fails on unordered */
10519 *first_code
= UNEQ
;
10520 *bypass_code
= UNORDERED
;
10522 case NE
: /* NE - ZF=0 - fails on unordered */
10523 *first_code
= LTGT
;
10524 *second_code
= UNORDERED
;
10526 case UNGE
: /* GEU - CF=0 - fails on unordered */
10528 *second_code
= UNORDERED
;
10530 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10532 *second_code
= UNORDERED
;
10535 gcc_unreachable ();
10537 if (!TARGET_IEEE_FP
)
10539 *second_code
= UNKNOWN
;
10540 *bypass_code
= UNKNOWN
;
10544 /* Return cost of comparison done fcom + arithmetics operations on AX.
10545 All following functions do use number of instructions as a cost metrics.
10546 In future this should be tweaked to compute bytes for optimize_size and
10547 take into account performance of various instructions on various CPUs. */
10549 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10551 if (!TARGET_IEEE_FP
)
10553 /* The cost of code output by ix86_expand_fp_compare. */
10577 gcc_unreachable ();
10581 /* Return cost of comparison done using fcomi operation.
10582 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10584 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10586 enum rtx_code bypass_code
, first_code
, second_code
;
10587 /* Return arbitrarily high cost when instruction is not supported - this
10588 prevents gcc from using it. */
10591 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10592 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10595 /* Return cost of comparison done using sahf operation.
10596 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10598 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10600 enum rtx_code bypass_code
, first_code
, second_code
;
10601 /* Return arbitrarily high cost when instruction is not preferred - this
10602 avoids gcc from using it. */
10603 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
10605 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10606 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10609 /* Compute cost of the comparison done using any method.
10610 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10612 ix86_fp_comparison_cost (enum rtx_code code
)
10614 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10617 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10618 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10620 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10621 if (min
> sahf_cost
)
10623 if (min
> fcomi_cost
)
10628 /* Return true if we should use an FCOMI instruction for this
10632 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10634 enum rtx_code swapped_code
= swap_condition (code
);
10636 return ((ix86_fp_comparison_cost (code
)
10637 == ix86_fp_comparison_fcomi_cost (code
))
10638 || (ix86_fp_comparison_cost (swapped_code
)
10639 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10642 /* Swap, force into registers, or otherwise massage the two operands
10643 to a fp comparison. The operands are updated in place; the new
10644 comparison code is returned. */
10646 static enum rtx_code
10647 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10649 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10650 rtx op0
= *pop0
, op1
= *pop1
;
10651 enum machine_mode op_mode
= GET_MODE (op0
);
10652 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10654 /* All of the unordered compare instructions only work on registers.
10655 The same is true of the fcomi compare instructions. The XFmode
10656 compare instructions require registers except when comparing
10657 against zero or when converting operand 1 from fixed point to
10661 && (fpcmp_mode
== CCFPUmode
10662 || (op_mode
== XFmode
10663 && ! (standard_80387_constant_p (op0
) == 1
10664 || standard_80387_constant_p (op1
) == 1)
10665 && GET_CODE (op1
) != FLOAT
)
10666 || ix86_use_fcomi_compare (code
)))
10668 op0
= force_reg (op_mode
, op0
);
10669 op1
= force_reg (op_mode
, op1
);
10673 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10674 things around if they appear profitable, otherwise force op0
10675 into a register. */
10677 if (standard_80387_constant_p (op0
) == 0
10679 && ! (standard_80387_constant_p (op1
) == 0
10683 tmp
= op0
, op0
= op1
, op1
= tmp
;
10684 code
= swap_condition (code
);
10688 op0
= force_reg (op_mode
, op0
);
10690 if (CONSTANT_P (op1
))
10692 int tmp
= standard_80387_constant_p (op1
);
10694 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10698 op1
= force_reg (op_mode
, op1
);
10701 op1
= force_reg (op_mode
, op1
);
10705 /* Try to rearrange the comparison to make it cheaper. */
10706 if (ix86_fp_comparison_cost (code
)
10707 > ix86_fp_comparison_cost (swap_condition (code
))
10708 && (REG_P (op1
) || !no_new_pseudos
))
10711 tmp
= op0
, op0
= op1
, op1
= tmp
;
10712 code
= swap_condition (code
);
10714 op0
= force_reg (op_mode
, op0
);
10722 /* Convert comparison codes we use to represent FP comparison to integer
10723 code that will result in proper branch. Return UNKNOWN if no such code
10727 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10756 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10759 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10760 rtx
*second_test
, rtx
*bypass_test
)
10762 enum machine_mode fpcmp_mode
, intcmp_mode
;
10764 int cost
= ix86_fp_comparison_cost (code
);
10765 enum rtx_code bypass_code
, first_code
, second_code
;
10767 fpcmp_mode
= ix86_fp_compare_mode (code
);
10768 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10771 *second_test
= NULL_RTX
;
10773 *bypass_test
= NULL_RTX
;
10775 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10777 /* Do fcomi/sahf based test when profitable. */
10778 if ((TARGET_CMOVE
|| TARGET_SAHF
)
10779 && (bypass_code
== UNKNOWN
|| bypass_test
)
10780 && (second_code
== UNKNOWN
|| second_test
)
10781 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10785 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10786 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10792 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10793 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10795 scratch
= gen_reg_rtx (HImode
);
10796 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10797 emit_insn (gen_x86_sahf_1 (scratch
));
10800 /* The FP codes work out to act like unsigned. */
10801 intcmp_mode
= fpcmp_mode
;
10803 if (bypass_code
!= UNKNOWN
)
10804 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10805 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10807 if (second_code
!= UNKNOWN
)
10808 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10809 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10814 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10815 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10816 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10818 scratch
= gen_reg_rtx (HImode
);
10819 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10821 /* In the unordered case, we have to check C2 for NaN's, which
10822 doesn't happen to work out to anything nice combination-wise.
10823 So do some bit twiddling on the value we've got in AH to come
10824 up with an appropriate set of condition codes. */
10826 intcmp_mode
= CCNOmode
;
10831 if (code
== GT
|| !TARGET_IEEE_FP
)
10833 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10838 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10839 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10840 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10841 intcmp_mode
= CCmode
;
10847 if (code
== LT
&& TARGET_IEEE_FP
)
10849 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10850 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10851 intcmp_mode
= CCmode
;
10856 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10862 if (code
== GE
|| !TARGET_IEEE_FP
)
10864 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10869 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10870 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10877 if (code
== LE
&& TARGET_IEEE_FP
)
10879 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10880 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10881 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10882 intcmp_mode
= CCmode
;
10887 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10893 if (code
== EQ
&& TARGET_IEEE_FP
)
10895 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10896 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10897 intcmp_mode
= CCmode
;
10902 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10909 if (code
== NE
&& TARGET_IEEE_FP
)
10911 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10912 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10918 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10924 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10928 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10933 gcc_unreachable ();
10937 /* Return the test that should be put into the flags user, i.e.
10938 the bcc, scc, or cmov instruction. */
10939 return gen_rtx_fmt_ee (code
, VOIDmode
,
10940 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10945 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10948 op0
= ix86_compare_op0
;
10949 op1
= ix86_compare_op1
;
10952 *second_test
= NULL_RTX
;
10954 *bypass_test
= NULL_RTX
;
10956 if (ix86_compare_emitted
)
10958 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10959 ix86_compare_emitted
= NULL_RTX
;
10961 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10962 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10963 second_test
, bypass_test
);
10965 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10970 /* Return true if the CODE will result in nontrivial jump sequence. */
10972 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10974 enum rtx_code bypass_code
, first_code
, second_code
;
10977 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10978 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10982 ix86_expand_branch (enum rtx_code code
, rtx label
)
10986 /* If we have emitted a compare insn, go straight to simple.
10987 ix86_expand_compare won't emit anything if ix86_compare_emitted
10989 if (ix86_compare_emitted
)
10992 switch (GET_MODE (ix86_compare_op0
))
10998 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10999 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11000 gen_rtx_LABEL_REF (VOIDmode
, label
),
11002 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11011 enum rtx_code bypass_code
, first_code
, second_code
;
11013 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11014 &ix86_compare_op1
);
11016 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11018 /* Check whether we will use the natural sequence with one jump. If
11019 so, we can expand jump early. Otherwise delay expansion by
11020 creating compound insn to not confuse optimizers. */
11021 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11024 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11025 gen_rtx_LABEL_REF (VOIDmode
, label
),
11026 pc_rtx
, NULL_RTX
, NULL_RTX
);
11030 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11031 ix86_compare_op0
, ix86_compare_op1
);
11032 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11033 gen_rtx_LABEL_REF (VOIDmode
, label
),
11035 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11037 use_fcomi
= ix86_use_fcomi_compare (code
);
11038 vec
= rtvec_alloc (3 + !use_fcomi
);
11039 RTVEC_ELT (vec
, 0) = tmp
;
11041 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11043 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11046 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11048 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11057 /* Expand DImode branch into multiple compare+branch. */
11059 rtx lo
[2], hi
[2], label2
;
11060 enum rtx_code code1
, code2
, code3
;
11061 enum machine_mode submode
;
11063 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11065 tmp
= ix86_compare_op0
;
11066 ix86_compare_op0
= ix86_compare_op1
;
11067 ix86_compare_op1
= tmp
;
11068 code
= swap_condition (code
);
11070 if (GET_MODE (ix86_compare_op0
) == DImode
)
11072 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11073 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11078 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11079 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11083 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11084 avoid two branches. This costs one extra insn, so disable when
11085 optimizing for size. */
11087 if ((code
== EQ
|| code
== NE
)
11089 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11094 if (hi
[1] != const0_rtx
)
11095 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11096 NULL_RTX
, 0, OPTAB_WIDEN
);
11099 if (lo
[1] != const0_rtx
)
11100 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11101 NULL_RTX
, 0, OPTAB_WIDEN
);
11103 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11104 NULL_RTX
, 0, OPTAB_WIDEN
);
11106 ix86_compare_op0
= tmp
;
11107 ix86_compare_op1
= const0_rtx
;
11108 ix86_expand_branch (code
, label
);
11112 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11113 op1 is a constant and the low word is zero, then we can just
11114 examine the high word. */
11116 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11119 case LT
: case LTU
: case GE
: case GEU
:
11120 ix86_compare_op0
= hi
[0];
11121 ix86_compare_op1
= hi
[1];
11122 ix86_expand_branch (code
, label
);
11128 /* Otherwise, we need two or three jumps. */
11130 label2
= gen_label_rtx ();
11133 code2
= swap_condition (code
);
11134 code3
= unsigned_condition (code
);
11138 case LT
: case GT
: case LTU
: case GTU
:
11141 case LE
: code1
= LT
; code2
= GT
; break;
11142 case GE
: code1
= GT
; code2
= LT
; break;
11143 case LEU
: code1
= LTU
; code2
= GTU
; break;
11144 case GEU
: code1
= GTU
; code2
= LTU
; break;
11146 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11147 case NE
: code2
= UNKNOWN
; break;
11150 gcc_unreachable ();
11155 * if (hi(a) < hi(b)) goto true;
11156 * if (hi(a) > hi(b)) goto false;
11157 * if (lo(a) < lo(b)) goto true;
11161 ix86_compare_op0
= hi
[0];
11162 ix86_compare_op1
= hi
[1];
11164 if (code1
!= UNKNOWN
)
11165 ix86_expand_branch (code1
, label
);
11166 if (code2
!= UNKNOWN
)
11167 ix86_expand_branch (code2
, label2
);
11169 ix86_compare_op0
= lo
[0];
11170 ix86_compare_op1
= lo
[1];
11171 ix86_expand_branch (code3
, label
);
11173 if (code2
!= UNKNOWN
)
11174 emit_label (label2
);
11179 gcc_unreachable ();
11183 /* Split branch based on floating point condition. */
11185 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11186 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11188 rtx second
, bypass
;
11189 rtx label
= NULL_RTX
;
11191 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11194 if (target2
!= pc_rtx
)
11197 code
= reverse_condition_maybe_unordered (code
);
11202 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11203 tmp
, &second
, &bypass
);
11205 /* Remove pushed operand from stack. */
11207 ix86_free_from_memory (GET_MODE (pushed
));
11209 if (split_branch_probability
>= 0)
11211 /* Distribute the probabilities across the jumps.
11212 Assume the BYPASS and SECOND to be always test
11214 probability
= split_branch_probability
;
11216 /* Value of 1 is low enough to make no need for probability
11217 to be updated. Later we may run some experiments and see
11218 if unordered values are more frequent in practice. */
11220 bypass_probability
= 1;
11222 second_probability
= 1;
11224 if (bypass
!= NULL_RTX
)
11226 label
= gen_label_rtx ();
11227 i
= emit_jump_insn (gen_rtx_SET
11229 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11231 gen_rtx_LABEL_REF (VOIDmode
,
11234 if (bypass_probability
>= 0)
11236 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11237 GEN_INT (bypass_probability
),
11240 i
= emit_jump_insn (gen_rtx_SET
11242 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11243 condition
, target1
, target2
)));
11244 if (probability
>= 0)
11246 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11247 GEN_INT (probability
),
11249 if (second
!= NULL_RTX
)
11251 i
= emit_jump_insn (gen_rtx_SET
11253 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11255 if (second_probability
>= 0)
11257 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11258 GEN_INT (second_probability
),
11261 if (label
!= NULL_RTX
)
11262 emit_label (label
);
11266 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11268 rtx ret
, tmp
, tmpreg
, equiv
;
11269 rtx second_test
, bypass_test
;
11271 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11272 return 0; /* FAIL */
11274 gcc_assert (GET_MODE (dest
) == QImode
);
11276 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11277 PUT_MODE (ret
, QImode
);
11282 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11283 if (bypass_test
|| second_test
)
11285 rtx test
= second_test
;
11287 rtx tmp2
= gen_reg_rtx (QImode
);
11290 gcc_assert (!second_test
);
11291 test
= bypass_test
;
11293 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11295 PUT_MODE (test
, QImode
);
11296 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11299 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11301 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11304 /* Attach a REG_EQUAL note describing the comparison result. */
11305 if (ix86_compare_op0
&& ix86_compare_op1
)
11307 equiv
= simplify_gen_relational (code
, QImode
,
11308 GET_MODE (ix86_compare_op0
),
11309 ix86_compare_op0
, ix86_compare_op1
);
11310 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11313 return 1; /* DONE */
11316 /* Expand comparison setting or clearing carry flag. Return true when
11317 successful and set pop for the operation. */
11319 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11321 enum machine_mode mode
=
11322 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11324 /* Do not handle DImode compares that go through special path. Also we can't
11325 deal with FP compares yet. This is possible to add. */
11326 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11328 if (FLOAT_MODE_P (mode
))
11330 rtx second_test
= NULL
, bypass_test
= NULL
;
11331 rtx compare_op
, compare_seq
;
11333 /* Shortcut: following common codes never translate into carry flag compares. */
11334 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11335 || code
== ORDERED
|| code
== UNORDERED
)
11338 /* These comparisons require zero flag; swap operands so they won't. */
11339 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11340 && !TARGET_IEEE_FP
)
11345 code
= swap_condition (code
);
11348 /* Try to expand the comparison and verify that we end up with carry flag
11349 based comparison. This is fails to be true only when we decide to expand
11350 comparison using arithmetic that is not too common scenario. */
11352 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11353 &second_test
, &bypass_test
);
11354 compare_seq
= get_insns ();
11357 if (second_test
|| bypass_test
)
11359 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11360 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11361 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11363 code
= GET_CODE (compare_op
);
11364 if (code
!= LTU
&& code
!= GEU
)
11366 emit_insn (compare_seq
);
11370 if (!INTEGRAL_MODE_P (mode
))
11378 /* Convert a==0 into (unsigned)a<1. */
11381 if (op1
!= const0_rtx
)
11384 code
= (code
== EQ
? LTU
: GEU
);
11387 /* Convert a>b into b<a or a>=b-1. */
11390 if (CONST_INT_P (op1
))
11392 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11393 /* Bail out on overflow. We still can swap operands but that
11394 would force loading of the constant into register. */
11395 if (op1
== const0_rtx
11396 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11398 code
= (code
== GTU
? GEU
: LTU
);
11405 code
= (code
== GTU
? LTU
: GEU
);
11409 /* Convert a>=0 into (unsigned)a<0x80000000. */
11412 if (mode
== DImode
|| op1
!= const0_rtx
)
11414 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11415 code
= (code
== LT
? GEU
: LTU
);
11419 if (mode
== DImode
|| op1
!= constm1_rtx
)
11421 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11422 code
= (code
== LE
? GEU
: LTU
);
11428 /* Swapping operands may cause constant to appear as first operand. */
11429 if (!nonimmediate_operand (op0
, VOIDmode
))
11431 if (no_new_pseudos
)
11433 op0
= force_reg (mode
, op0
);
11435 ix86_compare_op0
= op0
;
11436 ix86_compare_op1
= op1
;
11437 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11438 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11443 ix86_expand_int_movcc (rtx operands
[])
11445 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11446 rtx compare_seq
, compare_op
;
11447 rtx second_test
, bypass_test
;
11448 enum machine_mode mode
= GET_MODE (operands
[0]);
11449 bool sign_bit_compare_p
= false;;
11452 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11453 compare_seq
= get_insns ();
11456 compare_code
= GET_CODE (compare_op
);
11458 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11459 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11460 sign_bit_compare_p
= true;
11462 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11463 HImode insns, we'd be swallowed in word prefix ops. */
11465 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11466 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11467 && CONST_INT_P (operands
[2])
11468 && CONST_INT_P (operands
[3]))
11470 rtx out
= operands
[0];
11471 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11472 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11473 HOST_WIDE_INT diff
;
11476 /* Sign bit compares are better done using shifts than we do by using
11478 if (sign_bit_compare_p
11479 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11480 ix86_compare_op1
, &compare_op
))
11482 /* Detect overlap between destination and compare sources. */
11485 if (!sign_bit_compare_p
)
11487 bool fpcmp
= false;
11489 compare_code
= GET_CODE (compare_op
);
11491 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11492 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11495 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11498 /* To simplify rest of code, restrict to the GEU case. */
11499 if (compare_code
== LTU
)
11501 HOST_WIDE_INT tmp
= ct
;
11504 compare_code
= reverse_condition (compare_code
);
11505 code
= reverse_condition (code
);
11510 PUT_CODE (compare_op
,
11511 reverse_condition_maybe_unordered
11512 (GET_CODE (compare_op
)));
11514 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11518 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11519 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11520 tmp
= gen_reg_rtx (mode
);
11522 if (mode
== DImode
)
11523 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11525 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11529 if (code
== GT
|| code
== GE
)
11530 code
= reverse_condition (code
);
11533 HOST_WIDE_INT tmp
= ct
;
11538 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11539 ix86_compare_op1
, VOIDmode
, 0, -1);
11552 tmp
= expand_simple_binop (mode
, PLUS
,
11554 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11565 tmp
= expand_simple_binop (mode
, IOR
,
11567 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11569 else if (diff
== -1 && ct
)
11579 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11581 tmp
= expand_simple_binop (mode
, PLUS
,
11582 copy_rtx (tmp
), GEN_INT (cf
),
11583 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11591 * andl cf - ct, dest
11601 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11604 tmp
= expand_simple_binop (mode
, AND
,
11606 gen_int_mode (cf
- ct
, mode
),
11607 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11609 tmp
= expand_simple_binop (mode
, PLUS
,
11610 copy_rtx (tmp
), GEN_INT (ct
),
11611 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11614 if (!rtx_equal_p (tmp
, out
))
11615 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11617 return 1; /* DONE */
11623 tmp
= ct
, ct
= cf
, cf
= tmp
;
11625 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11627 /* We may be reversing unordered compare to normal compare, that
11628 is not valid in general (we may convert non-trapping condition
11629 to trapping one), however on i386 we currently emit all
11630 comparisons unordered. */
11631 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11632 code
= reverse_condition_maybe_unordered (code
);
11636 compare_code
= reverse_condition (compare_code
);
11637 code
= reverse_condition (code
);
11641 compare_code
= UNKNOWN
;
11642 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11643 && CONST_INT_P (ix86_compare_op1
))
11645 if (ix86_compare_op1
== const0_rtx
11646 && (code
== LT
|| code
== GE
))
11647 compare_code
= code
;
11648 else if (ix86_compare_op1
== constm1_rtx
)
11652 else if (code
== GT
)
11657 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11658 if (compare_code
!= UNKNOWN
11659 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11660 && (cf
== -1 || ct
== -1))
11662 /* If lea code below could be used, only optimize
11663 if it results in a 2 insn sequence. */
11665 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11666 || diff
== 3 || diff
== 5 || diff
== 9)
11667 || (compare_code
== LT
&& ct
== -1)
11668 || (compare_code
== GE
&& cf
== -1))
11671 * notl op1 (if necessary)
11679 code
= reverse_condition (code
);
11682 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11683 ix86_compare_op1
, VOIDmode
, 0, -1);
11685 out
= expand_simple_binop (mode
, IOR
,
11687 out
, 1, OPTAB_DIRECT
);
11688 if (out
!= operands
[0])
11689 emit_move_insn (operands
[0], out
);
11691 return 1; /* DONE */
11696 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11697 || diff
== 3 || diff
== 5 || diff
== 9)
11698 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11700 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11706 * lea cf(dest*(ct-cf)),dest
11710 * This also catches the degenerate setcc-only case.
11716 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11717 ix86_compare_op1
, VOIDmode
, 0, 1);
11720 /* On x86_64 the lea instruction operates on Pmode, so we need
11721 to get arithmetics done in proper mode to match. */
11723 tmp
= copy_rtx (out
);
11727 out1
= copy_rtx (out
);
11728 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11732 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11738 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11741 if (!rtx_equal_p (tmp
, out
))
11744 out
= force_operand (tmp
, copy_rtx (out
));
11746 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11748 if (!rtx_equal_p (out
, operands
[0]))
11749 emit_move_insn (operands
[0], copy_rtx (out
));
11751 return 1; /* DONE */
11755 * General case: Jumpful:
11756 * xorl dest,dest cmpl op1, op2
11757 * cmpl op1, op2 movl ct, dest
11758 * setcc dest jcc 1f
11759 * decl dest movl cf, dest
11760 * andl (cf-ct),dest 1:
11763 * Size 20. Size 14.
11765 * This is reasonably steep, but branch mispredict costs are
11766 * high on modern cpus, so consider failing only if optimizing
11770 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11771 && BRANCH_COST
>= 2)
11777 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11778 /* We may be reversing unordered compare to normal compare,
11779 that is not valid in general (we may convert non-trapping
11780 condition to trapping one), however on i386 we currently
11781 emit all comparisons unordered. */
11782 code
= reverse_condition_maybe_unordered (code
);
11785 code
= reverse_condition (code
);
11786 if (compare_code
!= UNKNOWN
)
11787 compare_code
= reverse_condition (compare_code
);
11791 if (compare_code
!= UNKNOWN
)
11793 /* notl op1 (if needed)
11798 For x < 0 (resp. x <= -1) there will be no notl,
11799 so if possible swap the constants to get rid of the
11801 True/false will be -1/0 while code below (store flag
11802 followed by decrement) is 0/-1, so the constants need
11803 to be exchanged once more. */
11805 if (compare_code
== GE
|| !cf
)
11807 code
= reverse_condition (code
);
11812 HOST_WIDE_INT tmp
= cf
;
11817 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11818 ix86_compare_op1
, VOIDmode
, 0, -1);
11822 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11823 ix86_compare_op1
, VOIDmode
, 0, 1);
11825 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11826 copy_rtx (out
), 1, OPTAB_DIRECT
);
11829 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11830 gen_int_mode (cf
- ct
, mode
),
11831 copy_rtx (out
), 1, OPTAB_DIRECT
);
11833 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11834 copy_rtx (out
), 1, OPTAB_DIRECT
);
11835 if (!rtx_equal_p (out
, operands
[0]))
11836 emit_move_insn (operands
[0], copy_rtx (out
));
11838 return 1; /* DONE */
11842 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11844 /* Try a few things more with specific constants and a variable. */
11847 rtx var
, orig_out
, out
, tmp
;
11849 if (BRANCH_COST
<= 2)
11850 return 0; /* FAIL */
11852 /* If one of the two operands is an interesting constant, load a
11853 constant with the above and mask it in with a logical operation. */
11855 if (CONST_INT_P (operands
[2]))
11858 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11859 operands
[3] = constm1_rtx
, op
= and_optab
;
11860 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11861 operands
[3] = const0_rtx
, op
= ior_optab
;
11863 return 0; /* FAIL */
11865 else if (CONST_INT_P (operands
[3]))
11868 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11869 operands
[2] = constm1_rtx
, op
= and_optab
;
11870 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11871 operands
[2] = const0_rtx
, op
= ior_optab
;
11873 return 0; /* FAIL */
11876 return 0; /* FAIL */
11878 orig_out
= operands
[0];
11879 tmp
= gen_reg_rtx (mode
);
11882 /* Recurse to get the constant loaded. */
11883 if (ix86_expand_int_movcc (operands
) == 0)
11884 return 0; /* FAIL */
11886 /* Mask in the interesting variable. */
11887 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11889 if (!rtx_equal_p (out
, orig_out
))
11890 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11892 return 1; /* DONE */
11896 * For comparison with above,
11906 if (! nonimmediate_operand (operands
[2], mode
))
11907 operands
[2] = force_reg (mode
, operands
[2]);
11908 if (! nonimmediate_operand (operands
[3], mode
))
11909 operands
[3] = force_reg (mode
, operands
[3]);
11911 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11913 rtx tmp
= gen_reg_rtx (mode
);
11914 emit_move_insn (tmp
, operands
[3]);
11917 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11919 rtx tmp
= gen_reg_rtx (mode
);
11920 emit_move_insn (tmp
, operands
[2]);
11924 if (! register_operand (operands
[2], VOIDmode
)
11926 || ! register_operand (operands
[3], VOIDmode
)))
11927 operands
[2] = force_reg (mode
, operands
[2]);
11930 && ! register_operand (operands
[3], VOIDmode
))
11931 operands
[3] = force_reg (mode
, operands
[3]);
11933 emit_insn (compare_seq
);
11934 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11935 gen_rtx_IF_THEN_ELSE (mode
,
11936 compare_op
, operands
[2],
11939 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11940 gen_rtx_IF_THEN_ELSE (mode
,
11942 copy_rtx (operands
[3]),
11943 copy_rtx (operands
[0]))));
11945 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11946 gen_rtx_IF_THEN_ELSE (mode
,
11948 copy_rtx (operands
[2]),
11949 copy_rtx (operands
[0]))));
11951 return 1; /* DONE */
11954 /* Swap, force into registers, or otherwise massage the two operands
11955 to an sse comparison with a mask result. Thus we differ a bit from
11956 ix86_prepare_fp_compare_args which expects to produce a flags result.
11958 The DEST operand exists to help determine whether to commute commutative
11959 operators. The POP0/POP1 operands are updated in place. The new
11960 comparison code is returned, or UNKNOWN if not implementable. */
11962 static enum rtx_code
11963 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11964 rtx
*pop0
, rtx
*pop1
)
11972 /* We have no LTGT as an operator. We could implement it with
11973 NE & ORDERED, but this requires an extra temporary. It's
11974 not clear that it's worth it. */
11981 /* These are supported directly. */
11988 /* For commutative operators, try to canonicalize the destination
11989 operand to be first in the comparison - this helps reload to
11990 avoid extra moves. */
11991 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11999 /* These are not supported directly. Swap the comparison operands
12000 to transform into something that is supported. */
12004 code
= swap_condition (code
);
12008 gcc_unreachable ();
12014 /* Detect conditional moves that exactly match min/max operational
12015 semantics. Note that this is IEEE safe, as long as we don't
12016 interchange the operands.
12018 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12019 and TRUE if the operation is successful and instructions are emitted. */
12022 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12023 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12025 enum machine_mode mode
;
12031 else if (code
== UNGE
)
12034 if_true
= if_false
;
12040 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12042 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12047 mode
= GET_MODE (dest
);
12049 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12050 but MODE may be a vector mode and thus not appropriate. */
12051 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12053 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12056 if_true
= force_reg (mode
, if_true
);
12057 v
= gen_rtvec (2, if_true
, if_false
);
12058 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12062 code
= is_min
? SMIN
: SMAX
;
12063 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12066 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12070 /* Expand an sse vector comparison. Return the register with the result. */
12073 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12074 rtx op_true
, rtx op_false
)
12076 enum machine_mode mode
= GET_MODE (dest
);
12079 cmp_op0
= force_reg (mode
, cmp_op0
);
12080 if (!nonimmediate_operand (cmp_op1
, mode
))
12081 cmp_op1
= force_reg (mode
, cmp_op1
);
12084 || reg_overlap_mentioned_p (dest
, op_true
)
12085 || reg_overlap_mentioned_p (dest
, op_false
))
12086 dest
= gen_reg_rtx (mode
);
12088 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12089 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12094 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12095 operations. This is used for both scalar and vector conditional moves. */
12098 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12100 enum machine_mode mode
= GET_MODE (dest
);
12103 if (op_false
== CONST0_RTX (mode
))
12105 op_true
= force_reg (mode
, op_true
);
12106 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12107 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12109 else if (op_true
== CONST0_RTX (mode
))
12111 op_false
= force_reg (mode
, op_false
);
12112 x
= gen_rtx_NOT (mode
, cmp
);
12113 x
= gen_rtx_AND (mode
, x
, op_false
);
12114 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12118 op_true
= force_reg (mode
, op_true
);
12119 op_false
= force_reg (mode
, op_false
);
12121 t2
= gen_reg_rtx (mode
);
12123 t3
= gen_reg_rtx (mode
);
12127 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12128 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12130 x
= gen_rtx_NOT (mode
, cmp
);
12131 x
= gen_rtx_AND (mode
, x
, op_false
);
12132 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12134 x
= gen_rtx_IOR (mode
, t3
, t2
);
12135 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12139 /* Expand a floating-point conditional move. Return true if successful. */
12142 ix86_expand_fp_movcc (rtx operands
[])
12144 enum machine_mode mode
= GET_MODE (operands
[0]);
12145 enum rtx_code code
= GET_CODE (operands
[1]);
12146 rtx tmp
, compare_op
, second_test
, bypass_test
;
12148 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12150 enum machine_mode cmode
;
12152 /* Since we've no cmove for sse registers, don't force bad register
12153 allocation just to gain access to it. Deny movcc when the
12154 comparison mode doesn't match the move mode. */
12155 cmode
= GET_MODE (ix86_compare_op0
);
12156 if (cmode
== VOIDmode
)
12157 cmode
= GET_MODE (ix86_compare_op1
);
12161 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12163 &ix86_compare_op1
);
12164 if (code
== UNKNOWN
)
12167 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12168 ix86_compare_op1
, operands
[2],
12172 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12173 ix86_compare_op1
, operands
[2], operands
[3]);
12174 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12178 /* The floating point conditional move instructions don't directly
12179 support conditions resulting from a signed integer comparison. */
12181 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12183 /* The floating point conditional move instructions don't directly
12184 support signed integer comparisons. */
12186 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12188 gcc_assert (!second_test
&& !bypass_test
);
12189 tmp
= gen_reg_rtx (QImode
);
12190 ix86_expand_setcc (code
, tmp
);
12192 ix86_compare_op0
= tmp
;
12193 ix86_compare_op1
= const0_rtx
;
12194 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12196 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12198 tmp
= gen_reg_rtx (mode
);
12199 emit_move_insn (tmp
, operands
[3]);
12202 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12204 tmp
= gen_reg_rtx (mode
);
12205 emit_move_insn (tmp
, operands
[2]);
12209 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12210 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12211 operands
[2], operands
[3])));
12213 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12214 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12215 operands
[3], operands
[0])));
12217 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12218 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12219 operands
[2], operands
[0])));
12224 /* Expand a floating-point vector conditional move; a vcond operation
12225 rather than a movcc operation. */
12228 ix86_expand_fp_vcond (rtx operands
[])
12230 enum rtx_code code
= GET_CODE (operands
[3]);
12233 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12234 &operands
[4], &operands
[5]);
12235 if (code
== UNKNOWN
)
12238 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12239 operands
[5], operands
[1], operands
[2]))
12242 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12243 operands
[1], operands
[2]);
12244 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12248 /* Expand a signed integral vector conditional move. */
12251 ix86_expand_int_vcond (rtx operands
[])
12253 enum machine_mode mode
= GET_MODE (operands
[0]);
12254 enum rtx_code code
= GET_CODE (operands
[3]);
12255 bool negate
= false;
12258 cop0
= operands
[4];
12259 cop1
= operands
[5];
12261 /* Canonicalize the comparison to EQ, GT, GTU. */
12272 code
= reverse_condition (code
);
12278 code
= reverse_condition (code
);
12284 code
= swap_condition (code
);
12285 x
= cop0
, cop0
= cop1
, cop1
= x
;
12289 gcc_unreachable ();
12292 /* Unsigned parallel compare is not supported by the hardware. Play some
12293 tricks to turn this into a signed comparison against 0. */
12296 cop0
= force_reg (mode
, cop0
);
12304 /* Perform a parallel modulo subtraction. */
12305 t1
= gen_reg_rtx (mode
);
12306 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12308 /* Extract the original sign bit of op0. */
12309 mask
= GEN_INT (-0x80000000);
12310 mask
= gen_rtx_CONST_VECTOR (mode
,
12311 gen_rtvec (4, mask
, mask
, mask
, mask
));
12312 mask
= force_reg (mode
, mask
);
12313 t2
= gen_reg_rtx (mode
);
12314 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12316 /* XOR it back into the result of the subtraction. This results
12317 in the sign bit set iff we saw unsigned underflow. */
12318 x
= gen_reg_rtx (mode
);
12319 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12327 /* Perform a parallel unsigned saturating subtraction. */
12328 x
= gen_reg_rtx (mode
);
12329 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12330 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12337 gcc_unreachable ();
12341 cop1
= CONST0_RTX (mode
);
12344 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12345 operands
[1+negate
], operands
[2-negate
]);
12347 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12348 operands
[2-negate
]);
12352 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12353 true if we should do zero extension, else sign extension. HIGH_P is
12354 true if we want the N/2 high elements, else the low elements. */
12357 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12359 enum machine_mode imode
= GET_MODE (operands
[1]);
12360 rtx (*unpack
)(rtx
, rtx
, rtx
);
12367 unpack
= gen_vec_interleave_highv16qi
;
12369 unpack
= gen_vec_interleave_lowv16qi
;
12373 unpack
= gen_vec_interleave_highv8hi
;
12375 unpack
= gen_vec_interleave_lowv8hi
;
12379 unpack
= gen_vec_interleave_highv4si
;
12381 unpack
= gen_vec_interleave_lowv4si
;
12384 gcc_unreachable ();
12387 dest
= gen_lowpart (imode
, operands
[0]);
12390 se
= force_reg (imode
, CONST0_RTX (imode
));
12392 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12393 operands
[1], pc_rtx
, pc_rtx
);
12395 emit_insn (unpack (dest
, operands
[1], se
));
12398 /* Expand conditional increment or decrement using adb/sbb instructions.
12399 The default case using setcc followed by the conditional move can be
12400 done by generic code. */
12402 ix86_expand_int_addcc (rtx operands
[])
12404 enum rtx_code code
= GET_CODE (operands
[1]);
12406 rtx val
= const0_rtx
;
12407 bool fpcmp
= false;
12408 enum machine_mode mode
= GET_MODE (operands
[0]);
12410 if (operands
[3] != const1_rtx
12411 && operands
[3] != constm1_rtx
)
12413 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12414 ix86_compare_op1
, &compare_op
))
12416 code
= GET_CODE (compare_op
);
12418 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12419 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12422 code
= ix86_fp_compare_code_to_integer (code
);
12429 PUT_CODE (compare_op
,
12430 reverse_condition_maybe_unordered
12431 (GET_CODE (compare_op
)));
12433 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12435 PUT_MODE (compare_op
, mode
);
12437 /* Construct either adc or sbb insn. */
12438 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12440 switch (GET_MODE (operands
[0]))
12443 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12446 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12449 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12452 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12455 gcc_unreachable ();
12460 switch (GET_MODE (operands
[0]))
12463 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12466 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12469 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12472 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12475 gcc_unreachable ();
12478 return 1; /* DONE */
12482 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12483 works for floating pointer parameters and nonoffsetable memories.
12484 For pushes, it returns just stack offsets; the values will be saved
12485 in the right order. Maximally three parts are generated. */
12488 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12493 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12495 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12497 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12498 gcc_assert (size
>= 2 && size
<= 3);
12500 /* Optimize constant pool reference to immediates. This is used by fp
12501 moves, that force all constants to memory to allow combining. */
12502 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12504 rtx tmp
= maybe_get_pool_constant (operand
);
12509 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12511 /* The only non-offsetable memories we handle are pushes. */
12512 int ok
= push_operand (operand
, VOIDmode
);
12516 operand
= copy_rtx (operand
);
12517 PUT_MODE (operand
, Pmode
);
12518 parts
[0] = parts
[1] = parts
[2] = operand
;
12522 if (GET_CODE (operand
) == CONST_VECTOR
)
12524 enum machine_mode imode
= int_mode_for_mode (mode
);
12525 /* Caution: if we looked through a constant pool memory above,
12526 the operand may actually have a different mode now. That's
12527 ok, since we want to pun this all the way back to an integer. */
12528 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12529 gcc_assert (operand
!= NULL
);
12535 if (mode
== DImode
)
12536 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12539 if (REG_P (operand
))
12541 gcc_assert (reload_completed
);
12542 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12543 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12545 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12547 else if (offsettable_memref_p (operand
))
12549 operand
= adjust_address (operand
, SImode
, 0);
12550 parts
[0] = operand
;
12551 parts
[1] = adjust_address (operand
, SImode
, 4);
12553 parts
[2] = adjust_address (operand
, SImode
, 8);
12555 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12560 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12564 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12565 parts
[2] = gen_int_mode (l
[2], SImode
);
12568 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12571 gcc_unreachable ();
12573 parts
[1] = gen_int_mode (l
[1], SImode
);
12574 parts
[0] = gen_int_mode (l
[0], SImode
);
12577 gcc_unreachable ();
12582 if (mode
== TImode
)
12583 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12584 if (mode
== XFmode
|| mode
== TFmode
)
12586 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12587 if (REG_P (operand
))
12589 gcc_assert (reload_completed
);
12590 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12591 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12593 else if (offsettable_memref_p (operand
))
12595 operand
= adjust_address (operand
, DImode
, 0);
12596 parts
[0] = operand
;
12597 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12599 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12604 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12605 real_to_target (l
, &r
, mode
);
12607 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12608 if (HOST_BITS_PER_WIDE_INT
>= 64)
12611 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12612 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12615 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12617 if (upper_mode
== SImode
)
12618 parts
[1] = gen_int_mode (l
[2], SImode
);
12619 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12622 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12623 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12626 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12629 gcc_unreachable ();
12636 /* Emit insns to perform a move or push of DI, DF, and XF values.
12637 Return false when normal moves are needed; true when all required
12638 insns have been emitted. Operands 2-4 contain the input values
12639 int the correct order; operands 5-7 contain the output values. */
12642 ix86_split_long_move (rtx operands
[])
12647 int collisions
= 0;
12648 enum machine_mode mode
= GET_MODE (operands
[0]);
12650 /* The DFmode expanders may ask us to move double.
12651 For 64bit target this is single move. By hiding the fact
12652 here we simplify i386.md splitters. */
12653 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12655 /* Optimize constant pool reference to immediates. This is used by
12656 fp moves, that force all constants to memory to allow combining. */
12658 if (MEM_P (operands
[1])
12659 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12660 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12661 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12662 if (push_operand (operands
[0], VOIDmode
))
12664 operands
[0] = copy_rtx (operands
[0]);
12665 PUT_MODE (operands
[0], Pmode
);
12668 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12669 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12670 emit_move_insn (operands
[0], operands
[1]);
12674 /* The only non-offsettable memory we handle is push. */
12675 if (push_operand (operands
[0], VOIDmode
))
12678 gcc_assert (!MEM_P (operands
[0])
12679 || offsettable_memref_p (operands
[0]));
12681 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12682 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12684 /* When emitting push, take care for source operands on the stack. */
12685 if (push
&& MEM_P (operands
[1])
12686 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12689 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12690 XEXP (part
[1][2], 0));
12691 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12692 XEXP (part
[1][1], 0));
12695 /* We need to do copy in the right order in case an address register
12696 of the source overlaps the destination. */
12697 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12699 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12701 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12704 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12707 /* Collision in the middle part can be handled by reordering. */
12708 if (collisions
== 1 && nparts
== 3
12709 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12712 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12713 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12716 /* If there are more collisions, we can't handle it by reordering.
12717 Do an lea to the last part and use only one colliding move. */
12718 else if (collisions
> 1)
12724 base
= part
[0][nparts
- 1];
12726 /* Handle the case when the last part isn't valid for lea.
12727 Happens in 64-bit mode storing the 12-byte XFmode. */
12728 if (GET_MODE (base
) != Pmode
)
12729 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12731 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12732 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12733 part
[1][1] = replace_equiv_address (part
[1][1],
12734 plus_constant (base
, UNITS_PER_WORD
));
12736 part
[1][2] = replace_equiv_address (part
[1][2],
12737 plus_constant (base
, 8));
12747 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12748 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12749 emit_move_insn (part
[0][2], part
[1][2]);
12754 /* In 64bit mode we don't have 32bit push available. In case this is
12755 register, it is OK - we will just use larger counterpart. We also
12756 retype memory - these comes from attempt to avoid REX prefix on
12757 moving of second half of TFmode value. */
12758 if (GET_MODE (part
[1][1]) == SImode
)
12760 switch (GET_CODE (part
[1][1]))
12763 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12767 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12771 gcc_unreachable ();
12774 if (GET_MODE (part
[1][0]) == SImode
)
12775 part
[1][0] = part
[1][1];
12778 emit_move_insn (part
[0][1], part
[1][1]);
12779 emit_move_insn (part
[0][0], part
[1][0]);
12783 /* Choose correct order to not overwrite the source before it is copied. */
12784 if ((REG_P (part
[0][0])
12785 && REG_P (part
[1][1])
12786 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12788 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12790 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12794 operands
[2] = part
[0][2];
12795 operands
[3] = part
[0][1];
12796 operands
[4] = part
[0][0];
12797 operands
[5] = part
[1][2];
12798 operands
[6] = part
[1][1];
12799 operands
[7] = part
[1][0];
12803 operands
[2] = part
[0][1];
12804 operands
[3] = part
[0][0];
12805 operands
[5] = part
[1][1];
12806 operands
[6] = part
[1][0];
12813 operands
[2] = part
[0][0];
12814 operands
[3] = part
[0][1];
12815 operands
[4] = part
[0][2];
12816 operands
[5] = part
[1][0];
12817 operands
[6] = part
[1][1];
12818 operands
[7] = part
[1][2];
12822 operands
[2] = part
[0][0];
12823 operands
[3] = part
[0][1];
12824 operands
[5] = part
[1][0];
12825 operands
[6] = part
[1][1];
12829 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12832 if (CONST_INT_P (operands
[5])
12833 && operands
[5] != const0_rtx
12834 && REG_P (operands
[2]))
12836 if (CONST_INT_P (operands
[6])
12837 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12838 operands
[6] = operands
[2];
12841 && CONST_INT_P (operands
[7])
12842 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12843 operands
[7] = operands
[2];
12847 && CONST_INT_P (operands
[6])
12848 && operands
[6] != const0_rtx
12849 && REG_P (operands
[3])
12850 && CONST_INT_P (operands
[7])
12851 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12852 operands
[7] = operands
[3];
12855 emit_move_insn (operands
[2], operands
[5]);
12856 emit_move_insn (operands
[3], operands
[6]);
12858 emit_move_insn (operands
[4], operands
[7]);
12863 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12864 left shift by a constant, either using a single shift or
12865 a sequence of add instructions. */
12868 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12872 emit_insn ((mode
== DImode
12874 : gen_adddi3
) (operand
, operand
, operand
));
12876 else if (!optimize_size
12877 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12880 for (i
=0; i
<count
; i
++)
12882 emit_insn ((mode
== DImode
12884 : gen_adddi3
) (operand
, operand
, operand
));
12888 emit_insn ((mode
== DImode
12890 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12894 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12896 rtx low
[2], high
[2];
12898 const int single_width
= mode
== DImode
? 32 : 64;
12900 if (CONST_INT_P (operands
[2]))
12902 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12903 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12905 if (count
>= single_width
)
12907 emit_move_insn (high
[0], low
[1]);
12908 emit_move_insn (low
[0], const0_rtx
);
12910 if (count
> single_width
)
12911 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12915 if (!rtx_equal_p (operands
[0], operands
[1]))
12916 emit_move_insn (operands
[0], operands
[1]);
12917 emit_insn ((mode
== DImode
12919 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12920 ix86_expand_ashl_const (low
[0], count
, mode
);
12925 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12927 if (operands
[1] == const1_rtx
)
12929 /* Assuming we've chosen a QImode capable registers, then 1 << N
12930 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12931 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12933 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12935 ix86_expand_clear (low
[0]);
12936 ix86_expand_clear (high
[0]);
12937 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12939 d
= gen_lowpart (QImode
, low
[0]);
12940 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12941 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12942 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12944 d
= gen_lowpart (QImode
, high
[0]);
12945 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12946 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12947 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12950 /* Otherwise, we can get the same results by manually performing
12951 a bit extract operation on bit 5/6, and then performing the two
12952 shifts. The two methods of getting 0/1 into low/high are exactly
12953 the same size. Avoiding the shift in the bit extract case helps
12954 pentium4 a bit; no one else seems to care much either way. */
12959 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12960 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12962 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12963 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12965 emit_insn ((mode
== DImode
12967 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12968 emit_insn ((mode
== DImode
12970 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12971 emit_move_insn (low
[0], high
[0]);
12972 emit_insn ((mode
== DImode
12974 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12977 emit_insn ((mode
== DImode
12979 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12980 emit_insn ((mode
== DImode
12982 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12986 if (operands
[1] == constm1_rtx
)
12988 /* For -1 << N, we can avoid the shld instruction, because we
12989 know that we're shifting 0...31/63 ones into a -1. */
12990 emit_move_insn (low
[0], constm1_rtx
);
12992 emit_move_insn (high
[0], low
[0]);
12994 emit_move_insn (high
[0], constm1_rtx
);
12998 if (!rtx_equal_p (operands
[0], operands
[1]))
12999 emit_move_insn (operands
[0], operands
[1]);
13001 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13002 emit_insn ((mode
== DImode
13004 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13007 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13009 if (TARGET_CMOVE
&& scratch
)
13011 ix86_expand_clear (scratch
);
13012 emit_insn ((mode
== DImode
13013 ? gen_x86_shift_adj_1
13014 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13017 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13021 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13023 rtx low
[2], high
[2];
13025 const int single_width
= mode
== DImode
? 32 : 64;
13027 if (CONST_INT_P (operands
[2]))
13029 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13030 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13032 if (count
== single_width
* 2 - 1)
13034 emit_move_insn (high
[0], high
[1]);
13035 emit_insn ((mode
== DImode
13037 : gen_ashrdi3
) (high
[0], high
[0],
13038 GEN_INT (single_width
- 1)));
13039 emit_move_insn (low
[0], high
[0]);
13042 else if (count
>= single_width
)
13044 emit_move_insn (low
[0], high
[1]);
13045 emit_move_insn (high
[0], low
[0]);
13046 emit_insn ((mode
== DImode
13048 : gen_ashrdi3
) (high
[0], high
[0],
13049 GEN_INT (single_width
- 1)));
13050 if (count
> single_width
)
13051 emit_insn ((mode
== DImode
13053 : gen_ashrdi3
) (low
[0], low
[0],
13054 GEN_INT (count
- single_width
)));
13058 if (!rtx_equal_p (operands
[0], operands
[1]))
13059 emit_move_insn (operands
[0], operands
[1]);
13060 emit_insn ((mode
== DImode
13062 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13063 emit_insn ((mode
== DImode
13065 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13070 if (!rtx_equal_p (operands
[0], operands
[1]))
13071 emit_move_insn (operands
[0], operands
[1]);
13073 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13075 emit_insn ((mode
== DImode
13077 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13078 emit_insn ((mode
== DImode
13080 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13082 if (TARGET_CMOVE
&& scratch
)
13084 emit_move_insn (scratch
, high
[0]);
13085 emit_insn ((mode
== DImode
13087 : gen_ashrdi3
) (scratch
, scratch
,
13088 GEN_INT (single_width
- 1)));
13089 emit_insn ((mode
== DImode
13090 ? gen_x86_shift_adj_1
13091 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13095 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13100 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13102 rtx low
[2], high
[2];
13104 const int single_width
= mode
== DImode
? 32 : 64;
13106 if (CONST_INT_P (operands
[2]))
13108 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13109 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13111 if (count
>= single_width
)
13113 emit_move_insn (low
[0], high
[1]);
13114 ix86_expand_clear (high
[0]);
13116 if (count
> single_width
)
13117 emit_insn ((mode
== DImode
13119 : gen_lshrdi3
) (low
[0], low
[0],
13120 GEN_INT (count
- single_width
)));
13124 if (!rtx_equal_p (operands
[0], operands
[1]))
13125 emit_move_insn (operands
[0], operands
[1]);
13126 emit_insn ((mode
== DImode
13128 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13129 emit_insn ((mode
== DImode
13131 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13136 if (!rtx_equal_p (operands
[0], operands
[1]))
13137 emit_move_insn (operands
[0], operands
[1]);
13139 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13141 emit_insn ((mode
== DImode
13143 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13144 emit_insn ((mode
== DImode
13146 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13148 /* Heh. By reversing the arguments, we can reuse this pattern. */
13149 if (TARGET_CMOVE
&& scratch
)
13151 ix86_expand_clear (scratch
);
13152 emit_insn ((mode
== DImode
13153 ? gen_x86_shift_adj_1
13154 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13158 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13162 /* Predict just emitted jump instruction to be taken with probability PROB. */
13164 predict_jump (int prob
)
13166 rtx insn
= get_last_insn ();
13167 gcc_assert (JUMP_P (insn
));
13169 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13174 /* Helper function for the string operations below. Dest VARIABLE whether
13175 it is aligned to VALUE bytes. If true, jump to the label. */
13177 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13179 rtx label
= gen_label_rtx ();
13180 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13181 if (GET_MODE (variable
) == DImode
)
13182 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13184 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13185 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13188 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13190 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13194 /* Adjust COUNTER by the VALUE. */
13196 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13198 if (GET_MODE (countreg
) == DImode
)
13199 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13201 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13204 /* Zero extend possibly SImode EXP to Pmode register. */
13206 ix86_zero_extend_to_Pmode (rtx exp
)
13209 if (GET_MODE (exp
) == VOIDmode
)
13210 return force_reg (Pmode
, exp
);
13211 if (GET_MODE (exp
) == Pmode
)
13212 return copy_to_mode_reg (Pmode
, exp
);
13213 r
= gen_reg_rtx (Pmode
);
13214 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13218 /* Divide COUNTREG by SCALE. */
13220 scale_counter (rtx countreg
, int scale
)
13223 rtx piece_size_mask
;
13227 if (CONST_INT_P (countreg
))
13228 return GEN_INT (INTVAL (countreg
) / scale
);
13229 gcc_assert (REG_P (countreg
));
13231 piece_size_mask
= GEN_INT (scale
- 1);
13232 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13233 GEN_INT (exact_log2 (scale
)),
13234 NULL
, 1, OPTAB_DIRECT
);
13238 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13239 for constant loop counts. */
13241 static enum machine_mode
13242 counter_mode (rtx count_exp
)
13244 if (GET_MODE (count_exp
) != VOIDmode
)
13245 return GET_MODE (count_exp
);
13246 if (GET_CODE (count_exp
) != CONST_INT
)
13248 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13253 /* When SRCPTR is non-NULL, output simple loop to move memory
13254 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13255 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13256 equivalent loop to set memory by VALUE (supposed to be in MODE).
13258 The size is rounded down to whole number of chunk size moved at once.
13259 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13263 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13264 rtx destptr
, rtx srcptr
, rtx value
,
13265 rtx count
, enum machine_mode mode
, int unroll
,
13268 rtx out_label
, top_label
, iter
, tmp
;
13269 enum machine_mode iter_mode
= counter_mode (count
);
13270 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13271 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13277 top_label
= gen_label_rtx ();
13278 out_label
= gen_label_rtx ();
13279 iter
= gen_reg_rtx (iter_mode
);
13281 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13282 NULL
, 1, OPTAB_DIRECT
);
13283 /* Those two should combine. */
13284 if (piece_size
== const1_rtx
)
13286 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13288 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13290 emit_move_insn (iter
, const0_rtx
);
13292 emit_label (top_label
);
13294 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13295 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13296 destmem
= change_address (destmem
, mode
, x_addr
);
13300 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13301 srcmem
= change_address (srcmem
, mode
, y_addr
);
13303 /* When unrolling for chips that reorder memory reads and writes,
13304 we can save registers by using single temporary.
13305 Also using 4 temporaries is overkill in 32bit mode. */
13306 if (!TARGET_64BIT
&& 0)
13308 for (i
= 0; i
< unroll
; i
++)
13313 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13315 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13317 emit_move_insn (destmem
, srcmem
);
13323 gcc_assert (unroll
<= 4);
13324 for (i
= 0; i
< unroll
; i
++)
13326 tmpreg
[i
] = gen_reg_rtx (mode
);
13330 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13332 emit_move_insn (tmpreg
[i
], srcmem
);
13334 for (i
= 0; i
< unroll
; i
++)
13339 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13341 emit_move_insn (destmem
, tmpreg
[i
]);
13346 for (i
= 0; i
< unroll
; i
++)
13350 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13351 emit_move_insn (destmem
, value
);
13354 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13355 true, OPTAB_LIB_WIDEN
);
13357 emit_move_insn (iter
, tmp
);
13359 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13361 if (expected_size
!= -1)
13363 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13364 if (expected_size
== 0)
13366 else if (expected_size
> REG_BR_PROB_BASE
)
13367 predict_jump (REG_BR_PROB_BASE
- 1);
13369 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13372 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13373 iter
= ix86_zero_extend_to_Pmode (iter
);
13374 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13375 true, OPTAB_LIB_WIDEN
);
13376 if (tmp
!= destptr
)
13377 emit_move_insn (destptr
, tmp
);
13380 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13381 true, OPTAB_LIB_WIDEN
);
13383 emit_move_insn (srcptr
, tmp
);
13385 emit_label (out_label
);
13388 /* Output "rep; mov" instruction.
13389 Arguments have same meaning as for previous function */
13391 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13392 rtx destptr
, rtx srcptr
,
13394 enum machine_mode mode
)
13400 /* If the size is known, it is shorter to use rep movs. */
13401 if (mode
== QImode
&& CONST_INT_P (count
)
13402 && !(INTVAL (count
) & 3))
13405 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13406 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13407 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13408 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13409 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13410 if (mode
!= QImode
)
13412 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13413 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13414 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13415 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13416 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13417 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13421 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13422 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13424 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13428 /* Output "rep; stos" instruction.
13429 Arguments have same meaning as for previous function */
13431 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13433 enum machine_mode mode
)
13438 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13439 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13440 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13441 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13442 if (mode
!= QImode
)
13444 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13445 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13446 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13449 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13450 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13454 emit_strmov (rtx destmem
, rtx srcmem
,
13455 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13457 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13458 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13459 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13462 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13464 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13465 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13468 if (CONST_INT_P (count
))
13470 HOST_WIDE_INT countval
= INTVAL (count
);
13473 if ((countval
& 0x10) && max_size
> 16)
13477 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13478 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13481 gcc_unreachable ();
13484 if ((countval
& 0x08) && max_size
> 8)
13487 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13490 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13491 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13495 if ((countval
& 0x04) && max_size
> 4)
13497 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13500 if ((countval
& 0x02) && max_size
> 2)
13502 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13505 if ((countval
& 0x01) && max_size
> 1)
13507 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13514 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13515 count
, 1, OPTAB_DIRECT
);
13516 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13517 count
, QImode
, 1, 4);
13521 /* When there are stringops, we can cheaply increase dest and src pointers.
13522 Otherwise we save code size by maintaining offset (zero is readily
13523 available from preceding rep operation) and using x86 addressing modes.
13525 if (TARGET_SINGLE_STRINGOP
)
13529 rtx label
= ix86_expand_aligntest (count
, 4, true);
13530 src
= change_address (srcmem
, SImode
, srcptr
);
13531 dest
= change_address (destmem
, SImode
, destptr
);
13532 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13533 emit_label (label
);
13534 LABEL_NUSES (label
) = 1;
13538 rtx label
= ix86_expand_aligntest (count
, 2, true);
13539 src
= change_address (srcmem
, HImode
, srcptr
);
13540 dest
= change_address (destmem
, HImode
, destptr
);
13541 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13542 emit_label (label
);
13543 LABEL_NUSES (label
) = 1;
13547 rtx label
= ix86_expand_aligntest (count
, 1, true);
13548 src
= change_address (srcmem
, QImode
, srcptr
);
13549 dest
= change_address (destmem
, QImode
, destptr
);
13550 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13551 emit_label (label
);
13552 LABEL_NUSES (label
) = 1;
13557 rtx offset
= force_reg (Pmode
, const0_rtx
);
13562 rtx label
= ix86_expand_aligntest (count
, 4, true);
13563 src
= change_address (srcmem
, SImode
, srcptr
);
13564 dest
= change_address (destmem
, SImode
, destptr
);
13565 emit_move_insn (dest
, src
);
13566 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13567 true, OPTAB_LIB_WIDEN
);
13569 emit_move_insn (offset
, tmp
);
13570 emit_label (label
);
13571 LABEL_NUSES (label
) = 1;
13575 rtx label
= ix86_expand_aligntest (count
, 2, true);
13576 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13577 src
= change_address (srcmem
, HImode
, tmp
);
13578 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13579 dest
= change_address (destmem
, HImode
, tmp
);
13580 emit_move_insn (dest
, src
);
13581 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13582 true, OPTAB_LIB_WIDEN
);
13584 emit_move_insn (offset
, tmp
);
13585 emit_label (label
);
13586 LABEL_NUSES (label
) = 1;
13590 rtx label
= ix86_expand_aligntest (count
, 1, true);
13591 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13592 src
= change_address (srcmem
, QImode
, tmp
);
13593 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13594 dest
= change_address (destmem
, QImode
, tmp
);
13595 emit_move_insn (dest
, src
);
13596 emit_label (label
);
13597 LABEL_NUSES (label
) = 1;
13602 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13604 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13605 rtx count
, int max_size
)
13608 expand_simple_binop (counter_mode (count
), AND
, count
,
13609 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13610 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13611 gen_lowpart (QImode
, value
), count
, QImode
,
13615 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13617 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13621 if (CONST_INT_P (count
))
13623 HOST_WIDE_INT countval
= INTVAL (count
);
13626 if ((countval
& 0x10) && max_size
> 16)
13630 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13631 emit_insn (gen_strset (destptr
, dest
, value
));
13632 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13633 emit_insn (gen_strset (destptr
, dest
, value
));
13636 gcc_unreachable ();
13639 if ((countval
& 0x08) && max_size
> 8)
13643 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13644 emit_insn (gen_strset (destptr
, dest
, value
));
13648 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13649 emit_insn (gen_strset (destptr
, dest
, value
));
13650 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13651 emit_insn (gen_strset (destptr
, dest
, value
));
13655 if ((countval
& 0x04) && max_size
> 4)
13657 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13658 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13661 if ((countval
& 0x02) && max_size
> 2)
13663 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13664 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13667 if ((countval
& 0x01) && max_size
> 1)
13669 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13670 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13677 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13682 rtx label
= ix86_expand_aligntest (count
, 16, true);
13685 dest
= change_address (destmem
, DImode
, destptr
);
13686 emit_insn (gen_strset (destptr
, dest
, value
));
13687 emit_insn (gen_strset (destptr
, dest
, value
));
13691 dest
= change_address (destmem
, SImode
, destptr
);
13692 emit_insn (gen_strset (destptr
, dest
, value
));
13693 emit_insn (gen_strset (destptr
, dest
, value
));
13694 emit_insn (gen_strset (destptr
, dest
, value
));
13695 emit_insn (gen_strset (destptr
, dest
, value
));
13697 emit_label (label
);
13698 LABEL_NUSES (label
) = 1;
13702 rtx label
= ix86_expand_aligntest (count
, 8, true);
13705 dest
= change_address (destmem
, DImode
, destptr
);
13706 emit_insn (gen_strset (destptr
, dest
, value
));
13710 dest
= change_address (destmem
, SImode
, destptr
);
13711 emit_insn (gen_strset (destptr
, dest
, value
));
13712 emit_insn (gen_strset (destptr
, dest
, value
));
13714 emit_label (label
);
13715 LABEL_NUSES (label
) = 1;
13719 rtx label
= ix86_expand_aligntest (count
, 4, true);
13720 dest
= change_address (destmem
, SImode
, destptr
);
13721 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13722 emit_label (label
);
13723 LABEL_NUSES (label
) = 1;
13727 rtx label
= ix86_expand_aligntest (count
, 2, true);
13728 dest
= change_address (destmem
, HImode
, destptr
);
13729 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13730 emit_label (label
);
13731 LABEL_NUSES (label
) = 1;
13735 rtx label
= ix86_expand_aligntest (count
, 1, true);
13736 dest
= change_address (destmem
, QImode
, destptr
);
13737 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13738 emit_label (label
);
13739 LABEL_NUSES (label
) = 1;
13743 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13744 DESIRED_ALIGNMENT. */
13746 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13747 rtx destptr
, rtx srcptr
, rtx count
,
13748 int align
, int desired_alignment
)
13750 if (align
<= 1 && desired_alignment
> 1)
13752 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13753 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13754 destmem
= change_address (destmem
, QImode
, destptr
);
13755 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13756 ix86_adjust_counter (count
, 1);
13757 emit_label (label
);
13758 LABEL_NUSES (label
) = 1;
13760 if (align
<= 2 && desired_alignment
> 2)
13762 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13763 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13764 destmem
= change_address (destmem
, HImode
, destptr
);
13765 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13766 ix86_adjust_counter (count
, 2);
13767 emit_label (label
);
13768 LABEL_NUSES (label
) = 1;
13770 if (align
<= 4 && desired_alignment
> 4)
13772 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13773 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13774 destmem
= change_address (destmem
, SImode
, destptr
);
13775 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13776 ix86_adjust_counter (count
, 4);
13777 emit_label (label
);
13778 LABEL_NUSES (label
) = 1;
13780 gcc_assert (desired_alignment
<= 8);
13783 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13784 DESIRED_ALIGNMENT. */
13786 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13787 int align
, int desired_alignment
)
13789 if (align
<= 1 && desired_alignment
> 1)
13791 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13792 destmem
= change_address (destmem
, QImode
, destptr
);
13793 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13794 ix86_adjust_counter (count
, 1);
13795 emit_label (label
);
13796 LABEL_NUSES (label
) = 1;
13798 if (align
<= 2 && desired_alignment
> 2)
13800 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13801 destmem
= change_address (destmem
, HImode
, destptr
);
13802 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13803 ix86_adjust_counter (count
, 2);
13804 emit_label (label
);
13805 LABEL_NUSES (label
) = 1;
13807 if (align
<= 4 && desired_alignment
> 4)
13809 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13810 destmem
= change_address (destmem
, SImode
, destptr
);
13811 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13812 ix86_adjust_counter (count
, 4);
13813 emit_label (label
);
13814 LABEL_NUSES (label
) = 1;
13816 gcc_assert (desired_alignment
<= 8);
13819 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13820 static enum stringop_alg
13821 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13822 int *dynamic_check
)
13824 const struct stringop_algs
* algs
;
13826 *dynamic_check
= -1;
13828 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13830 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13831 if (stringop_alg
!= no_stringop
)
13832 return stringop_alg
;
13833 /* rep; movq or rep; movl is the smallest variant. */
13834 else if (optimize_size
)
13836 if (!count
|| (count
& 3))
13837 return rep_prefix_1_byte
;
13839 return rep_prefix_4_byte
;
13841 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13843 else if (expected_size
!= -1 && expected_size
< 4)
13844 return loop_1_byte
;
13845 else if (expected_size
!= -1)
13848 enum stringop_alg alg
= libcall
;
13849 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13851 gcc_assert (algs
->size
[i
].max
);
13852 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13854 if (algs
->size
[i
].alg
!= libcall
)
13855 alg
= algs
->size
[i
].alg
;
13856 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13857 last non-libcall inline algorithm. */
13858 if (TARGET_INLINE_ALL_STRINGOPS
)
13860 /* When the current size is best to be copied by a libcall,
13861 but we are still forced to inline, run the heuristic bellow
13862 that will pick code for medium sized blocks. */
13863 if (alg
!= libcall
)
13868 return algs
->size
[i
].alg
;
13871 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13873 /* When asked to inline the call anyway, try to pick meaningful choice.
13874 We look for maximal size of block that is faster to copy by hand and
13875 take blocks of at most of that size guessing that average size will
13876 be roughly half of the block.
13878 If this turns out to be bad, we might simply specify the preferred
13879 choice in ix86_costs. */
13880 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13881 && algs
->unknown_size
== libcall
)
13884 enum stringop_alg alg
;
13887 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13888 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13889 max
= algs
->size
[i
].max
;
13892 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13893 gcc_assert (*dynamic_check
== -1);
13894 gcc_assert (alg
!= libcall
);
13895 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13896 *dynamic_check
= max
;
13899 return algs
->unknown_size
;
13902 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13903 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13905 decide_alignment (int align
,
13906 enum stringop_alg alg
,
13909 int desired_align
= 0;
13913 gcc_unreachable ();
13915 case unrolled_loop
:
13916 desired_align
= GET_MODE_SIZE (Pmode
);
13918 case rep_prefix_8_byte
:
13921 case rep_prefix_4_byte
:
13922 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13923 copying whole cacheline at once. */
13924 if (TARGET_PENTIUMPRO
)
13929 case rep_prefix_1_byte
:
13930 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13931 copying whole cacheline at once. */
13932 if (TARGET_PENTIUMPRO
)
13946 if (desired_align
< align
)
13947 desired_align
= align
;
13948 if (expected_size
!= -1 && expected_size
< 4)
13949 desired_align
= align
;
13950 return desired_align
;
13953 /* Return the smallest power of 2 greater than VAL. */
13955 smallest_pow2_greater_than (int val
)
13963 /* Expand string move (memcpy) operation. Use i386 string operations when
13964 profitable. expand_clrmem contains similar code. The code depends upon
13965 architecture, block size and alignment, but always has the same
13968 1) Prologue guard: Conditional that jumps up to epilogues for small
13969 blocks that can be handled by epilogue alone. This is faster but
13970 also needed for correctness, since prologue assume the block is larger
13971 than the desired alignment.
13973 Optional dynamic check for size and libcall for large
13974 blocks is emitted here too, with -minline-stringops-dynamically.
13976 2) Prologue: copy first few bytes in order to get destination aligned
13977 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13978 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13979 We emit either a jump tree on power of two sized blocks, or a byte loop.
13981 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13982 with specified algorithm.
13984 4) Epilogue: code copying tail of the block that is too small to be
13985 handled by main body (or up to size guarded by prologue guard). */
13988 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
13989 rtx expected_align_exp
, rtx expected_size_exp
)
13995 rtx jump_around_label
= NULL
;
13996 HOST_WIDE_INT align
= 1;
13997 unsigned HOST_WIDE_INT count
= 0;
13998 HOST_WIDE_INT expected_size
= -1;
13999 int size_needed
= 0, epilogue_size_needed
;
14000 int desired_align
= 0;
14001 enum stringop_alg alg
;
14004 if (CONST_INT_P (align_exp
))
14005 align
= INTVAL (align_exp
);
14006 /* i386 can do misaligned access on reasonably increased cost. */
14007 if (CONST_INT_P (expected_align_exp
)
14008 && INTVAL (expected_align_exp
) > align
)
14009 align
= INTVAL (expected_align_exp
);
14010 if (CONST_INT_P (count_exp
))
14011 count
= expected_size
= INTVAL (count_exp
);
14012 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14013 expected_size
= INTVAL (expected_size_exp
);
14015 /* Step 0: Decide on preferred algorithm, desired alignment and
14016 size of chunks to be copied by main loop. */
14018 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14019 desired_align
= decide_alignment (align
, alg
, expected_size
);
14021 if (!TARGET_ALIGN_STRINGOPS
)
14022 align
= desired_align
;
14024 if (alg
== libcall
)
14026 gcc_assert (alg
!= no_stringop
);
14028 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14029 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14030 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14035 gcc_unreachable ();
14037 size_needed
= GET_MODE_SIZE (Pmode
);
14039 case unrolled_loop
:
14040 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14042 case rep_prefix_8_byte
:
14045 case rep_prefix_4_byte
:
14048 case rep_prefix_1_byte
:
14054 epilogue_size_needed
= size_needed
;
14056 /* Step 1: Prologue guard. */
14058 /* Alignment code needs count to be in register. */
14059 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14061 enum machine_mode mode
= SImode
;
14062 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14064 count_exp
= force_reg (mode
, count_exp
);
14066 gcc_assert (desired_align
>= 1 && align
>= 1);
14068 /* Ensure that alignment prologue won't copy past end of block. */
14069 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14071 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14072 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14073 Make sure it is power of 2. */
14074 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14076 label
= gen_label_rtx ();
14077 emit_cmp_and_jump_insns (count_exp
,
14078 GEN_INT (epilogue_size_needed
),
14079 LTU
, 0, counter_mode (count_exp
), 1, label
);
14080 if (GET_CODE (count_exp
) == CONST_INT
)
14082 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14083 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14085 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14087 /* Emit code to decide on runtime whether library call or inline should be
14089 if (dynamic_check
!= -1)
14091 rtx hot_label
= gen_label_rtx ();
14092 jump_around_label
= gen_label_rtx ();
14093 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14094 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14095 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14096 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14097 emit_jump (jump_around_label
);
14098 emit_label (hot_label
);
14101 /* Step 2: Alignment prologue. */
14103 if (desired_align
> align
)
14105 /* Except for the first move in epilogue, we no longer know
14106 constant offset in aliasing info. It don't seems to worth
14107 the pain to maintain it for the first move, so throw away
14109 src
= change_address (src
, BLKmode
, srcreg
);
14110 dst
= change_address (dst
, BLKmode
, destreg
);
14111 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14114 if (label
&& size_needed
== 1)
14116 emit_label (label
);
14117 LABEL_NUSES (label
) = 1;
14121 /* Step 3: Main loop. */
14127 gcc_unreachable ();
14129 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14130 count_exp
, QImode
, 1, expected_size
);
14133 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14134 count_exp
, Pmode
, 1, expected_size
);
14136 case unrolled_loop
:
14137 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14138 registers for 4 temporaries anyway. */
14139 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14140 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14143 case rep_prefix_8_byte
:
14144 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14147 case rep_prefix_4_byte
:
14148 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14151 case rep_prefix_1_byte
:
14152 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14156 /* Adjust properly the offset of src and dest memory for aliasing. */
14157 if (CONST_INT_P (count_exp
))
14159 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14160 (count
/ size_needed
) * size_needed
);
14161 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14162 (count
/ size_needed
) * size_needed
);
14166 src
= change_address (src
, BLKmode
, srcreg
);
14167 dst
= change_address (dst
, BLKmode
, destreg
);
14170 /* Step 4: Epilogue to copy the remaining bytes. */
14174 /* When the main loop is done, COUNT_EXP might hold original count,
14175 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14176 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14177 bytes. Compensate if needed. */
14179 if (size_needed
< epilogue_size_needed
)
14182 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14183 GEN_INT (size_needed
- 1), count_exp
, 1,
14185 if (tmp
!= count_exp
)
14186 emit_move_insn (count_exp
, tmp
);
14188 emit_label (label
);
14189 LABEL_NUSES (label
) = 1;
14192 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14193 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14194 epilogue_size_needed
);
14195 if (jump_around_label
)
14196 emit_label (jump_around_label
);
14200 /* Helper function for memcpy. For QImode value 0xXY produce
14201 0xXYXYXYXY of wide specified by MODE. This is essentially
14202 a * 0x10101010, but we can do slightly better than
14203 synth_mult by unwinding the sequence by hand on CPUs with
14206 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14208 enum machine_mode valmode
= GET_MODE (val
);
14210 int nops
= mode
== DImode
? 3 : 2;
14212 gcc_assert (mode
== SImode
|| mode
== DImode
);
14213 if (val
== const0_rtx
)
14214 return copy_to_mode_reg (mode
, const0_rtx
);
14215 if (CONST_INT_P (val
))
14217 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14221 if (mode
== DImode
)
14222 v
|= (v
<< 16) << 16;
14223 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14226 if (valmode
== VOIDmode
)
14228 if (valmode
!= QImode
)
14229 val
= gen_lowpart (QImode
, val
);
14230 if (mode
== QImode
)
14232 if (!TARGET_PARTIAL_REG_STALL
)
14234 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14235 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14236 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14237 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14239 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14240 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14241 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14246 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14248 if (!TARGET_PARTIAL_REG_STALL
)
14249 if (mode
== SImode
)
14250 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14252 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14255 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14256 NULL
, 1, OPTAB_DIRECT
);
14258 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14260 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14261 NULL
, 1, OPTAB_DIRECT
);
14262 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14263 if (mode
== SImode
)
14265 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14266 NULL
, 1, OPTAB_DIRECT
);
14267 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14272 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14273 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14274 alignment from ALIGN to DESIRED_ALIGN. */
14276 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14281 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14282 promoted_val
= promote_duplicated_reg (DImode
, val
);
14283 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14284 promoted_val
= promote_duplicated_reg (SImode
, val
);
14285 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14286 promoted_val
= promote_duplicated_reg (HImode
, val
);
14288 promoted_val
= val
;
14290 return promoted_val
;
14293 /* Expand string clear operation (bzero). Use i386 string operations when
14294 profitable. See expand_movmem comment for explanation of individual
14295 steps performed. */
14297 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14298 rtx expected_align_exp
, rtx expected_size_exp
)
14303 rtx jump_around_label
= NULL
;
14304 HOST_WIDE_INT align
= 1;
14305 unsigned HOST_WIDE_INT count
= 0;
14306 HOST_WIDE_INT expected_size
= -1;
14307 int size_needed
= 0, epilogue_size_needed
;
14308 int desired_align
= 0;
14309 enum stringop_alg alg
;
14310 rtx promoted_val
= NULL
;
14311 bool force_loopy_epilogue
= false;
14314 if (CONST_INT_P (align_exp
))
14315 align
= INTVAL (align_exp
);
14316 /* i386 can do misaligned access on reasonably increased cost. */
14317 if (CONST_INT_P (expected_align_exp
)
14318 && INTVAL (expected_align_exp
) > align
)
14319 align
= INTVAL (expected_align_exp
);
14320 if (CONST_INT_P (count_exp
))
14321 count
= expected_size
= INTVAL (count_exp
);
14322 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14323 expected_size
= INTVAL (expected_size_exp
);
14325 /* Step 0: Decide on preferred algorithm, desired alignment and
14326 size of chunks to be copied by main loop. */
14328 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14329 desired_align
= decide_alignment (align
, alg
, expected_size
);
14331 if (!TARGET_ALIGN_STRINGOPS
)
14332 align
= desired_align
;
14334 if (alg
== libcall
)
14336 gcc_assert (alg
!= no_stringop
);
14338 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14339 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14344 gcc_unreachable ();
14346 size_needed
= GET_MODE_SIZE (Pmode
);
14348 case unrolled_loop
:
14349 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14351 case rep_prefix_8_byte
:
14354 case rep_prefix_4_byte
:
14357 case rep_prefix_1_byte
:
14362 epilogue_size_needed
= size_needed
;
14364 /* Step 1: Prologue guard. */
14366 /* Alignment code needs count to be in register. */
14367 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14369 enum machine_mode mode
= SImode
;
14370 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14372 count_exp
= force_reg (mode
, count_exp
);
14374 /* Do the cheap promotion to allow better CSE across the
14375 main loop and epilogue (ie one load of the big constant in the
14376 front of all code. */
14377 if (CONST_INT_P (val_exp
))
14378 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14379 desired_align
, align
);
14380 /* Ensure that alignment prologue won't copy past end of block. */
14381 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14383 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14384 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14385 Make sure it is power of 2. */
14386 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14388 /* To improve performance of small blocks, we jump around the VAL
14389 promoting mode. This mean that if the promoted VAL is not constant,
14390 we might not use it in the epilogue and have to use byte
14392 if (epilogue_size_needed
> 2 && !promoted_val
)
14393 force_loopy_epilogue
= true;
14394 label
= gen_label_rtx ();
14395 emit_cmp_and_jump_insns (count_exp
,
14396 GEN_INT (epilogue_size_needed
),
14397 LTU
, 0, counter_mode (count_exp
), 1, label
);
14398 if (GET_CODE (count_exp
) == CONST_INT
)
14400 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14401 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14403 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14405 if (dynamic_check
!= -1)
14407 rtx hot_label
= gen_label_rtx ();
14408 jump_around_label
= gen_label_rtx ();
14409 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14410 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14411 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14412 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14413 emit_jump (jump_around_label
);
14414 emit_label (hot_label
);
14417 /* Step 2: Alignment prologue. */
14419 /* Do the expensive promotion once we branched off the small blocks. */
14421 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14422 desired_align
, align
);
14423 gcc_assert (desired_align
>= 1 && align
>= 1);
14425 if (desired_align
> align
)
14427 /* Except for the first move in epilogue, we no longer know
14428 constant offset in aliasing info. It don't seems to worth
14429 the pain to maintain it for the first move, so throw away
14431 dst
= change_address (dst
, BLKmode
, destreg
);
14432 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14435 if (label
&& size_needed
== 1)
14437 emit_label (label
);
14438 LABEL_NUSES (label
) = 1;
14442 /* Step 3: Main loop. */
14448 gcc_unreachable ();
14450 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14451 count_exp
, QImode
, 1, expected_size
);
14454 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14455 count_exp
, Pmode
, 1, expected_size
);
14457 case unrolled_loop
:
14458 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14459 count_exp
, Pmode
, 4, expected_size
);
14461 case rep_prefix_8_byte
:
14462 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14465 case rep_prefix_4_byte
:
14466 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14469 case rep_prefix_1_byte
:
14470 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14474 /* Adjust properly the offset of src and dest memory for aliasing. */
14475 if (CONST_INT_P (count_exp
))
14476 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14477 (count
/ size_needed
) * size_needed
);
14479 dst
= change_address (dst
, BLKmode
, destreg
);
14481 /* Step 4: Epilogue to copy the remaining bytes. */
14485 /* When the main loop is done, COUNT_EXP might hold original count,
14486 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14487 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14488 bytes. Compensate if needed. */
14490 if (size_needed
< desired_align
- align
)
14493 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14494 GEN_INT (size_needed
- 1), count_exp
, 1,
14496 size_needed
= desired_align
- align
+ 1;
14497 if (tmp
!= count_exp
)
14498 emit_move_insn (count_exp
, tmp
);
14500 emit_label (label
);
14501 LABEL_NUSES (label
) = 1;
14503 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14505 if (force_loopy_epilogue
)
14506 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14509 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14512 if (jump_around_label
)
14513 emit_label (jump_around_label
);
14517 /* Expand the appropriate insns for doing strlen if not just doing
14520 out = result, initialized with the start address
14521 align_rtx = alignment of the address.
14522 scratch = scratch register, initialized with the startaddress when
14523 not aligned, otherwise undefined
14525 This is just the body. It needs the initializations mentioned above and
14526 some address computing at the end. These things are done in i386.md. */
14529 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14533 rtx align_2_label
= NULL_RTX
;
14534 rtx align_3_label
= NULL_RTX
;
14535 rtx align_4_label
= gen_label_rtx ();
14536 rtx end_0_label
= gen_label_rtx ();
14538 rtx tmpreg
= gen_reg_rtx (SImode
);
14539 rtx scratch
= gen_reg_rtx (SImode
);
14543 if (CONST_INT_P (align_rtx
))
14544 align
= INTVAL (align_rtx
);
14546 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14548 /* Is there a known alignment and is it less than 4? */
14551 rtx scratch1
= gen_reg_rtx (Pmode
);
14552 emit_move_insn (scratch1
, out
);
14553 /* Is there a known alignment and is it not 2? */
14556 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14557 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14559 /* Leave just the 3 lower bits. */
14560 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14561 NULL_RTX
, 0, OPTAB_WIDEN
);
14563 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14564 Pmode
, 1, align_4_label
);
14565 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14566 Pmode
, 1, align_2_label
);
14567 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14568 Pmode
, 1, align_3_label
);
14572 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14573 check if is aligned to 4 - byte. */
14575 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14576 NULL_RTX
, 0, OPTAB_WIDEN
);
14578 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14579 Pmode
, 1, align_4_label
);
14582 mem
= change_address (src
, QImode
, out
);
14584 /* Now compare the bytes. */
14586 /* Compare the first n unaligned byte on a byte per byte basis. */
14587 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14588 QImode
, 1, end_0_label
);
14590 /* Increment the address. */
14592 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14594 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14596 /* Not needed with an alignment of 2 */
14599 emit_label (align_2_label
);
14601 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14605 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14607 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14609 emit_label (align_3_label
);
14612 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14616 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14618 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14621 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14622 align this loop. It gives only huge programs, but does not help to
14624 emit_label (align_4_label
);
14626 mem
= change_address (src
, SImode
, out
);
14627 emit_move_insn (scratch
, mem
);
14629 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14631 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14633 /* This formula yields a nonzero result iff one of the bytes is zero.
14634 This saves three branches inside loop and many cycles. */
14636 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14637 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14638 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14639 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14640 gen_int_mode (0x80808080, SImode
)));
14641 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14646 rtx reg
= gen_reg_rtx (SImode
);
14647 rtx reg2
= gen_reg_rtx (Pmode
);
14648 emit_move_insn (reg
, tmpreg
);
14649 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14651 /* If zero is not in the first two bytes, move two bytes forward. */
14652 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14653 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14654 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14655 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14656 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14659 /* Emit lea manually to avoid clobbering of flags. */
14660 emit_insn (gen_rtx_SET (SImode
, reg2
,
14661 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14663 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14664 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14665 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14666 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14673 rtx end_2_label
= gen_label_rtx ();
14674 /* Is zero in the first two bytes? */
14676 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14677 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14678 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14679 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14680 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14682 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14683 JUMP_LABEL (tmp
) = end_2_label
;
14685 /* Not in the first two. Move two bytes forward. */
14686 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14688 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14690 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14692 emit_label (end_2_label
);
14696 /* Avoid branch in fixing the byte. */
14697 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14698 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14699 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14701 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14703 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14705 emit_label (end_0_label
);
14708 /* Expand strlen. */
14711 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14713 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14715 /* The generic case of strlen expander is long. Avoid it's
14716 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14718 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14719 && !TARGET_INLINE_ALL_STRINGOPS
14721 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14724 addr
= force_reg (Pmode
, XEXP (src
, 0));
14725 scratch1
= gen_reg_rtx (Pmode
);
14727 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14730 /* Well it seems that some optimizer does not combine a call like
14731 foo(strlen(bar), strlen(bar));
14732 when the move and the subtraction is done here. It does calculate
14733 the length just once when these instructions are done inside of
14734 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14735 often used and I use one fewer register for the lifetime of
14736 output_strlen_unroll() this is better. */
14738 emit_move_insn (out
, addr
);
14740 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14742 /* strlensi_unroll_1 returns the address of the zero at the end of
14743 the string, like memchr(), so compute the length by subtracting
14744 the start address. */
14746 emit_insn (gen_subdi3 (out
, out
, addr
));
14748 emit_insn (gen_subsi3 (out
, out
, addr
));
14753 scratch2
= gen_reg_rtx (Pmode
);
14754 scratch3
= gen_reg_rtx (Pmode
);
14755 scratch4
= force_reg (Pmode
, constm1_rtx
);
14757 emit_move_insn (scratch3
, addr
);
14758 eoschar
= force_reg (QImode
, eoschar
);
14760 src
= replace_equiv_address_nv (src
, scratch3
);
14762 /* If .md starts supporting :P, this can be done in .md. */
14763 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14764 scratch4
), UNSPEC_SCAS
);
14765 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14768 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14769 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14773 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14774 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14780 /* For given symbol (function) construct code to compute address of it's PLT
14781 entry in large x86-64 PIC model. */
14783 construct_plt_address (rtx symbol
)
14785 rtx tmp
= gen_reg_rtx (Pmode
);
14786 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
14788 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
14789 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
14791 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
14792 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
14797 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14798 rtx callarg2 ATTRIBUTE_UNUSED
,
14799 rtx pop
, int sibcall
)
14801 rtx use
= NULL
, call
;
14803 if (pop
== const0_rtx
)
14805 gcc_assert (!TARGET_64BIT
|| !pop
);
14807 if (TARGET_MACHO
&& !TARGET_64BIT
)
14810 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14811 fnaddr
= machopic_indirect_call_target (fnaddr
);
14816 /* Static functions and indirect calls don't need the pic register. */
14817 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
14818 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14819 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14820 use_reg (&use
, pic_offset_table_rtx
);
14823 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14825 rtx al
= gen_rtx_REG (QImode
, 0);
14826 emit_move_insn (al
, callarg2
);
14827 use_reg (&use
, al
);
14830 if (ix86_cmodel
== CM_LARGE_PIC
14831 && GET_CODE (fnaddr
) == MEM
14832 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14833 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
14834 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
14835 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14837 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14838 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14840 if (sibcall
&& TARGET_64BIT
14841 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14844 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14845 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14846 emit_move_insn (fnaddr
, addr
);
14847 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14850 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14852 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14855 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14856 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14857 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14860 call
= emit_call_insn (call
);
14862 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14866 /* Clear stack slot assignments remembered from previous functions.
14867 This is called from INIT_EXPANDERS once before RTL is emitted for each
14870 static struct machine_function
*
14871 ix86_init_machine_status (void)
14873 struct machine_function
*f
;
14875 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14876 f
->use_fast_prologue_epilogue_nregs
= -1;
14877 f
->tls_descriptor_call_expanded_p
= 0;
14882 /* Return a MEM corresponding to a stack slot with mode MODE.
14883 Allocate a new slot if necessary.
14885 The RTL for a function can have several slots available: N is
14886 which slot to use. */
14889 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14891 struct stack_local_entry
*s
;
14893 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14895 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14896 if (s
->mode
== mode
&& s
->n
== n
)
14897 return copy_rtx (s
->rtl
);
14899 s
= (struct stack_local_entry
*)
14900 ggc_alloc (sizeof (struct stack_local_entry
));
14903 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14905 s
->next
= ix86_stack_locals
;
14906 ix86_stack_locals
= s
;
14910 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14912 static GTY(()) rtx ix86_tls_symbol
;
14914 ix86_tls_get_addr (void)
14917 if (!ix86_tls_symbol
)
14919 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14920 (TARGET_ANY_GNU_TLS
14922 ? "___tls_get_addr"
14923 : "__tls_get_addr");
14926 return ix86_tls_symbol
;
14929 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14931 static GTY(()) rtx ix86_tls_module_base_symbol
;
14933 ix86_tls_module_base (void)
14936 if (!ix86_tls_module_base_symbol
)
14938 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14939 "_TLS_MODULE_BASE_");
14940 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
14941 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
14944 return ix86_tls_module_base_symbol
;
14947 /* Calculate the length of the memory address in the instruction
14948 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14951 memory_address_length (rtx addr
)
14953 struct ix86_address parts
;
14954 rtx base
, index
, disp
;
14958 if (GET_CODE (addr
) == PRE_DEC
14959 || GET_CODE (addr
) == POST_INC
14960 || GET_CODE (addr
) == PRE_MODIFY
14961 || GET_CODE (addr
) == POST_MODIFY
)
14964 ok
= ix86_decompose_address (addr
, &parts
);
14967 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14968 parts
.base
= SUBREG_REG (parts
.base
);
14969 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14970 parts
.index
= SUBREG_REG (parts
.index
);
14973 index
= parts
.index
;
14978 - esp as the base always wants an index,
14979 - ebp as the base always wants a displacement. */
14981 /* Register Indirect. */
14982 if (base
&& !index
&& !disp
)
14984 /* esp (for its index) and ebp (for its displacement) need
14985 the two-byte modrm form. */
14986 if (addr
== stack_pointer_rtx
14987 || addr
== arg_pointer_rtx
14988 || addr
== frame_pointer_rtx
14989 || addr
== hard_frame_pointer_rtx
)
14993 /* Direct Addressing. */
14994 else if (disp
&& !base
&& !index
)
14999 /* Find the length of the displacement constant. */
15002 if (base
&& satisfies_constraint_K (disp
))
15007 /* ebp always wants a displacement. */
15008 else if (base
== hard_frame_pointer_rtx
)
15011 /* An index requires the two-byte modrm form.... */
15013 /* ...like esp, which always wants an index. */
15014 || base
== stack_pointer_rtx
15015 || base
== arg_pointer_rtx
15016 || base
== frame_pointer_rtx
)
15023 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15024 is set, expect that insn have 8bit immediate alternative. */
15026 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15030 extract_insn_cached (insn
);
15031 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15032 if (CONSTANT_P (recog_data
.operand
[i
]))
15035 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15039 switch (get_attr_mode (insn
))
15050 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15055 fatal_insn ("unknown insn mode", insn
);
15061 /* Compute default value for "length_address" attribute. */
15063 ix86_attr_length_address_default (rtx insn
)
15067 if (get_attr_type (insn
) == TYPE_LEA
)
15069 rtx set
= PATTERN (insn
);
15071 if (GET_CODE (set
) == PARALLEL
)
15072 set
= XVECEXP (set
, 0, 0);
15074 gcc_assert (GET_CODE (set
) == SET
);
15076 return memory_address_length (SET_SRC (set
));
15079 extract_insn_cached (insn
);
15080 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15081 if (MEM_P (recog_data
.operand
[i
]))
15083 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15089 /* Return the maximum number of instructions a cpu can issue. */
15092 ix86_issue_rate (void)
15096 case PROCESSOR_PENTIUM
:
15100 case PROCESSOR_PENTIUMPRO
:
15101 case PROCESSOR_PENTIUM4
:
15102 case PROCESSOR_ATHLON
:
15104 case PROCESSOR_AMDFAM10
:
15105 case PROCESSOR_NOCONA
:
15106 case PROCESSOR_GENERIC32
:
15107 case PROCESSOR_GENERIC64
:
15110 case PROCESSOR_CORE2
:
15118 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15119 by DEP_INSN and nothing set by DEP_INSN. */
15122 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15126 /* Simplify the test for uninteresting insns. */
15127 if (insn_type
!= TYPE_SETCC
15128 && insn_type
!= TYPE_ICMOV
15129 && insn_type
!= TYPE_FCMOV
15130 && insn_type
!= TYPE_IBR
)
15133 if ((set
= single_set (dep_insn
)) != 0)
15135 set
= SET_DEST (set
);
15138 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15139 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15140 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15141 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15143 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15144 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15149 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15152 /* This test is true if the dependent insn reads the flags but
15153 not any other potentially set register. */
15154 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15157 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15163 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15164 address with operands set by DEP_INSN. */
15167 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15171 if (insn_type
== TYPE_LEA
15174 addr
= PATTERN (insn
);
15176 if (GET_CODE (addr
) == PARALLEL
)
15177 addr
= XVECEXP (addr
, 0, 0);
15179 gcc_assert (GET_CODE (addr
) == SET
);
15181 addr
= SET_SRC (addr
);
15186 extract_insn_cached (insn
);
15187 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15188 if (MEM_P (recog_data
.operand
[i
]))
15190 addr
= XEXP (recog_data
.operand
[i
], 0);
15197 return modified_in_p (addr
, dep_insn
);
15201 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15203 enum attr_type insn_type
, dep_insn_type
;
15204 enum attr_memory memory
;
15206 int dep_insn_code_number
;
15208 /* Anti and output dependencies have zero cost on all CPUs. */
15209 if (REG_NOTE_KIND (link
) != 0)
15212 dep_insn_code_number
= recog_memoized (dep_insn
);
15214 /* If we can't recognize the insns, we can't really do anything. */
15215 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15218 insn_type
= get_attr_type (insn
);
15219 dep_insn_type
= get_attr_type (dep_insn
);
15223 case PROCESSOR_PENTIUM
:
15224 /* Address Generation Interlock adds a cycle of latency. */
15225 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15228 /* ??? Compares pair with jump/setcc. */
15229 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15232 /* Floating point stores require value to be ready one cycle earlier. */
15233 if (insn_type
== TYPE_FMOV
15234 && get_attr_memory (insn
) == MEMORY_STORE
15235 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15239 case PROCESSOR_PENTIUMPRO
:
15240 memory
= get_attr_memory (insn
);
15242 /* INT->FP conversion is expensive. */
15243 if (get_attr_fp_int_src (dep_insn
))
15246 /* There is one cycle extra latency between an FP op and a store. */
15247 if (insn_type
== TYPE_FMOV
15248 && (set
= single_set (dep_insn
)) != NULL_RTX
15249 && (set2
= single_set (insn
)) != NULL_RTX
15250 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15251 && MEM_P (SET_DEST (set2
)))
15254 /* Show ability of reorder buffer to hide latency of load by executing
15255 in parallel with previous instruction in case
15256 previous instruction is not needed to compute the address. */
15257 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15258 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15260 /* Claim moves to take one cycle, as core can issue one load
15261 at time and the next load can start cycle later. */
15262 if (dep_insn_type
== TYPE_IMOV
15263 || dep_insn_type
== TYPE_FMOV
)
15271 memory
= get_attr_memory (insn
);
15273 /* The esp dependency is resolved before the instruction is really
15275 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15276 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15279 /* INT->FP conversion is expensive. */
15280 if (get_attr_fp_int_src (dep_insn
))
15283 /* Show ability of reorder buffer to hide latency of load by executing
15284 in parallel with previous instruction in case
15285 previous instruction is not needed to compute the address. */
15286 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15287 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15289 /* Claim moves to take one cycle, as core can issue one load
15290 at time and the next load can start cycle later. */
15291 if (dep_insn_type
== TYPE_IMOV
15292 || dep_insn_type
== TYPE_FMOV
)
15301 case PROCESSOR_ATHLON
:
15303 case PROCESSOR_AMDFAM10
:
15304 case PROCESSOR_GENERIC32
:
15305 case PROCESSOR_GENERIC64
:
15306 memory
= get_attr_memory (insn
);
15308 /* Show ability of reorder buffer to hide latency of load by executing
15309 in parallel with previous instruction in case
15310 previous instruction is not needed to compute the address. */
15311 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15312 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15314 enum attr_unit unit
= get_attr_unit (insn
);
15317 /* Because of the difference between the length of integer and
15318 floating unit pipeline preparation stages, the memory operands
15319 for floating point are cheaper.
15321 ??? For Athlon it the difference is most probably 2. */
15322 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15325 loadcost
= TARGET_ATHLON
? 2 : 0;
15327 if (cost
>= loadcost
)
15340 /* How many alternative schedules to try. This should be as wide as the
15341 scheduling freedom in the DFA, but no wider. Making this value too
15342 large results extra work for the scheduler. */
15345 ia32_multipass_dfa_lookahead (void)
15347 if (ix86_tune
== PROCESSOR_PENTIUM
)
15350 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15351 || ix86_tune
== PROCESSOR_K6
)
15359 /* Compute the alignment given to a constant that is being placed in memory.
15360 EXP is the constant and ALIGN is the alignment that the object would
15362 The value of this function is used instead of that alignment to align
15366 ix86_constant_alignment (tree exp
, int align
)
15368 if (TREE_CODE (exp
) == REAL_CST
)
15370 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15372 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15375 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15376 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15377 return BITS_PER_WORD
;
15382 /* Compute the alignment for a static variable.
15383 TYPE is the data type, and ALIGN is the alignment that
15384 the object would ordinarily have. The value of this function is used
15385 instead of that alignment to align the object. */
15388 ix86_data_alignment (tree type
, int align
)
15390 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15392 if (AGGREGATE_TYPE_P (type
)
15393 && TYPE_SIZE (type
)
15394 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15395 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15396 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15397 && align
< max_align
)
15400 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15401 to 16byte boundary. */
15404 if (AGGREGATE_TYPE_P (type
)
15405 && TYPE_SIZE (type
)
15406 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15407 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15408 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15412 if (TREE_CODE (type
) == ARRAY_TYPE
)
15414 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15416 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15419 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15422 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15424 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15427 else if ((TREE_CODE (type
) == RECORD_TYPE
15428 || TREE_CODE (type
) == UNION_TYPE
15429 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15430 && TYPE_FIELDS (type
))
15432 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15434 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15437 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15438 || TREE_CODE (type
) == INTEGER_TYPE
)
15440 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15442 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15449 /* Compute the alignment for a local variable.
15450 TYPE is the data type, and ALIGN is the alignment that
15451 the object would ordinarily have. The value of this macro is used
15452 instead of that alignment to align the object. */
15455 ix86_local_alignment (tree type
, int align
)
15457 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15458 to 16byte boundary. */
15461 if (AGGREGATE_TYPE_P (type
)
15462 && TYPE_SIZE (type
)
15463 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15464 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15465 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15468 if (TREE_CODE (type
) == ARRAY_TYPE
)
15470 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15472 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15475 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15477 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15479 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15482 else if ((TREE_CODE (type
) == RECORD_TYPE
15483 || TREE_CODE (type
) == UNION_TYPE
15484 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15485 && TYPE_FIELDS (type
))
15487 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15489 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15492 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15493 || TREE_CODE (type
) == INTEGER_TYPE
)
15496 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15498 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15504 /* Emit RTL insns to initialize the variable parts of a trampoline.
15505 FNADDR is an RTX for the address of the function's pure code.
15506 CXT is an RTX for the static chain value for the function. */
15508 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15512 /* Compute offset from the end of the jmp to the target function. */
15513 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15514 plus_constant (tramp
, 10),
15515 NULL_RTX
, 1, OPTAB_DIRECT
);
15516 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15517 gen_int_mode (0xb9, QImode
));
15518 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15519 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15520 gen_int_mode (0xe9, QImode
));
15521 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15526 /* Try to load address using shorter movl instead of movabs.
15527 We may want to support movq for kernel mode, but kernel does not use
15528 trampolines at the moment. */
15529 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15531 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15532 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15533 gen_int_mode (0xbb41, HImode
));
15534 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15535 gen_lowpart (SImode
, fnaddr
));
15540 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15541 gen_int_mode (0xbb49, HImode
));
15542 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15546 /* Load static chain using movabs to r10. */
15547 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15548 gen_int_mode (0xba49, HImode
));
15549 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15552 /* Jump to the r11 */
15553 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15554 gen_int_mode (0xff49, HImode
));
15555 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15556 gen_int_mode (0xe3, QImode
));
15558 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15561 #ifdef ENABLE_EXECUTE_STACK
15562 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15563 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15567 /* Codes for all the SSE/MMX builtins. */
15570 IX86_BUILTIN_ADDPS
,
15571 IX86_BUILTIN_ADDSS
,
15572 IX86_BUILTIN_DIVPS
,
15573 IX86_BUILTIN_DIVSS
,
15574 IX86_BUILTIN_MULPS
,
15575 IX86_BUILTIN_MULSS
,
15576 IX86_BUILTIN_SUBPS
,
15577 IX86_BUILTIN_SUBSS
,
15579 IX86_BUILTIN_CMPEQPS
,
15580 IX86_BUILTIN_CMPLTPS
,
15581 IX86_BUILTIN_CMPLEPS
,
15582 IX86_BUILTIN_CMPGTPS
,
15583 IX86_BUILTIN_CMPGEPS
,
15584 IX86_BUILTIN_CMPNEQPS
,
15585 IX86_BUILTIN_CMPNLTPS
,
15586 IX86_BUILTIN_CMPNLEPS
,
15587 IX86_BUILTIN_CMPNGTPS
,
15588 IX86_BUILTIN_CMPNGEPS
,
15589 IX86_BUILTIN_CMPORDPS
,
15590 IX86_BUILTIN_CMPUNORDPS
,
15591 IX86_BUILTIN_CMPEQSS
,
15592 IX86_BUILTIN_CMPLTSS
,
15593 IX86_BUILTIN_CMPLESS
,
15594 IX86_BUILTIN_CMPNEQSS
,
15595 IX86_BUILTIN_CMPNLTSS
,
15596 IX86_BUILTIN_CMPNLESS
,
15597 IX86_BUILTIN_CMPNGTSS
,
15598 IX86_BUILTIN_CMPNGESS
,
15599 IX86_BUILTIN_CMPORDSS
,
15600 IX86_BUILTIN_CMPUNORDSS
,
15602 IX86_BUILTIN_COMIEQSS
,
15603 IX86_BUILTIN_COMILTSS
,
15604 IX86_BUILTIN_COMILESS
,
15605 IX86_BUILTIN_COMIGTSS
,
15606 IX86_BUILTIN_COMIGESS
,
15607 IX86_BUILTIN_COMINEQSS
,
15608 IX86_BUILTIN_UCOMIEQSS
,
15609 IX86_BUILTIN_UCOMILTSS
,
15610 IX86_BUILTIN_UCOMILESS
,
15611 IX86_BUILTIN_UCOMIGTSS
,
15612 IX86_BUILTIN_UCOMIGESS
,
15613 IX86_BUILTIN_UCOMINEQSS
,
15615 IX86_BUILTIN_CVTPI2PS
,
15616 IX86_BUILTIN_CVTPS2PI
,
15617 IX86_BUILTIN_CVTSI2SS
,
15618 IX86_BUILTIN_CVTSI642SS
,
15619 IX86_BUILTIN_CVTSS2SI
,
15620 IX86_BUILTIN_CVTSS2SI64
,
15621 IX86_BUILTIN_CVTTPS2PI
,
15622 IX86_BUILTIN_CVTTSS2SI
,
15623 IX86_BUILTIN_CVTTSS2SI64
,
15625 IX86_BUILTIN_MAXPS
,
15626 IX86_BUILTIN_MAXSS
,
15627 IX86_BUILTIN_MINPS
,
15628 IX86_BUILTIN_MINSS
,
15630 IX86_BUILTIN_LOADUPS
,
15631 IX86_BUILTIN_STOREUPS
,
15632 IX86_BUILTIN_MOVSS
,
15634 IX86_BUILTIN_MOVHLPS
,
15635 IX86_BUILTIN_MOVLHPS
,
15636 IX86_BUILTIN_LOADHPS
,
15637 IX86_BUILTIN_LOADLPS
,
15638 IX86_BUILTIN_STOREHPS
,
15639 IX86_BUILTIN_STORELPS
,
15641 IX86_BUILTIN_MASKMOVQ
,
15642 IX86_BUILTIN_MOVMSKPS
,
15643 IX86_BUILTIN_PMOVMSKB
,
15645 IX86_BUILTIN_MOVNTPS
,
15646 IX86_BUILTIN_MOVNTQ
,
15648 IX86_BUILTIN_LOADDQU
,
15649 IX86_BUILTIN_STOREDQU
,
15651 IX86_BUILTIN_PACKSSWB
,
15652 IX86_BUILTIN_PACKSSDW
,
15653 IX86_BUILTIN_PACKUSWB
,
15655 IX86_BUILTIN_PADDB
,
15656 IX86_BUILTIN_PADDW
,
15657 IX86_BUILTIN_PADDD
,
15658 IX86_BUILTIN_PADDQ
,
15659 IX86_BUILTIN_PADDSB
,
15660 IX86_BUILTIN_PADDSW
,
15661 IX86_BUILTIN_PADDUSB
,
15662 IX86_BUILTIN_PADDUSW
,
15663 IX86_BUILTIN_PSUBB
,
15664 IX86_BUILTIN_PSUBW
,
15665 IX86_BUILTIN_PSUBD
,
15666 IX86_BUILTIN_PSUBQ
,
15667 IX86_BUILTIN_PSUBSB
,
15668 IX86_BUILTIN_PSUBSW
,
15669 IX86_BUILTIN_PSUBUSB
,
15670 IX86_BUILTIN_PSUBUSW
,
15673 IX86_BUILTIN_PANDN
,
15677 IX86_BUILTIN_PAVGB
,
15678 IX86_BUILTIN_PAVGW
,
15680 IX86_BUILTIN_PCMPEQB
,
15681 IX86_BUILTIN_PCMPEQW
,
15682 IX86_BUILTIN_PCMPEQD
,
15683 IX86_BUILTIN_PCMPGTB
,
15684 IX86_BUILTIN_PCMPGTW
,
15685 IX86_BUILTIN_PCMPGTD
,
15687 IX86_BUILTIN_PMADDWD
,
15689 IX86_BUILTIN_PMAXSW
,
15690 IX86_BUILTIN_PMAXUB
,
15691 IX86_BUILTIN_PMINSW
,
15692 IX86_BUILTIN_PMINUB
,
15694 IX86_BUILTIN_PMULHUW
,
15695 IX86_BUILTIN_PMULHW
,
15696 IX86_BUILTIN_PMULLW
,
15698 IX86_BUILTIN_PSADBW
,
15699 IX86_BUILTIN_PSHUFW
,
15701 IX86_BUILTIN_PSLLW
,
15702 IX86_BUILTIN_PSLLD
,
15703 IX86_BUILTIN_PSLLQ
,
15704 IX86_BUILTIN_PSRAW
,
15705 IX86_BUILTIN_PSRAD
,
15706 IX86_BUILTIN_PSRLW
,
15707 IX86_BUILTIN_PSRLD
,
15708 IX86_BUILTIN_PSRLQ
,
15709 IX86_BUILTIN_PSLLWI
,
15710 IX86_BUILTIN_PSLLDI
,
15711 IX86_BUILTIN_PSLLQI
,
15712 IX86_BUILTIN_PSRAWI
,
15713 IX86_BUILTIN_PSRADI
,
15714 IX86_BUILTIN_PSRLWI
,
15715 IX86_BUILTIN_PSRLDI
,
15716 IX86_BUILTIN_PSRLQI
,
15718 IX86_BUILTIN_PUNPCKHBW
,
15719 IX86_BUILTIN_PUNPCKHWD
,
15720 IX86_BUILTIN_PUNPCKHDQ
,
15721 IX86_BUILTIN_PUNPCKLBW
,
15722 IX86_BUILTIN_PUNPCKLWD
,
15723 IX86_BUILTIN_PUNPCKLDQ
,
15725 IX86_BUILTIN_SHUFPS
,
15727 IX86_BUILTIN_RCPPS
,
15728 IX86_BUILTIN_RCPSS
,
15729 IX86_BUILTIN_RSQRTPS
,
15730 IX86_BUILTIN_RSQRTSS
,
15731 IX86_BUILTIN_SQRTPS
,
15732 IX86_BUILTIN_SQRTSS
,
15734 IX86_BUILTIN_UNPCKHPS
,
15735 IX86_BUILTIN_UNPCKLPS
,
15737 IX86_BUILTIN_ANDPS
,
15738 IX86_BUILTIN_ANDNPS
,
15740 IX86_BUILTIN_XORPS
,
15743 IX86_BUILTIN_LDMXCSR
,
15744 IX86_BUILTIN_STMXCSR
,
15745 IX86_BUILTIN_SFENCE
,
15747 /* 3DNow! Original */
15748 IX86_BUILTIN_FEMMS
,
15749 IX86_BUILTIN_PAVGUSB
,
15750 IX86_BUILTIN_PF2ID
,
15751 IX86_BUILTIN_PFACC
,
15752 IX86_BUILTIN_PFADD
,
15753 IX86_BUILTIN_PFCMPEQ
,
15754 IX86_BUILTIN_PFCMPGE
,
15755 IX86_BUILTIN_PFCMPGT
,
15756 IX86_BUILTIN_PFMAX
,
15757 IX86_BUILTIN_PFMIN
,
15758 IX86_BUILTIN_PFMUL
,
15759 IX86_BUILTIN_PFRCP
,
15760 IX86_BUILTIN_PFRCPIT1
,
15761 IX86_BUILTIN_PFRCPIT2
,
15762 IX86_BUILTIN_PFRSQIT1
,
15763 IX86_BUILTIN_PFRSQRT
,
15764 IX86_BUILTIN_PFSUB
,
15765 IX86_BUILTIN_PFSUBR
,
15766 IX86_BUILTIN_PI2FD
,
15767 IX86_BUILTIN_PMULHRW
,
15769 /* 3DNow! Athlon Extensions */
15770 IX86_BUILTIN_PF2IW
,
15771 IX86_BUILTIN_PFNACC
,
15772 IX86_BUILTIN_PFPNACC
,
15773 IX86_BUILTIN_PI2FW
,
15774 IX86_BUILTIN_PSWAPDSI
,
15775 IX86_BUILTIN_PSWAPDSF
,
15778 IX86_BUILTIN_ADDPD
,
15779 IX86_BUILTIN_ADDSD
,
15780 IX86_BUILTIN_DIVPD
,
15781 IX86_BUILTIN_DIVSD
,
15782 IX86_BUILTIN_MULPD
,
15783 IX86_BUILTIN_MULSD
,
15784 IX86_BUILTIN_SUBPD
,
15785 IX86_BUILTIN_SUBSD
,
15787 IX86_BUILTIN_CMPEQPD
,
15788 IX86_BUILTIN_CMPLTPD
,
15789 IX86_BUILTIN_CMPLEPD
,
15790 IX86_BUILTIN_CMPGTPD
,
15791 IX86_BUILTIN_CMPGEPD
,
15792 IX86_BUILTIN_CMPNEQPD
,
15793 IX86_BUILTIN_CMPNLTPD
,
15794 IX86_BUILTIN_CMPNLEPD
,
15795 IX86_BUILTIN_CMPNGTPD
,
15796 IX86_BUILTIN_CMPNGEPD
,
15797 IX86_BUILTIN_CMPORDPD
,
15798 IX86_BUILTIN_CMPUNORDPD
,
15799 IX86_BUILTIN_CMPEQSD
,
15800 IX86_BUILTIN_CMPLTSD
,
15801 IX86_BUILTIN_CMPLESD
,
15802 IX86_BUILTIN_CMPNEQSD
,
15803 IX86_BUILTIN_CMPNLTSD
,
15804 IX86_BUILTIN_CMPNLESD
,
15805 IX86_BUILTIN_CMPORDSD
,
15806 IX86_BUILTIN_CMPUNORDSD
,
15808 IX86_BUILTIN_COMIEQSD
,
15809 IX86_BUILTIN_COMILTSD
,
15810 IX86_BUILTIN_COMILESD
,
15811 IX86_BUILTIN_COMIGTSD
,
15812 IX86_BUILTIN_COMIGESD
,
15813 IX86_BUILTIN_COMINEQSD
,
15814 IX86_BUILTIN_UCOMIEQSD
,
15815 IX86_BUILTIN_UCOMILTSD
,
15816 IX86_BUILTIN_UCOMILESD
,
15817 IX86_BUILTIN_UCOMIGTSD
,
15818 IX86_BUILTIN_UCOMIGESD
,
15819 IX86_BUILTIN_UCOMINEQSD
,
15821 IX86_BUILTIN_MAXPD
,
15822 IX86_BUILTIN_MAXSD
,
15823 IX86_BUILTIN_MINPD
,
15824 IX86_BUILTIN_MINSD
,
15826 IX86_BUILTIN_ANDPD
,
15827 IX86_BUILTIN_ANDNPD
,
15829 IX86_BUILTIN_XORPD
,
15831 IX86_BUILTIN_SQRTPD
,
15832 IX86_BUILTIN_SQRTSD
,
15834 IX86_BUILTIN_UNPCKHPD
,
15835 IX86_BUILTIN_UNPCKLPD
,
15837 IX86_BUILTIN_SHUFPD
,
15839 IX86_BUILTIN_LOADUPD
,
15840 IX86_BUILTIN_STOREUPD
,
15841 IX86_BUILTIN_MOVSD
,
15843 IX86_BUILTIN_LOADHPD
,
15844 IX86_BUILTIN_LOADLPD
,
15846 IX86_BUILTIN_CVTDQ2PD
,
15847 IX86_BUILTIN_CVTDQ2PS
,
15849 IX86_BUILTIN_CVTPD2DQ
,
15850 IX86_BUILTIN_CVTPD2PI
,
15851 IX86_BUILTIN_CVTPD2PS
,
15852 IX86_BUILTIN_CVTTPD2DQ
,
15853 IX86_BUILTIN_CVTTPD2PI
,
15855 IX86_BUILTIN_CVTPI2PD
,
15856 IX86_BUILTIN_CVTSI2SD
,
15857 IX86_BUILTIN_CVTSI642SD
,
15859 IX86_BUILTIN_CVTSD2SI
,
15860 IX86_BUILTIN_CVTSD2SI64
,
15861 IX86_BUILTIN_CVTSD2SS
,
15862 IX86_BUILTIN_CVTSS2SD
,
15863 IX86_BUILTIN_CVTTSD2SI
,
15864 IX86_BUILTIN_CVTTSD2SI64
,
15866 IX86_BUILTIN_CVTPS2DQ
,
15867 IX86_BUILTIN_CVTPS2PD
,
15868 IX86_BUILTIN_CVTTPS2DQ
,
15870 IX86_BUILTIN_MOVNTI
,
15871 IX86_BUILTIN_MOVNTPD
,
15872 IX86_BUILTIN_MOVNTDQ
,
15875 IX86_BUILTIN_MASKMOVDQU
,
15876 IX86_BUILTIN_MOVMSKPD
,
15877 IX86_BUILTIN_PMOVMSKB128
,
15879 IX86_BUILTIN_PACKSSWB128
,
15880 IX86_BUILTIN_PACKSSDW128
,
15881 IX86_BUILTIN_PACKUSWB128
,
15883 IX86_BUILTIN_PADDB128
,
15884 IX86_BUILTIN_PADDW128
,
15885 IX86_BUILTIN_PADDD128
,
15886 IX86_BUILTIN_PADDQ128
,
15887 IX86_BUILTIN_PADDSB128
,
15888 IX86_BUILTIN_PADDSW128
,
15889 IX86_BUILTIN_PADDUSB128
,
15890 IX86_BUILTIN_PADDUSW128
,
15891 IX86_BUILTIN_PSUBB128
,
15892 IX86_BUILTIN_PSUBW128
,
15893 IX86_BUILTIN_PSUBD128
,
15894 IX86_BUILTIN_PSUBQ128
,
15895 IX86_BUILTIN_PSUBSB128
,
15896 IX86_BUILTIN_PSUBSW128
,
15897 IX86_BUILTIN_PSUBUSB128
,
15898 IX86_BUILTIN_PSUBUSW128
,
15900 IX86_BUILTIN_PAND128
,
15901 IX86_BUILTIN_PANDN128
,
15902 IX86_BUILTIN_POR128
,
15903 IX86_BUILTIN_PXOR128
,
15905 IX86_BUILTIN_PAVGB128
,
15906 IX86_BUILTIN_PAVGW128
,
15908 IX86_BUILTIN_PCMPEQB128
,
15909 IX86_BUILTIN_PCMPEQW128
,
15910 IX86_BUILTIN_PCMPEQD128
,
15911 IX86_BUILTIN_PCMPGTB128
,
15912 IX86_BUILTIN_PCMPGTW128
,
15913 IX86_BUILTIN_PCMPGTD128
,
15915 IX86_BUILTIN_PMADDWD128
,
15917 IX86_BUILTIN_PMAXSW128
,
15918 IX86_BUILTIN_PMAXUB128
,
15919 IX86_BUILTIN_PMINSW128
,
15920 IX86_BUILTIN_PMINUB128
,
15922 IX86_BUILTIN_PMULUDQ
,
15923 IX86_BUILTIN_PMULUDQ128
,
15924 IX86_BUILTIN_PMULHUW128
,
15925 IX86_BUILTIN_PMULHW128
,
15926 IX86_BUILTIN_PMULLW128
,
15928 IX86_BUILTIN_PSADBW128
,
15929 IX86_BUILTIN_PSHUFHW
,
15930 IX86_BUILTIN_PSHUFLW
,
15931 IX86_BUILTIN_PSHUFD
,
15933 IX86_BUILTIN_PSLLDQI128
,
15934 IX86_BUILTIN_PSLLWI128
,
15935 IX86_BUILTIN_PSLLDI128
,
15936 IX86_BUILTIN_PSLLQI128
,
15937 IX86_BUILTIN_PSRAWI128
,
15938 IX86_BUILTIN_PSRADI128
,
15939 IX86_BUILTIN_PSRLDQI128
,
15940 IX86_BUILTIN_PSRLWI128
,
15941 IX86_BUILTIN_PSRLDI128
,
15942 IX86_BUILTIN_PSRLQI128
,
15944 IX86_BUILTIN_PSLLDQ128
,
15945 IX86_BUILTIN_PSLLW128
,
15946 IX86_BUILTIN_PSLLD128
,
15947 IX86_BUILTIN_PSLLQ128
,
15948 IX86_BUILTIN_PSRAW128
,
15949 IX86_BUILTIN_PSRAD128
,
15950 IX86_BUILTIN_PSRLW128
,
15951 IX86_BUILTIN_PSRLD128
,
15952 IX86_BUILTIN_PSRLQ128
,
15954 IX86_BUILTIN_PUNPCKHBW128
,
15955 IX86_BUILTIN_PUNPCKHWD128
,
15956 IX86_BUILTIN_PUNPCKHDQ128
,
15957 IX86_BUILTIN_PUNPCKHQDQ128
,
15958 IX86_BUILTIN_PUNPCKLBW128
,
15959 IX86_BUILTIN_PUNPCKLWD128
,
15960 IX86_BUILTIN_PUNPCKLDQ128
,
15961 IX86_BUILTIN_PUNPCKLQDQ128
,
15963 IX86_BUILTIN_CLFLUSH
,
15964 IX86_BUILTIN_MFENCE
,
15965 IX86_BUILTIN_LFENCE
,
15967 /* Prescott New Instructions. */
15968 IX86_BUILTIN_ADDSUBPS
,
15969 IX86_BUILTIN_HADDPS
,
15970 IX86_BUILTIN_HSUBPS
,
15971 IX86_BUILTIN_MOVSHDUP
,
15972 IX86_BUILTIN_MOVSLDUP
,
15973 IX86_BUILTIN_ADDSUBPD
,
15974 IX86_BUILTIN_HADDPD
,
15975 IX86_BUILTIN_HSUBPD
,
15976 IX86_BUILTIN_LDDQU
,
15978 IX86_BUILTIN_MONITOR
,
15979 IX86_BUILTIN_MWAIT
,
15982 IX86_BUILTIN_PHADDW
,
15983 IX86_BUILTIN_PHADDD
,
15984 IX86_BUILTIN_PHADDSW
,
15985 IX86_BUILTIN_PHSUBW
,
15986 IX86_BUILTIN_PHSUBD
,
15987 IX86_BUILTIN_PHSUBSW
,
15988 IX86_BUILTIN_PMADDUBSW
,
15989 IX86_BUILTIN_PMULHRSW
,
15990 IX86_BUILTIN_PSHUFB
,
15991 IX86_BUILTIN_PSIGNB
,
15992 IX86_BUILTIN_PSIGNW
,
15993 IX86_BUILTIN_PSIGND
,
15994 IX86_BUILTIN_PALIGNR
,
15995 IX86_BUILTIN_PABSB
,
15996 IX86_BUILTIN_PABSW
,
15997 IX86_BUILTIN_PABSD
,
15999 IX86_BUILTIN_PHADDW128
,
16000 IX86_BUILTIN_PHADDD128
,
16001 IX86_BUILTIN_PHADDSW128
,
16002 IX86_BUILTIN_PHSUBW128
,
16003 IX86_BUILTIN_PHSUBD128
,
16004 IX86_BUILTIN_PHSUBSW128
,
16005 IX86_BUILTIN_PMADDUBSW128
,
16006 IX86_BUILTIN_PMULHRSW128
,
16007 IX86_BUILTIN_PSHUFB128
,
16008 IX86_BUILTIN_PSIGNB128
,
16009 IX86_BUILTIN_PSIGNW128
,
16010 IX86_BUILTIN_PSIGND128
,
16011 IX86_BUILTIN_PALIGNR128
,
16012 IX86_BUILTIN_PABSB128
,
16013 IX86_BUILTIN_PABSW128
,
16014 IX86_BUILTIN_PABSD128
,
16016 /* AMDFAM10 - SSE4A New Instructions. */
16017 IX86_BUILTIN_MOVNTSD
,
16018 IX86_BUILTIN_MOVNTSS
,
16019 IX86_BUILTIN_EXTRQI
,
16020 IX86_BUILTIN_EXTRQ
,
16021 IX86_BUILTIN_INSERTQI
,
16022 IX86_BUILTIN_INSERTQ
,
16024 IX86_BUILTIN_VEC_INIT_V2SI
,
16025 IX86_BUILTIN_VEC_INIT_V4HI
,
16026 IX86_BUILTIN_VEC_INIT_V8QI
,
16027 IX86_BUILTIN_VEC_EXT_V2DF
,
16028 IX86_BUILTIN_VEC_EXT_V2DI
,
16029 IX86_BUILTIN_VEC_EXT_V4SF
,
16030 IX86_BUILTIN_VEC_EXT_V4SI
,
16031 IX86_BUILTIN_VEC_EXT_V8HI
,
16032 IX86_BUILTIN_VEC_EXT_V2SI
,
16033 IX86_BUILTIN_VEC_EXT_V4HI
,
16034 IX86_BUILTIN_VEC_SET_V8HI
,
16035 IX86_BUILTIN_VEC_SET_V4HI
,
16040 /* Table for the ix86 builtin decls. */
16041 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16043 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16044 * if the target_flags include one of MASK. Stores the function decl
16045 * in the ix86_builtins array.
16046 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16049 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16051 tree decl
= NULL_TREE
;
16053 if (mask
& target_flags
16054 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16056 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16058 ix86_builtins
[(int) code
] = decl
;
16064 /* Like def_builtin, but also marks the function decl "const". */
16067 def_builtin_const (int mask
, const char *name
, tree type
,
16068 enum ix86_builtins code
)
16070 tree decl
= def_builtin (mask
, name
, type
, code
);
16072 TREE_READONLY (decl
) = 1;
16076 /* Bits for builtin_description.flag. */
16078 /* Set when we don't support the comparison natively, and should
16079 swap_comparison in order to support it. */
16080 #define BUILTIN_DESC_SWAP_OPERANDS 1
16082 struct builtin_description
16084 const unsigned int mask
;
16085 const enum insn_code icode
;
16086 const char *const name
;
16087 const enum ix86_builtins code
;
16088 const enum rtx_code comparison
;
16089 const unsigned int flag
;
16092 static const struct builtin_description bdesc_comi
[] =
16094 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16095 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16096 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16097 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16098 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16099 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16100 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16101 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16102 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16103 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16104 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16105 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16106 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16107 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16108 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16109 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16110 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16111 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16112 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16113 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16114 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16115 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16116 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16117 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16120 static const struct builtin_description bdesc_2arg
[] =
16123 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16124 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16125 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16126 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16127 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16128 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16129 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16130 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16132 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16133 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16134 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16135 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16136 BUILTIN_DESC_SWAP_OPERANDS
},
16137 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16138 BUILTIN_DESC_SWAP_OPERANDS
},
16139 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16140 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16141 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16142 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16143 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16144 BUILTIN_DESC_SWAP_OPERANDS
},
16145 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16146 BUILTIN_DESC_SWAP_OPERANDS
},
16147 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16148 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16149 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16150 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16151 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16152 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16153 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16154 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16155 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16156 BUILTIN_DESC_SWAP_OPERANDS
},
16157 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16158 BUILTIN_DESC_SWAP_OPERANDS
},
16159 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16161 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16162 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16163 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16164 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16166 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16167 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16168 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16169 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16171 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16172 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16173 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16174 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16175 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16178 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16179 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16180 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16181 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16182 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16183 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16184 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16185 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16187 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16188 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16189 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16190 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16191 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16192 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16193 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16194 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16196 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16197 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16198 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16200 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16201 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16202 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16203 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16205 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16206 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16208 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16209 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16210 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16211 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16212 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16213 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16215 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16216 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16217 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16218 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16220 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16221 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16222 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16223 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16224 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16225 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16228 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16229 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16230 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16232 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16233 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16234 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16236 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16237 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16238 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16239 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16240 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16241 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16243 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16244 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16245 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16246 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16247 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16248 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16250 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16251 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16252 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16253 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16255 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16256 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16259 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16260 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16261 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16262 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16263 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16264 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16265 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16266 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16268 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16269 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16270 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16271 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16272 BUILTIN_DESC_SWAP_OPERANDS
},
16273 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16274 BUILTIN_DESC_SWAP_OPERANDS
},
16275 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16276 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16277 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16278 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16279 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16280 BUILTIN_DESC_SWAP_OPERANDS
},
16281 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16282 BUILTIN_DESC_SWAP_OPERANDS
},
16283 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16284 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16285 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16286 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16287 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16288 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16289 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16290 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16291 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16293 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16294 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16295 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16296 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16298 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16299 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16300 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16301 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16303 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16304 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16305 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16308 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16309 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16310 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16311 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16312 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16313 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16314 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16315 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16317 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16318 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16319 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16320 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16321 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16322 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16323 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16324 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16326 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16327 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16329 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16330 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16331 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16332 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16334 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16335 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16337 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16338 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16339 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16340 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16341 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16342 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16344 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16345 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16346 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16347 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16349 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16350 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16351 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16352 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16353 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16354 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16355 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16356 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16358 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16359 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16360 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16362 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16363 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16365 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16366 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16368 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16369 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16370 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16372 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16373 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16374 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16376 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16377 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16379 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16381 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16382 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16383 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16384 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16387 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16388 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16389 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16390 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16391 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16392 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16395 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16396 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16397 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16398 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16399 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16400 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16401 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16402 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16403 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16404 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16405 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16406 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16407 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16408 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16409 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16410 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16411 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16412 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16413 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16414 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16415 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16416 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16417 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16418 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16421 static const struct builtin_description bdesc_1arg
[] =
16423 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16424 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16426 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16427 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16428 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16430 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16431 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16432 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16433 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16434 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16435 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16437 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16438 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16440 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16442 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16443 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16445 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16446 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16447 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16448 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16449 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16451 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16453 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16454 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16455 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16456 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16458 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16459 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16460 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16463 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16464 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16467 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16468 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16469 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16470 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16471 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16472 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16475 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16476 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16479 ix86_init_mmx_sse_builtins (void)
16481 const struct builtin_description
* d
;
16484 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16485 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16486 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16487 tree V2DI_type_node
16488 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16489 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16490 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16491 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16492 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16493 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16494 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16496 tree pchar_type_node
= build_pointer_type (char_type_node
);
16497 tree pcchar_type_node
= build_pointer_type (
16498 build_type_variant (char_type_node
, 1, 0));
16499 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16500 tree pcfloat_type_node
= build_pointer_type (
16501 build_type_variant (float_type_node
, 1, 0));
16502 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16503 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16504 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16507 tree int_ftype_v4sf_v4sf
16508 = build_function_type_list (integer_type_node
,
16509 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16510 tree v4si_ftype_v4sf_v4sf
16511 = build_function_type_list (V4SI_type_node
,
16512 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16513 /* MMX/SSE/integer conversions. */
16514 tree int_ftype_v4sf
16515 = build_function_type_list (integer_type_node
,
16516 V4SF_type_node
, NULL_TREE
);
16517 tree int64_ftype_v4sf
16518 = build_function_type_list (long_long_integer_type_node
,
16519 V4SF_type_node
, NULL_TREE
);
16520 tree int_ftype_v8qi
16521 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16522 tree v4sf_ftype_v4sf_int
16523 = build_function_type_list (V4SF_type_node
,
16524 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16525 tree v4sf_ftype_v4sf_int64
16526 = build_function_type_list (V4SF_type_node
,
16527 V4SF_type_node
, long_long_integer_type_node
,
16529 tree v4sf_ftype_v4sf_v2si
16530 = build_function_type_list (V4SF_type_node
,
16531 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16533 /* Miscellaneous. */
16534 tree v8qi_ftype_v4hi_v4hi
16535 = build_function_type_list (V8QI_type_node
,
16536 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16537 tree v4hi_ftype_v2si_v2si
16538 = build_function_type_list (V4HI_type_node
,
16539 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16540 tree v4sf_ftype_v4sf_v4sf_int
16541 = build_function_type_list (V4SF_type_node
,
16542 V4SF_type_node
, V4SF_type_node
,
16543 integer_type_node
, NULL_TREE
);
16544 tree v2si_ftype_v4hi_v4hi
16545 = build_function_type_list (V2SI_type_node
,
16546 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16547 tree v4hi_ftype_v4hi_int
16548 = build_function_type_list (V4HI_type_node
,
16549 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16550 tree v4hi_ftype_v4hi_di
16551 = build_function_type_list (V4HI_type_node
,
16552 V4HI_type_node
, long_long_unsigned_type_node
,
16554 tree v2si_ftype_v2si_di
16555 = build_function_type_list (V2SI_type_node
,
16556 V2SI_type_node
, long_long_unsigned_type_node
,
16558 tree void_ftype_void
16559 = build_function_type (void_type_node
, void_list_node
);
16560 tree void_ftype_unsigned
16561 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16562 tree void_ftype_unsigned_unsigned
16563 = build_function_type_list (void_type_node
, unsigned_type_node
,
16564 unsigned_type_node
, NULL_TREE
);
16565 tree void_ftype_pcvoid_unsigned_unsigned
16566 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16567 unsigned_type_node
, unsigned_type_node
,
16569 tree unsigned_ftype_void
16570 = build_function_type (unsigned_type_node
, void_list_node
);
16571 tree v2si_ftype_v4sf
16572 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16573 /* Loads/stores. */
16574 tree void_ftype_v8qi_v8qi_pchar
16575 = build_function_type_list (void_type_node
,
16576 V8QI_type_node
, V8QI_type_node
,
16577 pchar_type_node
, NULL_TREE
);
16578 tree v4sf_ftype_pcfloat
16579 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16580 /* @@@ the type is bogus */
16581 tree v4sf_ftype_v4sf_pv2si
16582 = build_function_type_list (V4SF_type_node
,
16583 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16584 tree void_ftype_pv2si_v4sf
16585 = build_function_type_list (void_type_node
,
16586 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16587 tree void_ftype_pfloat_v4sf
16588 = build_function_type_list (void_type_node
,
16589 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16590 tree void_ftype_pdi_di
16591 = build_function_type_list (void_type_node
,
16592 pdi_type_node
, long_long_unsigned_type_node
,
16594 tree void_ftype_pv2di_v2di
16595 = build_function_type_list (void_type_node
,
16596 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16597 /* Normal vector unops. */
16598 tree v4sf_ftype_v4sf
16599 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16600 tree v16qi_ftype_v16qi
16601 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16602 tree v8hi_ftype_v8hi
16603 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16604 tree v4si_ftype_v4si
16605 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16606 tree v8qi_ftype_v8qi
16607 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16608 tree v4hi_ftype_v4hi
16609 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16611 /* Normal vector binops. */
16612 tree v4sf_ftype_v4sf_v4sf
16613 = build_function_type_list (V4SF_type_node
,
16614 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16615 tree v8qi_ftype_v8qi_v8qi
16616 = build_function_type_list (V8QI_type_node
,
16617 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16618 tree v4hi_ftype_v4hi_v4hi
16619 = build_function_type_list (V4HI_type_node
,
16620 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16621 tree v2si_ftype_v2si_v2si
16622 = build_function_type_list (V2SI_type_node
,
16623 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16624 tree di_ftype_di_di
16625 = build_function_type_list (long_long_unsigned_type_node
,
16626 long_long_unsigned_type_node
,
16627 long_long_unsigned_type_node
, NULL_TREE
);
16629 tree di_ftype_di_di_int
16630 = build_function_type_list (long_long_unsigned_type_node
,
16631 long_long_unsigned_type_node
,
16632 long_long_unsigned_type_node
,
16633 integer_type_node
, NULL_TREE
);
16635 tree v2si_ftype_v2sf
16636 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16637 tree v2sf_ftype_v2si
16638 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16639 tree v2si_ftype_v2si
16640 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16641 tree v2sf_ftype_v2sf
16642 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16643 tree v2sf_ftype_v2sf_v2sf
16644 = build_function_type_list (V2SF_type_node
,
16645 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16646 tree v2si_ftype_v2sf_v2sf
16647 = build_function_type_list (V2SI_type_node
,
16648 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16649 tree pint_type_node
= build_pointer_type (integer_type_node
);
16650 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16651 tree pcdouble_type_node
= build_pointer_type (
16652 build_type_variant (double_type_node
, 1, 0));
16653 tree int_ftype_v2df_v2df
16654 = build_function_type_list (integer_type_node
,
16655 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16657 tree void_ftype_pcvoid
16658 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16659 tree v4sf_ftype_v4si
16660 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16661 tree v4si_ftype_v4sf
16662 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16663 tree v2df_ftype_v4si
16664 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16665 tree v4si_ftype_v2df
16666 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16667 tree v2si_ftype_v2df
16668 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16669 tree v4sf_ftype_v2df
16670 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16671 tree v2df_ftype_v2si
16672 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16673 tree v2df_ftype_v4sf
16674 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16675 tree int_ftype_v2df
16676 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16677 tree int64_ftype_v2df
16678 = build_function_type_list (long_long_integer_type_node
,
16679 V2DF_type_node
, NULL_TREE
);
16680 tree v2df_ftype_v2df_int
16681 = build_function_type_list (V2DF_type_node
,
16682 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16683 tree v2df_ftype_v2df_int64
16684 = build_function_type_list (V2DF_type_node
,
16685 V2DF_type_node
, long_long_integer_type_node
,
16687 tree v4sf_ftype_v4sf_v2df
16688 = build_function_type_list (V4SF_type_node
,
16689 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16690 tree v2df_ftype_v2df_v4sf
16691 = build_function_type_list (V2DF_type_node
,
16692 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16693 tree v2df_ftype_v2df_v2df_int
16694 = build_function_type_list (V2DF_type_node
,
16695 V2DF_type_node
, V2DF_type_node
,
16698 tree v2df_ftype_v2df_pcdouble
16699 = build_function_type_list (V2DF_type_node
,
16700 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16701 tree void_ftype_pdouble_v2df
16702 = build_function_type_list (void_type_node
,
16703 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16704 tree void_ftype_pint_int
16705 = build_function_type_list (void_type_node
,
16706 pint_type_node
, integer_type_node
, NULL_TREE
);
16707 tree void_ftype_v16qi_v16qi_pchar
16708 = build_function_type_list (void_type_node
,
16709 V16QI_type_node
, V16QI_type_node
,
16710 pchar_type_node
, NULL_TREE
);
16711 tree v2df_ftype_pcdouble
16712 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16713 tree v2df_ftype_v2df_v2df
16714 = build_function_type_list (V2DF_type_node
,
16715 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16716 tree v16qi_ftype_v16qi_v16qi
16717 = build_function_type_list (V16QI_type_node
,
16718 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16719 tree v8hi_ftype_v8hi_v8hi
16720 = build_function_type_list (V8HI_type_node
,
16721 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16722 tree v4si_ftype_v4si_v4si
16723 = build_function_type_list (V4SI_type_node
,
16724 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16725 tree v2di_ftype_v2di_v2di
16726 = build_function_type_list (V2DI_type_node
,
16727 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16728 tree v2di_ftype_v2df_v2df
16729 = build_function_type_list (V2DI_type_node
,
16730 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16731 tree v2df_ftype_v2df
16732 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16733 tree v2di_ftype_v2di_int
16734 = build_function_type_list (V2DI_type_node
,
16735 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16736 tree v2di_ftype_v2di_v2di_int
16737 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16738 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16739 tree v4si_ftype_v4si_int
16740 = build_function_type_list (V4SI_type_node
,
16741 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16742 tree v8hi_ftype_v8hi_int
16743 = build_function_type_list (V8HI_type_node
,
16744 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16745 tree v4si_ftype_v8hi_v8hi
16746 = build_function_type_list (V4SI_type_node
,
16747 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16748 tree di_ftype_v8qi_v8qi
16749 = build_function_type_list (long_long_unsigned_type_node
,
16750 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16751 tree di_ftype_v2si_v2si
16752 = build_function_type_list (long_long_unsigned_type_node
,
16753 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16754 tree v2di_ftype_v16qi_v16qi
16755 = build_function_type_list (V2DI_type_node
,
16756 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16757 tree v2di_ftype_v4si_v4si
16758 = build_function_type_list (V2DI_type_node
,
16759 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16760 tree int_ftype_v16qi
16761 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16762 tree v16qi_ftype_pcchar
16763 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16764 tree void_ftype_pchar_v16qi
16765 = build_function_type_list (void_type_node
,
16766 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16768 tree v2di_ftype_v2di_unsigned_unsigned
16769 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16770 unsigned_type_node
, unsigned_type_node
,
16772 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16773 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16774 unsigned_type_node
, unsigned_type_node
,
16776 tree v2di_ftype_v2di_v16qi
16777 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16781 tree float128_type
;
16784 /* The __float80 type. */
16785 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16786 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16790 /* The __float80 type. */
16791 float80_type
= make_node (REAL_TYPE
);
16792 TYPE_PRECISION (float80_type
) = 80;
16793 layout_type (float80_type
);
16794 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16799 float128_type
= make_node (REAL_TYPE
);
16800 TYPE_PRECISION (float128_type
) = 128;
16801 layout_type (float128_type
);
16802 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16805 /* Add all builtins that are more or less simple operations on two
16807 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16809 /* Use one of the operands; the target can have a different mode for
16810 mask-generating compares. */
16811 enum machine_mode mode
;
16816 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16821 type
= v16qi_ftype_v16qi_v16qi
;
16824 type
= v8hi_ftype_v8hi_v8hi
;
16827 type
= v4si_ftype_v4si_v4si
;
16830 type
= v2di_ftype_v2di_v2di
;
16833 type
= v2df_ftype_v2df_v2df
;
16836 type
= v4sf_ftype_v4sf_v4sf
;
16839 type
= v8qi_ftype_v8qi_v8qi
;
16842 type
= v4hi_ftype_v4hi_v4hi
;
16845 type
= v2si_ftype_v2si_v2si
;
16848 type
= di_ftype_di_di
;
16852 gcc_unreachable ();
16855 /* Override for comparisons. */
16856 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16857 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16858 type
= v4si_ftype_v4sf_v4sf
;
16860 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16861 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16862 type
= v2di_ftype_v2df_v2df
;
16864 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16867 /* Add all builtins that are more or less simple operations on 1 operand. */
16868 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16870 enum machine_mode mode
;
16875 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16880 type
= v16qi_ftype_v16qi
;
16883 type
= v8hi_ftype_v8hi
;
16886 type
= v4si_ftype_v4si
;
16889 type
= v2df_ftype_v2df
;
16892 type
= v4sf_ftype_v4sf
;
16895 type
= v8qi_ftype_v8qi
;
16898 type
= v4hi_ftype_v4hi
;
16901 type
= v2si_ftype_v2si
;
16908 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16911 /* Add the remaining MMX insns with somewhat more complicated types. */
16912 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16913 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16914 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16915 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16917 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16918 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16919 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16921 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
16922 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
16924 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
16925 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
16927 /* comi/ucomi insns. */
16928 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16929 if (d
->mask
== MASK_SSE2
)
16930 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
16932 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
16934 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
16935 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
16936 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
16938 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
16939 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
16940 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
16941 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
16942 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
16943 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
16944 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
16945 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
16946 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
16947 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
16948 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
16950 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
16952 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
16953 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
16955 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
16956 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
16957 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
16958 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
16960 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
16961 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
16962 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
16963 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
16965 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
16967 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
16969 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
16970 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
16971 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
16972 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
16973 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
16974 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
16976 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
16978 /* Original 3DNow! */
16979 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
16980 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
16981 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
16982 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
16983 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
16984 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
16985 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
16986 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
16987 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
16988 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
16989 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
16990 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
16991 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
16992 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
16993 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
16994 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
16995 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
16996 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
16997 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
16998 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17000 /* 3DNow! extension as used in the Athlon CPU. */
17001 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17002 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17003 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17004 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17005 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17006 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17009 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17011 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17012 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17014 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17015 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17017 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17018 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17019 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17020 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17021 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17023 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17024 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17025 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17026 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17028 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17029 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17031 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17033 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17034 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17036 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17037 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17038 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17039 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17040 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17042 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17044 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17045 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17046 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17047 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17049 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17050 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17051 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17053 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17054 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17055 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17056 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17058 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17059 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17060 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17062 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17063 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17065 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17066 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17068 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17069 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17070 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17071 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17072 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
17073 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
17074 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17076 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17077 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17078 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17079 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17080 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
17081 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
17082 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17084 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17085 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17086 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
17087 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
17089 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17091 /* Prescott New Instructions. */
17092 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17093 void_ftype_pcvoid_unsigned_unsigned
,
17094 IX86_BUILTIN_MONITOR
);
17095 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17096 void_ftype_unsigned_unsigned
,
17097 IX86_BUILTIN_MWAIT
);
17098 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17099 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17102 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17103 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17104 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17105 IX86_BUILTIN_PALIGNR
);
17107 /* AMDFAM10 SSE4A New built-ins */
17108 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17109 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17110 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17111 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17112 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17113 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17114 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17115 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17116 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17117 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17118 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17119 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17121 /* Access to the vec_init patterns. */
17122 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17123 integer_type_node
, NULL_TREE
);
17124 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17125 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17127 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17128 short_integer_type_node
,
17129 short_integer_type_node
,
17130 short_integer_type_node
, NULL_TREE
);
17131 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17132 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17134 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17135 char_type_node
, char_type_node
,
17136 char_type_node
, char_type_node
,
17137 char_type_node
, char_type_node
,
17138 char_type_node
, NULL_TREE
);
17139 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17140 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17142 /* Access to the vec_extract patterns. */
17143 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17144 integer_type_node
, NULL_TREE
);
17145 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17146 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17148 ftype
= build_function_type_list (long_long_integer_type_node
,
17149 V2DI_type_node
, integer_type_node
,
17151 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17152 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17154 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17155 integer_type_node
, NULL_TREE
);
17156 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17157 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17159 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17160 integer_type_node
, NULL_TREE
);
17161 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17162 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17164 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17165 integer_type_node
, NULL_TREE
);
17166 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17167 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17169 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17170 integer_type_node
, NULL_TREE
);
17171 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17172 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17174 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17175 integer_type_node
, NULL_TREE
);
17176 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17177 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17179 /* Access to the vec_set patterns. */
17180 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17182 integer_type_node
, NULL_TREE
);
17183 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17184 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17186 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17188 integer_type_node
, NULL_TREE
);
17189 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17190 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17194 ix86_init_builtins (void)
17197 ix86_init_mmx_sse_builtins ();
17200 /* Errors in the source file can cause expand_expr to return const0_rtx
17201 where we expect a vector. To avoid crashing, use one of the vector
17202 clear instructions. */
17204 safe_vector_operand (rtx x
, enum machine_mode mode
)
17206 if (x
== const0_rtx
)
17207 x
= CONST0_RTX (mode
);
17211 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17214 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17217 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17218 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17219 rtx op0
= expand_normal (arg0
);
17220 rtx op1
= expand_normal (arg1
);
17221 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17222 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17223 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17225 if (VECTOR_MODE_P (mode0
))
17226 op0
= safe_vector_operand (op0
, mode0
);
17227 if (VECTOR_MODE_P (mode1
))
17228 op1
= safe_vector_operand (op1
, mode1
);
17230 if (optimize
|| !target
17231 || GET_MODE (target
) != tmode
17232 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17233 target
= gen_reg_rtx (tmode
);
17235 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17237 rtx x
= gen_reg_rtx (V4SImode
);
17238 emit_insn (gen_sse2_loadd (x
, op1
));
17239 op1
= gen_lowpart (TImode
, x
);
17242 /* The insn must want input operands in the same modes as the
17244 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17245 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17247 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17248 op0
= copy_to_mode_reg (mode0
, op0
);
17249 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17250 op1
= copy_to_mode_reg (mode1
, op1
);
17252 /* ??? Using ix86_fixup_binary_operands is problematic when
17253 we've got mismatched modes. Fake it. */
17259 if (tmode
== mode0
&& tmode
== mode1
)
17261 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17265 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17267 op0
= force_reg (mode0
, op0
);
17268 op1
= force_reg (mode1
, op1
);
17269 target
= gen_reg_rtx (tmode
);
17272 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17279 /* Subroutine of ix86_expand_builtin to take care of stores. */
17282 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17285 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17286 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17287 rtx op0
= expand_normal (arg0
);
17288 rtx op1
= expand_normal (arg1
);
17289 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17290 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17292 if (VECTOR_MODE_P (mode1
))
17293 op1
= safe_vector_operand (op1
, mode1
);
17295 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17296 op1
= copy_to_mode_reg (mode1
, op1
);
17298 pat
= GEN_FCN (icode
) (op0
, op1
);
17304 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17307 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17308 rtx target
, int do_load
)
17311 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17312 rtx op0
= expand_normal (arg0
);
17313 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17314 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17316 if (optimize
|| !target
17317 || GET_MODE (target
) != tmode
17318 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17319 target
= gen_reg_rtx (tmode
);
17321 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17324 if (VECTOR_MODE_P (mode0
))
17325 op0
= safe_vector_operand (op0
, mode0
);
17327 if ((optimize
&& !register_operand (op0
, mode0
))
17328 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17329 op0
= copy_to_mode_reg (mode0
, op0
);
17332 pat
= GEN_FCN (icode
) (target
, op0
);
17339 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17340 sqrtss, rsqrtss, rcpss. */
17343 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17346 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17347 rtx op1
, op0
= expand_normal (arg0
);
17348 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17349 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17351 if (optimize
|| !target
17352 || GET_MODE (target
) != tmode
17353 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17354 target
= gen_reg_rtx (tmode
);
17356 if (VECTOR_MODE_P (mode0
))
17357 op0
= safe_vector_operand (op0
, mode0
);
17359 if ((optimize
&& !register_operand (op0
, mode0
))
17360 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17361 op0
= copy_to_mode_reg (mode0
, op0
);
17364 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17365 op1
= copy_to_mode_reg (mode0
, op1
);
17367 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17374 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17377 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17381 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17382 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17383 rtx op0
= expand_normal (arg0
);
17384 rtx op1
= expand_normal (arg1
);
17386 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17387 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17388 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17389 enum rtx_code comparison
= d
->comparison
;
17391 if (VECTOR_MODE_P (mode0
))
17392 op0
= safe_vector_operand (op0
, mode0
);
17393 if (VECTOR_MODE_P (mode1
))
17394 op1
= safe_vector_operand (op1
, mode1
);
17396 /* Swap operands if we have a comparison that isn't available in
17398 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17400 rtx tmp
= gen_reg_rtx (mode1
);
17401 emit_move_insn (tmp
, op1
);
17406 if (optimize
|| !target
17407 || GET_MODE (target
) != tmode
17408 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17409 target
= gen_reg_rtx (tmode
);
17411 if ((optimize
&& !register_operand (op0
, mode0
))
17412 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17413 op0
= copy_to_mode_reg (mode0
, op0
);
17414 if ((optimize
&& !register_operand (op1
, mode1
))
17415 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17416 op1
= copy_to_mode_reg (mode1
, op1
);
17418 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17419 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17426 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17429 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17433 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17434 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17435 rtx op0
= expand_normal (arg0
);
17436 rtx op1
= expand_normal (arg1
);
17438 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17439 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17440 enum rtx_code comparison
= d
->comparison
;
17442 if (VECTOR_MODE_P (mode0
))
17443 op0
= safe_vector_operand (op0
, mode0
);
17444 if (VECTOR_MODE_P (mode1
))
17445 op1
= safe_vector_operand (op1
, mode1
);
17447 /* Swap operands if we have a comparison that isn't available in
17449 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17456 target
= gen_reg_rtx (SImode
);
17457 emit_move_insn (target
, const0_rtx
);
17458 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17460 if ((optimize
&& !register_operand (op0
, mode0
))
17461 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17462 op0
= copy_to_mode_reg (mode0
, op0
);
17463 if ((optimize
&& !register_operand (op1
, mode1
))
17464 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17465 op1
= copy_to_mode_reg (mode1
, op1
);
17467 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17468 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17472 emit_insn (gen_rtx_SET (VOIDmode
,
17473 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17474 gen_rtx_fmt_ee (comparison
, QImode
,
17478 return SUBREG_REG (target
);
17481 /* Return the integer constant in ARG. Constrain it to be in the range
17482 of the subparts of VEC_TYPE; issue an error if not. */
17485 get_element_number (tree vec_type
, tree arg
)
17487 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17489 if (!host_integerp (arg
, 1)
17490 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17492 error ("selector must be an integer constant in the range 0..%wi", max
);
17499 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17500 ix86_expand_vector_init. We DO have language-level syntax for this, in
17501 the form of (type){ init-list }. Except that since we can't place emms
17502 instructions from inside the compiler, we can't allow the use of MMX
17503 registers unless the user explicitly asks for it. So we do *not* define
17504 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17505 we have builtins invoked by mmintrin.h that gives us license to emit
17506 these sorts of instructions. */
17509 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17511 enum machine_mode tmode
= TYPE_MODE (type
);
17512 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17513 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17514 rtvec v
= rtvec_alloc (n_elt
);
17516 gcc_assert (VECTOR_MODE_P (tmode
));
17517 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17519 for (i
= 0; i
< n_elt
; ++i
)
17521 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17522 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17525 if (!target
|| !register_operand (target
, tmode
))
17526 target
= gen_reg_rtx (tmode
);
17528 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17532 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17533 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17534 had a language-level syntax for referencing vector elements. */
17537 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17539 enum machine_mode tmode
, mode0
;
17544 arg0
= CALL_EXPR_ARG (exp
, 0);
17545 arg1
= CALL_EXPR_ARG (exp
, 1);
17547 op0
= expand_normal (arg0
);
17548 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17550 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17551 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17552 gcc_assert (VECTOR_MODE_P (mode0
));
17554 op0
= force_reg (mode0
, op0
);
17556 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17557 target
= gen_reg_rtx (tmode
);
17559 ix86_expand_vector_extract (true, target
, op0
, elt
);
17564 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17565 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17566 a language-level syntax for referencing vector elements. */
17569 ix86_expand_vec_set_builtin (tree exp
)
17571 enum machine_mode tmode
, mode1
;
17572 tree arg0
, arg1
, arg2
;
17576 arg0
= CALL_EXPR_ARG (exp
, 0);
17577 arg1
= CALL_EXPR_ARG (exp
, 1);
17578 arg2
= CALL_EXPR_ARG (exp
, 2);
17580 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17581 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17582 gcc_assert (VECTOR_MODE_P (tmode
));
17584 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17585 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17586 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17588 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17589 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17591 op0
= force_reg (tmode
, op0
);
17592 op1
= force_reg (mode1
, op1
);
17594 ix86_expand_vector_set (true, op0
, op1
, elt
);
17599 /* Expand an expression EXP that calls a built-in function,
17600 with result going to TARGET if that's convenient
17601 (and in mode MODE if that's convenient).
17602 SUBTARGET may be used as the target for computing one of EXP's operands.
17603 IGNORE is nonzero if the value is to be ignored. */
17606 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17607 enum machine_mode mode ATTRIBUTE_UNUSED
,
17608 int ignore ATTRIBUTE_UNUSED
)
17610 const struct builtin_description
*d
;
17612 enum insn_code icode
;
17613 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17614 tree arg0
, arg1
, arg2
, arg3
;
17615 rtx op0
, op1
, op2
, op3
, pat
;
17616 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17617 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17621 case IX86_BUILTIN_EMMS
:
17622 emit_insn (gen_mmx_emms ());
17625 case IX86_BUILTIN_SFENCE
:
17626 emit_insn (gen_sse_sfence ());
17629 case IX86_BUILTIN_MASKMOVQ
:
17630 case IX86_BUILTIN_MASKMOVDQU
:
17631 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17632 ? CODE_FOR_mmx_maskmovq
17633 : CODE_FOR_sse2_maskmovdqu
);
17634 /* Note the arg order is different from the operand order. */
17635 arg1
= CALL_EXPR_ARG (exp
, 0);
17636 arg2
= CALL_EXPR_ARG (exp
, 1);
17637 arg0
= CALL_EXPR_ARG (exp
, 2);
17638 op0
= expand_normal (arg0
);
17639 op1
= expand_normal (arg1
);
17640 op2
= expand_normal (arg2
);
17641 mode0
= insn_data
[icode
].operand
[0].mode
;
17642 mode1
= insn_data
[icode
].operand
[1].mode
;
17643 mode2
= insn_data
[icode
].operand
[2].mode
;
17645 op0
= force_reg (Pmode
, op0
);
17646 op0
= gen_rtx_MEM (mode1
, op0
);
17648 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17649 op0
= copy_to_mode_reg (mode0
, op0
);
17650 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17651 op1
= copy_to_mode_reg (mode1
, op1
);
17652 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17653 op2
= copy_to_mode_reg (mode2
, op2
);
17654 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17660 case IX86_BUILTIN_SQRTSS
:
17661 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17662 case IX86_BUILTIN_RSQRTSS
:
17663 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17664 case IX86_BUILTIN_RCPSS
:
17665 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17667 case IX86_BUILTIN_LOADUPS
:
17668 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17670 case IX86_BUILTIN_STOREUPS
:
17671 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17673 case IX86_BUILTIN_LOADHPS
:
17674 case IX86_BUILTIN_LOADLPS
:
17675 case IX86_BUILTIN_LOADHPD
:
17676 case IX86_BUILTIN_LOADLPD
:
17677 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17678 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17679 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17680 : CODE_FOR_sse2_loadlpd
);
17681 arg0
= CALL_EXPR_ARG (exp
, 0);
17682 arg1
= CALL_EXPR_ARG (exp
, 1);
17683 op0
= expand_normal (arg0
);
17684 op1
= expand_normal (arg1
);
17685 tmode
= insn_data
[icode
].operand
[0].mode
;
17686 mode0
= insn_data
[icode
].operand
[1].mode
;
17687 mode1
= insn_data
[icode
].operand
[2].mode
;
17689 op0
= force_reg (mode0
, op0
);
17690 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17691 if (optimize
|| target
== 0
17692 || GET_MODE (target
) != tmode
17693 || !register_operand (target
, tmode
))
17694 target
= gen_reg_rtx (tmode
);
17695 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17701 case IX86_BUILTIN_STOREHPS
:
17702 case IX86_BUILTIN_STORELPS
:
17703 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17704 : CODE_FOR_sse_storelps
);
17705 arg0
= CALL_EXPR_ARG (exp
, 0);
17706 arg1
= CALL_EXPR_ARG (exp
, 1);
17707 op0
= expand_normal (arg0
);
17708 op1
= expand_normal (arg1
);
17709 mode0
= insn_data
[icode
].operand
[0].mode
;
17710 mode1
= insn_data
[icode
].operand
[1].mode
;
17712 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17713 op1
= force_reg (mode1
, op1
);
17715 pat
= GEN_FCN (icode
) (op0
, op1
);
17721 case IX86_BUILTIN_MOVNTPS
:
17722 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17723 case IX86_BUILTIN_MOVNTQ
:
17724 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17726 case IX86_BUILTIN_LDMXCSR
:
17727 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17728 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17729 emit_move_insn (target
, op0
);
17730 emit_insn (gen_sse_ldmxcsr (target
));
17733 case IX86_BUILTIN_STMXCSR
:
17734 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17735 emit_insn (gen_sse_stmxcsr (target
));
17736 return copy_to_mode_reg (SImode
, target
);
17738 case IX86_BUILTIN_SHUFPS
:
17739 case IX86_BUILTIN_SHUFPD
:
17740 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17741 ? CODE_FOR_sse_shufps
17742 : CODE_FOR_sse2_shufpd
);
17743 arg0
= CALL_EXPR_ARG (exp
, 0);
17744 arg1
= CALL_EXPR_ARG (exp
, 1);
17745 arg2
= CALL_EXPR_ARG (exp
, 2);
17746 op0
= expand_normal (arg0
);
17747 op1
= expand_normal (arg1
);
17748 op2
= expand_normal (arg2
);
17749 tmode
= insn_data
[icode
].operand
[0].mode
;
17750 mode0
= insn_data
[icode
].operand
[1].mode
;
17751 mode1
= insn_data
[icode
].operand
[2].mode
;
17752 mode2
= insn_data
[icode
].operand
[3].mode
;
17754 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17755 op0
= copy_to_mode_reg (mode0
, op0
);
17756 if ((optimize
&& !register_operand (op1
, mode1
))
17757 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17758 op1
= copy_to_mode_reg (mode1
, op1
);
17759 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17761 /* @@@ better error message */
17762 error ("mask must be an immediate");
17763 return gen_reg_rtx (tmode
);
17765 if (optimize
|| target
== 0
17766 || GET_MODE (target
) != tmode
17767 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17768 target
= gen_reg_rtx (tmode
);
17769 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17775 case IX86_BUILTIN_PSHUFW
:
17776 case IX86_BUILTIN_PSHUFD
:
17777 case IX86_BUILTIN_PSHUFHW
:
17778 case IX86_BUILTIN_PSHUFLW
:
17779 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17780 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17781 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17782 : CODE_FOR_mmx_pshufw
);
17783 arg0
= CALL_EXPR_ARG (exp
, 0);
17784 arg1
= CALL_EXPR_ARG (exp
, 1);
17785 op0
= expand_normal (arg0
);
17786 op1
= expand_normal (arg1
);
17787 tmode
= insn_data
[icode
].operand
[0].mode
;
17788 mode1
= insn_data
[icode
].operand
[1].mode
;
17789 mode2
= insn_data
[icode
].operand
[2].mode
;
17791 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17792 op0
= copy_to_mode_reg (mode1
, op0
);
17793 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17795 /* @@@ better error message */
17796 error ("mask must be an immediate");
17800 || GET_MODE (target
) != tmode
17801 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17802 target
= gen_reg_rtx (tmode
);
17803 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17809 case IX86_BUILTIN_PSLLWI128
:
17810 icode
= CODE_FOR_ashlv8hi3
;
17812 case IX86_BUILTIN_PSLLDI128
:
17813 icode
= CODE_FOR_ashlv4si3
;
17815 case IX86_BUILTIN_PSLLQI128
:
17816 icode
= CODE_FOR_ashlv2di3
;
17818 case IX86_BUILTIN_PSRAWI128
:
17819 icode
= CODE_FOR_ashrv8hi3
;
17821 case IX86_BUILTIN_PSRADI128
:
17822 icode
= CODE_FOR_ashrv4si3
;
17824 case IX86_BUILTIN_PSRLWI128
:
17825 icode
= CODE_FOR_lshrv8hi3
;
17827 case IX86_BUILTIN_PSRLDI128
:
17828 icode
= CODE_FOR_lshrv4si3
;
17830 case IX86_BUILTIN_PSRLQI128
:
17831 icode
= CODE_FOR_lshrv2di3
;
17834 arg0
= CALL_EXPR_ARG (exp
, 0);
17835 arg1
= CALL_EXPR_ARG (exp
, 1);
17836 op0
= expand_normal (arg0
);
17837 op1
= expand_normal (arg1
);
17839 if (!CONST_INT_P (op1
))
17841 error ("shift must be an immediate");
17844 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
17845 op1
= GEN_INT (255);
17847 tmode
= insn_data
[icode
].operand
[0].mode
;
17848 mode1
= insn_data
[icode
].operand
[1].mode
;
17849 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17850 op0
= copy_to_reg (op0
);
17852 target
= gen_reg_rtx (tmode
);
17853 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17859 case IX86_BUILTIN_PSLLW128
:
17860 icode
= CODE_FOR_ashlv8hi3
;
17862 case IX86_BUILTIN_PSLLD128
:
17863 icode
= CODE_FOR_ashlv4si3
;
17865 case IX86_BUILTIN_PSLLQ128
:
17866 icode
= CODE_FOR_ashlv2di3
;
17868 case IX86_BUILTIN_PSRAW128
:
17869 icode
= CODE_FOR_ashrv8hi3
;
17871 case IX86_BUILTIN_PSRAD128
:
17872 icode
= CODE_FOR_ashrv4si3
;
17874 case IX86_BUILTIN_PSRLW128
:
17875 icode
= CODE_FOR_lshrv8hi3
;
17877 case IX86_BUILTIN_PSRLD128
:
17878 icode
= CODE_FOR_lshrv4si3
;
17880 case IX86_BUILTIN_PSRLQ128
:
17881 icode
= CODE_FOR_lshrv2di3
;
17884 arg0
= CALL_EXPR_ARG (exp
, 0);
17885 arg1
= CALL_EXPR_ARG (exp
, 1);
17886 op0
= expand_normal (arg0
);
17887 op1
= expand_normal (arg1
);
17889 tmode
= insn_data
[icode
].operand
[0].mode
;
17890 mode1
= insn_data
[icode
].operand
[1].mode
;
17892 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17893 op0
= copy_to_reg (op0
);
17895 op1
= simplify_gen_subreg (TImode
, op1
, GET_MODE (op1
), 0);
17896 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
17897 op1
= copy_to_reg (op1
);
17899 target
= gen_reg_rtx (tmode
);
17900 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17906 case IX86_BUILTIN_PSLLDQI128
:
17907 case IX86_BUILTIN_PSRLDQI128
:
17908 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17909 : CODE_FOR_sse2_lshrti3
);
17910 arg0
= CALL_EXPR_ARG (exp
, 0);
17911 arg1
= CALL_EXPR_ARG (exp
, 1);
17912 op0
= expand_normal (arg0
);
17913 op1
= expand_normal (arg1
);
17914 tmode
= insn_data
[icode
].operand
[0].mode
;
17915 mode1
= insn_data
[icode
].operand
[1].mode
;
17916 mode2
= insn_data
[icode
].operand
[2].mode
;
17918 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17920 op0
= copy_to_reg (op0
);
17921 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17923 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17925 error ("shift must be an immediate");
17928 target
= gen_reg_rtx (V2DImode
);
17929 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
17936 case IX86_BUILTIN_FEMMS
:
17937 emit_insn (gen_mmx_femms ());
17940 case IX86_BUILTIN_PAVGUSB
:
17941 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
17943 case IX86_BUILTIN_PF2ID
:
17944 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
17946 case IX86_BUILTIN_PFACC
:
17947 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
17949 case IX86_BUILTIN_PFADD
:
17950 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
17952 case IX86_BUILTIN_PFCMPEQ
:
17953 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
17955 case IX86_BUILTIN_PFCMPGE
:
17956 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
17958 case IX86_BUILTIN_PFCMPGT
:
17959 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
17961 case IX86_BUILTIN_PFMAX
:
17962 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
17964 case IX86_BUILTIN_PFMIN
:
17965 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
17967 case IX86_BUILTIN_PFMUL
:
17968 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
17970 case IX86_BUILTIN_PFRCP
:
17971 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
17973 case IX86_BUILTIN_PFRCPIT1
:
17974 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
17976 case IX86_BUILTIN_PFRCPIT2
:
17977 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
17979 case IX86_BUILTIN_PFRSQIT1
:
17980 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
17982 case IX86_BUILTIN_PFRSQRT
:
17983 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
17985 case IX86_BUILTIN_PFSUB
:
17986 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
17988 case IX86_BUILTIN_PFSUBR
:
17989 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
17991 case IX86_BUILTIN_PI2FD
:
17992 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
17994 case IX86_BUILTIN_PMULHRW
:
17995 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
17997 case IX86_BUILTIN_PF2IW
:
17998 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18000 case IX86_BUILTIN_PFNACC
:
18001 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18003 case IX86_BUILTIN_PFPNACC
:
18004 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18006 case IX86_BUILTIN_PI2FW
:
18007 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18009 case IX86_BUILTIN_PSWAPDSI
:
18010 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18012 case IX86_BUILTIN_PSWAPDSF
:
18013 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18015 case IX86_BUILTIN_SQRTSD
:
18016 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18017 case IX86_BUILTIN_LOADUPD
:
18018 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18019 case IX86_BUILTIN_STOREUPD
:
18020 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18022 case IX86_BUILTIN_MFENCE
:
18023 emit_insn (gen_sse2_mfence ());
18025 case IX86_BUILTIN_LFENCE
:
18026 emit_insn (gen_sse2_lfence ());
18029 case IX86_BUILTIN_CLFLUSH
:
18030 arg0
= CALL_EXPR_ARG (exp
, 0);
18031 op0
= expand_normal (arg0
);
18032 icode
= CODE_FOR_sse2_clflush
;
18033 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18034 op0
= copy_to_mode_reg (Pmode
, op0
);
18036 emit_insn (gen_sse2_clflush (op0
));
18039 case IX86_BUILTIN_MOVNTPD
:
18040 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18041 case IX86_BUILTIN_MOVNTDQ
:
18042 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18043 case IX86_BUILTIN_MOVNTI
:
18044 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18046 case IX86_BUILTIN_LOADDQU
:
18047 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18048 case IX86_BUILTIN_STOREDQU
:
18049 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18051 case IX86_BUILTIN_MONITOR
:
18052 arg0
= CALL_EXPR_ARG (exp
, 0);
18053 arg1
= CALL_EXPR_ARG (exp
, 1);
18054 arg2
= CALL_EXPR_ARG (exp
, 2);
18055 op0
= expand_normal (arg0
);
18056 op1
= expand_normal (arg1
);
18057 op2
= expand_normal (arg2
);
18059 op0
= copy_to_mode_reg (Pmode
, op0
);
18061 op1
= copy_to_mode_reg (SImode
, op1
);
18063 op2
= copy_to_mode_reg (SImode
, op2
);
18065 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18067 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18070 case IX86_BUILTIN_MWAIT
:
18071 arg0
= CALL_EXPR_ARG (exp
, 0);
18072 arg1
= CALL_EXPR_ARG (exp
, 1);
18073 op0
= expand_normal (arg0
);
18074 op1
= expand_normal (arg1
);
18076 op0
= copy_to_mode_reg (SImode
, op0
);
18078 op1
= copy_to_mode_reg (SImode
, op1
);
18079 emit_insn (gen_sse3_mwait (op0
, op1
));
18082 case IX86_BUILTIN_LDDQU
:
18083 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18086 case IX86_BUILTIN_PALIGNR
:
18087 case IX86_BUILTIN_PALIGNR128
:
18088 if (fcode
== IX86_BUILTIN_PALIGNR
)
18090 icode
= CODE_FOR_ssse3_palignrdi
;
18095 icode
= CODE_FOR_ssse3_palignrti
;
18098 arg0
= CALL_EXPR_ARG (exp
, 0);
18099 arg1
= CALL_EXPR_ARG (exp
, 1);
18100 arg2
= CALL_EXPR_ARG (exp
, 2);
18101 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18102 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18103 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18104 tmode
= insn_data
[icode
].operand
[0].mode
;
18105 mode1
= insn_data
[icode
].operand
[1].mode
;
18106 mode2
= insn_data
[icode
].operand
[2].mode
;
18107 mode3
= insn_data
[icode
].operand
[3].mode
;
18109 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18111 op0
= copy_to_reg (op0
);
18112 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18114 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18116 op1
= copy_to_reg (op1
);
18117 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18119 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18121 error ("shift must be an immediate");
18124 target
= gen_reg_rtx (mode
);
18125 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18132 case IX86_BUILTIN_MOVNTSD
:
18133 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18135 case IX86_BUILTIN_MOVNTSS
:
18136 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18138 case IX86_BUILTIN_INSERTQ
:
18139 case IX86_BUILTIN_EXTRQ
:
18140 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18141 ? CODE_FOR_sse4a_extrq
18142 : CODE_FOR_sse4a_insertq
);
18143 arg0
= CALL_EXPR_ARG (exp
, 0);
18144 arg1
= CALL_EXPR_ARG (exp
, 1);
18145 op0
= expand_normal (arg0
);
18146 op1
= expand_normal (arg1
);
18147 tmode
= insn_data
[icode
].operand
[0].mode
;
18148 mode1
= insn_data
[icode
].operand
[1].mode
;
18149 mode2
= insn_data
[icode
].operand
[2].mode
;
18150 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18151 op0
= copy_to_mode_reg (mode1
, op0
);
18152 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18153 op1
= copy_to_mode_reg (mode2
, op1
);
18154 if (optimize
|| target
== 0
18155 || GET_MODE (target
) != tmode
18156 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18157 target
= gen_reg_rtx (tmode
);
18158 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18164 case IX86_BUILTIN_EXTRQI
:
18165 icode
= CODE_FOR_sse4a_extrqi
;
18166 arg0
= CALL_EXPR_ARG (exp
, 0);
18167 arg1
= CALL_EXPR_ARG (exp
, 1);
18168 arg2
= CALL_EXPR_ARG (exp
, 2);
18169 op0
= expand_normal (arg0
);
18170 op1
= expand_normal (arg1
);
18171 op2
= expand_normal (arg2
);
18172 tmode
= insn_data
[icode
].operand
[0].mode
;
18173 mode1
= insn_data
[icode
].operand
[1].mode
;
18174 mode2
= insn_data
[icode
].operand
[2].mode
;
18175 mode3
= insn_data
[icode
].operand
[3].mode
;
18176 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18177 op0
= copy_to_mode_reg (mode1
, op0
);
18178 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18180 error ("index mask must be an immediate");
18181 return gen_reg_rtx (tmode
);
18183 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18185 error ("length mask must be an immediate");
18186 return gen_reg_rtx (tmode
);
18188 if (optimize
|| target
== 0
18189 || GET_MODE (target
) != tmode
18190 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18191 target
= gen_reg_rtx (tmode
);
18192 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18198 case IX86_BUILTIN_INSERTQI
:
18199 icode
= CODE_FOR_sse4a_insertqi
;
18200 arg0
= CALL_EXPR_ARG (exp
, 0);
18201 arg1
= CALL_EXPR_ARG (exp
, 1);
18202 arg2
= CALL_EXPR_ARG (exp
, 2);
18203 arg3
= CALL_EXPR_ARG (exp
, 3);
18204 op0
= expand_normal (arg0
);
18205 op1
= expand_normal (arg1
);
18206 op2
= expand_normal (arg2
);
18207 op3
= expand_normal (arg3
);
18208 tmode
= insn_data
[icode
].operand
[0].mode
;
18209 mode1
= insn_data
[icode
].operand
[1].mode
;
18210 mode2
= insn_data
[icode
].operand
[2].mode
;
18211 mode3
= insn_data
[icode
].operand
[3].mode
;
18212 mode4
= insn_data
[icode
].operand
[4].mode
;
18214 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18215 op0
= copy_to_mode_reg (mode1
, op0
);
18217 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18218 op1
= copy_to_mode_reg (mode2
, op1
);
18220 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18222 error ("index mask must be an immediate");
18223 return gen_reg_rtx (tmode
);
18225 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18227 error ("length mask must be an immediate");
18228 return gen_reg_rtx (tmode
);
18230 if (optimize
|| target
== 0
18231 || GET_MODE (target
) != tmode
18232 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18233 target
= gen_reg_rtx (tmode
);
18234 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18240 case IX86_BUILTIN_VEC_INIT_V2SI
:
18241 case IX86_BUILTIN_VEC_INIT_V4HI
:
18242 case IX86_BUILTIN_VEC_INIT_V8QI
:
18243 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18245 case IX86_BUILTIN_VEC_EXT_V2DF
:
18246 case IX86_BUILTIN_VEC_EXT_V2DI
:
18247 case IX86_BUILTIN_VEC_EXT_V4SF
:
18248 case IX86_BUILTIN_VEC_EXT_V4SI
:
18249 case IX86_BUILTIN_VEC_EXT_V8HI
:
18250 case IX86_BUILTIN_VEC_EXT_V2SI
:
18251 case IX86_BUILTIN_VEC_EXT_V4HI
:
18252 return ix86_expand_vec_ext_builtin (exp
, target
);
18254 case IX86_BUILTIN_VEC_SET_V8HI
:
18255 case IX86_BUILTIN_VEC_SET_V4HI
:
18256 return ix86_expand_vec_set_builtin (exp
);
18262 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18263 if (d
->code
== fcode
)
18265 /* Compares are treated specially. */
18266 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18267 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18268 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18269 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18270 return ix86_expand_sse_compare (d
, exp
, target
);
18272 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18275 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18276 if (d
->code
== fcode
)
18277 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18279 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18280 if (d
->code
== fcode
)
18281 return ix86_expand_sse_comi (d
, exp
, target
);
18283 gcc_unreachable ();
18286 /* Returns a function decl for a vectorized version of the builtin function
18287 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18288 if it is not available. */
18291 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18294 enum machine_mode in_mode
, out_mode
;
18297 if (TREE_CODE (type_out
) != VECTOR_TYPE
18298 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18301 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18302 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18303 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18304 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18308 case BUILT_IN_SQRT
:
18309 if (out_mode
== DFmode
&& out_n
== 2
18310 && in_mode
== DFmode
&& in_n
== 2)
18311 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18314 case BUILT_IN_SQRTF
:
18315 if (out_mode
== SFmode
&& out_n
== 4
18316 && in_mode
== SFmode
&& in_n
== 4)
18317 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18320 case BUILT_IN_LRINTF
:
18321 if (out_mode
== SImode
&& out_n
== 4
18322 && in_mode
== SFmode
&& in_n
== 4)
18323 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18333 /* Returns a decl of a function that implements conversion of the
18334 input vector of type TYPE, or NULL_TREE if it is not available. */
18337 ix86_builtin_conversion (enum tree_code code
, tree type
)
18339 if (TREE_CODE (type
) != VECTOR_TYPE
)
18345 switch (TYPE_MODE (type
))
18348 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18353 case FIX_TRUNC_EXPR
:
18354 switch (TYPE_MODE (type
))
18357 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18367 /* Store OPERAND to the memory after reload is completed. This means
18368 that we can't easily use assign_stack_local. */
18370 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18374 gcc_assert (reload_completed
);
18375 if (TARGET_RED_ZONE
)
18377 result
= gen_rtx_MEM (mode
,
18378 gen_rtx_PLUS (Pmode
,
18380 GEN_INT (-RED_ZONE_SIZE
)));
18381 emit_move_insn (result
, operand
);
18383 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18389 operand
= gen_lowpart (DImode
, operand
);
18393 gen_rtx_SET (VOIDmode
,
18394 gen_rtx_MEM (DImode
,
18395 gen_rtx_PRE_DEC (DImode
,
18396 stack_pointer_rtx
)),
18400 gcc_unreachable ();
18402 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18411 split_di (&operand
, 1, operands
, operands
+ 1);
18413 gen_rtx_SET (VOIDmode
,
18414 gen_rtx_MEM (SImode
,
18415 gen_rtx_PRE_DEC (Pmode
,
18416 stack_pointer_rtx
)),
18419 gen_rtx_SET (VOIDmode
,
18420 gen_rtx_MEM (SImode
,
18421 gen_rtx_PRE_DEC (Pmode
,
18422 stack_pointer_rtx
)),
18427 /* Store HImodes as SImodes. */
18428 operand
= gen_lowpart (SImode
, operand
);
18432 gen_rtx_SET (VOIDmode
,
18433 gen_rtx_MEM (GET_MODE (operand
),
18434 gen_rtx_PRE_DEC (SImode
,
18435 stack_pointer_rtx
)),
18439 gcc_unreachable ();
18441 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18446 /* Free operand from the memory. */
18448 ix86_free_from_memory (enum machine_mode mode
)
18450 if (!TARGET_RED_ZONE
)
18454 if (mode
== DImode
|| TARGET_64BIT
)
18458 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18459 to pop or add instruction if registers are available. */
18460 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18461 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18466 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18467 QImode must go into class Q_REGS.
18468 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18469 movdf to do mem-to-mem moves through integer regs. */
18471 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18473 enum machine_mode mode
= GET_MODE (x
);
18475 /* We're only allowed to return a subclass of CLASS. Many of the
18476 following checks fail for NO_REGS, so eliminate that early. */
18477 if (class == NO_REGS
)
18480 /* All classes can load zeros. */
18481 if (x
== CONST0_RTX (mode
))
18484 /* Force constants into memory if we are loading a (nonzero) constant into
18485 an MMX or SSE register. This is because there are no MMX/SSE instructions
18486 to load from a constant. */
18488 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18491 /* Prefer SSE regs only, if we can use them for math. */
18492 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18493 return SSE_CLASS_P (class) ? class : NO_REGS
;
18495 /* Floating-point constants need more complex checks. */
18496 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18498 /* General regs can load everything. */
18499 if (reg_class_subset_p (class, GENERAL_REGS
))
18502 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18503 zero above. We only want to wind up preferring 80387 registers if
18504 we plan on doing computation with them. */
18506 && standard_80387_constant_p (x
))
18508 /* Limit class to non-sse. */
18509 if (class == FLOAT_SSE_REGS
)
18511 if (class == FP_TOP_SSE_REGS
)
18513 if (class == FP_SECOND_SSE_REGS
)
18514 return FP_SECOND_REG
;
18515 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18522 /* Generally when we see PLUS here, it's the function invariant
18523 (plus soft-fp const_int). Which can only be computed into general
18525 if (GET_CODE (x
) == PLUS
)
18526 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18528 /* QImode constants are easy to load, but non-constant QImode data
18529 must go into Q_REGS. */
18530 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18532 if (reg_class_subset_p (class, Q_REGS
))
18534 if (reg_class_subset_p (Q_REGS
, class))
18542 /* Discourage putting floating-point values in SSE registers unless
18543 SSE math is being used, and likewise for the 387 registers. */
18545 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18547 enum machine_mode mode
= GET_MODE (x
);
18549 /* Restrict the output reload class to the register bank that we are doing
18550 math on. If we would like not to return a subset of CLASS, reject this
18551 alternative: if reload cannot do this, it will still use its choice. */
18552 mode
= GET_MODE (x
);
18553 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18554 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18556 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18558 if (class == FP_TOP_SSE_REGS
)
18560 else if (class == FP_SECOND_SSE_REGS
)
18561 return FP_SECOND_REG
;
18563 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18569 /* If we are copying between general and FP registers, we need a memory
18570 location. The same is true for SSE and MMX registers.
18572 The macro can't work reliably when one of the CLASSES is class containing
18573 registers from multiple units (SSE, MMX, integer). We avoid this by never
18574 combining those units in single alternative in the machine description.
18575 Ensure that this constraint holds to avoid unexpected surprises.
18577 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18578 enforce these sanity checks. */
18581 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18582 enum machine_mode mode
, int strict
)
18584 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18585 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18586 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18587 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18588 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18589 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18591 gcc_assert (!strict
);
18595 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18598 /* ??? This is a lie. We do have moves between mmx/general, and for
18599 mmx/sse2. But by saying we need secondary memory we discourage the
18600 register allocator from using the mmx registers unless needed. */
18601 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18604 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18606 /* SSE1 doesn't have any direct moves from other classes. */
18610 /* If the target says that inter-unit moves are more expensive
18611 than moving through memory, then don't generate them. */
18612 if (!TARGET_INTER_UNIT_MOVES
)
18615 /* Between SSE and general, we have moves no larger than word size. */
18616 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18623 /* Return true if the registers in CLASS cannot represent the change from
18624 modes FROM to TO. */
18627 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18628 enum reg_class
class)
18633 /* x87 registers can't do subreg at all, as all values are reformatted
18634 to extended precision. */
18635 if (MAYBE_FLOAT_CLASS_P (class))
18638 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18640 /* Vector registers do not support QI or HImode loads. If we don't
18641 disallow a change to these modes, reload will assume it's ok to
18642 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18643 the vec_dupv4hi pattern. */
18644 if (GET_MODE_SIZE (from
) < 4)
18647 /* Vector registers do not support subreg with nonzero offsets, which
18648 are otherwise valid for integer registers. Since we can't see
18649 whether we have a nonzero offset from here, prohibit all
18650 nonparadoxical subregs changing size. */
18651 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18658 /* Return the cost of moving data from a register in class CLASS1 to
18659 one in class CLASS2.
18661 It is not required that the cost always equal 2 when FROM is the same as TO;
18662 on some machines it is expensive to move between registers if they are not
18663 general registers. */
18666 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18667 enum reg_class class2
)
18669 /* In case we require secondary memory, compute cost of the store followed
18670 by load. In order to avoid bad register allocation choices, we need
18671 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18673 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18677 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18678 MEMORY_MOVE_COST (mode
, class1
, 1));
18679 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18680 MEMORY_MOVE_COST (mode
, class2
, 1));
18682 /* In case of copying from general_purpose_register we may emit multiple
18683 stores followed by single load causing memory size mismatch stall.
18684 Count this as arbitrarily high cost of 20. */
18685 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18688 /* In the case of FP/MMX moves, the registers actually overlap, and we
18689 have to switch modes in order to treat them differently. */
18690 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18691 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18697 /* Moves between SSE/MMX and integer unit are expensive. */
18698 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18699 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18700 return ix86_cost
->mmxsse_to_integer
;
18701 if (MAYBE_FLOAT_CLASS_P (class1
))
18702 return ix86_cost
->fp_move
;
18703 if (MAYBE_SSE_CLASS_P (class1
))
18704 return ix86_cost
->sse_move
;
18705 if (MAYBE_MMX_CLASS_P (class1
))
18706 return ix86_cost
->mmx_move
;
18710 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18713 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18715 /* Flags and only flags can only hold CCmode values. */
18716 if (CC_REGNO_P (regno
))
18717 return GET_MODE_CLASS (mode
) == MODE_CC
;
18718 if (GET_MODE_CLASS (mode
) == MODE_CC
18719 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18720 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18722 if (FP_REGNO_P (regno
))
18723 return VALID_FP_MODE_P (mode
);
18724 if (SSE_REGNO_P (regno
))
18726 /* We implement the move patterns for all vector modes into and
18727 out of SSE registers, even when no operation instructions
18729 return (VALID_SSE_REG_MODE (mode
)
18730 || VALID_SSE2_REG_MODE (mode
)
18731 || VALID_MMX_REG_MODE (mode
)
18732 || VALID_MMX_REG_MODE_3DNOW (mode
));
18734 if (MMX_REGNO_P (regno
))
18736 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18737 so if the register is available at all, then we can move data of
18738 the given mode into or out of it. */
18739 return (VALID_MMX_REG_MODE (mode
)
18740 || VALID_MMX_REG_MODE_3DNOW (mode
));
18743 if (mode
== QImode
)
18745 /* Take care for QImode values - they can be in non-QI regs,
18746 but then they do cause partial register stalls. */
18747 if (regno
< 4 || TARGET_64BIT
)
18749 if (!TARGET_PARTIAL_REG_STALL
)
18751 return reload_in_progress
|| reload_completed
;
18753 /* We handle both integer and floats in the general purpose registers. */
18754 else if (VALID_INT_MODE_P (mode
))
18756 else if (VALID_FP_MODE_P (mode
))
18758 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18759 on to use that value in smaller contexts, this can easily force a
18760 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18761 supporting DImode, allow it. */
18762 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18768 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18769 tieable integer mode. */
18772 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18781 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18784 return TARGET_64BIT
;
18791 /* Return true if MODE1 is accessible in a register that can hold MODE2
18792 without copying. That is, all register classes that can hold MODE2
18793 can also hold MODE1. */
18796 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18798 if (mode1
== mode2
)
18801 if (ix86_tieable_integer_mode_p (mode1
)
18802 && ix86_tieable_integer_mode_p (mode2
))
18805 /* MODE2 being XFmode implies fp stack or general regs, which means we
18806 can tie any smaller floating point modes to it. Note that we do not
18807 tie this with TFmode. */
18808 if (mode2
== XFmode
)
18809 return mode1
== SFmode
|| mode1
== DFmode
;
18811 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18812 that we can tie it with SFmode. */
18813 if (mode2
== DFmode
)
18814 return mode1
== SFmode
;
18816 /* If MODE2 is only appropriate for an SSE register, then tie with
18817 any other mode acceptable to SSE registers. */
18818 if (GET_MODE_SIZE (mode2
) == 16
18819 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18820 return (GET_MODE_SIZE (mode1
) == 16
18821 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18823 /* If MODE2 is appropriate for an MMX register, then tie
18824 with any other mode acceptable to MMX registers. */
18825 if (GET_MODE_SIZE (mode2
) == 8
18826 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18827 return (GET_MODE_SIZE (mode1
) == 8
18828 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18833 /* Return the cost of moving data of mode M between a
18834 register and memory. A value of 2 is the default; this cost is
18835 relative to those in `REGISTER_MOVE_COST'.
18837 If moving between registers and memory is more expensive than
18838 between two registers, you should define this macro to express the
18841 Model also increased moving costs of QImode registers in non
18845 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18847 if (FLOAT_CLASS_P (class))
18864 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18866 if (SSE_CLASS_P (class))
18869 switch (GET_MODE_SIZE (mode
))
18883 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18885 if (MMX_CLASS_P (class))
18888 switch (GET_MODE_SIZE (mode
))
18899 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18901 switch (GET_MODE_SIZE (mode
))
18905 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18906 : ix86_cost
->movzbl_load
);
18908 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18909 : ix86_cost
->int_store
[0] + 4);
18912 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18914 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18915 if (mode
== TFmode
)
18917 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18918 * (((int) GET_MODE_SIZE (mode
)
18919 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18923 /* Compute a (partial) cost for rtx X. Return true if the complete
18924 cost has been computed, and false if subexpressions should be
18925 scanned. In either case, *TOTAL contains the cost result. */
18928 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18930 enum machine_mode mode
= GET_MODE (x
);
18938 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18940 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18942 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18944 || (!GET_CODE (x
) != LABEL_REF
18945 && (GET_CODE (x
) != SYMBOL_REF
18946 || !SYMBOL_REF_LOCAL_P (x
)))))
18953 if (mode
== VOIDmode
)
18956 switch (standard_80387_constant_p (x
))
18961 default: /* Other constants */
18966 /* Start with (MEM (SYMBOL_REF)), since that's where
18967 it'll probably end up. Add a penalty for size. */
18968 *total
= (COSTS_N_INSNS (1)
18969 + (flag_pic
!= 0 && !TARGET_64BIT
)
18970 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18976 /* The zero extensions is often completely free on x86_64, so make
18977 it as cheap as possible. */
18978 if (TARGET_64BIT
&& mode
== DImode
18979 && GET_MODE (XEXP (x
, 0)) == SImode
)
18981 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18982 *total
= ix86_cost
->add
;
18984 *total
= ix86_cost
->movzx
;
18988 *total
= ix86_cost
->movsx
;
18992 if (CONST_INT_P (XEXP (x
, 1))
18993 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18995 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18998 *total
= ix86_cost
->add
;
19001 if ((value
== 2 || value
== 3)
19002 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19004 *total
= ix86_cost
->lea
;
19014 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19016 if (CONST_INT_P (XEXP (x
, 1)))
19018 if (INTVAL (XEXP (x
, 1)) > 32)
19019 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19021 *total
= ix86_cost
->shift_const
* 2;
19025 if (GET_CODE (XEXP (x
, 1)) == AND
)
19026 *total
= ix86_cost
->shift_var
* 2;
19028 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19033 if (CONST_INT_P (XEXP (x
, 1)))
19034 *total
= ix86_cost
->shift_const
;
19036 *total
= ix86_cost
->shift_var
;
19041 if (FLOAT_MODE_P (mode
))
19043 *total
= ix86_cost
->fmul
;
19048 rtx op0
= XEXP (x
, 0);
19049 rtx op1
= XEXP (x
, 1);
19051 if (CONST_INT_P (XEXP (x
, 1)))
19053 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19054 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19058 /* This is arbitrary. */
19061 /* Compute costs correctly for widening multiplication. */
19062 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19063 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19064 == GET_MODE_SIZE (mode
))
19066 int is_mulwiden
= 0;
19067 enum machine_mode inner_mode
= GET_MODE (op0
);
19069 if (GET_CODE (op0
) == GET_CODE (op1
))
19070 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19071 else if (CONST_INT_P (op1
))
19073 if (GET_CODE (op0
) == SIGN_EXTEND
)
19074 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19077 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19081 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19084 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19085 + nbits
* ix86_cost
->mult_bit
19086 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19095 if (FLOAT_MODE_P (mode
))
19096 *total
= ix86_cost
->fdiv
;
19098 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19102 if (FLOAT_MODE_P (mode
))
19103 *total
= ix86_cost
->fadd
;
19104 else if (GET_MODE_CLASS (mode
) == MODE_INT
19105 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19107 if (GET_CODE (XEXP (x
, 0)) == PLUS
19108 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19109 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19110 && CONSTANT_P (XEXP (x
, 1)))
19112 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19113 if (val
== 2 || val
== 4 || val
== 8)
19115 *total
= ix86_cost
->lea
;
19116 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19117 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19119 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19123 else if (GET_CODE (XEXP (x
, 0)) == MULT
19124 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19126 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19127 if (val
== 2 || val
== 4 || val
== 8)
19129 *total
= ix86_cost
->lea
;
19130 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19131 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19135 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19137 *total
= ix86_cost
->lea
;
19138 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19139 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19140 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19147 if (FLOAT_MODE_P (mode
))
19149 *total
= ix86_cost
->fadd
;
19157 if (!TARGET_64BIT
&& mode
== DImode
)
19159 *total
= (ix86_cost
->add
* 2
19160 + (rtx_cost (XEXP (x
, 0), outer_code
)
19161 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19162 + (rtx_cost (XEXP (x
, 1), outer_code
)
19163 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19169 if (FLOAT_MODE_P (mode
))
19171 *total
= ix86_cost
->fchs
;
19177 if (!TARGET_64BIT
&& mode
== DImode
)
19178 *total
= ix86_cost
->add
* 2;
19180 *total
= ix86_cost
->add
;
19184 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19185 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19186 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19187 && XEXP (x
, 1) == const0_rtx
)
19189 /* This kind of construct is implemented using test[bwl].
19190 Treat it as if we had an AND. */
19191 *total
= (ix86_cost
->add
19192 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19193 + rtx_cost (const1_rtx
, outer_code
));
19199 if (!TARGET_SSE_MATH
19201 || (mode
== DFmode
&& !TARGET_SSE2
))
19206 if (FLOAT_MODE_P (mode
))
19207 *total
= ix86_cost
->fabs
;
19211 if (FLOAT_MODE_P (mode
))
19212 *total
= ix86_cost
->fsqrt
;
19216 if (XINT (x
, 1) == UNSPEC_TP
)
19227 static int current_machopic_label_num
;
19229 /* Given a symbol name and its associated stub, write out the
19230 definition of the stub. */
19233 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19235 unsigned int length
;
19236 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19237 int label
= ++current_machopic_label_num
;
19239 /* For 64-bit we shouldn't get here. */
19240 gcc_assert (!TARGET_64BIT
);
19242 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19243 symb
= (*targetm
.strip_name_encoding
) (symb
);
19245 length
= strlen (stub
);
19246 binder_name
= alloca (length
+ 32);
19247 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19249 length
= strlen (symb
);
19250 symbol_name
= alloca (length
+ 32);
19251 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19253 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19256 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19258 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19260 fprintf (file
, "%s:\n", stub
);
19261 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19265 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19266 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19267 fprintf (file
, "\tjmp\t*%%edx\n");
19270 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19272 fprintf (file
, "%s:\n", binder_name
);
19276 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19277 fprintf (file
, "\tpushl\t%%eax\n");
19280 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19282 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19284 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19285 fprintf (file
, "%s:\n", lazy_ptr_name
);
19286 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19287 fprintf (file
, "\t.long %s\n", binder_name
);
19291 darwin_x86_file_end (void)
19293 darwin_file_end ();
19296 #endif /* TARGET_MACHO */
19298 /* Order the registers for register allocator. */
19301 x86_order_regs_for_local_alloc (void)
19306 /* First allocate the local general purpose registers. */
19307 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19308 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19309 reg_alloc_order
[pos
++] = i
;
19311 /* Global general purpose registers. */
19312 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19313 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19314 reg_alloc_order
[pos
++] = i
;
19316 /* x87 registers come first in case we are doing FP math
19318 if (!TARGET_SSE_MATH
)
19319 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19320 reg_alloc_order
[pos
++] = i
;
19322 /* SSE registers. */
19323 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19324 reg_alloc_order
[pos
++] = i
;
19325 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19326 reg_alloc_order
[pos
++] = i
;
19328 /* x87 registers. */
19329 if (TARGET_SSE_MATH
)
19330 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19331 reg_alloc_order
[pos
++] = i
;
19333 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19334 reg_alloc_order
[pos
++] = i
;
19336 /* Initialize the rest of array as we do not allocate some registers
19338 while (pos
< FIRST_PSEUDO_REGISTER
)
19339 reg_alloc_order
[pos
++] = 0;
19342 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19343 struct attribute_spec.handler. */
19345 ix86_handle_struct_attribute (tree
*node
, tree name
,
19346 tree args ATTRIBUTE_UNUSED
,
19347 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19350 if (DECL_P (*node
))
19352 if (TREE_CODE (*node
) == TYPE_DECL
)
19353 type
= &TREE_TYPE (*node
);
19358 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19359 || TREE_CODE (*type
) == UNION_TYPE
)))
19361 warning (OPT_Wattributes
, "%qs attribute ignored",
19362 IDENTIFIER_POINTER (name
));
19363 *no_add_attrs
= true;
19366 else if ((is_attribute_p ("ms_struct", name
)
19367 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19368 || ((is_attribute_p ("gcc_struct", name
)
19369 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19371 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19372 IDENTIFIER_POINTER (name
));
19373 *no_add_attrs
= true;
19380 ix86_ms_bitfield_layout_p (tree record_type
)
19382 return (TARGET_MS_BITFIELD_LAYOUT
&&
19383 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19384 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19387 /* Returns an expression indicating where the this parameter is
19388 located on entry to the FUNCTION. */
19391 x86_this_parameter (tree function
)
19393 tree type
= TREE_TYPE (function
);
19397 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19398 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19401 if (ix86_function_regparm (type
, function
) > 0)
19405 parm
= TYPE_ARG_TYPES (type
);
19406 /* Figure out whether or not the function has a variable number of
19408 for (; parm
; parm
= TREE_CHAIN (parm
))
19409 if (TREE_VALUE (parm
) == void_type_node
)
19411 /* If not, the this parameter is in the first argument. */
19415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19417 return gen_rtx_REG (SImode
, regno
);
19421 if (aggregate_value_p (TREE_TYPE (type
), type
))
19422 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19424 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19427 /* Determine whether x86_output_mi_thunk can succeed. */
19430 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19431 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19432 HOST_WIDE_INT vcall_offset
, tree function
)
19434 /* 64-bit can handle anything. */
19438 /* For 32-bit, everything's fine if we have one free register. */
19439 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19442 /* Need a free register for vcall_offset. */
19446 /* Need a free register for GOT references. */
19447 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19450 /* Otherwise ok. */
19454 /* Output the assembler code for a thunk function. THUNK_DECL is the
19455 declaration for the thunk function itself, FUNCTION is the decl for
19456 the target function. DELTA is an immediate constant offset to be
19457 added to THIS. If VCALL_OFFSET is nonzero, the word at
19458 *(*this + vcall_offset) should be added to THIS. */
19461 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19462 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19463 HOST_WIDE_INT vcall_offset
, tree function
)
19466 rtx
this = x86_this_parameter (function
);
19469 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19470 pull it in now and let DELTA benefit. */
19473 else if (vcall_offset
)
19475 /* Put the this parameter into %eax. */
19477 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19478 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19481 this_reg
= NULL_RTX
;
19483 /* Adjust the this parameter by a fixed constant. */
19486 xops
[0] = GEN_INT (delta
);
19487 xops
[1] = this_reg
? this_reg
: this;
19490 if (!x86_64_general_operand (xops
[0], DImode
))
19492 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19494 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19498 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19501 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19504 /* Adjust the this parameter by a value stored in the vtable. */
19508 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19511 int tmp_regno
= 2 /* ECX */;
19512 if (lookup_attribute ("fastcall",
19513 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19514 tmp_regno
= 0 /* EAX */;
19515 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19518 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19521 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19523 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19525 /* Adjust the this parameter. */
19526 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19527 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19529 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19530 xops
[0] = GEN_INT (vcall_offset
);
19532 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19533 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19535 xops
[1] = this_reg
;
19537 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19539 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19542 /* If necessary, drop THIS back to its stack slot. */
19543 if (this_reg
&& this_reg
!= this)
19545 xops
[0] = this_reg
;
19547 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19550 xops
[0] = XEXP (DECL_RTL (function
), 0);
19553 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19554 output_asm_insn ("jmp\t%P0", xops
);
19557 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19558 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19559 tmp
= gen_rtx_MEM (QImode
, tmp
);
19561 output_asm_insn ("jmp\t%A0", xops
);
19566 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19567 output_asm_insn ("jmp\t%P0", xops
);
19572 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19573 tmp
= (gen_rtx_SYMBOL_REF
19575 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19576 tmp
= gen_rtx_MEM (QImode
, tmp
);
19578 output_asm_insn ("jmp\t%0", xops
);
19581 #endif /* TARGET_MACHO */
19583 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19584 output_set_got (tmp
, NULL_RTX
);
19587 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19588 output_asm_insn ("jmp\t{*}%1", xops
);
19594 x86_file_start (void)
19596 default_file_start ();
19598 darwin_file_start ();
19600 if (X86_FILE_START_VERSION_DIRECTIVE
)
19601 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19602 if (X86_FILE_START_FLTUSED
)
19603 fputs ("\t.global\t__fltused\n", asm_out_file
);
19604 if (ix86_asm_dialect
== ASM_INTEL
)
19605 fputs ("\t.intel_syntax\n", asm_out_file
);
19609 x86_field_alignment (tree field
, int computed
)
19611 enum machine_mode mode
;
19612 tree type
= TREE_TYPE (field
);
19614 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19616 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19617 ? get_inner_array_type (type
) : type
);
19618 if (mode
== DFmode
|| mode
== DCmode
19619 || GET_MODE_CLASS (mode
) == MODE_INT
19620 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19621 return MIN (32, computed
);
19625 /* Output assembler code to FILE to increment profiler label # LABELNO
19626 for profiling a function entry. */
19628 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19633 #ifndef NO_PROFILE_COUNTERS
19634 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19636 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19640 #ifndef NO_PROFILE_COUNTERS
19641 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19643 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19647 #ifndef NO_PROFILE_COUNTERS
19648 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19649 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19651 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19655 #ifndef NO_PROFILE_COUNTERS
19656 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19657 PROFILE_COUNT_REGISTER
);
19659 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19663 /* We don't have exact information about the insn sizes, but we may assume
19664 quite safely that we are informed about all 1 byte insns and memory
19665 address sizes. This is enough to eliminate unnecessary padding in
19669 min_insn_size (rtx insn
)
19673 if (!INSN_P (insn
) || !active_insn_p (insn
))
19676 /* Discard alignments we've emit and jump instructions. */
19677 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19678 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19681 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19682 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19685 /* Important case - calls are always 5 bytes.
19686 It is common to have many calls in the row. */
19688 && symbolic_reference_mentioned_p (PATTERN (insn
))
19689 && !SIBLING_CALL_P (insn
))
19691 if (get_attr_length (insn
) <= 1)
19694 /* For normal instructions we may rely on the sizes of addresses
19695 and the presence of symbol to require 4 bytes of encoding.
19696 This is not the case for jumps where references are PC relative. */
19697 if (!JUMP_P (insn
))
19699 l
= get_attr_length_address (insn
);
19700 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19709 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19713 ix86_avoid_jump_misspredicts (void)
19715 rtx insn
, start
= get_insns ();
19716 int nbytes
= 0, njumps
= 0;
19719 /* Look for all minimal intervals of instructions containing 4 jumps.
19720 The intervals are bounded by START and INSN. NBYTES is the total
19721 size of instructions in the interval including INSN and not including
19722 START. When the NBYTES is smaller than 16 bytes, it is possible
19723 that the end of START and INSN ends up in the same 16byte page.
19725 The smallest offset in the page INSN can start is the case where START
19726 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19727 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19729 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19732 nbytes
+= min_insn_size (insn
);
19734 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19735 INSN_UID (insn
), min_insn_size (insn
));
19737 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19738 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19746 start
= NEXT_INSN (start
);
19747 if ((JUMP_P (start
)
19748 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19749 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19751 njumps
--, isjump
= 1;
19754 nbytes
-= min_insn_size (start
);
19756 gcc_assert (njumps
>= 0);
19758 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19759 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19761 if (njumps
== 3 && isjump
&& nbytes
< 16)
19763 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19766 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19767 INSN_UID (insn
), padsize
);
19768 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19773 /* AMD Athlon works faster
19774 when RET is not destination of conditional jump or directly preceded
19775 by other jump instruction. We avoid the penalty by inserting NOP just
19776 before the RET instructions in such cases. */
19778 ix86_pad_returns (void)
19783 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19785 basic_block bb
= e
->src
;
19786 rtx ret
= BB_END (bb
);
19788 bool replace
= false;
19790 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19791 || !maybe_hot_bb_p (bb
))
19793 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19794 if (active_insn_p (prev
) || LABEL_P (prev
))
19796 if (prev
&& LABEL_P (prev
))
19801 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19802 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19803 && !(e
->flags
& EDGE_FALLTHRU
))
19808 prev
= prev_active_insn (ret
);
19810 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19813 /* Empty functions get branch mispredict even when the jump destination
19814 is not visible to us. */
19815 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19820 emit_insn_before (gen_return_internal_long (), ret
);
19826 /* Implement machine specific optimizations. We implement padding of returns
19827 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19831 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19832 ix86_pad_returns ();
19833 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19834 ix86_avoid_jump_misspredicts ();
19837 /* Return nonzero when QImode register that must be represented via REX prefix
19840 x86_extended_QIreg_mentioned_p (rtx insn
)
19843 extract_insn_cached (insn
);
19844 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19845 if (REG_P (recog_data
.operand
[i
])
19846 && REGNO (recog_data
.operand
[i
]) >= 4)
19851 /* Return nonzero when P points to register encoded via REX prefix.
19852 Called via for_each_rtx. */
19854 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19856 unsigned int regno
;
19859 regno
= REGNO (*p
);
19860 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19863 /* Return true when INSN mentions register that must be encoded using REX
19866 x86_extended_reg_mentioned_p (rtx insn
)
19868 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19871 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19872 optabs would emit if we didn't have TFmode patterns. */
19875 x86_emit_floatuns (rtx operands
[2])
19877 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19878 enum machine_mode mode
, inmode
;
19880 inmode
= GET_MODE (operands
[1]);
19881 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19884 in
= force_reg (inmode
, operands
[1]);
19885 mode
= GET_MODE (out
);
19886 neglab
= gen_label_rtx ();
19887 donelab
= gen_label_rtx ();
19888 f0
= gen_reg_rtx (mode
);
19890 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
19892 expand_float (out
, in
, 0);
19894 emit_jump_insn (gen_jump (donelab
));
19897 emit_label (neglab
);
19899 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
19901 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
19903 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19905 expand_float (f0
, i0
, 0);
19907 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19909 emit_label (donelab
);
19912 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19913 with all elements equal to VAR. Return true if successful. */
19916 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19917 rtx target
, rtx val
)
19919 enum machine_mode smode
, wsmode
, wvmode
;
19934 val
= force_reg (GET_MODE_INNER (mode
), val
);
19935 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19936 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19942 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19944 val
= gen_lowpart (SImode
, val
);
19945 x
= gen_rtx_TRUNCATE (HImode
, val
);
19946 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19947 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19969 /* Extend HImode to SImode using a paradoxical SUBREG. */
19970 tmp1
= gen_reg_rtx (SImode
);
19971 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19972 /* Insert the SImode value as low element of V4SImode vector. */
19973 tmp2
= gen_reg_rtx (V4SImode
);
19974 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19975 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19976 CONST0_RTX (V4SImode
),
19978 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19979 /* Cast the V4SImode vector back to a V8HImode vector. */
19980 tmp1
= gen_reg_rtx (V8HImode
);
19981 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19982 /* Duplicate the low short through the whole low SImode word. */
19983 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19984 /* Cast the V8HImode vector back to a V4SImode vector. */
19985 tmp2
= gen_reg_rtx (V4SImode
);
19986 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19987 /* Replicate the low element of the V4SImode vector. */
19988 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19989 /* Cast the V2SImode back to V8HImode, and store in target. */
19990 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20001 /* Extend QImode to SImode using a paradoxical SUBREG. */
20002 tmp1
= gen_reg_rtx (SImode
);
20003 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20004 /* Insert the SImode value as low element of V4SImode vector. */
20005 tmp2
= gen_reg_rtx (V4SImode
);
20006 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20007 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20008 CONST0_RTX (V4SImode
),
20010 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20011 /* Cast the V4SImode vector back to a V16QImode vector. */
20012 tmp1
= gen_reg_rtx (V16QImode
);
20013 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20014 /* Duplicate the low byte through the whole low SImode word. */
20015 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20016 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20017 /* Cast the V16QImode vector back to a V4SImode vector. */
20018 tmp2
= gen_reg_rtx (V4SImode
);
20019 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20020 /* Replicate the low element of the V4SImode vector. */
20021 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20022 /* Cast the V2SImode back to V16QImode, and store in target. */
20023 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20031 /* Replicate the value once into the next wider mode and recurse. */
20032 val
= convert_modes (wsmode
, smode
, val
, true);
20033 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20034 GEN_INT (GET_MODE_BITSIZE (smode
)),
20035 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20036 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20038 x
= gen_reg_rtx (wvmode
);
20039 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20040 gcc_unreachable ();
20041 emit_move_insn (target
, gen_lowpart (mode
, x
));
20049 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20050 whose ONE_VAR element is VAR, and other elements are zero. Return true
20054 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20055 rtx target
, rtx var
, int one_var
)
20057 enum machine_mode vsimode
;
20073 var
= force_reg (GET_MODE_INNER (mode
), var
);
20074 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20075 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20080 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20081 new_target
= gen_reg_rtx (mode
);
20083 new_target
= target
;
20084 var
= force_reg (GET_MODE_INNER (mode
), var
);
20085 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20086 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20087 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20090 /* We need to shuffle the value to the correct position, so
20091 create a new pseudo to store the intermediate result. */
20093 /* With SSE2, we can use the integer shuffle insns. */
20094 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20096 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20098 GEN_INT (one_var
== 1 ? 0 : 1),
20099 GEN_INT (one_var
== 2 ? 0 : 1),
20100 GEN_INT (one_var
== 3 ? 0 : 1)));
20101 if (target
!= new_target
)
20102 emit_move_insn (target
, new_target
);
20106 /* Otherwise convert the intermediate result to V4SFmode and
20107 use the SSE1 shuffle instructions. */
20108 if (mode
!= V4SFmode
)
20110 tmp
= gen_reg_rtx (V4SFmode
);
20111 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20116 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20118 GEN_INT (one_var
== 1 ? 0 : 1),
20119 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20120 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20122 if (mode
!= V4SFmode
)
20123 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20124 else if (tmp
!= target
)
20125 emit_move_insn (target
, tmp
);
20127 else if (target
!= new_target
)
20128 emit_move_insn (target
, new_target
);
20133 vsimode
= V4SImode
;
20139 vsimode
= V2SImode
;
20145 /* Zero extend the variable element to SImode and recurse. */
20146 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20148 x
= gen_reg_rtx (vsimode
);
20149 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20151 gcc_unreachable ();
20153 emit_move_insn (target
, gen_lowpart (mode
, x
));
20161 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20162 consisting of the values in VALS. It is known that all elements
20163 except ONE_VAR are constants. Return true if successful. */
20166 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20167 rtx target
, rtx vals
, int one_var
)
20169 rtx var
= XVECEXP (vals
, 0, one_var
);
20170 enum machine_mode wmode
;
20173 const_vec
= copy_rtx (vals
);
20174 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20175 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20183 /* For the two element vectors, it's just as easy to use
20184 the general case. */
20200 /* There's no way to set one QImode entry easily. Combine
20201 the variable value with its adjacent constant value, and
20202 promote to an HImode set. */
20203 x
= XVECEXP (vals
, 0, one_var
^ 1);
20206 var
= convert_modes (HImode
, QImode
, var
, true);
20207 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20208 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20209 x
= GEN_INT (INTVAL (x
) & 0xff);
20213 var
= convert_modes (HImode
, QImode
, var
, true);
20214 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20216 if (x
!= const0_rtx
)
20217 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20218 1, OPTAB_LIB_WIDEN
);
20220 x
= gen_reg_rtx (wmode
);
20221 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20222 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20224 emit_move_insn (target
, gen_lowpart (mode
, x
));
20231 emit_move_insn (target
, const_vec
);
20232 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20236 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20237 all values variable, and none identical. */
20240 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20241 rtx target
, rtx vals
)
20243 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20244 rtx op0
= NULL
, op1
= NULL
;
20245 bool use_vec_concat
= false;
20251 if (!mmx_ok
&& !TARGET_SSE
)
20257 /* For the two element vectors, we always implement VEC_CONCAT. */
20258 op0
= XVECEXP (vals
, 0, 0);
20259 op1
= XVECEXP (vals
, 0, 1);
20260 use_vec_concat
= true;
20264 half_mode
= V2SFmode
;
20267 half_mode
= V2SImode
;
20273 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20274 Recurse to load the two halves. */
20276 op0
= gen_reg_rtx (half_mode
);
20277 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20278 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20280 op1
= gen_reg_rtx (half_mode
);
20281 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20282 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20284 use_vec_concat
= true;
20295 gcc_unreachable ();
20298 if (use_vec_concat
)
20300 if (!register_operand (op0
, half_mode
))
20301 op0
= force_reg (half_mode
, op0
);
20302 if (!register_operand (op1
, half_mode
))
20303 op1
= force_reg (half_mode
, op1
);
20305 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20306 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20310 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20311 enum machine_mode inner_mode
;
20312 rtx words
[4], shift
;
20314 inner_mode
= GET_MODE_INNER (mode
);
20315 n_elts
= GET_MODE_NUNITS (mode
);
20316 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20317 n_elt_per_word
= n_elts
/ n_words
;
20318 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20320 for (i
= 0; i
< n_words
; ++i
)
20322 rtx word
= NULL_RTX
;
20324 for (j
= 0; j
< n_elt_per_word
; ++j
)
20326 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20327 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20333 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20334 word
, 1, OPTAB_LIB_WIDEN
);
20335 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20336 word
, 1, OPTAB_LIB_WIDEN
);
20344 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20345 else if (n_words
== 2)
20347 rtx tmp
= gen_reg_rtx (mode
);
20348 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20349 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20350 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20351 emit_move_insn (target
, tmp
);
20353 else if (n_words
== 4)
20355 rtx tmp
= gen_reg_rtx (V4SImode
);
20356 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20357 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20358 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20361 gcc_unreachable ();
20365 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20366 instructions unless MMX_OK is true. */
20369 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20371 enum machine_mode mode
= GET_MODE (target
);
20372 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20373 int n_elts
= GET_MODE_NUNITS (mode
);
20374 int n_var
= 0, one_var
= -1;
20375 bool all_same
= true, all_const_zero
= true;
20379 for (i
= 0; i
< n_elts
; ++i
)
20381 x
= XVECEXP (vals
, 0, i
);
20382 if (!CONSTANT_P (x
))
20383 n_var
++, one_var
= i
;
20384 else if (x
!= CONST0_RTX (inner_mode
))
20385 all_const_zero
= false;
20386 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20390 /* Constants are best loaded from the constant pool. */
20393 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20397 /* If all values are identical, broadcast the value. */
20399 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20400 XVECEXP (vals
, 0, 0)))
20403 /* Values where only one field is non-constant are best loaded from
20404 the pool and overwritten via move later. */
20408 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20409 XVECEXP (vals
, 0, one_var
),
20413 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20417 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20421 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20423 enum machine_mode mode
= GET_MODE (target
);
20424 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20425 bool use_vec_merge
= false;
20434 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20435 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20437 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20439 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20440 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20450 /* For the two element vectors, we implement a VEC_CONCAT with
20451 the extraction of the other element. */
20453 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20454 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20457 op0
= val
, op1
= tmp
;
20459 op0
= tmp
, op1
= val
;
20461 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20462 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20470 use_vec_merge
= true;
20474 /* tmp = target = A B C D */
20475 tmp
= copy_to_reg (target
);
20476 /* target = A A B B */
20477 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20478 /* target = X A B B */
20479 ix86_expand_vector_set (false, target
, val
, 0);
20480 /* target = A X C D */
20481 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20482 GEN_INT (1), GEN_INT (0),
20483 GEN_INT (2+4), GEN_INT (3+4)));
20487 /* tmp = target = A B C D */
20488 tmp
= copy_to_reg (target
);
20489 /* tmp = X B C D */
20490 ix86_expand_vector_set (false, tmp
, val
, 0);
20491 /* target = A B X D */
20492 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20493 GEN_INT (0), GEN_INT (1),
20494 GEN_INT (0+4), GEN_INT (3+4)));
20498 /* tmp = target = A B C D */
20499 tmp
= copy_to_reg (target
);
20500 /* tmp = X B C D */
20501 ix86_expand_vector_set (false, tmp
, val
, 0);
20502 /* target = A B X D */
20503 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20504 GEN_INT (0), GEN_INT (1),
20505 GEN_INT (2+4), GEN_INT (0+4)));
20509 gcc_unreachable ();
20514 /* Element 0 handled by vec_merge below. */
20517 use_vec_merge
= true;
20523 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20524 store into element 0, then shuffle them back. */
20528 order
[0] = GEN_INT (elt
);
20529 order
[1] = const1_rtx
;
20530 order
[2] = const2_rtx
;
20531 order
[3] = GEN_INT (3);
20532 order
[elt
] = const0_rtx
;
20534 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20535 order
[1], order
[2], order
[3]));
20537 ix86_expand_vector_set (false, target
, val
, 0);
20539 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20540 order
[1], order
[2], order
[3]));
20544 /* For SSE1, we have to reuse the V4SF code. */
20545 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20546 gen_lowpart (SFmode
, val
), elt
);
20551 use_vec_merge
= TARGET_SSE2
;
20554 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20565 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20566 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20567 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20571 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20573 emit_move_insn (mem
, target
);
20575 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20576 emit_move_insn (tmp
, val
);
20578 emit_move_insn (target
, mem
);
20583 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20585 enum machine_mode mode
= GET_MODE (vec
);
20586 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20587 bool use_vec_extr
= false;
20600 use_vec_extr
= true;
20612 tmp
= gen_reg_rtx (mode
);
20613 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20614 GEN_INT (elt
), GEN_INT (elt
),
20615 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20619 tmp
= gen_reg_rtx (mode
);
20620 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20624 gcc_unreachable ();
20627 use_vec_extr
= true;
20642 tmp
= gen_reg_rtx (mode
);
20643 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20644 GEN_INT (elt
), GEN_INT (elt
),
20645 GEN_INT (elt
), GEN_INT (elt
)));
20649 tmp
= gen_reg_rtx (mode
);
20650 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20654 gcc_unreachable ();
20657 use_vec_extr
= true;
20662 /* For SSE1, we have to reuse the V4SF code. */
20663 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20664 gen_lowpart (V4SFmode
, vec
), elt
);
20670 use_vec_extr
= TARGET_SSE2
;
20673 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20678 /* ??? Could extract the appropriate HImode element and shift. */
20685 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20686 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20688 /* Let the rtl optimizers know about the zero extension performed. */
20689 if (inner_mode
== HImode
)
20691 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20692 target
= gen_lowpart (SImode
, target
);
20695 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20699 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20701 emit_move_insn (mem
, vec
);
20703 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20704 emit_move_insn (target
, tmp
);
20708 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20709 pattern to reduce; DEST is the destination; IN is the input vector. */
20712 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20714 rtx tmp1
, tmp2
, tmp3
;
20716 tmp1
= gen_reg_rtx (V4SFmode
);
20717 tmp2
= gen_reg_rtx (V4SFmode
);
20718 tmp3
= gen_reg_rtx (V4SFmode
);
20720 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20721 emit_insn (fn (tmp2
, tmp1
, in
));
20723 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20724 GEN_INT (1), GEN_INT (1),
20725 GEN_INT (1+4), GEN_INT (1+4)));
20726 emit_insn (fn (dest
, tmp2
, tmp3
));
20729 /* Target hook for scalar_mode_supported_p. */
20731 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20733 if (DECIMAL_FLOAT_MODE_P (mode
))
20736 return default_scalar_mode_supported_p (mode
);
20739 /* Implements target hook vector_mode_supported_p. */
20741 ix86_vector_mode_supported_p (enum machine_mode mode
)
20743 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20745 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20747 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20749 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20754 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20756 We do this in the new i386 backend to maintain source compatibility
20757 with the old cc0-based compiler. */
20760 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20761 tree inputs ATTRIBUTE_UNUSED
,
20764 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20766 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20771 /* Implementes target vector targetm.asm.encode_section_info. This
20772 is not used by netware. */
20774 static void ATTRIBUTE_UNUSED
20775 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20777 default_encode_section_info (decl
, rtl
, first
);
20779 if (TREE_CODE (decl
) == VAR_DECL
20780 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20781 && ix86_in_large_data_p (decl
))
20782 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20785 /* Worker function for REVERSE_CONDITION. */
20788 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20790 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20791 ? reverse_condition (code
)
20792 : reverse_condition_maybe_unordered (code
));
20795 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20799 output_387_reg_move (rtx insn
, rtx
*operands
)
20801 if (REG_P (operands
[1])
20802 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20804 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20805 return output_387_ffreep (operands
, 0);
20806 return "fstp\t%y0";
20808 if (STACK_TOP_P (operands
[0]))
20809 return "fld%z1\t%y1";
20813 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20814 FP status register is set. */
20817 ix86_emit_fp_unordered_jump (rtx label
)
20819 rtx reg
= gen_reg_rtx (HImode
);
20822 emit_insn (gen_x86_fnstsw_1 (reg
));
20824 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
20826 emit_insn (gen_x86_sahf_1 (reg
));
20828 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20829 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20833 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20835 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20836 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20839 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20840 gen_rtx_LABEL_REF (VOIDmode
, label
),
20842 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20844 emit_jump_insn (temp
);
20845 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
20848 /* Output code to perform a log1p XFmode calculation. */
20850 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20852 rtx label1
= gen_label_rtx ();
20853 rtx label2
= gen_label_rtx ();
20855 rtx tmp
= gen_reg_rtx (XFmode
);
20856 rtx tmp2
= gen_reg_rtx (XFmode
);
20858 emit_insn (gen_absxf2 (tmp
, op1
));
20859 emit_insn (gen_cmpxf (tmp
,
20860 CONST_DOUBLE_FROM_REAL_VALUE (
20861 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20863 emit_jump_insn (gen_bge (label1
));
20865 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20866 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20867 emit_jump (label2
);
20869 emit_label (label1
);
20870 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20871 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20872 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20873 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20875 emit_label (label2
);
20878 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20880 static void ATTRIBUTE_UNUSED
20881 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20884 /* With Binutils 2.15, the "@unwind" marker must be specified on
20885 every occurrence of the ".eh_frame" section, not just the first
20888 && strcmp (name
, ".eh_frame") == 0)
20890 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20891 flags
& SECTION_WRITE
? "aw" : "a");
20894 default_elf_asm_named_section (name
, flags
, decl
);
20897 /* Return the mangling of TYPE if it is an extended fundamental type. */
20899 static const char *
20900 ix86_mangle_fundamental_type (tree type
)
20902 switch (TYPE_MODE (type
))
20905 /* __float128 is "g". */
20908 /* "long double" or __float80 is "e". */
20915 /* For 32-bit code we can save PIC register setup by using
20916 __stack_chk_fail_local hidden function instead of calling
20917 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20918 register, so it is better to call __stack_chk_fail directly. */
20921 ix86_stack_protect_fail (void)
20923 return TARGET_64BIT
20924 ? default_external_stack_protect_fail ()
20925 : default_hidden_stack_protect_fail ();
20928 /* Select a format to encode pointers in exception handling data. CODE
20929 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20930 true if the symbol may be affected by dynamic relocations.
20932 ??? All x86 object file formats are capable of representing this.
20933 After all, the relocation needed is the same as for the call insn.
20934 Whether or not a particular assembler allows us to enter such, I
20935 guess we'll have to see. */
20937 asm_preferred_eh_data_format (int code
, int global
)
20941 int type
= DW_EH_PE_sdata8
;
20943 || ix86_cmodel
== CM_SMALL_PIC
20944 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20945 type
= DW_EH_PE_sdata4
;
20946 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20948 if (ix86_cmodel
== CM_SMALL
20949 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20950 return DW_EH_PE_udata4
;
20951 return DW_EH_PE_absptr
;
20954 /* Expand copysign from SIGN to the positive value ABS_VALUE
20955 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20958 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20960 enum machine_mode mode
= GET_MODE (sign
);
20961 rtx sgn
= gen_reg_rtx (mode
);
20962 if (mask
== NULL_RTX
)
20964 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20965 if (!VECTOR_MODE_P (mode
))
20967 /* We need to generate a scalar mode mask in this case. */
20968 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20969 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20970 mask
= gen_reg_rtx (mode
);
20971 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20975 mask
= gen_rtx_NOT (mode
, mask
);
20976 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20977 gen_rtx_AND (mode
, mask
, sign
)));
20978 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20979 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20982 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20983 mask for masking out the sign-bit is stored in *SMASK, if that is
20986 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20988 enum machine_mode mode
= GET_MODE (op0
);
20991 xa
= gen_reg_rtx (mode
);
20992 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20993 if (!VECTOR_MODE_P (mode
))
20995 /* We need to generate a scalar mode mask in this case. */
20996 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20997 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20998 mask
= gen_reg_rtx (mode
);
20999 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21001 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21002 gen_rtx_AND (mode
, op0
, mask
)));
21010 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21011 swapping the operands if SWAP_OPERANDS is true. The expanded
21012 code is a forward jump to a newly created label in case the
21013 comparison is true. The generated label rtx is returned. */
21015 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21016 bool swap_operands
)
21027 label
= gen_label_rtx ();
21028 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21029 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21030 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21031 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21032 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21033 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21034 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21035 JUMP_LABEL (tmp
) = label
;
21040 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21041 using comparison code CODE. Operands are swapped for the comparison if
21042 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21044 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21045 bool swap_operands
)
21047 enum machine_mode mode
= GET_MODE (op0
);
21048 rtx mask
= gen_reg_rtx (mode
);
21057 if (mode
== DFmode
)
21058 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21059 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21061 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21062 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21067 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21068 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21070 ix86_gen_TWO52 (enum machine_mode mode
)
21072 REAL_VALUE_TYPE TWO52r
;
21075 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21076 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21077 TWO52
= force_reg (mode
, TWO52
);
21082 /* Expand SSE sequence for computing lround from OP1 storing
21085 ix86_expand_lround (rtx op0
, rtx op1
)
21087 /* C code for the stuff we're doing below:
21088 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21091 enum machine_mode mode
= GET_MODE (op1
);
21092 const struct real_format
*fmt
;
21093 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21096 /* load nextafter (0.5, 0.0) */
21097 fmt
= REAL_MODE_FORMAT (mode
);
21098 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21099 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21101 /* adj = copysign (0.5, op1) */
21102 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21103 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21105 /* adj = op1 + adj */
21106 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21108 /* op0 = (imode)adj */
21109 expand_fix (op0
, adj
, 0);
21112 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21115 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21117 /* C code for the stuff we're doing below (for do_floor):
21119 xi -= (double)xi > op1 ? 1 : 0;
21122 enum machine_mode fmode
= GET_MODE (op1
);
21123 enum machine_mode imode
= GET_MODE (op0
);
21124 rtx ireg
, freg
, label
, tmp
;
21126 /* reg = (long)op1 */
21127 ireg
= gen_reg_rtx (imode
);
21128 expand_fix (ireg
, op1
, 0);
21130 /* freg = (double)reg */
21131 freg
= gen_reg_rtx (fmode
);
21132 expand_float (freg
, ireg
, 0);
21134 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21135 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21136 freg
, op1
, !do_floor
);
21137 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21138 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21139 emit_move_insn (ireg
, tmp
);
21141 emit_label (label
);
21142 LABEL_NUSES (label
) = 1;
21144 emit_move_insn (op0
, ireg
);
21147 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21148 result in OPERAND0. */
21150 ix86_expand_rint (rtx operand0
, rtx operand1
)
21152 /* C code for the stuff we're doing below:
21153 xa = fabs (operand1);
21154 if (!isless (xa, 2**52))
21156 xa = xa + 2**52 - 2**52;
21157 return copysign (xa, operand1);
21159 enum machine_mode mode
= GET_MODE (operand0
);
21160 rtx res
, xa
, label
, TWO52
, mask
;
21162 res
= gen_reg_rtx (mode
);
21163 emit_move_insn (res
, operand1
);
21165 /* xa = abs (operand1) */
21166 xa
= ix86_expand_sse_fabs (res
, &mask
);
21168 /* if (!isless (xa, TWO52)) goto label; */
21169 TWO52
= ix86_gen_TWO52 (mode
);
21170 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21172 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21173 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21175 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21177 emit_label (label
);
21178 LABEL_NUSES (label
) = 1;
21180 emit_move_insn (operand0
, res
);
21183 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21186 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21188 /* C code for the stuff we expand below.
21189 double xa = fabs (x), x2;
21190 if (!isless (xa, TWO52))
21192 xa = xa + TWO52 - TWO52;
21193 x2 = copysign (xa, x);
21202 enum machine_mode mode
= GET_MODE (operand0
);
21203 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21205 TWO52
= ix86_gen_TWO52 (mode
);
21207 /* Temporary for holding the result, initialized to the input
21208 operand to ease control flow. */
21209 res
= gen_reg_rtx (mode
);
21210 emit_move_insn (res
, operand1
);
21212 /* xa = abs (operand1) */
21213 xa
= ix86_expand_sse_fabs (res
, &mask
);
21215 /* if (!isless (xa, TWO52)) goto label; */
21216 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21218 /* xa = xa + TWO52 - TWO52; */
21219 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21220 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21222 /* xa = copysign (xa, operand1) */
21223 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21225 /* generate 1.0 or -1.0 */
21226 one
= force_reg (mode
,
21227 const_double_from_real_value (do_floor
21228 ? dconst1
: dconstm1
, mode
));
21230 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21231 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21232 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21233 gen_rtx_AND (mode
, one
, tmp
)));
21234 /* We always need to subtract here to preserve signed zero. */
21235 tmp
= expand_simple_binop (mode
, MINUS
,
21236 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21237 emit_move_insn (res
, tmp
);
21239 emit_label (label
);
21240 LABEL_NUSES (label
) = 1;
21242 emit_move_insn (operand0
, res
);
21245 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21248 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21250 /* C code for the stuff we expand below.
21251 double xa = fabs (x), x2;
21252 if (!isless (xa, TWO52))
21254 x2 = (double)(long)x;
21261 if (HONOR_SIGNED_ZEROS (mode))
21262 return copysign (x2, x);
21265 enum machine_mode mode
= GET_MODE (operand0
);
21266 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21268 TWO52
= ix86_gen_TWO52 (mode
);
21270 /* Temporary for holding the result, initialized to the input
21271 operand to ease control flow. */
21272 res
= gen_reg_rtx (mode
);
21273 emit_move_insn (res
, operand1
);
21275 /* xa = abs (operand1) */
21276 xa
= ix86_expand_sse_fabs (res
, &mask
);
21278 /* if (!isless (xa, TWO52)) goto label; */
21279 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21281 /* xa = (double)(long)x */
21282 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21283 expand_fix (xi
, res
, 0);
21284 expand_float (xa
, xi
, 0);
21287 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21289 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21290 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21291 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21292 gen_rtx_AND (mode
, one
, tmp
)));
21293 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21294 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21295 emit_move_insn (res
, tmp
);
21297 if (HONOR_SIGNED_ZEROS (mode
))
21298 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21300 emit_label (label
);
21301 LABEL_NUSES (label
) = 1;
21303 emit_move_insn (operand0
, res
);
21306 /* Expand SSE sequence for computing round from OPERAND1 storing
21307 into OPERAND0. Sequence that works without relying on DImode truncation
21308 via cvttsd2siq that is only available on 64bit targets. */
21310 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21312 /* C code for the stuff we expand below.
21313 double xa = fabs (x), xa2, x2;
21314 if (!isless (xa, TWO52))
21316 Using the absolute value and copying back sign makes
21317 -0.0 -> -0.0 correct.
21318 xa2 = xa + TWO52 - TWO52;
21323 else if (dxa > 0.5)
21325 x2 = copysign (xa2, x);
21328 enum machine_mode mode
= GET_MODE (operand0
);
21329 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21331 TWO52
= ix86_gen_TWO52 (mode
);
21333 /* Temporary for holding the result, initialized to the input
21334 operand to ease control flow. */
21335 res
= gen_reg_rtx (mode
);
21336 emit_move_insn (res
, operand1
);
21338 /* xa = abs (operand1) */
21339 xa
= ix86_expand_sse_fabs (res
, &mask
);
21341 /* if (!isless (xa, TWO52)) goto label; */
21342 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21344 /* xa2 = xa + TWO52 - TWO52; */
21345 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21346 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21348 /* dxa = xa2 - xa; */
21349 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21351 /* generate 0.5, 1.0 and -0.5 */
21352 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21353 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21354 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21358 tmp
= gen_reg_rtx (mode
);
21359 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21360 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21361 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21362 gen_rtx_AND (mode
, one
, tmp
)));
21363 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21364 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21365 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21366 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21367 gen_rtx_AND (mode
, one
, tmp
)));
21368 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21370 /* res = copysign (xa2, operand1) */
21371 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21373 emit_label (label
);
21374 LABEL_NUSES (label
) = 1;
21376 emit_move_insn (operand0
, res
);
21379 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21382 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21384 /* C code for SSE variant we expand below.
21385 double xa = fabs (x), x2;
21386 if (!isless (xa, TWO52))
21388 x2 = (double)(long)x;
21389 if (HONOR_SIGNED_ZEROS (mode))
21390 return copysign (x2, x);
21393 enum machine_mode mode
= GET_MODE (operand0
);
21394 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21396 TWO52
= ix86_gen_TWO52 (mode
);
21398 /* Temporary for holding the result, initialized to the input
21399 operand to ease control flow. */
21400 res
= gen_reg_rtx (mode
);
21401 emit_move_insn (res
, operand1
);
21403 /* xa = abs (operand1) */
21404 xa
= ix86_expand_sse_fabs (res
, &mask
);
21406 /* if (!isless (xa, TWO52)) goto label; */
21407 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21409 /* x = (double)(long)x */
21410 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21411 expand_fix (xi
, res
, 0);
21412 expand_float (res
, xi
, 0);
21414 if (HONOR_SIGNED_ZEROS (mode
))
21415 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21417 emit_label (label
);
21418 LABEL_NUSES (label
) = 1;
21420 emit_move_insn (operand0
, res
);
21423 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21426 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21428 enum machine_mode mode
= GET_MODE (operand0
);
21429 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21431 /* C code for SSE variant we expand below.
21432 double xa = fabs (x), x2;
21433 if (!isless (xa, TWO52))
21435 xa2 = xa + TWO52 - TWO52;
21439 x2 = copysign (xa2, x);
21443 TWO52
= ix86_gen_TWO52 (mode
);
21445 /* Temporary for holding the result, initialized to the input
21446 operand to ease control flow. */
21447 res
= gen_reg_rtx (mode
);
21448 emit_move_insn (res
, operand1
);
21450 /* xa = abs (operand1) */
21451 xa
= ix86_expand_sse_fabs (res
, &smask
);
21453 /* if (!isless (xa, TWO52)) goto label; */
21454 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21456 /* res = xa + TWO52 - TWO52; */
21457 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21458 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21459 emit_move_insn (res
, tmp
);
21462 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21464 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21465 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21466 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21467 gen_rtx_AND (mode
, mask
, one
)));
21468 tmp
= expand_simple_binop (mode
, MINUS
,
21469 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21470 emit_move_insn (res
, tmp
);
21472 /* res = copysign (res, operand1) */
21473 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21475 emit_label (label
);
21476 LABEL_NUSES (label
) = 1;
21478 emit_move_insn (operand0
, res
);
21481 /* Expand SSE sequence for computing round from OPERAND1 storing
21484 ix86_expand_round (rtx operand0
, rtx operand1
)
21486 /* C code for the stuff we're doing below:
21487 double xa = fabs (x);
21488 if (!isless (xa, TWO52))
21490 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21491 return copysign (xa, x);
21493 enum machine_mode mode
= GET_MODE (operand0
);
21494 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21495 const struct real_format
*fmt
;
21496 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21498 /* Temporary for holding the result, initialized to the input
21499 operand to ease control flow. */
21500 res
= gen_reg_rtx (mode
);
21501 emit_move_insn (res
, operand1
);
21503 TWO52
= ix86_gen_TWO52 (mode
);
21504 xa
= ix86_expand_sse_fabs (res
, &mask
);
21505 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21507 /* load nextafter (0.5, 0.0) */
21508 fmt
= REAL_MODE_FORMAT (mode
);
21509 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21510 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21512 /* xa = xa + 0.5 */
21513 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21514 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21516 /* xa = (double)(int64_t)xa */
21517 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21518 expand_fix (xi
, xa
, 0);
21519 expand_float (xa
, xi
, 0);
21521 /* res = copysign (xa, operand1) */
21522 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21524 emit_label (label
);
21525 LABEL_NUSES (label
) = 1;
21527 emit_move_insn (operand0
, res
);
21531 /* Table of valid machine attributes. */
21532 static const struct attribute_spec ix86_attribute_table
[] =
21534 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21535 /* Stdcall attribute says callee is responsible for popping arguments
21536 if they are not variable. */
21537 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
21538 /* Fastcall attribute says callee is responsible for popping arguments
21539 if they are not variable. */
21540 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
21541 /* Cdecl attribute says the callee is a normal C declaration */
21542 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
21543 /* Regparm attribute specifies how many integer arguments are to be
21544 passed in registers. */
21545 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
21546 /* Sseregparm attribute says we are using x86_64 calling conventions
21547 for FP arguments. */
21548 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
21549 /* force_align_arg_pointer says this function realigns the stack at entry. */
21550 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
21551 false, true, true, ix86_handle_cconv_attribute
},
21552 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21553 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
21554 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
21555 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
21557 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
21558 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
21559 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21560 SUBTARGET_ATTRIBUTE_TABLE
,
21562 { NULL
, 0, 0, false, false, false, NULL
}
21565 /* Initialize the GCC target structure. */
21566 #undef TARGET_ATTRIBUTE_TABLE
21567 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21568 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21569 # undef TARGET_MERGE_DECL_ATTRIBUTES
21570 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21573 #undef TARGET_COMP_TYPE_ATTRIBUTES
21574 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21576 #undef TARGET_INIT_BUILTINS
21577 #define TARGET_INIT_BUILTINS ix86_init_builtins
21578 #undef TARGET_EXPAND_BUILTIN
21579 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21581 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21582 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21583 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21584 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21586 #undef TARGET_ASM_FUNCTION_EPILOGUE
21587 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21589 #undef TARGET_ENCODE_SECTION_INFO
21590 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21591 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21593 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21596 #undef TARGET_ASM_OPEN_PAREN
21597 #define TARGET_ASM_OPEN_PAREN ""
21598 #undef TARGET_ASM_CLOSE_PAREN
21599 #define TARGET_ASM_CLOSE_PAREN ""
21601 #undef TARGET_ASM_ALIGNED_HI_OP
21602 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
21603 #undef TARGET_ASM_ALIGNED_SI_OP
21604 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
21606 #undef TARGET_ASM_ALIGNED_DI_OP
21607 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
21610 #undef TARGET_ASM_UNALIGNED_HI_OP
21611 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
21612 #undef TARGET_ASM_UNALIGNED_SI_OP
21613 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
21614 #undef TARGET_ASM_UNALIGNED_DI_OP
21615 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
21617 #undef TARGET_SCHED_ADJUST_COST
21618 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
21619 #undef TARGET_SCHED_ISSUE_RATE
21620 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
21621 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
21622 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
21623 ia32_multipass_dfa_lookahead
21625 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
21626 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
21629 #undef TARGET_HAVE_TLS
21630 #define TARGET_HAVE_TLS true
21632 #undef TARGET_CANNOT_FORCE_CONST_MEM
21633 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
21634 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
21635 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
21637 #undef TARGET_DELEGITIMIZE_ADDRESS
21638 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
21640 #undef TARGET_MS_BITFIELD_LAYOUT_P
21641 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
21644 #undef TARGET_BINDS_LOCAL_P
21645 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
21648 #undef TARGET_ASM_OUTPUT_MI_THUNK
21649 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
21650 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
21651 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
21653 #undef TARGET_ASM_FILE_START
21654 #define TARGET_ASM_FILE_START x86_file_start
21656 #undef TARGET_DEFAULT_TARGET_FLAGS
21657 #define TARGET_DEFAULT_TARGET_FLAGS \
21659 | TARGET_64BIT_DEFAULT \
21660 | TARGET_SUBTARGET_DEFAULT \
21661 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
21663 #undef TARGET_HANDLE_OPTION
21664 #define TARGET_HANDLE_OPTION ix86_handle_option
21666 #undef TARGET_RTX_COSTS
21667 #define TARGET_RTX_COSTS ix86_rtx_costs
21668 #undef TARGET_ADDRESS_COST
21669 #define TARGET_ADDRESS_COST ix86_address_cost
21671 #undef TARGET_FIXED_CONDITION_CODE_REGS
21672 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
21673 #undef TARGET_CC_MODES_COMPATIBLE
21674 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
21676 #undef TARGET_MACHINE_DEPENDENT_REORG
21677 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
21679 #undef TARGET_BUILD_BUILTIN_VA_LIST
21680 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
21682 #undef TARGET_MD_ASM_CLOBBERS
21683 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
21685 #undef TARGET_PROMOTE_PROTOTYPES
21686 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
21687 #undef TARGET_STRUCT_VALUE_RTX
21688 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
21689 #undef TARGET_SETUP_INCOMING_VARARGS
21690 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
21691 #undef TARGET_MUST_PASS_IN_STACK
21692 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
21693 #undef TARGET_PASS_BY_REFERENCE
21694 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
21695 #undef TARGET_INTERNAL_ARG_POINTER
21696 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
21697 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
21698 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
21700 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
21701 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
21703 #undef TARGET_SCALAR_MODE_SUPPORTED_P
21704 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
21706 #undef TARGET_VECTOR_MODE_SUPPORTED_P
21707 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
21710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
21711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
21714 #ifdef SUBTARGET_INSERT_ATTRIBUTES
21715 #undef TARGET_INSERT_ATTRIBUTES
21716 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
21719 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
21720 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
21722 #undef TARGET_STACK_PROTECT_FAIL
21723 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
21725 #undef TARGET_FUNCTION_VALUE
21726 #define TARGET_FUNCTION_VALUE ix86_function_value
21728 struct gcc_target targetm
= TARGET_INITIALIZER
;
21730 #include "gt-i386.h"