1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1008 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1009 generic because it is not working well with PPro base chips. */
1010 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1012 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1013 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1014 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
1015 /* Enable to zero extend integer registers to avoid partial dependencies */
1016 const int x86_movx
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1017 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
1018 const int x86_double_with_add
= ~m_386
;
1019 const int x86_use_bit_test
= m_386
;
1020 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
1021 | m_K6
| m_CORE2
| m_GENERIC
;
1022 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1024 const int x86_3dnow_a
= m_ATHLON_K8_AMDFAM10
;
1025 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
1026 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1027 /* Branch hints were put in P4 based on simulation result. But
1028 after P4 was made, no performance benefit was observed with
1029 branch hints. It also increases the code size. As the result,
1030 icc never generates branch hints. */
1031 const int x86_branch_hints
= 0;
1032 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
;
1033 /*m_GENERIC | m_ATHLON_K8 ? */
1034 /* We probably ought to watch for partial register stalls on Generic32
1035 compilation setting as well. However in current implementation the
1036 partial register stalls are not eliminated very well - they can
1037 be introduced via subregs synthesized by combine and can happen
1038 in caller/callee saving sequences.
1039 Because this option pays back little on PPro based chips and is in conflict
1040 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1041 to leave it off for generic32 for now. */
1042 const int x86_partial_reg_stall
= m_PPRO
;
1043 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
1044 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
1045 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
1046 | m_CORE2
| m_GENERIC
);
1047 const int x86_use_mov0
= m_K6
;
1048 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
1049 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1050 const int x86_use_xchgb
= m_PENT4
;
1051 const int x86_read_modify_write
= ~m_PENT
;
1052 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
1053 const int x86_split_long_moves
= m_PPRO
;
1054 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
1055 | m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1057 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
1058 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
1059 const int x86_qimode_math
= ~(0);
1060 const int x86_promote_qi_regs
= 0;
1061 /* On PPro this flag is meant to avoid partial register stalls. Just like
1062 the x86_partial_reg_stall this option might be considered for Generic32
1063 if our scheme for avoiding partial stalls was more effective. */
1064 const int x86_himode_math
= ~(m_PPRO
);
1065 const int x86_promote_hi_regs
= m_PPRO
;
1066 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1067 const int x86_sub_esp_4
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1068 | m_CORE2
| m_GENERIC
;
1069 const int x86_sub_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1070 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1071 const int x86_add_esp_4
= m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
1072 | m_CORE2
| m_GENERIC
;
1073 const int x86_add_esp_8
= m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1074 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
1075 /* Enable if integer moves are preferred for DFmode copies */
1076 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1077 | m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
1078 const int x86_partial_reg_dependency
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1079 | m_CORE2
| m_GENERIC
;
1080 const int x86_memory_mismatch_stall
= m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
1081 | m_CORE2
| m_GENERIC
;
1082 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1083 for outgoing arguments will be computed and placed into the variable
1084 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1085 for each call; instead, the function prologue should increase the stack frame
1086 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1088 const int x86_accumulate_outgoing_args
= m_ATHLON_K8_AMDFAM10
| m_PENT4
1089 | m_NOCONA
| m_PPRO
| m_CORE2
1091 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1092 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
1093 const int x86_shift1
= ~m_486
;
1094 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
1095 | m_ATHLON_K8_AMDFAM10
| m_PENT4
1096 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1097 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1098 that thread 128bit SSE registers as single units versus K8 based chips that
1099 divide SSE registers to two 64bit halves.
1100 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1101 to allow register renaming on 128bit SSE units, but usually results in one
1102 extra microop on 64bit SSE units. Experimental results shows that disabling
1103 this option on P4 brings over 20% SPECfp regression, while enabling it on
1104 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1106 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1107 | m_GENERIC
| m_AMDFAM10
;
1108 /* Set for machines where the type and dependencies are resolved on SSE
1109 register parts instead of whole registers, so we may maintain just
1110 lower part of scalar values in proper format leaving the upper part
1112 const int x86_sse_split_regs
= m_ATHLON_K8
;
1113 /* Code generation for scalar reg-reg moves of single and double precision data:
1114 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1118 if (x86_sse_partial_reg_dependency == true)
1123 Code generation for scalar loads of double precision data:
1124 if (x86_sse_split_regs == true)
1125 movlpd mem, reg (gas syntax)
1129 Code generation for unaligned packed loads of single precision data
1130 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1131 if (x86_sse_unaligned_move_optimal)
1134 if (x86_sse_partial_reg_dependency == true)
1146 Code generation for unaligned packed loads of double precision data
1147 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1148 if (x86_sse_unaligned_move_optimal)
1151 if (x86_sse_split_regs == true)
1162 const int x86_sse_unaligned_move_optimal
= m_AMDFAM10
;
1163 const int x86_sse_typeless_stores
= m_ATHLON_K8_AMDFAM10
;
1164 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
1165 const int x86_use_ffreep
= m_ATHLON_K8_AMDFAM10
;
1166 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
1168 const int x86_inter_unit_moves
= ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
);
1170 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
1171 | m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1172 /* Some CPU cores are not able to predict more than 4 branch instructions in
1173 the 16 byte window. */
1174 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1175 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1176 const int x86_schedule
= m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
1177 | m_CORE2
| m_GENERIC
;
1178 const int x86_use_bt
= m_ATHLON_K8_AMDFAM10
;
1179 /* Compare and exchange was added for 80486. */
1180 const int x86_cmpxchg
= ~m_386
;
1181 /* Compare and exchange 8 bytes was added for pentium. */
1182 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
1183 /* Exchange and add was added for 80486. */
1184 const int x86_xadd
= ~m_386
;
1185 /* Byteswap was added for 80486. */
1186 const int x86_bswap
= ~m_386
;
1187 const int x86_pad_returns
= m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
;
1189 static enum stringop_alg stringop_alg
= no_stringop
;
1191 /* In case the average insn count for single function invocation is
1192 lower than this constant, emit fast (but longer) prologue and
1194 #define FAST_PROLOGUE_INSN_COUNT 20
1196 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1197 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1198 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1199 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1201 /* Array of the smallest class containing reg number REGNO, indexed by
1202 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1204 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1206 /* ax, dx, cx, bx */
1207 AREG
, DREG
, CREG
, BREG
,
1208 /* si, di, bp, sp */
1209 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1211 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1212 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1215 /* flags, fpsr, fpcr, frame */
1216 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1217 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1219 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1221 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1222 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1223 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1227 /* The "default" register map used in 32bit mode. */
1229 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1231 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1232 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1233 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1234 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1235 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1236 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1237 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1240 static int const x86_64_int_parameter_registers
[6] =
1242 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1243 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1246 static int const x86_64_int_return_registers
[4] =
1248 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1251 /* The "default" register map used in 64bit mode. */
1252 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1254 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1255 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1256 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1257 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1258 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1259 8,9,10,11,12,13,14,15, /* extended integer registers */
1260 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1263 /* Define the register numbers to be used in Dwarf debugging information.
1264 The SVR4 reference port C compiler uses the following register numbers
1265 in its Dwarf output code:
1266 0 for %eax (gcc regno = 0)
1267 1 for %ecx (gcc regno = 2)
1268 2 for %edx (gcc regno = 1)
1269 3 for %ebx (gcc regno = 3)
1270 4 for %esp (gcc regno = 7)
1271 5 for %ebp (gcc regno = 6)
1272 6 for %esi (gcc regno = 4)
1273 7 for %edi (gcc regno = 5)
1274 The following three DWARF register numbers are never generated by
1275 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1276 believes these numbers have these meanings.
1277 8 for %eip (no gcc equivalent)
1278 9 for %eflags (gcc regno = 17)
1279 10 for %trapno (no gcc equivalent)
1280 It is not at all clear how we should number the FP stack registers
1281 for the x86 architecture. If the version of SDB on x86/svr4 were
1282 a bit less brain dead with respect to floating-point then we would
1283 have a precedent to follow with respect to DWARF register numbers
1284 for x86 FP registers, but the SDB on x86/svr4 is so completely
1285 broken with respect to FP registers that it is hardly worth thinking
1286 of it as something to strive for compatibility with.
1287 The version of x86/svr4 SDB I have at the moment does (partially)
1288 seem to believe that DWARF register number 11 is associated with
1289 the x86 register %st(0), but that's about all. Higher DWARF
1290 register numbers don't seem to be associated with anything in
1291 particular, and even for DWARF regno 11, SDB only seems to under-
1292 stand that it should say that a variable lives in %st(0) (when
1293 asked via an `=' command) if we said it was in DWARF regno 11,
1294 but SDB still prints garbage when asked for the value of the
1295 variable in question (via a `/' command).
1296 (Also note that the labels SDB prints for various FP stack regs
1297 when doing an `x' command are all wrong.)
1298 Note that these problems generally don't affect the native SVR4
1299 C compiler because it doesn't allow the use of -O with -g and
1300 because when it is *not* optimizing, it allocates a memory
1301 location for each floating-point variable, and the memory
1302 location is what gets described in the DWARF AT_location
1303 attribute for the variable in question.
1304 Regardless of the severe mental illness of the x86/svr4 SDB, we
1305 do something sensible here and we use the following DWARF
1306 register numbers. Note that these are all stack-top-relative
1308 11 for %st(0) (gcc regno = 8)
1309 12 for %st(1) (gcc regno = 9)
1310 13 for %st(2) (gcc regno = 10)
1311 14 for %st(3) (gcc regno = 11)
1312 15 for %st(4) (gcc regno = 12)
1313 16 for %st(5) (gcc regno = 13)
1314 17 for %st(6) (gcc regno = 14)
1315 18 for %st(7) (gcc regno = 15)
1317 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1319 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1320 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1321 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1322 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1323 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1324 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1325 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1328 /* Test and compare insns in i386.md store the information needed to
1329 generate branch and scc insns here. */
1331 rtx ix86_compare_op0
= NULL_RTX
;
1332 rtx ix86_compare_op1
= NULL_RTX
;
1333 rtx ix86_compare_emitted
= NULL_RTX
;
1335 /* Size of the register save area. */
1336 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1338 /* Define the structure for the machine field in struct function. */
1340 struct stack_local_entry
GTY(())
1342 unsigned short mode
;
1345 struct stack_local_entry
*next
;
1348 /* Structure describing stack frame layout.
1349 Stack grows downward:
1355 saved frame pointer if frame_pointer_needed
1356 <- HARD_FRAME_POINTER
1361 [va_arg registers] (
1362 > to_allocate <- FRAME_POINTER
1372 HOST_WIDE_INT frame
;
1374 int outgoing_arguments_size
;
1377 HOST_WIDE_INT to_allocate
;
1378 /* The offsets relative to ARG_POINTER. */
1379 HOST_WIDE_INT frame_pointer_offset
;
1380 HOST_WIDE_INT hard_frame_pointer_offset
;
1381 HOST_WIDE_INT stack_pointer_offset
;
1383 /* When save_regs_using_mov is set, emit prologue using
1384 move instead of push instructions. */
1385 bool save_regs_using_mov
;
1388 /* Code model option. */
1389 enum cmodel ix86_cmodel
;
1391 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1393 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1395 /* Which unit we are generating floating point math for. */
1396 enum fpmath_unit ix86_fpmath
;
1398 /* Which cpu are we scheduling for. */
1399 enum processor_type ix86_tune
;
1402 /* Which instruction set architecture to use. */
1403 enum processor_type ix86_arch
;
1406 /* true if sse prefetch instruction is not NOOP. */
1407 int x86_prefetch_sse
;
1409 /* true if cmpxchg16b is supported. */
1412 /* ix86_regparm_string as a number */
1413 static int ix86_regparm
;
1415 /* -mstackrealign option */
1416 extern int ix86_force_align_arg_pointer
;
1417 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1419 /* Preferred alignment for stack boundary in bits. */
1420 unsigned int ix86_preferred_stack_boundary
;
1422 /* Values 1-5: see jump.c */
1423 int ix86_branch_cost
;
1425 /* Variables which are this size or smaller are put in the data/bss
1426 or ldata/lbss sections. */
1428 int ix86_section_threshold
= 65536;
1430 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1431 char internal_label_prefix
[16];
1432 int internal_label_prefix_len
;
1434 static bool ix86_handle_option (size_t, const char *, int);
1435 static void output_pic_addr_const (FILE *, rtx
, int);
1436 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1438 static const char *get_some_local_dynamic_name (void);
1439 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1440 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1441 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1443 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1444 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1446 static rtx
get_thread_pointer (int);
1447 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1448 static void get_pc_thunk_name (char [32], unsigned int);
1449 static rtx
gen_push (rtx
);
1450 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1451 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1452 static struct machine_function
* ix86_init_machine_status (void);
1453 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1454 static int ix86_nsaved_regs (void);
1455 static void ix86_emit_save_regs (void);
1456 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1457 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1458 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1459 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1460 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1461 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1462 static int ix86_issue_rate (void);
1463 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1464 static int ia32_multipass_dfa_lookahead (void);
1465 static void ix86_init_mmx_sse_builtins (void);
1466 static rtx
x86_this_parameter (tree
);
1467 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1468 HOST_WIDE_INT
, tree
);
1469 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1470 static void x86_file_start (void);
1471 static void ix86_reorg (void);
1472 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1473 static tree
ix86_build_builtin_va_list (void);
1474 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1476 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1477 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1478 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1480 static int ix86_address_cost (rtx
);
1481 static bool ix86_cannot_force_const_mem (rtx
);
1482 static rtx
ix86_delegitimize_address (rtx
);
1484 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1486 struct builtin_description
;
1487 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1489 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1491 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1492 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1493 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1494 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1495 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1496 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1497 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1498 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1499 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1500 static int ix86_fp_comparison_cost (enum rtx_code code
);
1501 static unsigned int ix86_select_alt_pic_regnum (void);
1502 static int ix86_save_reg (unsigned int, int);
1503 static void ix86_compute_frame_layout (struct ix86_frame
*);
1504 static int ix86_comp_type_attributes (tree
, tree
);
1505 static int ix86_function_regparm (tree
, tree
);
1506 const struct attribute_spec ix86_attribute_table
[];
1507 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1508 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1509 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1510 static bool contains_128bit_aligned_vector_p (tree
);
1511 static rtx
ix86_struct_value_rtx (tree
, int);
1512 static bool ix86_ms_bitfield_layout_p (tree
);
1513 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1514 static int extended_reg_mentioned_1 (rtx
*, void *);
1515 static bool ix86_rtx_costs (rtx
, int, int, int *);
1516 static int min_insn_size (rtx
);
1517 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1518 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1519 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1521 static void ix86_init_builtins (void);
1522 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1523 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1524 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1525 static const char *ix86_mangle_fundamental_type (tree
);
1526 static tree
ix86_stack_protect_fail (void);
1527 static rtx
ix86_internal_arg_pointer (void);
1528 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1529 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1532 /* This function is only used on Solaris. */
1533 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1536 /* Register class used for passing given 64bit part of the argument.
1537 These represent classes as documented by the PS ABI, with the exception
1538 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1539 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1541 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1542 whenever possible (upper half does contain padding).
1544 enum x86_64_reg_class
1547 X86_64_INTEGER_CLASS
,
1548 X86_64_INTEGERSI_CLASS
,
1555 X86_64_COMPLEX_X87_CLASS
,
1558 static const char * const x86_64_reg_class_name
[] = {
1559 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1560 "sseup", "x87", "x87up", "cplx87", "no"
1563 #define MAX_CLASSES 4
1565 /* Table of constants used by fldpi, fldln2, etc.... */
1566 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1567 static bool ext_80387_constants_init
= 0;
1568 static void init_ext_80387_constants (void);
1569 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1570 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1571 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1572 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1573 unsigned HOST_WIDE_INT align
)
1576 /* Initialize the GCC target structure. */
1577 #undef TARGET_ATTRIBUTE_TABLE
1578 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1579 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1580 # undef TARGET_MERGE_DECL_ATTRIBUTES
1581 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1584 #undef TARGET_COMP_TYPE_ATTRIBUTES
1585 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1587 #undef TARGET_INIT_BUILTINS
1588 #define TARGET_INIT_BUILTINS ix86_init_builtins
1589 #undef TARGET_EXPAND_BUILTIN
1590 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1592 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1593 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1594 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1595 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1597 #undef TARGET_ASM_FUNCTION_EPILOGUE
1598 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1600 #undef TARGET_ENCODE_SECTION_INFO
1601 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1602 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1604 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1607 #undef TARGET_ASM_OPEN_PAREN
1608 #define TARGET_ASM_OPEN_PAREN ""
1609 #undef TARGET_ASM_CLOSE_PAREN
1610 #define TARGET_ASM_CLOSE_PAREN ""
1612 #undef TARGET_ASM_ALIGNED_HI_OP
1613 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1614 #undef TARGET_ASM_ALIGNED_SI_OP
1615 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1617 #undef TARGET_ASM_ALIGNED_DI_OP
1618 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1621 #undef TARGET_ASM_UNALIGNED_HI_OP
1622 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1623 #undef TARGET_ASM_UNALIGNED_SI_OP
1624 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1625 #undef TARGET_ASM_UNALIGNED_DI_OP
1626 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1628 #undef TARGET_SCHED_ADJUST_COST
1629 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1630 #undef TARGET_SCHED_ISSUE_RATE
1631 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1632 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1633 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1634 ia32_multipass_dfa_lookahead
1636 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1637 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1640 #undef TARGET_HAVE_TLS
1641 #define TARGET_HAVE_TLS true
1643 #undef TARGET_CANNOT_FORCE_CONST_MEM
1644 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1645 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1646 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1648 #undef TARGET_DELEGITIMIZE_ADDRESS
1649 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1651 #undef TARGET_MS_BITFIELD_LAYOUT_P
1652 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1655 #undef TARGET_BINDS_LOCAL_P
1656 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1659 #undef TARGET_ASM_OUTPUT_MI_THUNK
1660 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1661 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1662 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1664 #undef TARGET_ASM_FILE_START
1665 #define TARGET_ASM_FILE_START x86_file_start
1667 #undef TARGET_DEFAULT_TARGET_FLAGS
1668 #define TARGET_DEFAULT_TARGET_FLAGS \
1670 | TARGET_64BIT_DEFAULT \
1671 | TARGET_SUBTARGET_DEFAULT \
1672 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1674 #undef TARGET_HANDLE_OPTION
1675 #define TARGET_HANDLE_OPTION ix86_handle_option
1677 #undef TARGET_RTX_COSTS
1678 #define TARGET_RTX_COSTS ix86_rtx_costs
1679 #undef TARGET_ADDRESS_COST
1680 #define TARGET_ADDRESS_COST ix86_address_cost
1682 #undef TARGET_FIXED_CONDITION_CODE_REGS
1683 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1684 #undef TARGET_CC_MODES_COMPATIBLE
1685 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1687 #undef TARGET_MACHINE_DEPENDENT_REORG
1688 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1690 #undef TARGET_BUILD_BUILTIN_VA_LIST
1691 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1693 #undef TARGET_MD_ASM_CLOBBERS
1694 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1696 #undef TARGET_PROMOTE_PROTOTYPES
1697 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1698 #undef TARGET_STRUCT_VALUE_RTX
1699 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1700 #undef TARGET_SETUP_INCOMING_VARARGS
1701 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1702 #undef TARGET_MUST_PASS_IN_STACK
1703 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1704 #undef TARGET_PASS_BY_REFERENCE
1705 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1706 #undef TARGET_INTERNAL_ARG_POINTER
1707 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1708 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1709 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1711 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1712 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1714 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1715 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1717 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1718 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1721 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1722 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1725 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1726 #undef TARGET_INSERT_ATTRIBUTES
1727 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1730 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1731 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1733 #undef TARGET_STACK_PROTECT_FAIL
1734 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1736 #undef TARGET_FUNCTION_VALUE
1737 #define TARGET_FUNCTION_VALUE ix86_function_value
1739 struct gcc_target targetm
= TARGET_INITIALIZER
;
1742 /* The svr4 ABI for the i386 says that records and unions are returned
1744 #ifndef DEFAULT_PCC_STRUCT_RETURN
1745 #define DEFAULT_PCC_STRUCT_RETURN 1
1748 /* Implement TARGET_HANDLE_OPTION. */
1751 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1758 target_flags
&= ~MASK_3DNOW_A
;
1759 target_flags_explicit
|= MASK_3DNOW_A
;
1766 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1767 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1774 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1775 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1782 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1783 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1790 target_flags
&= ~MASK_SSE4A
;
1791 target_flags_explicit
|= MASK_SSE4A
;
1800 /* Sometimes certain combinations of command options do not make
1801 sense on a particular target machine. You can define a macro
1802 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1803 defined, is executed once just after all the command options have
1806 Don't use this macro to turn on various extra optimizations for
1807 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1810 override_options (void)
1813 int ix86_tune_defaulted
= 0;
1815 /* Comes from final.c -- no real reason to change it. */
1816 #define MAX_CODE_ALIGN 16
1820 const struct processor_costs
*cost
; /* Processor costs */
1821 const int target_enable
; /* Target flags to enable. */
1822 const int target_disable
; /* Target flags to disable. */
1823 const int align_loop
; /* Default alignments. */
1824 const int align_loop_max_skip
;
1825 const int align_jump
;
1826 const int align_jump_max_skip
;
1827 const int align_func
;
1829 const processor_target_table
[PROCESSOR_max
] =
1831 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1832 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1833 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1834 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1835 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1836 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1837 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1838 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1839 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1840 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1841 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1842 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1843 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1844 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1847 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1850 const char *const name
; /* processor name or nickname. */
1851 const enum processor_type processor
;
1852 const enum pta_flags
1858 PTA_PREFETCH_SSE
= 16,
1869 const processor_alias_table
[] =
1871 {"i386", PROCESSOR_I386
, 0},
1872 {"i486", PROCESSOR_I486
, 0},
1873 {"i586", PROCESSOR_PENTIUM
, 0},
1874 {"pentium", PROCESSOR_PENTIUM
, 0},
1875 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1876 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1877 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1878 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1879 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1880 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1881 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1882 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1883 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1884 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1885 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1886 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1887 | PTA_MMX
| PTA_PREFETCH_SSE
},
1888 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1889 | PTA_MMX
| PTA_PREFETCH_SSE
},
1890 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1891 | PTA_MMX
| PTA_PREFETCH_SSE
},
1892 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1893 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1894 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1895 | PTA_64BIT
| PTA_MMX
1896 | PTA_PREFETCH_SSE
| PTA_CX16
},
1897 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1899 {"k6", PROCESSOR_K6
, PTA_MMX
},
1900 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1901 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1902 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1904 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1905 | PTA_3DNOW
| PTA_3DNOW_A
},
1906 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1907 | PTA_3DNOW_A
| PTA_SSE
},
1908 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1909 | PTA_3DNOW_A
| PTA_SSE
},
1910 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1911 | PTA_3DNOW_A
| PTA_SSE
},
1912 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1913 | PTA_SSE
| PTA_SSE2
},
1914 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1915 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1916 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1917 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1918 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1919 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1920 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1921 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1922 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1923 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1924 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1925 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1926 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1927 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1930 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1932 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1933 SUBTARGET_OVERRIDE_OPTIONS
;
1936 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1937 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1940 /* -fPIC is the default for x86_64. */
1941 if (TARGET_MACHO
&& TARGET_64BIT
)
1944 /* Set the default values for switches whose default depends on TARGET_64BIT
1945 in case they weren't overwritten by command line options. */
1948 /* Mach-O doesn't support omitting the frame pointer for now. */
1949 if (flag_omit_frame_pointer
== 2)
1950 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1951 if (flag_asynchronous_unwind_tables
== 2)
1952 flag_asynchronous_unwind_tables
= 1;
1953 if (flag_pcc_struct_return
== 2)
1954 flag_pcc_struct_return
= 0;
1958 if (flag_omit_frame_pointer
== 2)
1959 flag_omit_frame_pointer
= 0;
1960 if (flag_asynchronous_unwind_tables
== 2)
1961 flag_asynchronous_unwind_tables
= 0;
1962 if (flag_pcc_struct_return
== 2)
1963 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1966 /* Need to check -mtune=generic first. */
1967 if (ix86_tune_string
)
1969 if (!strcmp (ix86_tune_string
, "generic")
1970 || !strcmp (ix86_tune_string
, "i686")
1971 /* As special support for cross compilers we read -mtune=native
1972 as -mtune=generic. With native compilers we won't see the
1973 -mtune=native, as it was changed by the driver. */
1974 || !strcmp (ix86_tune_string
, "native"))
1977 ix86_tune_string
= "generic64";
1979 ix86_tune_string
= "generic32";
1981 else if (!strncmp (ix86_tune_string
, "generic", 7))
1982 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1986 if (ix86_arch_string
)
1987 ix86_tune_string
= ix86_arch_string
;
1988 if (!ix86_tune_string
)
1990 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1991 ix86_tune_defaulted
= 1;
1994 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1995 need to use a sensible tune option. */
1996 if (!strcmp (ix86_tune_string
, "generic")
1997 || !strcmp (ix86_tune_string
, "x86-64")
1998 || !strcmp (ix86_tune_string
, "i686"))
2001 ix86_tune_string
= "generic64";
2003 ix86_tune_string
= "generic32";
2006 if (ix86_stringop_string
)
2008 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2009 stringop_alg
= rep_prefix_1_byte
;
2010 else if (!strcmp (ix86_stringop_string
, "libcall"))
2011 stringop_alg
= libcall
;
2012 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2013 stringop_alg
= rep_prefix_4_byte
;
2014 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2015 stringop_alg
= rep_prefix_8_byte
;
2016 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2017 stringop_alg
= loop_1_byte
;
2018 else if (!strcmp (ix86_stringop_string
, "loop"))
2019 stringop_alg
= loop
;
2020 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2021 stringop_alg
= unrolled_loop
;
2023 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2025 if (!strcmp (ix86_tune_string
, "x86-64"))
2026 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2027 "-mtune=generic instead as appropriate.");
2029 if (!ix86_arch_string
)
2030 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2031 if (!strcmp (ix86_arch_string
, "generic"))
2032 error ("generic CPU can be used only for -mtune= switch");
2033 if (!strncmp (ix86_arch_string
, "generic", 7))
2034 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2036 if (ix86_cmodel_string
!= 0)
2038 if (!strcmp (ix86_cmodel_string
, "small"))
2039 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2040 else if (!strcmp (ix86_cmodel_string
, "medium"))
2041 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2043 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
2044 else if (!strcmp (ix86_cmodel_string
, "32"))
2045 ix86_cmodel
= CM_32
;
2046 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2047 ix86_cmodel
= CM_KERNEL
;
2048 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
2049 ix86_cmodel
= CM_LARGE
;
2051 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2055 ix86_cmodel
= CM_32
;
2057 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2059 if (ix86_asm_string
!= 0)
2062 && !strcmp (ix86_asm_string
, "intel"))
2063 ix86_asm_dialect
= ASM_INTEL
;
2064 else if (!strcmp (ix86_asm_string
, "att"))
2065 ix86_asm_dialect
= ASM_ATT
;
2067 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2069 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2070 error ("code model %qs not supported in the %s bit mode",
2071 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2072 if (ix86_cmodel
== CM_LARGE
)
2073 sorry ("code model %<large%> not supported yet");
2074 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2075 sorry ("%i-bit mode not compiled in",
2076 (target_flags
& MASK_64BIT
) ? 64 : 32);
2078 for (i
= 0; i
< pta_size
; i
++)
2079 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2081 ix86_arch
= processor_alias_table
[i
].processor
;
2082 /* Default cpu tuning to the architecture. */
2083 ix86_tune
= ix86_arch
;
2084 if (processor_alias_table
[i
].flags
& PTA_MMX
2085 && !(target_flags_explicit
& MASK_MMX
))
2086 target_flags
|= MASK_MMX
;
2087 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2088 && !(target_flags_explicit
& MASK_3DNOW
))
2089 target_flags
|= MASK_3DNOW
;
2090 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2091 && !(target_flags_explicit
& MASK_3DNOW_A
))
2092 target_flags
|= MASK_3DNOW_A
;
2093 if (processor_alias_table
[i
].flags
& PTA_SSE
2094 && !(target_flags_explicit
& MASK_SSE
))
2095 target_flags
|= MASK_SSE
;
2096 if (processor_alias_table
[i
].flags
& PTA_SSE2
2097 && !(target_flags_explicit
& MASK_SSE2
))
2098 target_flags
|= MASK_SSE2
;
2099 if (processor_alias_table
[i
].flags
& PTA_SSE3
2100 && !(target_flags_explicit
& MASK_SSE3
))
2101 target_flags
|= MASK_SSE3
;
2102 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2103 && !(target_flags_explicit
& MASK_SSSE3
))
2104 target_flags
|= MASK_SSSE3
;
2105 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2106 x86_prefetch_sse
= true;
2107 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2108 x86_cmpxchg16b
= true;
2109 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2110 && !(target_flags_explicit
& MASK_POPCNT
))
2111 target_flags
|= MASK_POPCNT
;
2112 if (processor_alias_table
[i
].flags
& PTA_ABM
2113 && !(target_flags_explicit
& MASK_ABM
))
2114 target_flags
|= MASK_ABM
;
2115 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2116 && !(target_flags_explicit
& MASK_SSE4A
))
2117 target_flags
|= MASK_SSE4A
;
2118 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2119 error ("CPU you selected does not support x86-64 "
2125 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2127 for (i
= 0; i
< pta_size
; i
++)
2128 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2130 ix86_tune
= processor_alias_table
[i
].processor
;
2131 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2133 if (ix86_tune_defaulted
)
2135 ix86_tune_string
= "x86-64";
2136 for (i
= 0; i
< pta_size
; i
++)
2137 if (! strcmp (ix86_tune_string
,
2138 processor_alias_table
[i
].name
))
2140 ix86_tune
= processor_alias_table
[i
].processor
;
2143 error ("CPU you selected does not support x86-64 "
2146 /* Intel CPUs have always interpreted SSE prefetch instructions as
2147 NOPs; so, we can enable SSE prefetch instructions even when
2148 -mtune (rather than -march) points us to a processor that has them.
2149 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2150 higher processors. */
2151 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2152 x86_prefetch_sse
= true;
2156 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2158 ix86_arch_mask
= 1 << ix86_arch
;
2159 ix86_tune_mask
= 1 << ix86_tune
;
2162 ix86_cost
= &size_cost
;
2164 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2165 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2166 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2168 /* Arrange to set up i386_stack_locals for all functions. */
2169 init_machine_status
= ix86_init_machine_status
;
2171 /* Validate -mregparm= value. */
2172 if (ix86_regparm_string
)
2174 i
= atoi (ix86_regparm_string
);
2175 if (i
< 0 || i
> REGPARM_MAX
)
2176 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2182 ix86_regparm
= REGPARM_MAX
;
2184 /* If the user has provided any of the -malign-* options,
2185 warn and use that value only if -falign-* is not set.
2186 Remove this code in GCC 3.2 or later. */
2187 if (ix86_align_loops_string
)
2189 warning (0, "-malign-loops is obsolete, use -falign-loops");
2190 if (align_loops
== 0)
2192 i
= atoi (ix86_align_loops_string
);
2193 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2194 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2196 align_loops
= 1 << i
;
2200 if (ix86_align_jumps_string
)
2202 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2203 if (align_jumps
== 0)
2205 i
= atoi (ix86_align_jumps_string
);
2206 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2207 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2209 align_jumps
= 1 << i
;
2213 if (ix86_align_funcs_string
)
2215 warning (0, "-malign-functions is obsolete, use -falign-functions");
2216 if (align_functions
== 0)
2218 i
= atoi (ix86_align_funcs_string
);
2219 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2220 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2222 align_functions
= 1 << i
;
2226 /* Default align_* from the processor table. */
2227 if (align_loops
== 0)
2229 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2230 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2232 if (align_jumps
== 0)
2234 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2235 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2237 if (align_functions
== 0)
2239 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2242 /* Validate -mbranch-cost= value, or provide default. */
2243 ix86_branch_cost
= ix86_cost
->branch_cost
;
2244 if (ix86_branch_cost_string
)
2246 i
= atoi (ix86_branch_cost_string
);
2248 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2250 ix86_branch_cost
= i
;
2252 if (ix86_section_threshold_string
)
2254 i
= atoi (ix86_section_threshold_string
);
2256 error ("-mlarge-data-threshold=%d is negative", i
);
2258 ix86_section_threshold
= i
;
2261 if (ix86_tls_dialect_string
)
2263 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2264 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2265 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2266 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2267 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2268 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2270 error ("bad value (%s) for -mtls-dialect= switch",
2271 ix86_tls_dialect_string
);
2274 /* Keep nonleaf frame pointers. */
2275 if (flag_omit_frame_pointer
)
2276 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2277 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2278 flag_omit_frame_pointer
= 1;
2280 /* If we're doing fast math, we don't care about comparison order
2281 wrt NaNs. This lets us use a shorter comparison sequence. */
2282 if (flag_finite_math_only
)
2283 target_flags
&= ~MASK_IEEE_FP
;
2285 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2286 since the insns won't need emulation. */
2287 if (x86_arch_always_fancy_math_387
& ARCHMASK
)
2288 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2290 /* Likewise, if the target doesn't have a 387, or we've specified
2291 software floating point, don't use 387 inline intrinsics. */
2293 target_flags
|= MASK_NO_FANCY_MATH_387
;
2295 /* Turn on SSE3 builtins for -mssse3. */
2297 target_flags
|= MASK_SSE3
;
2299 /* Turn on SSE3 builtins for -msse4a. */
2301 target_flags
|= MASK_SSE3
;
2303 /* Turn on SSE2 builtins for -msse3. */
2305 target_flags
|= MASK_SSE2
;
2307 /* Turn on SSE builtins for -msse2. */
2309 target_flags
|= MASK_SSE
;
2311 /* Turn on MMX builtins for -msse. */
2314 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2315 x86_prefetch_sse
= true;
2318 /* Turn on MMX builtins for 3Dnow. */
2320 target_flags
|= MASK_MMX
;
2322 /* Turn on POPCNT builtins for -mabm. */
2324 target_flags
|= MASK_POPCNT
;
2328 if (TARGET_ALIGN_DOUBLE
)
2329 error ("-malign-double makes no sense in the 64bit mode");
2331 error ("-mrtd calling convention not supported in the 64bit mode");
2333 /* Enable by default the SSE and MMX builtins. Do allow the user to
2334 explicitly disable any of these. In particular, disabling SSE and
2335 MMX for kernel code is extremely useful. */
2337 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2338 & ~target_flags_explicit
);
2342 /* i386 ABI does not specify red zone. It still makes sense to use it
2343 when programmer takes care to stack from being destroyed. */
2344 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2345 target_flags
|= MASK_NO_RED_ZONE
;
2348 /* Validate -mpreferred-stack-boundary= value, or provide default.
2349 The default of 128 bits is for Pentium III's SSE __m128. We can't
2350 change it because of optimize_size. Otherwise, we can't mix object
2351 files compiled with -Os and -On. */
2352 ix86_preferred_stack_boundary
= 128;
2353 if (ix86_preferred_stack_boundary_string
)
2355 i
= atoi (ix86_preferred_stack_boundary_string
);
2356 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2357 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2358 TARGET_64BIT
? 4 : 2);
2360 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2363 /* Accept -msseregparm only if at least SSE support is enabled. */
2364 if (TARGET_SSEREGPARM
2366 error ("-msseregparm used without SSE enabled");
2368 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2370 if (ix86_fpmath_string
!= 0)
2372 if (! strcmp (ix86_fpmath_string
, "387"))
2373 ix86_fpmath
= FPMATH_387
;
2374 else if (! strcmp (ix86_fpmath_string
, "sse"))
2378 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2379 ix86_fpmath
= FPMATH_387
;
2382 ix86_fpmath
= FPMATH_SSE
;
2384 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2385 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2389 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2390 ix86_fpmath
= FPMATH_387
;
2392 else if (!TARGET_80387
)
2394 warning (0, "387 instruction set disabled, using SSE arithmetics");
2395 ix86_fpmath
= FPMATH_SSE
;
2398 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2401 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2404 /* If the i387 is disabled, then do not return values in it. */
2406 target_flags
&= ~MASK_FLOAT_RETURNS
;
2408 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2409 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2411 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2413 /* ??? Unwind info is not correct around the CFG unless either a frame
2414 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2415 unwind info generation to be aware of the CFG and propagating states
2417 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2418 || flag_exceptions
|| flag_non_call_exceptions
)
2419 && flag_omit_frame_pointer
2420 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2422 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2423 warning (0, "unwind tables currently require either a frame pointer "
2424 "or -maccumulate-outgoing-args for correctness");
2425 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2428 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2431 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2432 p
= strchr (internal_label_prefix
, 'X');
2433 internal_label_prefix_len
= p
- internal_label_prefix
;
2437 /* When scheduling description is not available, disable scheduler pass
2438 so it won't slow down the compilation and make x87 code slower. */
2439 if (!TARGET_SCHEDULE
)
2440 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2442 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2443 set_param_value ("simultaneous-prefetches",
2444 ix86_cost
->simultaneous_prefetches
);
2445 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2446 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2449 /* switch to the appropriate section for output of DECL.
2450 DECL is either a `VAR_DECL' node or a constant of some sort.
2451 RELOC indicates whether forming the initial value of DECL requires
2452 link-time relocations. */
2455 x86_64_elf_select_section (tree decl
, int reloc
,
2456 unsigned HOST_WIDE_INT align
)
2458 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2459 && ix86_in_large_data_p (decl
))
2461 const char *sname
= NULL
;
2462 unsigned int flags
= SECTION_WRITE
;
2463 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2468 case SECCAT_DATA_REL
:
2469 sname
= ".ldata.rel";
2471 case SECCAT_DATA_REL_LOCAL
:
2472 sname
= ".ldata.rel.local";
2474 case SECCAT_DATA_REL_RO
:
2475 sname
= ".ldata.rel.ro";
2477 case SECCAT_DATA_REL_RO_LOCAL
:
2478 sname
= ".ldata.rel.ro.local";
2482 flags
|= SECTION_BSS
;
2485 case SECCAT_RODATA_MERGE_STR
:
2486 case SECCAT_RODATA_MERGE_STR_INIT
:
2487 case SECCAT_RODATA_MERGE_CONST
:
2491 case SECCAT_SRODATA
:
2498 /* We don't split these for medium model. Place them into
2499 default sections and hope for best. */
2504 /* We might get called with string constants, but get_named_section
2505 doesn't like them as they are not DECLs. Also, we need to set
2506 flags in that case. */
2508 return get_section (sname
, flags
, NULL
);
2509 return get_named_section (decl
, sname
, reloc
);
2512 return default_elf_select_section (decl
, reloc
, align
);
2515 /* Build up a unique section name, expressed as a
2516 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2517 RELOC indicates whether the initial value of EXP requires
2518 link-time relocations. */
2521 x86_64_elf_unique_section (tree decl
, int reloc
)
2523 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2524 && ix86_in_large_data_p (decl
))
2526 const char *prefix
= NULL
;
2527 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2528 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2530 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2533 case SECCAT_DATA_REL
:
2534 case SECCAT_DATA_REL_LOCAL
:
2535 case SECCAT_DATA_REL_RO
:
2536 case SECCAT_DATA_REL_RO_LOCAL
:
2537 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2540 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2543 case SECCAT_RODATA_MERGE_STR
:
2544 case SECCAT_RODATA_MERGE_STR_INIT
:
2545 case SECCAT_RODATA_MERGE_CONST
:
2546 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2548 case SECCAT_SRODATA
:
2555 /* We don't split these for medium model. Place them into
2556 default sections and hope for best. */
2564 plen
= strlen (prefix
);
2566 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2567 name
= targetm
.strip_name_encoding (name
);
2568 nlen
= strlen (name
);
2570 string
= alloca (nlen
+ plen
+ 1);
2571 memcpy (string
, prefix
, plen
);
2572 memcpy (string
+ plen
, name
, nlen
+ 1);
2574 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2578 default_unique_section (decl
, reloc
);
2581 #ifdef COMMON_ASM_OP
2582 /* This says how to output assembler code to declare an
2583 uninitialized external linkage data object.
2585 For medium model x86-64 we need to use .largecomm opcode for
2588 x86_elf_aligned_common (FILE *file
,
2589 const char *name
, unsigned HOST_WIDE_INT size
,
2592 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2593 && size
> (unsigned int)ix86_section_threshold
)
2594 fprintf (file
, ".largecomm\t");
2596 fprintf (file
, "%s", COMMON_ASM_OP
);
2597 assemble_name (file
, name
);
2598 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2599 size
, align
/ BITS_PER_UNIT
);
2602 /* Utility function for targets to use in implementing
2603 ASM_OUTPUT_ALIGNED_BSS. */
2606 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2607 const char *name
, unsigned HOST_WIDE_INT size
,
2610 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2611 && size
> (unsigned int)ix86_section_threshold
)
2612 switch_to_section (get_named_section (decl
, ".lbss", 0));
2614 switch_to_section (bss_section
);
2615 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2616 #ifdef ASM_DECLARE_OBJECT_NAME
2617 last_assemble_variable_decl
= decl
;
2618 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2620 /* Standard thing is just output label for the object. */
2621 ASM_OUTPUT_LABEL (file
, name
);
2622 #endif /* ASM_DECLARE_OBJECT_NAME */
2623 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2627 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2629 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2630 make the problem with not enough registers even worse. */
2631 #ifdef INSN_SCHEDULING
2633 flag_schedule_insns
= 0;
2637 /* The Darwin libraries never set errno, so we might as well
2638 avoid calling them when that's the only reason we would. */
2639 flag_errno_math
= 0;
2641 /* The default values of these switches depend on the TARGET_64BIT
2642 that is not known at this moment. Mark these values with 2 and
2643 let user the to override these. In case there is no command line option
2644 specifying them, we will set the defaults in override_options. */
2646 flag_omit_frame_pointer
= 2;
2647 flag_pcc_struct_return
= 2;
2648 flag_asynchronous_unwind_tables
= 2;
2649 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2650 SUBTARGET_OPTIMIZATION_OPTIONS
;
2654 /* Table of valid machine attributes. */
2655 const struct attribute_spec ix86_attribute_table
[] =
2657 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2658 /* Stdcall attribute says callee is responsible for popping arguments
2659 if they are not variable. */
2660 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2661 /* Fastcall attribute says callee is responsible for popping arguments
2662 if they are not variable. */
2663 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2664 /* Cdecl attribute says the callee is a normal C declaration */
2665 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2666 /* Regparm attribute specifies how many integer arguments are to be
2667 passed in registers. */
2668 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2669 /* Sseregparm attribute says we are using x86_64 calling conventions
2670 for FP arguments. */
2671 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2672 /* force_align_arg_pointer says this function realigns the stack at entry. */
2673 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2674 false, true, true, ix86_handle_cconv_attribute
},
2675 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2676 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2677 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2678 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2680 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2681 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2682 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2683 SUBTARGET_ATTRIBUTE_TABLE
,
2685 { NULL
, 0, 0, false, false, false, NULL
}
2688 /* Decide whether we can make a sibling call to a function. DECL is the
2689 declaration of the function being targeted by the call and EXP is the
2690 CALL_EXPR representing the call. */
2693 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2698 /* If we are generating position-independent code, we cannot sibcall
2699 optimize any indirect call, or a direct call to a global function,
2700 as the PLT requires %ebx be live. */
2701 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2708 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2709 if (POINTER_TYPE_P (func
))
2710 func
= TREE_TYPE (func
);
2713 /* Check that the return value locations are the same. Like
2714 if we are returning floats on the 80387 register stack, we cannot
2715 make a sibcall from a function that doesn't return a float to a
2716 function that does or, conversely, from a function that does return
2717 a float to a function that doesn't; the necessary stack adjustment
2718 would not be executed. This is also the place we notice
2719 differences in the return value ABI. Note that it is ok for one
2720 of the functions to have void return type as long as the return
2721 value of the other is passed in a register. */
2722 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2723 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2725 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2727 if (!rtx_equal_p (a
, b
))
2730 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2732 else if (!rtx_equal_p (a
, b
))
2735 /* If this call is indirect, we'll need to be able to use a call-clobbered
2736 register for the address of the target function. Make sure that all
2737 such registers are not used for passing parameters. */
2738 if (!decl
&& !TARGET_64BIT
)
2742 /* We're looking at the CALL_EXPR, we need the type of the function. */
2743 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2744 type
= TREE_TYPE (type
); /* pointer type */
2745 type
= TREE_TYPE (type
); /* function type */
2747 if (ix86_function_regparm (type
, NULL
) >= 3)
2749 /* ??? Need to count the actual number of registers to be used,
2750 not the possible number of registers. Fix later. */
2755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2756 /* Dllimport'd functions are also called indirectly. */
2757 if (decl
&& DECL_DLLIMPORT_P (decl
)
2758 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2762 /* If we forced aligned the stack, then sibcalling would unalign the
2763 stack, which may break the called function. */
2764 if (cfun
->machine
->force_align_arg_pointer
)
2767 /* Otherwise okay. That also includes certain types of indirect calls. */
2771 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2772 calling convention attributes;
2773 arguments as in struct attribute_spec.handler. */
2776 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2778 int flags ATTRIBUTE_UNUSED
,
2781 if (TREE_CODE (*node
) != FUNCTION_TYPE
2782 && TREE_CODE (*node
) != METHOD_TYPE
2783 && TREE_CODE (*node
) != FIELD_DECL
2784 && TREE_CODE (*node
) != TYPE_DECL
)
2786 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2787 IDENTIFIER_POINTER (name
));
2788 *no_add_attrs
= true;
2792 /* Can combine regparm with all attributes but fastcall. */
2793 if (is_attribute_p ("regparm", name
))
2797 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2799 error ("fastcall and regparm attributes are not compatible");
2802 cst
= TREE_VALUE (args
);
2803 if (TREE_CODE (cst
) != INTEGER_CST
)
2805 warning (OPT_Wattributes
,
2806 "%qs attribute requires an integer constant argument",
2807 IDENTIFIER_POINTER (name
));
2808 *no_add_attrs
= true;
2810 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2812 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2813 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2814 *no_add_attrs
= true;
2818 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2819 TYPE_ATTRIBUTES (*node
))
2820 && compare_tree_int (cst
, REGPARM_MAX
-1))
2822 error ("%s functions limited to %d register parameters",
2823 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2831 warning (OPT_Wattributes
, "%qs attribute ignored",
2832 IDENTIFIER_POINTER (name
));
2833 *no_add_attrs
= true;
2837 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2838 if (is_attribute_p ("fastcall", name
))
2840 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2842 error ("fastcall and cdecl attributes are not compatible");
2844 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2846 error ("fastcall and stdcall attributes are not compatible");
2848 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2850 error ("fastcall and regparm attributes are not compatible");
2854 /* Can combine stdcall with fastcall (redundant), regparm and
2856 else if (is_attribute_p ("stdcall", name
))
2858 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2860 error ("stdcall and cdecl attributes are not compatible");
2862 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2864 error ("stdcall and fastcall attributes are not compatible");
2868 /* Can combine cdecl with regparm and sseregparm. */
2869 else if (is_attribute_p ("cdecl", name
))
2871 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2873 error ("stdcall and cdecl attributes are not compatible");
2875 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2877 error ("fastcall and cdecl attributes are not compatible");
2881 /* Can combine sseregparm with all attributes. */
2886 /* Return 0 if the attributes for two types are incompatible, 1 if they
2887 are compatible, and 2 if they are nearly compatible (which causes a
2888 warning to be generated). */
2891 ix86_comp_type_attributes (tree type1
, tree type2
)
2893 /* Check for mismatch of non-default calling convention. */
2894 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2896 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2899 /* Check for mismatched fastcall/regparm types. */
2900 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2901 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2902 || (ix86_function_regparm (type1
, NULL
)
2903 != ix86_function_regparm (type2
, NULL
)))
2906 /* Check for mismatched sseregparm types. */
2907 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2908 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2911 /* Check for mismatched return types (cdecl vs stdcall). */
2912 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2913 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2919 /* Return the regparm value for a function with the indicated TYPE and DECL.
2920 DECL may be NULL when calling function indirectly
2921 or considering a libcall. */
2924 ix86_function_regparm (tree type
, tree decl
)
2927 int regparm
= ix86_regparm
;
2928 bool user_convention
= false;
2932 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2935 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2936 user_convention
= true;
2939 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2942 user_convention
= true;
2945 /* Use register calling convention for local functions when possible. */
2946 if (!TARGET_64BIT
&& !user_convention
&& decl
2947 && flag_unit_at_a_time
&& !profile_flag
)
2949 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2952 int local_regparm
, globals
= 0, regno
;
2954 /* Make sure no regparm register is taken by a global register
2956 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2957 if (global_regs
[local_regparm
])
2959 /* We can't use regparm(3) for nested functions as these use
2960 static chain pointer in third argument. */
2961 if (local_regparm
== 3
2962 && decl_function_context (decl
)
2963 && !DECL_NO_STATIC_CHAIN (decl
))
2965 /* If the function realigns its stackpointer, the
2966 prologue will clobber %ecx. If we've already
2967 generated code for the callee, the callee
2968 DECL_STRUCT_FUNCTION is gone, so we fall back to
2969 scanning the attributes for the self-realigning
2971 if ((DECL_STRUCT_FUNCTION (decl
)
2972 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2973 || (!DECL_STRUCT_FUNCTION (decl
)
2974 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2975 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2977 /* Each global register variable increases register preassure,
2978 so the more global reg vars there are, the smaller regparm
2979 optimization use, unless requested by the user explicitly. */
2980 for (regno
= 0; regno
< 6; regno
++)
2981 if (global_regs
[regno
])
2984 = globals
< local_regparm
? local_regparm
- globals
: 0;
2986 if (local_regparm
> regparm
)
2987 regparm
= local_regparm
;
2994 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2995 DFmode (2) arguments in SSE registers for a function with the
2996 indicated TYPE and DECL. DECL may be NULL when calling function
2997 indirectly or considering a libcall. Otherwise return 0. */
3000 ix86_function_sseregparm (tree type
, tree decl
)
3002 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3003 by the sseregparm attribute. */
3004 if (TARGET_SSEREGPARM
3006 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3011 error ("Calling %qD with attribute sseregparm without "
3012 "SSE/SSE2 enabled", decl
);
3014 error ("Calling %qT with attribute sseregparm without "
3015 "SSE/SSE2 enabled", type
);
3022 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3023 (and DFmode for SSE2) arguments in SSE registers,
3024 even for 32-bit targets. */
3025 if (!TARGET_64BIT
&& decl
3026 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3028 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3030 return TARGET_SSE2
? 2 : 1;
3036 /* Return true if EAX is live at the start of the function. Used by
3037 ix86_expand_prologue to determine if we need special help before
3038 calling allocate_stack_worker. */
3041 ix86_eax_live_at_start_p (void)
3043 /* Cheat. Don't bother working forward from ix86_function_regparm
3044 to the function type to whether an actual argument is located in
3045 eax. Instead just look at cfg info, which is still close enough
3046 to correct at this point. This gives false positives for broken
3047 functions that might use uninitialized data that happens to be
3048 allocated in eax, but who cares? */
3049 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3052 /* Value is the number of bytes of arguments automatically
3053 popped when returning from a subroutine call.
3054 FUNDECL is the declaration node of the function (as a tree),
3055 FUNTYPE is the data type of the function (as a tree),
3056 or for a library call it is an identifier node for the subroutine name.
3057 SIZE is the number of bytes of arguments passed on the stack.
3059 On the 80386, the RTD insn may be used to pop them if the number
3060 of args is fixed, but if the number is variable then the caller
3061 must pop them all. RTD can't be used for library calls now
3062 because the library is compiled with the Unix compiler.
3063 Use of RTD is a selectable option, since it is incompatible with
3064 standard Unix calling sequences. If the option is not selected,
3065 the caller must always pop the args.
3067 The attribute stdcall is equivalent to RTD on a per module basis. */
3070 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3072 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3074 /* Cdecl functions override -mrtd, and never pop the stack. */
3075 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3077 /* Stdcall and fastcall functions will pop the stack if not
3079 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3080 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3084 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3085 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3086 == void_type_node
)))
3090 /* Lose any fake structure return argument if it is passed on the stack. */
3091 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3093 && !KEEP_AGGREGATE_RETURN_POINTER
)
3095 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3098 return GET_MODE_SIZE (Pmode
);
3104 /* Argument support functions. */
3106 /* Return true when register may be used to pass function parameters. */
3108 ix86_function_arg_regno_p (int regno
)
3114 return (regno
< REGPARM_MAX
3115 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3117 return (regno
< REGPARM_MAX
3118 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3119 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3120 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3121 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3126 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3131 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3132 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3135 /* RAX is used as hidden argument to va_arg functions. */
3138 for (i
= 0; i
< REGPARM_MAX
; i
++)
3139 if (regno
== x86_64_int_parameter_registers
[i
])
3144 /* Return if we do not know how to pass TYPE solely in registers. */
3147 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3149 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3152 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3153 The layout_type routine is crafty and tries to trick us into passing
3154 currently unsupported vector types on the stack by using TImode. */
3155 return (!TARGET_64BIT
&& mode
== TImode
3156 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3159 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3160 for a call to a function whose data type is FNTYPE.
3161 For a library call, FNTYPE is 0. */
3164 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3165 tree fntype
, /* tree ptr for function decl */
3166 rtx libname
, /* SYMBOL_REF of library name or 0 */
3169 static CUMULATIVE_ARGS zero_cum
;
3170 tree param
, next_param
;
3172 if (TARGET_DEBUG_ARG
)
3174 fprintf (stderr
, "\ninit_cumulative_args (");
3176 fprintf (stderr
, "fntype code = %s, ret code = %s",
3177 tree_code_name
[(int) TREE_CODE (fntype
)],
3178 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3180 fprintf (stderr
, "no fntype");
3183 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3188 /* Set up the number of registers to use for passing arguments. */
3189 cum
->nregs
= ix86_regparm
;
3191 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3193 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3194 cum
->warn_sse
= true;
3195 cum
->warn_mmx
= true;
3196 cum
->maybe_vaarg
= false;
3198 /* Use ecx and edx registers if function has fastcall attribute,
3199 else look for regparm information. */
3200 if (fntype
&& !TARGET_64BIT
)
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3208 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3211 /* Set up the number of SSE registers used for passing SFmode
3212 and DFmode arguments. Warn for mismatching ABI. */
3213 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3215 /* Determine if this function has variable arguments. This is
3216 indicated by the last argument being 'void_type_mode' if there
3217 are no variable arguments. If there are variable arguments, then
3218 we won't pass anything in registers in 32-bit mode. */
3220 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3222 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3223 param
!= 0; param
= next_param
)
3225 next_param
= TREE_CHAIN (param
);
3226 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3236 cum
->float_in_sse
= 0;
3238 cum
->maybe_vaarg
= true;
3242 if ((!fntype
&& !libname
)
3243 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3244 cum
->maybe_vaarg
= true;
3246 if (TARGET_DEBUG_ARG
)
3247 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3252 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3253 But in the case of vector types, it is some vector mode.
3255 When we have only some of our vector isa extensions enabled, then there
3256 are some modes for which vector_mode_supported_p is false. For these
3257 modes, the generic vector support in gcc will choose some non-vector mode
3258 in order to implement the type. By computing the natural mode, we'll
3259 select the proper ABI location for the operand and not depend on whatever
3260 the middle-end decides to do with these vector types. */
3262 static enum machine_mode
3263 type_natural_mode (tree type
)
3265 enum machine_mode mode
= TYPE_MODE (type
);
3267 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3269 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3270 if ((size
== 8 || size
== 16)
3271 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3272 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3274 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3276 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3277 mode
= MIN_MODE_VECTOR_FLOAT
;
3279 mode
= MIN_MODE_VECTOR_INT
;
3281 /* Get the mode which has this inner mode and number of units. */
3282 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3283 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3284 && GET_MODE_INNER (mode
) == innermode
)
3294 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3295 this may not agree with the mode that the type system has chosen for the
3296 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3297 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3300 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3305 if (orig_mode
!= BLKmode
)
3306 tmp
= gen_rtx_REG (orig_mode
, regno
);
3309 tmp
= gen_rtx_REG (mode
, regno
);
3310 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3311 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3317 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3318 of this code is to classify each 8bytes of incoming argument by the register
3319 class and assign registers accordingly. */
3321 /* Return the union class of CLASS1 and CLASS2.
3322 See the x86-64 PS ABI for details. */
3324 static enum x86_64_reg_class
3325 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3327 /* Rule #1: If both classes are equal, this is the resulting class. */
3328 if (class1
== class2
)
3331 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3333 if (class1
== X86_64_NO_CLASS
)
3335 if (class2
== X86_64_NO_CLASS
)
3338 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3339 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3340 return X86_64_MEMORY_CLASS
;
3342 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3343 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3344 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3345 return X86_64_INTEGERSI_CLASS
;
3346 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3347 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3348 return X86_64_INTEGER_CLASS
;
3350 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3352 if (class1
== X86_64_X87_CLASS
3353 || class1
== X86_64_X87UP_CLASS
3354 || class1
== X86_64_COMPLEX_X87_CLASS
3355 || class2
== X86_64_X87_CLASS
3356 || class2
== X86_64_X87UP_CLASS
3357 || class2
== X86_64_COMPLEX_X87_CLASS
)
3358 return X86_64_MEMORY_CLASS
;
3360 /* Rule #6: Otherwise class SSE is used. */
3361 return X86_64_SSE_CLASS
;
3364 /* Classify the argument of type TYPE and mode MODE.
3365 CLASSES will be filled by the register class used to pass each word
3366 of the operand. The number of words is returned. In case the parameter
3367 should be passed in memory, 0 is returned. As a special case for zero
3368 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3370 BIT_OFFSET is used internally for handling records and specifies offset
3371 of the offset in bits modulo 256 to avoid overflow cases.
3373 See the x86-64 PS ABI for details.
3377 classify_argument (enum machine_mode mode
, tree type
,
3378 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3380 HOST_WIDE_INT bytes
=
3381 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3382 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3384 /* Variable sized entities are always passed/returned in memory. */
3388 if (mode
!= VOIDmode
3389 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3392 if (type
&& AGGREGATE_TYPE_P (type
))
3396 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3398 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3402 for (i
= 0; i
< words
; i
++)
3403 classes
[i
] = X86_64_NO_CLASS
;
3405 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3406 signalize memory class, so handle it as special case. */
3409 classes
[0] = X86_64_NO_CLASS
;
3413 /* Classify each field of record and merge classes. */
3414 switch (TREE_CODE (type
))
3417 /* And now merge the fields of structure. */
3418 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3420 if (TREE_CODE (field
) == FIELD_DECL
)
3424 if (TREE_TYPE (field
) == error_mark_node
)
3427 /* Bitfields are always classified as integer. Handle them
3428 early, since later code would consider them to be
3429 misaligned integers. */
3430 if (DECL_BIT_FIELD (field
))
3432 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3433 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3434 + tree_low_cst (DECL_SIZE (field
), 0)
3437 merge_classes (X86_64_INTEGER_CLASS
,
3442 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3443 TREE_TYPE (field
), subclasses
,
3444 (int_bit_position (field
)
3445 + bit_offset
) % 256);
3448 for (i
= 0; i
< num
; i
++)
3451 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3453 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3461 /* Arrays are handled as small records. */
3464 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3465 TREE_TYPE (type
), subclasses
, bit_offset
);
3469 /* The partial classes are now full classes. */
3470 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3471 subclasses
[0] = X86_64_SSE_CLASS
;
3472 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3473 subclasses
[0] = X86_64_INTEGER_CLASS
;
3475 for (i
= 0; i
< words
; i
++)
3476 classes
[i
] = subclasses
[i
% num
];
3481 case QUAL_UNION_TYPE
:
3482 /* Unions are similar to RECORD_TYPE but offset is always 0.
3484 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3486 if (TREE_CODE (field
) == FIELD_DECL
)
3490 if (TREE_TYPE (field
) == error_mark_node
)
3493 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3494 TREE_TYPE (field
), subclasses
,
3498 for (i
= 0; i
< num
; i
++)
3499 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3508 /* Final merger cleanup. */
3509 for (i
= 0; i
< words
; i
++)
3511 /* If one class is MEMORY, everything should be passed in
3513 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3516 /* The X86_64_SSEUP_CLASS should be always preceded by
3517 X86_64_SSE_CLASS. */
3518 if (classes
[i
] == X86_64_SSEUP_CLASS
3519 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3520 classes
[i
] = X86_64_SSE_CLASS
;
3522 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3523 if (classes
[i
] == X86_64_X87UP_CLASS
3524 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3525 classes
[i
] = X86_64_SSE_CLASS
;
3530 /* Compute alignment needed. We align all types to natural boundaries with
3531 exception of XFmode that is aligned to 64bits. */
3532 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3534 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3537 mode_alignment
= 128;
3538 else if (mode
== XCmode
)
3539 mode_alignment
= 256;
3540 if (COMPLEX_MODE_P (mode
))
3541 mode_alignment
/= 2;
3542 /* Misaligned fields are always returned in memory. */
3543 if (bit_offset
% mode_alignment
)
3547 /* for V1xx modes, just use the base mode */
3548 if (VECTOR_MODE_P (mode
)
3549 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3550 mode
= GET_MODE_INNER (mode
);
3552 /* Classification of atomic types. */
3557 classes
[0] = X86_64_SSE_CLASS
;
3560 classes
[0] = X86_64_SSE_CLASS
;
3561 classes
[1] = X86_64_SSEUP_CLASS
;
3570 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3571 classes
[0] = X86_64_INTEGERSI_CLASS
;
3573 classes
[0] = X86_64_INTEGER_CLASS
;
3577 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3582 if (!(bit_offset
% 64))
3583 classes
[0] = X86_64_SSESF_CLASS
;
3585 classes
[0] = X86_64_SSE_CLASS
;
3588 classes
[0] = X86_64_SSEDF_CLASS
;
3591 classes
[0] = X86_64_X87_CLASS
;
3592 classes
[1] = X86_64_X87UP_CLASS
;
3595 classes
[0] = X86_64_SSE_CLASS
;
3596 classes
[1] = X86_64_SSEUP_CLASS
;
3599 classes
[0] = X86_64_SSE_CLASS
;
3602 classes
[0] = X86_64_SSEDF_CLASS
;
3603 classes
[1] = X86_64_SSEDF_CLASS
;
3606 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3609 /* This modes is larger than 16 bytes. */
3617 classes
[0] = X86_64_SSE_CLASS
;
3618 classes
[1] = X86_64_SSEUP_CLASS
;
3624 classes
[0] = X86_64_SSE_CLASS
;
3630 gcc_assert (VECTOR_MODE_P (mode
));
3635 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3637 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3638 classes
[0] = X86_64_INTEGERSI_CLASS
;
3640 classes
[0] = X86_64_INTEGER_CLASS
;
3641 classes
[1] = X86_64_INTEGER_CLASS
;
3642 return 1 + (bytes
> 8);
3646 /* Examine the argument and return set number of register required in each
3647 class. Return 0 iff parameter should be passed in memory. */
3649 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3650 int *int_nregs
, int *sse_nregs
)
3652 enum x86_64_reg_class
class[MAX_CLASSES
];
3653 int n
= classify_argument (mode
, type
, class, 0);
3659 for (n
--; n
>= 0; n
--)
3662 case X86_64_INTEGER_CLASS
:
3663 case X86_64_INTEGERSI_CLASS
:
3666 case X86_64_SSE_CLASS
:
3667 case X86_64_SSESF_CLASS
:
3668 case X86_64_SSEDF_CLASS
:
3671 case X86_64_NO_CLASS
:
3672 case X86_64_SSEUP_CLASS
:
3674 case X86_64_X87_CLASS
:
3675 case X86_64_X87UP_CLASS
:
3679 case X86_64_COMPLEX_X87_CLASS
:
3680 return in_return
? 2 : 0;
3681 case X86_64_MEMORY_CLASS
:
3687 /* Construct container for the argument used by GCC interface. See
3688 FUNCTION_ARG for the detailed description. */
3691 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3692 tree type
, int in_return
, int nintregs
, int nsseregs
,
3693 const int *intreg
, int sse_regno
)
3695 /* The following variables hold the static issued_error state. */
3696 static bool issued_sse_arg_error
;
3697 static bool issued_sse_ret_error
;
3698 static bool issued_x87_ret_error
;
3700 enum machine_mode tmpmode
;
3702 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3703 enum x86_64_reg_class
class[MAX_CLASSES
];
3707 int needed_sseregs
, needed_intregs
;
3708 rtx exp
[MAX_CLASSES
];
3711 n
= classify_argument (mode
, type
, class, 0);
3712 if (TARGET_DEBUG_ARG
)
3715 fprintf (stderr
, "Memory class\n");
3718 fprintf (stderr
, "Classes:");
3719 for (i
= 0; i
< n
; i
++)
3721 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3723 fprintf (stderr
, "\n");
3728 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3731 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3734 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3735 some less clueful developer tries to use floating-point anyway. */
3736 if (needed_sseregs
&& !TARGET_SSE
)
3740 if (!issued_sse_ret_error
)
3742 error ("SSE register return with SSE disabled");
3743 issued_sse_ret_error
= true;
3746 else if (!issued_sse_arg_error
)
3748 error ("SSE register argument with SSE disabled");
3749 issued_sse_arg_error
= true;
3754 /* Likewise, error if the ABI requires us to return values in the
3755 x87 registers and the user specified -mno-80387. */
3756 if (!TARGET_80387
&& in_return
)
3757 for (i
= 0; i
< n
; i
++)
3758 if (class[i
] == X86_64_X87_CLASS
3759 || class[i
] == X86_64_X87UP_CLASS
3760 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3762 if (!issued_x87_ret_error
)
3764 error ("x87 register return with x87 disabled");
3765 issued_x87_ret_error
= true;
3770 /* First construct simple cases. Avoid SCmode, since we want to use
3771 single register to pass this type. */
3772 if (n
== 1 && mode
!= SCmode
)
3775 case X86_64_INTEGER_CLASS
:
3776 case X86_64_INTEGERSI_CLASS
:
3777 return gen_rtx_REG (mode
, intreg
[0]);
3778 case X86_64_SSE_CLASS
:
3779 case X86_64_SSESF_CLASS
:
3780 case X86_64_SSEDF_CLASS
:
3781 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3782 case X86_64_X87_CLASS
:
3783 case X86_64_COMPLEX_X87_CLASS
:
3784 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3785 case X86_64_NO_CLASS
:
3786 /* Zero sized array, struct or class. */
3791 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3793 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3795 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3796 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3797 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3798 && class[1] == X86_64_INTEGER_CLASS
3799 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3800 && intreg
[0] + 1 == intreg
[1])
3801 return gen_rtx_REG (mode
, intreg
[0]);
3803 /* Otherwise figure out the entries of the PARALLEL. */
3804 for (i
= 0; i
< n
; i
++)
3808 case X86_64_NO_CLASS
:
3810 case X86_64_INTEGER_CLASS
:
3811 case X86_64_INTEGERSI_CLASS
:
3812 /* Merge TImodes on aligned occasions here too. */
3813 if (i
* 8 + 8 > bytes
)
3814 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3815 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3819 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3820 if (tmpmode
== BLKmode
)
3822 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3823 gen_rtx_REG (tmpmode
, *intreg
),
3827 case X86_64_SSESF_CLASS
:
3828 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3829 gen_rtx_REG (SFmode
,
3830 SSE_REGNO (sse_regno
)),
3834 case X86_64_SSEDF_CLASS
:
3835 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3836 gen_rtx_REG (DFmode
,
3837 SSE_REGNO (sse_regno
)),
3841 case X86_64_SSE_CLASS
:
3842 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3846 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3847 gen_rtx_REG (tmpmode
,
3848 SSE_REGNO (sse_regno
)),
3850 if (tmpmode
== TImode
)
3859 /* Empty aligned struct, union or class. */
3863 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3864 for (i
= 0; i
< nexps
; i
++)
3865 XVECEXP (ret
, 0, i
) = exp
[i
];
3869 /* Update the data in CUM to advance over an argument
3870 of mode MODE and data type TYPE.
3871 (TYPE is null for libcalls where that information may not be available.) */
3874 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3875 tree type
, int named
)
3878 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3879 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3882 mode
= type_natural_mode (type
);
3884 if (TARGET_DEBUG_ARG
)
3885 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3886 "mode=%s, named=%d)\n\n",
3887 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3888 GET_MODE_NAME (mode
), named
);
3892 int int_nregs
, sse_nregs
;
3893 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3894 cum
->words
+= words
;
3895 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3897 cum
->nregs
-= int_nregs
;
3898 cum
->sse_nregs
-= sse_nregs
;
3899 cum
->regno
+= int_nregs
;
3900 cum
->sse_regno
+= sse_nregs
;
3903 cum
->words
+= words
;
3921 cum
->words
+= words
;
3922 cum
->nregs
-= words
;
3923 cum
->regno
+= words
;
3925 if (cum
->nregs
<= 0)
3933 if (cum
->float_in_sse
< 2)
3936 if (cum
->float_in_sse
< 1)
3947 if (!type
|| !AGGREGATE_TYPE_P (type
))
3949 cum
->sse_words
+= words
;
3950 cum
->sse_nregs
-= 1;
3951 cum
->sse_regno
+= 1;
3952 if (cum
->sse_nregs
<= 0)
3964 if (!type
|| !AGGREGATE_TYPE_P (type
))
3966 cum
->mmx_words
+= words
;
3967 cum
->mmx_nregs
-= 1;
3968 cum
->mmx_regno
+= 1;
3969 if (cum
->mmx_nregs
<= 0)
3980 /* Define where to put the arguments to a function.
3981 Value is zero to push the argument on the stack,
3982 or a hard register in which to store the argument.
3984 MODE is the argument's machine mode.
3985 TYPE is the data type of the argument (as a tree).
3986 This is null for libcalls where that information may
3988 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3989 the preceding args and about the function being called.
3990 NAMED is nonzero if this argument is a named parameter
3991 (otherwise it is an extra parameter matching an ellipsis). */
3994 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3995 tree type
, int named
)
3997 enum machine_mode mode
= orig_mode
;
4000 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
4001 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4002 static bool warnedsse
, warnedmmx
;
4004 /* To simplify the code below, represent vector types with a vector mode
4005 even if MMX/SSE are not active. */
4006 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4007 mode
= type_natural_mode (type
);
4009 /* Handle a hidden AL argument containing number of registers for varargs
4010 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4012 if (mode
== VOIDmode
)
4015 return GEN_INT (cum
->maybe_vaarg
4016 ? (cum
->sse_nregs
< 0
4024 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4026 &x86_64_int_parameter_registers
[cum
->regno
],
4031 /* For now, pass fp/complex values on the stack. */
4043 if (words
<= cum
->nregs
)
4045 int regno
= cum
->regno
;
4047 /* Fastcall allocates the first two DWORD (SImode) or
4048 smaller arguments to ECX and EDX. */
4051 if (mode
== BLKmode
|| mode
== DImode
)
4054 /* ECX not EAX is the first allocated register. */
4058 ret
= gen_rtx_REG (mode
, regno
);
4062 if (cum
->float_in_sse
< 2)
4065 if (cum
->float_in_sse
< 1)
4075 if (!type
|| !AGGREGATE_TYPE_P (type
))
4077 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4080 warning (0, "SSE vector argument without SSE enabled "
4084 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4085 cum
->sse_regno
+ FIRST_SSE_REG
);
4092 if (!type
|| !AGGREGATE_TYPE_P (type
))
4094 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4097 warning (0, "MMX vector argument without MMX enabled "
4101 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4102 cum
->mmx_regno
+ FIRST_MMX_REG
);
4107 if (TARGET_DEBUG_ARG
)
4110 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4111 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4114 print_simple_rtl (stderr
, ret
);
4116 fprintf (stderr
, ", stack");
4118 fprintf (stderr
, " )\n");
4124 /* A C expression that indicates when an argument must be passed by
4125 reference. If nonzero for an argument, a copy of that argument is
4126 made in memory and a pointer to the argument is passed instead of
4127 the argument itself. The pointer is passed in whatever way is
4128 appropriate for passing a pointer to that type. */
4131 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4132 enum machine_mode mode ATTRIBUTE_UNUSED
,
4133 tree type
, bool named ATTRIBUTE_UNUSED
)
4138 if (type
&& int_size_in_bytes (type
) == -1)
4140 if (TARGET_DEBUG_ARG
)
4141 fprintf (stderr
, "function_arg_pass_by_reference\n");
4148 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4149 ABI. Only called if TARGET_SSE. */
4151 contains_128bit_aligned_vector_p (tree type
)
4153 enum machine_mode mode
= TYPE_MODE (type
);
4154 if (SSE_REG_MODE_P (mode
)
4155 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4157 if (TYPE_ALIGN (type
) < 128)
4160 if (AGGREGATE_TYPE_P (type
))
4162 /* Walk the aggregates recursively. */
4163 switch (TREE_CODE (type
))
4167 case QUAL_UNION_TYPE
:
4171 /* Walk all the structure fields. */
4172 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4174 if (TREE_CODE (field
) == FIELD_DECL
4175 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4182 /* Just for use if some languages passes arrays by value. */
4183 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4194 /* Gives the alignment boundary, in bits, of an argument with the
4195 specified mode and type. */
4198 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4202 align
= TYPE_ALIGN (type
);
4204 align
= GET_MODE_ALIGNMENT (mode
);
4205 if (align
< PARM_BOUNDARY
)
4206 align
= PARM_BOUNDARY
;
4209 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4210 make an exception for SSE modes since these require 128bit
4213 The handling here differs from field_alignment. ICC aligns MMX
4214 arguments to 4 byte boundaries, while structure fields are aligned
4215 to 8 byte boundaries. */
4217 align
= PARM_BOUNDARY
;
4220 if (!SSE_REG_MODE_P (mode
))
4221 align
= PARM_BOUNDARY
;
4225 if (!contains_128bit_aligned_vector_p (type
))
4226 align
= PARM_BOUNDARY
;
4234 /* Return true if N is a possible register number of function value. */
4236 ix86_function_value_regno_p (int regno
)
4242 return ((regno
) == 0
4243 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4244 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4246 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4247 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4248 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4253 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4254 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4258 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4265 /* Define how to find the value returned by a function.
4266 VALTYPE is the data type of the value (as a tree).
4267 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4268 otherwise, FUNC is 0. */
4270 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4271 bool outgoing ATTRIBUTE_UNUSED
)
4273 enum machine_mode natmode
= type_natural_mode (valtype
);
4277 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4278 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4279 x86_64_int_return_registers
, 0);
4280 /* For zero sized structures, construct_container return NULL, but we
4281 need to keep rest of compiler happy by returning meaningful value. */
4283 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4288 tree fn
= NULL_TREE
, fntype
;
4290 && DECL_P (fntype_or_decl
))
4291 fn
= fntype_or_decl
;
4292 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4293 return gen_rtx_REG (TYPE_MODE (valtype
),
4294 ix86_value_regno (natmode
, fn
, fntype
));
4298 /* Return true iff type is returned in memory. */
4300 ix86_return_in_memory (tree type
)
4302 int needed_intregs
, needed_sseregs
, size
;
4303 enum machine_mode mode
= type_natural_mode (type
);
4306 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4308 if (mode
== BLKmode
)
4311 size
= int_size_in_bytes (type
);
4313 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4316 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4318 /* User-created vectors small enough to fit in EAX. */
4322 /* MMX/3dNow values are returned in MM0,
4323 except when it doesn't exits. */
4325 return (TARGET_MMX
? 0 : 1);
4327 /* SSE values are returned in XMM0, except when it doesn't exist. */
4329 return (TARGET_SSE
? 0 : 1);
4343 /* When returning SSE vector types, we have a choice of either
4344 (1) being abi incompatible with a -march switch, or
4345 (2) generating an error.
4346 Given no good solution, I think the safest thing is one warning.
4347 The user won't be able to use -Werror, but....
4349 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4350 called in response to actually generating a caller or callee that
4351 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4352 via aggregate_value_p for general type probing from tree-ssa. */
4355 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4357 static bool warnedsse
, warnedmmx
;
4361 /* Look at the return type of the function, not the function type. */
4362 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4364 if (!TARGET_SSE
&& !warnedsse
)
4367 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4370 warning (0, "SSE vector return without SSE enabled "
4375 if (!TARGET_MMX
&& !warnedmmx
)
4377 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4380 warning (0, "MMX vector return without MMX enabled "
4389 /* Define how to find the value returned by a library function
4390 assuming the value has mode MODE. */
4392 ix86_libcall_value (enum machine_mode mode
)
4406 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4409 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4413 return gen_rtx_REG (mode
, 0);
4417 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4420 /* Given a mode, return the register to use for a return value. */
4423 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4425 gcc_assert (!TARGET_64BIT
);
4427 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4428 we normally prevent this case when mmx is not available. However
4429 some ABIs may require the result to be returned like DImode. */
4430 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4431 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4433 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4434 we prevent this case when sse is not available. However some ABIs
4435 may require the result to be returned like integer TImode. */
4436 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4437 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4439 /* Decimal floating point values can go in %eax, unlike other float modes. */
4440 if (DECIMAL_FLOAT_MODE_P (mode
))
4443 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4444 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4447 /* Floating point return values in %st(0), except for local functions when
4448 SSE math is enabled or for functions with sseregparm attribute. */
4449 if ((func
|| fntype
)
4450 && (mode
== SFmode
|| mode
== DFmode
))
4452 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4453 if ((sse_level
>= 1 && mode
== SFmode
)
4454 || (sse_level
== 2 && mode
== DFmode
))
4455 return FIRST_SSE_REG
;
4458 return FIRST_FLOAT_REG
;
4461 /* Create the va_list data type. */
4464 ix86_build_builtin_va_list (void)
4466 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4468 /* For i386 we use plain pointer to argument area. */
4470 return build_pointer_type (char_type_node
);
4472 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4473 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4475 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4476 unsigned_type_node
);
4477 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4478 unsigned_type_node
);
4479 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4481 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4484 va_list_gpr_counter_field
= f_gpr
;
4485 va_list_fpr_counter_field
= f_fpr
;
4487 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4488 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4489 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4490 DECL_FIELD_CONTEXT (f_sav
) = record
;
4492 TREE_CHAIN (record
) = type_decl
;
4493 TYPE_NAME (record
) = type_decl
;
4494 TYPE_FIELDS (record
) = f_gpr
;
4495 TREE_CHAIN (f_gpr
) = f_fpr
;
4496 TREE_CHAIN (f_fpr
) = f_ovf
;
4497 TREE_CHAIN (f_ovf
) = f_sav
;
4499 layout_type (record
);
4501 /* The correct type is an array type of one element. */
4502 return build_array_type (record
, build_index_type (size_zero_node
));
4505 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4508 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4509 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4512 CUMULATIVE_ARGS next_cum
;
4513 rtx save_area
= NULL_RTX
, mem
;
4526 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4529 /* Indicate to allocate space on the stack for varargs save area. */
4530 ix86_save_varrargs_registers
= 1;
4532 cfun
->stack_alignment_needed
= 128;
4534 fntype
= TREE_TYPE (current_function_decl
);
4535 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4536 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4537 != void_type_node
));
4539 /* For varargs, we do not want to skip the dummy va_dcl argument.
4540 For stdargs, we do want to skip the last named argument. */
4543 function_arg_advance (&next_cum
, mode
, type
, 1);
4546 save_area
= frame_pointer_rtx
;
4548 set
= get_varargs_alias_set ();
4550 for (i
= next_cum
.regno
;
4552 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4555 mem
= gen_rtx_MEM (Pmode
,
4556 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4557 MEM_NOTRAP_P (mem
) = 1;
4558 set_mem_alias_set (mem
, set
);
4559 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4560 x86_64_int_parameter_registers
[i
]));
4563 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4565 /* Now emit code to save SSE registers. The AX parameter contains number
4566 of SSE parameter registers used to call this function. We use
4567 sse_prologue_save insn template that produces computed jump across
4568 SSE saves. We need some preparation work to get this working. */
4570 label
= gen_label_rtx ();
4571 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4573 /* Compute address to jump to :
4574 label - 5*eax + nnamed_sse_arguments*5 */
4575 tmp_reg
= gen_reg_rtx (Pmode
);
4576 nsse_reg
= gen_reg_rtx (Pmode
);
4577 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4578 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4579 gen_rtx_MULT (Pmode
, nsse_reg
,
4581 if (next_cum
.sse_regno
)
4584 gen_rtx_CONST (DImode
,
4585 gen_rtx_PLUS (DImode
,
4587 GEN_INT (next_cum
.sse_regno
* 4))));
4589 emit_move_insn (nsse_reg
, label_ref
);
4590 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4592 /* Compute address of memory block we save into. We always use pointer
4593 pointing 127 bytes after first byte to store - this is needed to keep
4594 instruction size limited by 4 bytes. */
4595 tmp_reg
= gen_reg_rtx (Pmode
);
4596 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4597 plus_constant (save_area
,
4598 8 * REGPARM_MAX
+ 127)));
4599 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4600 MEM_NOTRAP_P (mem
) = 1;
4601 set_mem_alias_set (mem
, set
);
4602 set_mem_align (mem
, BITS_PER_WORD
);
4604 /* And finally do the dirty job! */
4605 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4606 GEN_INT (next_cum
.sse_regno
), label
));
4611 /* Implement va_start. */
4614 ix86_va_start (tree valist
, rtx nextarg
)
4616 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4617 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4618 tree gpr
, fpr
, ovf
, sav
, t
;
4621 /* Only 64bit target needs something special. */
4624 std_expand_builtin_va_start (valist
, nextarg
);
4628 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4629 f_fpr
= TREE_CHAIN (f_gpr
);
4630 f_ovf
= TREE_CHAIN (f_fpr
);
4631 f_sav
= TREE_CHAIN (f_ovf
);
4633 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4634 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4635 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4636 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4637 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4639 /* Count number of gp and fp argument registers used. */
4640 words
= current_function_args_info
.words
;
4641 n_gpr
= current_function_args_info
.regno
;
4642 n_fpr
= current_function_args_info
.sse_regno
;
4644 if (TARGET_DEBUG_ARG
)
4645 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4646 (int) words
, (int) n_gpr
, (int) n_fpr
);
4648 if (cfun
->va_list_gpr_size
)
4650 type
= TREE_TYPE (gpr
);
4651 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4652 build_int_cst (type
, n_gpr
* 8));
4653 TREE_SIDE_EFFECTS (t
) = 1;
4654 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4657 if (cfun
->va_list_fpr_size
)
4659 type
= TREE_TYPE (fpr
);
4660 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4661 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4662 TREE_SIDE_EFFECTS (t
) = 1;
4663 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4666 /* Find the overflow area. */
4667 type
= TREE_TYPE (ovf
);
4668 t
= make_tree (type
, virtual_incoming_args_rtx
);
4670 t
= build2 (PLUS_EXPR
, type
, t
,
4671 build_int_cst (type
, words
* UNITS_PER_WORD
));
4672 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4673 TREE_SIDE_EFFECTS (t
) = 1;
4674 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4676 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4678 /* Find the register save area.
4679 Prologue of the function save it right above stack frame. */
4680 type
= TREE_TYPE (sav
);
4681 t
= make_tree (type
, frame_pointer_rtx
);
4682 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4683 TREE_SIDE_EFFECTS (t
) = 1;
4684 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4688 /* Implement va_arg. */
4691 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4693 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4694 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4695 tree gpr
, fpr
, ovf
, sav
, t
;
4697 tree lab_false
, lab_over
= NULL_TREE
;
4702 enum machine_mode nat_mode
;
4704 /* Only 64bit target needs something special. */
4706 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4708 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4709 f_fpr
= TREE_CHAIN (f_gpr
);
4710 f_ovf
= TREE_CHAIN (f_fpr
);
4711 f_sav
= TREE_CHAIN (f_ovf
);
4713 valist
= build_va_arg_indirect_ref (valist
);
4714 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4715 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4716 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4717 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4719 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4721 type
= build_pointer_type (type
);
4722 size
= int_size_in_bytes (type
);
4723 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4725 nat_mode
= type_natural_mode (type
);
4726 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4727 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4729 /* Pull the value out of the saved registers. */
4731 addr
= create_tmp_var (ptr_type_node
, "addr");
4732 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4736 int needed_intregs
, needed_sseregs
;
4738 tree int_addr
, sse_addr
;
4740 lab_false
= create_artificial_label ();
4741 lab_over
= create_artificial_label ();
4743 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4745 need_temp
= (!REG_P (container
)
4746 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4747 || TYPE_ALIGN (type
) > 128));
4749 /* In case we are passing structure, verify that it is consecutive block
4750 on the register save area. If not we need to do moves. */
4751 if (!need_temp
&& !REG_P (container
))
4753 /* Verify that all registers are strictly consecutive */
4754 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4758 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4760 rtx slot
= XVECEXP (container
, 0, i
);
4761 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4762 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4770 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4772 rtx slot
= XVECEXP (container
, 0, i
);
4773 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4774 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4786 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4787 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4788 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4789 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4792 /* First ensure that we fit completely in registers. */
4795 t
= build_int_cst (TREE_TYPE (gpr
),
4796 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4797 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4798 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4799 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4800 gimplify_and_add (t
, pre_p
);
4804 t
= build_int_cst (TREE_TYPE (fpr
),
4805 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4807 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4808 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4809 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4810 gimplify_and_add (t
, pre_p
);
4813 /* Compute index to start of area used for integer regs. */
4816 /* int_addr = gpr + sav; */
4817 t
= fold_convert (ptr_type_node
, gpr
);
4818 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4819 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4820 gimplify_and_add (t
, pre_p
);
4824 /* sse_addr = fpr + sav; */
4825 t
= fold_convert (ptr_type_node
, fpr
);
4826 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4827 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4828 gimplify_and_add (t
, pre_p
);
4833 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4836 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4837 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4838 gimplify_and_add (t
, pre_p
);
4840 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4842 rtx slot
= XVECEXP (container
, 0, i
);
4843 rtx reg
= XEXP (slot
, 0);
4844 enum machine_mode mode
= GET_MODE (reg
);
4845 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4846 tree addr_type
= build_pointer_type (piece_type
);
4849 tree dest_addr
, dest
;
4851 if (SSE_REGNO_P (REGNO (reg
)))
4853 src_addr
= sse_addr
;
4854 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4858 src_addr
= int_addr
;
4859 src_offset
= REGNO (reg
) * 8;
4861 src_addr
= fold_convert (addr_type
, src_addr
);
4862 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4863 size_int (src_offset
)));
4864 src
= build_va_arg_indirect_ref (src_addr
);
4866 dest_addr
= fold_convert (addr_type
, addr
);
4867 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4868 size_int (INTVAL (XEXP (slot
, 1)))));
4869 dest
= build_va_arg_indirect_ref (dest_addr
);
4871 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4872 gimplify_and_add (t
, pre_p
);
4878 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4879 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4880 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4881 gimplify_and_add (t
, pre_p
);
4885 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4886 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4887 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4888 gimplify_and_add (t
, pre_p
);
4891 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4892 gimplify_and_add (t
, pre_p
);
4894 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4895 append_to_statement_list (t
, pre_p
);
4898 /* ... otherwise out of the overflow area. */
4900 /* Care for on-stack alignment if needed. */
4901 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4902 || integer_zerop (TYPE_SIZE (type
)))
4906 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4907 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4908 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4909 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4910 build_int_cst (TREE_TYPE (t
), -align
));
4912 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4914 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4915 gimplify_and_add (t2
, pre_p
);
4917 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4918 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4919 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4920 gimplify_and_add (t
, pre_p
);
4924 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4925 append_to_statement_list (t
, pre_p
);
4928 ptrtype
= build_pointer_type (type
);
4929 addr
= fold_convert (ptrtype
, addr
);
4932 addr
= build_va_arg_indirect_ref (addr
);
4933 return build_va_arg_indirect_ref (addr
);
4936 /* Return nonzero if OPNUM's MEM should be matched
4937 in movabs* patterns. */
4940 ix86_check_movabs (rtx insn
, int opnum
)
4944 set
= PATTERN (insn
);
4945 if (GET_CODE (set
) == PARALLEL
)
4946 set
= XVECEXP (set
, 0, 0);
4947 gcc_assert (GET_CODE (set
) == SET
);
4948 mem
= XEXP (set
, opnum
);
4949 while (GET_CODE (mem
) == SUBREG
)
4950 mem
= SUBREG_REG (mem
);
4951 gcc_assert (MEM_P (mem
));
4952 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4955 /* Initialize the table of extra 80387 mathematical constants. */
4958 init_ext_80387_constants (void)
4960 static const char * cst
[5] =
4962 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4963 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4964 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4965 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4966 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4970 for (i
= 0; i
< 5; i
++)
4972 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4973 /* Ensure each constant is rounded to XFmode precision. */
4974 real_convert (&ext_80387_constants_table
[i
],
4975 XFmode
, &ext_80387_constants_table
[i
]);
4978 ext_80387_constants_init
= 1;
4981 /* Return true if the constant is something that can be loaded with
4982 a special instruction. */
4985 standard_80387_constant_p (rtx x
)
4989 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4992 if (x
== CONST0_RTX (GET_MODE (x
)))
4994 if (x
== CONST1_RTX (GET_MODE (x
)))
4997 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4999 /* For XFmode constants, try to find a special 80387 instruction when
5000 optimizing for size or on those CPUs that benefit from them. */
5001 if (GET_MODE (x
) == XFmode
5002 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
5006 if (! ext_80387_constants_init
)
5007 init_ext_80387_constants ();
5009 for (i
= 0; i
< 5; i
++)
5010 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5014 /* Load of the constant -0.0 or -1.0 will be split as
5015 fldz;fchs or fld1;fchs sequence. */
5016 if (real_isnegzero (&r
))
5018 if (real_identical (&r
, &dconstm1
))
5024 /* Return the opcode of the special instruction to be used to load
5028 standard_80387_constant_opcode (rtx x
)
5030 switch (standard_80387_constant_p (x
))
5054 /* Return the CONST_DOUBLE representing the 80387 constant that is
5055 loaded by the specified special instruction. The argument IDX
5056 matches the return value from standard_80387_constant_p. */
5059 standard_80387_constant_rtx (int idx
)
5063 if (! ext_80387_constants_init
)
5064 init_ext_80387_constants ();
5080 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5084 /* Return 1 if mode is a valid mode for sse. */
5086 standard_sse_mode_p (enum machine_mode mode
)
5103 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5106 standard_sse_constant_p (rtx x
)
5108 enum machine_mode mode
= GET_MODE (x
);
5110 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5112 if (vector_all_ones_operand (x
, mode
)
5113 && standard_sse_mode_p (mode
))
5114 return TARGET_SSE2
? 2 : -1;
5119 /* Return the opcode of the special instruction to be used to load
5123 standard_sse_constant_opcode (rtx insn
, rtx x
)
5125 switch (standard_sse_constant_p (x
))
5128 if (get_attr_mode (insn
) == MODE_V4SF
)
5129 return "xorps\t%0, %0";
5130 else if (get_attr_mode (insn
) == MODE_V2DF
)
5131 return "xorpd\t%0, %0";
5133 return "pxor\t%0, %0";
5135 return "pcmpeqd\t%0, %0";
5140 /* Returns 1 if OP contains a symbol reference */
5143 symbolic_reference_mentioned_p (rtx op
)
5148 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5151 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5152 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5158 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5159 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5163 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5170 /* Return 1 if it is appropriate to emit `ret' instructions in the
5171 body of a function. Do this only if the epilogue is simple, needing a
5172 couple of insns. Prior to reloading, we can't tell how many registers
5173 must be saved, so return 0 then. Return 0 if there is no frame
5174 marker to de-allocate. */
5177 ix86_can_use_return_insn_p (void)
5179 struct ix86_frame frame
;
5181 if (! reload_completed
|| frame_pointer_needed
)
5184 /* Don't allow more than 32 pop, since that's all we can do
5185 with one instruction. */
5186 if (current_function_pops_args
5187 && current_function_args_size
>= 32768)
5190 ix86_compute_frame_layout (&frame
);
5191 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5194 /* Value should be nonzero if functions must have frame pointers.
5195 Zero means the frame pointer need not be set up (and parms may
5196 be accessed via the stack pointer) in functions that seem suitable. */
5199 ix86_frame_pointer_required (void)
5201 /* If we accessed previous frames, then the generated code expects
5202 to be able to access the saved ebp value in our frame. */
5203 if (cfun
->machine
->accesses_prev_frame
)
5206 /* Several x86 os'es need a frame pointer for other reasons,
5207 usually pertaining to setjmp. */
5208 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5211 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5212 the frame pointer by default. Turn it back on now if we've not
5213 got a leaf function. */
5214 if (TARGET_OMIT_LEAF_FRAME_POINTER
5215 && (!current_function_is_leaf
5216 || ix86_current_function_calls_tls_descriptor
))
5219 if (current_function_profile
)
5225 /* Record that the current function accesses previous call frames. */
5228 ix86_setup_frame_addresses (void)
5230 cfun
->machine
->accesses_prev_frame
= 1;
5233 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5234 # define USE_HIDDEN_LINKONCE 1
5236 # define USE_HIDDEN_LINKONCE 0
5239 static int pic_labels_used
;
5241 /* Fills in the label name that should be used for a pc thunk for
5242 the given register. */
5245 get_pc_thunk_name (char name
[32], unsigned int regno
)
5247 gcc_assert (!TARGET_64BIT
);
5249 if (USE_HIDDEN_LINKONCE
)
5250 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5252 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5256 /* This function generates code for -fpic that loads %ebx with
5257 the return address of the caller and then returns. */
5260 ix86_file_end (void)
5265 for (regno
= 0; regno
< 8; ++regno
)
5269 if (! ((pic_labels_used
>> regno
) & 1))
5272 get_pc_thunk_name (name
, regno
);
5277 switch_to_section (darwin_sections
[text_coal_section
]);
5278 fputs ("\t.weak_definition\t", asm_out_file
);
5279 assemble_name (asm_out_file
, name
);
5280 fputs ("\n\t.private_extern\t", asm_out_file
);
5281 assemble_name (asm_out_file
, name
);
5282 fputs ("\n", asm_out_file
);
5283 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5287 if (USE_HIDDEN_LINKONCE
)
5291 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5293 TREE_PUBLIC (decl
) = 1;
5294 TREE_STATIC (decl
) = 1;
5295 DECL_ONE_ONLY (decl
) = 1;
5297 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5298 switch_to_section (get_named_section (decl
, NULL
, 0));
5300 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5301 fputs ("\t.hidden\t", asm_out_file
);
5302 assemble_name (asm_out_file
, name
);
5303 fputc ('\n', asm_out_file
);
5304 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5308 switch_to_section (text_section
);
5309 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5312 xops
[0] = gen_rtx_REG (SImode
, regno
);
5313 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5314 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5315 output_asm_insn ("ret", xops
);
5318 if (NEED_INDICATE_EXEC_STACK
)
5319 file_end_indicate_exec_stack ();
5322 /* Emit code for the SET_GOT patterns. */
5325 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5330 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5332 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5334 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5337 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5339 output_asm_insn ("call\t%a2", xops
);
5342 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5343 is what will be referenced by the Mach-O PIC subsystem. */
5345 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5348 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5349 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5352 output_asm_insn ("pop{l}\t%0", xops
);
5357 get_pc_thunk_name (name
, REGNO (dest
));
5358 pic_labels_used
|= 1 << REGNO (dest
);
5360 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5361 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5362 output_asm_insn ("call\t%X2", xops
);
5363 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5364 is what will be referenced by the Mach-O PIC subsystem. */
5367 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5369 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5370 CODE_LABEL_NUMBER (label
));
5377 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5378 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5380 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5385 /* Generate an "push" pattern for input ARG. */
5390 return gen_rtx_SET (VOIDmode
,
5392 gen_rtx_PRE_DEC (Pmode
,
5393 stack_pointer_rtx
)),
5397 /* Return >= 0 if there is an unused call-clobbered register available
5398 for the entire function. */
5401 ix86_select_alt_pic_regnum (void)
5403 if (current_function_is_leaf
&& !current_function_profile
5404 && !ix86_current_function_calls_tls_descriptor
)
5407 for (i
= 2; i
>= 0; --i
)
5408 if (!regs_ever_live
[i
])
5412 return INVALID_REGNUM
;
5415 /* Return 1 if we need to save REGNO. */
5417 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5419 if (pic_offset_table_rtx
5420 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5421 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5422 || current_function_profile
5423 || current_function_calls_eh_return
5424 || current_function_uses_const_pool
))
5426 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5431 if (current_function_calls_eh_return
&& maybe_eh_return
)
5436 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5437 if (test
== INVALID_REGNUM
)
5444 if (cfun
->machine
->force_align_arg_pointer
5445 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5448 return (regs_ever_live
[regno
]
5449 && !call_used_regs
[regno
]
5450 && !fixed_regs
[regno
]
5451 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5454 /* Return number of registers to be saved on the stack. */
5457 ix86_nsaved_regs (void)
5462 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5463 if (ix86_save_reg (regno
, true))
5468 /* Return the offset between two registers, one to be eliminated, and the other
5469 its replacement, at the start of a routine. */
5472 ix86_initial_elimination_offset (int from
, int to
)
5474 struct ix86_frame frame
;
5475 ix86_compute_frame_layout (&frame
);
5477 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5478 return frame
.hard_frame_pointer_offset
;
5479 else if (from
== FRAME_POINTER_REGNUM
5480 && to
== HARD_FRAME_POINTER_REGNUM
)
5481 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5484 gcc_assert (to
== STACK_POINTER_REGNUM
);
5486 if (from
== ARG_POINTER_REGNUM
)
5487 return frame
.stack_pointer_offset
;
5489 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5490 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5494 /* Fill structure ix86_frame about frame of currently computed function. */
5497 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5499 HOST_WIDE_INT total_size
;
5500 unsigned int stack_alignment_needed
;
5501 HOST_WIDE_INT offset
;
5502 unsigned int preferred_alignment
;
5503 HOST_WIDE_INT size
= get_frame_size ();
5505 frame
->nregs
= ix86_nsaved_regs ();
5508 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5509 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5511 /* During reload iteration the amount of registers saved can change.
5512 Recompute the value as needed. Do not recompute when amount of registers
5513 didn't change as reload does multiple calls to the function and does not
5514 expect the decision to change within single iteration. */
5516 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5518 int count
= frame
->nregs
;
5520 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5521 /* The fast prologue uses move instead of push to save registers. This
5522 is significantly longer, but also executes faster as modern hardware
5523 can execute the moves in parallel, but can't do that for push/pop.
5525 Be careful about choosing what prologue to emit: When function takes
5526 many instructions to execute we may use slow version as well as in
5527 case function is known to be outside hot spot (this is known with
5528 feedback only). Weight the size of function by number of registers
5529 to save as it is cheap to use one or two push instructions but very
5530 slow to use many of them. */
5532 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5533 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5534 || (flag_branch_probabilities
5535 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5536 cfun
->machine
->use_fast_prologue_epilogue
= false;
5538 cfun
->machine
->use_fast_prologue_epilogue
5539 = !expensive_function_p (count
);
5541 if (TARGET_PROLOGUE_USING_MOVE
5542 && cfun
->machine
->use_fast_prologue_epilogue
)
5543 frame
->save_regs_using_mov
= true;
5545 frame
->save_regs_using_mov
= false;
5548 /* Skip return address and saved base pointer. */
5549 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5551 frame
->hard_frame_pointer_offset
= offset
;
5553 /* Do some sanity checking of stack_alignment_needed and
5554 preferred_alignment, since i386 port is the only using those features
5555 that may break easily. */
5557 gcc_assert (!size
|| stack_alignment_needed
);
5558 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5559 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5560 gcc_assert (stack_alignment_needed
5561 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5563 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5564 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5566 /* Register save area */
5567 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5570 if (ix86_save_varrargs_registers
)
5572 offset
+= X86_64_VARARGS_SIZE
;
5573 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5576 frame
->va_arg_size
= 0;
5578 /* Align start of frame for local function. */
5579 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5580 & -stack_alignment_needed
) - offset
;
5582 offset
+= frame
->padding1
;
5584 /* Frame pointer points here. */
5585 frame
->frame_pointer_offset
= offset
;
5589 /* Add outgoing arguments area. Can be skipped if we eliminated
5590 all the function calls as dead code.
5591 Skipping is however impossible when function calls alloca. Alloca
5592 expander assumes that last current_function_outgoing_args_size
5593 of stack frame are unused. */
5594 if (ACCUMULATE_OUTGOING_ARGS
5595 && (!current_function_is_leaf
|| current_function_calls_alloca
5596 || ix86_current_function_calls_tls_descriptor
))
5598 offset
+= current_function_outgoing_args_size
;
5599 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5602 frame
->outgoing_arguments_size
= 0;
5604 /* Align stack boundary. Only needed if we're calling another function
5606 if (!current_function_is_leaf
|| current_function_calls_alloca
5607 || ix86_current_function_calls_tls_descriptor
)
5608 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5609 & -preferred_alignment
) - offset
;
5611 frame
->padding2
= 0;
5613 offset
+= frame
->padding2
;
5615 /* We've reached end of stack frame. */
5616 frame
->stack_pointer_offset
= offset
;
5618 /* Size prologue needs to allocate. */
5619 frame
->to_allocate
=
5620 (size
+ frame
->padding1
+ frame
->padding2
5621 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5623 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5624 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5625 frame
->save_regs_using_mov
= false;
5627 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5628 && current_function_is_leaf
5629 && !ix86_current_function_calls_tls_descriptor
)
5631 frame
->red_zone_size
= frame
->to_allocate
;
5632 if (frame
->save_regs_using_mov
)
5633 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5634 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5635 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5638 frame
->red_zone_size
= 0;
5639 frame
->to_allocate
-= frame
->red_zone_size
;
5640 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5642 fprintf (stderr
, "\n");
5643 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5644 fprintf (stderr
, "size: %ld\n", (long)size
);
5645 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5646 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5647 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5648 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5649 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5650 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5651 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5652 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5653 (long)frame
->hard_frame_pointer_offset
);
5654 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5655 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5656 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5657 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5661 /* Emit code to save registers in the prologue. */
5664 ix86_emit_save_regs (void)
5669 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5670 if (ix86_save_reg (regno
, true))
5672 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5673 RTX_FRAME_RELATED_P (insn
) = 1;
5677 /* Emit code to save registers using MOV insns. First register
5678 is restored from POINTER + OFFSET. */
5680 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5685 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5686 if (ix86_save_reg (regno
, true))
5688 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5690 gen_rtx_REG (Pmode
, regno
));
5691 RTX_FRAME_RELATED_P (insn
) = 1;
5692 offset
+= UNITS_PER_WORD
;
5696 /* Expand prologue or epilogue stack adjustment.
5697 The pattern exist to put a dependency on all ebp-based memory accesses.
5698 STYLE should be negative if instructions should be marked as frame related,
5699 zero if %r11 register is live and cannot be freely used and positive
5703 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5708 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5709 else if (x86_64_immediate_operand (offset
, DImode
))
5710 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5714 /* r11 is used by indirect sibcall return as well, set before the
5715 epilogue and used after the epilogue. ATM indirect sibcall
5716 shouldn't be used together with huge frame sizes in one
5717 function because of the frame_size check in sibcall.c. */
5719 r11
= gen_rtx_REG (DImode
, R11_REG
);
5720 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5722 RTX_FRAME_RELATED_P (insn
) = 1;
5723 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5727 RTX_FRAME_RELATED_P (insn
) = 1;
5730 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5733 ix86_internal_arg_pointer (void)
5735 bool has_force_align_arg_pointer
=
5736 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5737 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5738 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5739 && DECL_NAME (current_function_decl
)
5740 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5741 && DECL_FILE_SCOPE_P (current_function_decl
))
5742 || ix86_force_align_arg_pointer
5743 || has_force_align_arg_pointer
)
5745 /* Nested functions can't realign the stack due to a register
5747 if (DECL_CONTEXT (current_function_decl
)
5748 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5750 if (ix86_force_align_arg_pointer
)
5751 warning (0, "-mstackrealign ignored for nested functions");
5752 if (has_force_align_arg_pointer
)
5753 error ("%s not supported for nested functions",
5754 ix86_force_align_arg_pointer_string
);
5755 return virtual_incoming_args_rtx
;
5757 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5758 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5761 return virtual_incoming_args_rtx
;
5764 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5765 This is called from dwarf2out.c to emit call frame instructions
5766 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5768 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5770 rtx unspec
= SET_SRC (pattern
);
5771 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5775 case UNSPEC_REG_SAVE
:
5776 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5777 SET_DEST (pattern
));
5779 case UNSPEC_DEF_CFA
:
5780 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5781 INTVAL (XVECEXP (unspec
, 0, 0)));
5788 /* Expand the prologue into a bunch of separate insns. */
5791 ix86_expand_prologue (void)
5795 struct ix86_frame frame
;
5796 HOST_WIDE_INT allocate
;
5798 ix86_compute_frame_layout (&frame
);
5800 if (cfun
->machine
->force_align_arg_pointer
)
5804 /* Grab the argument pointer. */
5805 x
= plus_constant (stack_pointer_rtx
, 4);
5806 y
= cfun
->machine
->force_align_arg_pointer
;
5807 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5808 RTX_FRAME_RELATED_P (insn
) = 1;
5810 /* The unwind info consists of two parts: install the fafp as the cfa,
5811 and record the fafp as the "save register" of the stack pointer.
5812 The later is there in order that the unwinder can see where it
5813 should restore the stack pointer across the and insn. */
5814 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5815 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5816 RTX_FRAME_RELATED_P (x
) = 1;
5817 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5819 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5820 RTX_FRAME_RELATED_P (y
) = 1;
5821 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5822 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5823 REG_NOTES (insn
) = x
;
5825 /* Align the stack. */
5826 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5829 /* And here we cheat like madmen with the unwind info. We force the
5830 cfa register back to sp+4, which is exactly what it was at the
5831 start of the function. Re-pushing the return address results in
5832 the return at the same spot relative to the cfa, and thus is
5833 correct wrt the unwind info. */
5834 x
= cfun
->machine
->force_align_arg_pointer
;
5835 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5836 insn
= emit_insn (gen_push (x
));
5837 RTX_FRAME_RELATED_P (insn
) = 1;
5840 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5841 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5842 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5843 REG_NOTES (insn
) = x
;
5846 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5847 slower on all targets. Also sdb doesn't like it. */
5849 if (frame_pointer_needed
)
5851 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5852 RTX_FRAME_RELATED_P (insn
) = 1;
5854 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5855 RTX_FRAME_RELATED_P (insn
) = 1;
5858 allocate
= frame
.to_allocate
;
5860 if (!frame
.save_regs_using_mov
)
5861 ix86_emit_save_regs ();
5863 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5865 /* When using red zone we may start register saving before allocating
5866 the stack frame saving one cycle of the prologue. */
5867 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5868 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5869 : stack_pointer_rtx
,
5870 -frame
.nregs
* UNITS_PER_WORD
);
5874 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5875 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5876 GEN_INT (-allocate
), -1);
5879 /* Only valid for Win32. */
5880 rtx eax
= gen_rtx_REG (SImode
, 0);
5881 bool eax_live
= ix86_eax_live_at_start_p ();
5884 gcc_assert (!TARGET_64BIT
);
5888 emit_insn (gen_push (eax
));
5892 emit_move_insn (eax
, GEN_INT (allocate
));
5894 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5895 RTX_FRAME_RELATED_P (insn
) = 1;
5896 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5897 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5898 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5899 t
, REG_NOTES (insn
));
5903 if (frame_pointer_needed
)
5904 t
= plus_constant (hard_frame_pointer_rtx
,
5907 - frame
.nregs
* UNITS_PER_WORD
);
5909 t
= plus_constant (stack_pointer_rtx
, allocate
);
5910 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5914 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5916 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5917 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5919 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5920 -frame
.nregs
* UNITS_PER_WORD
);
5923 pic_reg_used
= false;
5924 if (pic_offset_table_rtx
5925 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5926 || current_function_profile
))
5928 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5930 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5931 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5933 pic_reg_used
= true;
5939 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5941 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5943 /* Even with accurate pre-reload life analysis, we can wind up
5944 deleting all references to the pic register after reload.
5945 Consider if cross-jumping unifies two sides of a branch
5946 controlled by a comparison vs the only read from a global.
5947 In which case, allow the set_got to be deleted, though we're
5948 too late to do anything about the ebx save in the prologue. */
5949 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5952 /* Prevent function calls from be scheduled before the call to mcount.
5953 In the pic_reg_used case, make sure that the got load isn't deleted. */
5954 if (current_function_profile
)
5955 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5958 /* Emit code to restore saved registers using MOV insns. First register
5959 is restored from POINTER + OFFSET. */
5961 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5962 int maybe_eh_return
)
5965 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5967 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5968 if (ix86_save_reg (regno
, maybe_eh_return
))
5970 /* Ensure that adjust_address won't be forced to produce pointer
5971 out of range allowed by x86-64 instruction set. */
5972 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5976 r11
= gen_rtx_REG (DImode
, R11_REG
);
5977 emit_move_insn (r11
, GEN_INT (offset
));
5978 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5979 base_address
= gen_rtx_MEM (Pmode
, r11
);
5982 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5983 adjust_address (base_address
, Pmode
, offset
));
5984 offset
+= UNITS_PER_WORD
;
5988 /* Restore function stack, frame, and registers. */
5991 ix86_expand_epilogue (int style
)
5994 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5995 struct ix86_frame frame
;
5996 HOST_WIDE_INT offset
;
5998 ix86_compute_frame_layout (&frame
);
6000 /* Calculate start of saved registers relative to ebp. Special care
6001 must be taken for the normal return case of a function using
6002 eh_return: the eax and edx registers are marked as saved, but not
6003 restored along this path. */
6004 offset
= frame
.nregs
;
6005 if (current_function_calls_eh_return
&& style
!= 2)
6007 offset
*= -UNITS_PER_WORD
;
6009 /* If we're only restoring one register and sp is not valid then
6010 using a move instruction to restore the register since it's
6011 less work than reloading sp and popping the register.
6013 The default code result in stack adjustment using add/lea instruction,
6014 while this code results in LEAVE instruction (or discrete equivalent),
6015 so it is profitable in some other cases as well. Especially when there
6016 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6017 and there is exactly one register to pop. This heuristic may need some
6018 tuning in future. */
6019 if ((!sp_valid
&& frame
.nregs
<= 1)
6020 || (TARGET_EPILOGUE_USING_MOVE
6021 && cfun
->machine
->use_fast_prologue_epilogue
6022 && (frame
.nregs
> 1 || frame
.to_allocate
))
6023 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6024 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6025 && cfun
->machine
->use_fast_prologue_epilogue
6026 && frame
.nregs
== 1)
6027 || current_function_calls_eh_return
)
6029 /* Restore registers. We can use ebp or esp to address the memory
6030 locations. If both are available, default to ebp, since offsets
6031 are known to be small. Only exception is esp pointing directly to the
6032 end of block of saved registers, where we may simplify addressing
6035 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6036 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6037 frame
.to_allocate
, style
== 2);
6039 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6040 offset
, style
== 2);
6042 /* eh_return epilogues need %ecx added to the stack pointer. */
6045 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6047 if (frame_pointer_needed
)
6049 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6050 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6051 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6053 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6054 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6056 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6061 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6062 tmp
= plus_constant (tmp
, (frame
.to_allocate
6063 + frame
.nregs
* UNITS_PER_WORD
));
6064 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6067 else if (!frame_pointer_needed
)
6068 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6069 GEN_INT (frame
.to_allocate
6070 + frame
.nregs
* UNITS_PER_WORD
),
6072 /* If not an i386, mov & pop is faster than "leave". */
6073 else if (TARGET_USE_LEAVE
|| optimize_size
6074 || !cfun
->machine
->use_fast_prologue_epilogue
)
6075 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6078 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6079 hard_frame_pointer_rtx
,
6082 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6084 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6089 /* First step is to deallocate the stack frame so that we can
6090 pop the registers. */
6093 gcc_assert (frame_pointer_needed
);
6094 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6095 hard_frame_pointer_rtx
,
6096 GEN_INT (offset
), style
);
6098 else if (frame
.to_allocate
)
6099 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6100 GEN_INT (frame
.to_allocate
), style
);
6102 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6103 if (ix86_save_reg (regno
, false))
6106 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6108 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6110 if (frame_pointer_needed
)
6112 /* Leave results in shorter dependency chains on CPUs that are
6113 able to grok it fast. */
6114 if (TARGET_USE_LEAVE
)
6115 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6116 else if (TARGET_64BIT
)
6117 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6119 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6123 if (cfun
->machine
->force_align_arg_pointer
)
6125 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6126 cfun
->machine
->force_align_arg_pointer
,
6130 /* Sibcall epilogues don't want a return instruction. */
6134 if (current_function_pops_args
&& current_function_args_size
)
6136 rtx popc
= GEN_INT (current_function_pops_args
);
6138 /* i386 can only pop 64K bytes. If asked to pop more, pop
6139 return address, do explicit add, and jump indirectly to the
6142 if (current_function_pops_args
>= 65536)
6144 rtx ecx
= gen_rtx_REG (SImode
, 2);
6146 /* There is no "pascal" calling convention in 64bit ABI. */
6147 gcc_assert (!TARGET_64BIT
);
6149 emit_insn (gen_popsi1 (ecx
));
6150 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6151 emit_jump_insn (gen_return_indirect_internal (ecx
));
6154 emit_jump_insn (gen_return_pop_internal (popc
));
6157 emit_jump_insn (gen_return_internal ());
6160 /* Reset from the function's potential modifications. */
6163 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6164 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6166 if (pic_offset_table_rtx
)
6167 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6169 /* Mach-O doesn't support labels at the end of objects, so if
6170 it looks like we might want one, insert a NOP. */
6172 rtx insn
= get_last_insn ();
6175 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6176 insn
= PREV_INSN (insn
);
6180 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6181 fputs ("\tnop\n", file
);
6187 /* Extract the parts of an RTL expression that is a valid memory address
6188 for an instruction. Return 0 if the structure of the address is
6189 grossly off. Return -1 if the address contains ASHIFT, so it is not
6190 strictly valid, but still used for computing length of lea instruction. */
6193 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6195 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6196 rtx base_reg
, index_reg
;
6197 HOST_WIDE_INT scale
= 1;
6198 rtx scale_rtx
= NULL_RTX
;
6200 enum ix86_address_seg seg
= SEG_DEFAULT
;
6202 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6204 else if (GET_CODE (addr
) == PLUS
)
6214 addends
[n
++] = XEXP (op
, 1);
6217 while (GET_CODE (op
) == PLUS
);
6222 for (i
= n
; i
>= 0; --i
)
6225 switch (GET_CODE (op
))
6230 index
= XEXP (op
, 0);
6231 scale_rtx
= XEXP (op
, 1);
6235 if (XINT (op
, 1) == UNSPEC_TP
6236 && TARGET_TLS_DIRECT_SEG_REFS
6237 && seg
== SEG_DEFAULT
)
6238 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6267 else if (GET_CODE (addr
) == MULT
)
6269 index
= XEXP (addr
, 0); /* index*scale */
6270 scale_rtx
= XEXP (addr
, 1);
6272 else if (GET_CODE (addr
) == ASHIFT
)
6276 /* We're called for lea too, which implements ashift on occasion. */
6277 index
= XEXP (addr
, 0);
6278 tmp
= XEXP (addr
, 1);
6279 if (!CONST_INT_P (tmp
))
6281 scale
= INTVAL (tmp
);
6282 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6288 disp
= addr
; /* displacement */
6290 /* Extract the integral value of scale. */
6293 if (!CONST_INT_P (scale_rtx
))
6295 scale
= INTVAL (scale_rtx
);
6298 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6299 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6301 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6302 if (base_reg
&& index_reg
&& scale
== 1
6303 && (index_reg
== arg_pointer_rtx
6304 || index_reg
== frame_pointer_rtx
6305 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6308 tmp
= base
, base
= index
, index
= tmp
;
6309 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6312 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6313 if ((base_reg
== hard_frame_pointer_rtx
6314 || base_reg
== frame_pointer_rtx
6315 || base_reg
== arg_pointer_rtx
) && !disp
)
6318 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6319 Avoid this by transforming to [%esi+0]. */
6320 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6321 && base_reg
&& !index_reg
&& !disp
6323 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6326 /* Special case: encode reg+reg instead of reg*2. */
6327 if (!base
&& index
&& scale
&& scale
== 2)
6328 base
= index
, base_reg
= index_reg
, scale
= 1;
6330 /* Special case: scaling cannot be encoded without base or displacement. */
6331 if (!base
&& !disp
&& index
&& scale
!= 1)
6343 /* Return cost of the memory address x.
6344 For i386, it is better to use a complex address than let gcc copy
6345 the address into a reg and make a new pseudo. But not if the address
6346 requires to two regs - that would mean more pseudos with longer
6349 ix86_address_cost (rtx x
)
6351 struct ix86_address parts
;
6353 int ok
= ix86_decompose_address (x
, &parts
);
6357 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6358 parts
.base
= SUBREG_REG (parts
.base
);
6359 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6360 parts
.index
= SUBREG_REG (parts
.index
);
6362 /* More complex memory references are better. */
6363 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6365 if (parts
.seg
!= SEG_DEFAULT
)
6368 /* Attempt to minimize number of registers in the address. */
6370 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6372 && (!REG_P (parts
.index
)
6373 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6377 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6379 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6380 && parts
.base
!= parts
.index
)
6383 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6384 since it's predecode logic can't detect the length of instructions
6385 and it degenerates to vector decoded. Increase cost of such
6386 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6387 to split such addresses or even refuse such addresses at all.
6389 Following addressing modes are affected:
6394 The first and last case may be avoidable by explicitly coding the zero in
6395 memory address, but I don't have AMD-K6 machine handy to check this
6399 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6400 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6401 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6407 /* If X is a machine specific address (i.e. a symbol or label being
6408 referenced as a displacement from the GOT implemented using an
6409 UNSPEC), then return the base term. Otherwise return X. */
6412 ix86_find_base_term (rtx x
)
6418 if (GET_CODE (x
) != CONST
)
6421 if (GET_CODE (term
) == PLUS
6422 && (CONST_INT_P (XEXP (term
, 1))
6423 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6424 term
= XEXP (term
, 0);
6425 if (GET_CODE (term
) != UNSPEC
6426 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6429 term
= XVECEXP (term
, 0, 0);
6431 if (GET_CODE (term
) != SYMBOL_REF
6432 && GET_CODE (term
) != LABEL_REF
)
6438 term
= ix86_delegitimize_address (x
);
6440 if (GET_CODE (term
) != SYMBOL_REF
6441 && GET_CODE (term
) != LABEL_REF
)
6447 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6448 this is used for to form addresses to local data when -fPIC is in
6452 darwin_local_data_pic (rtx disp
)
6454 if (GET_CODE (disp
) == MINUS
)
6456 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6457 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6458 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6460 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6461 if (! strcmp (sym_name
, "<pic base>"))
6469 /* Determine if a given RTX is a valid constant. We already know this
6470 satisfies CONSTANT_P. */
6473 legitimate_constant_p (rtx x
)
6475 switch (GET_CODE (x
))
6480 if (GET_CODE (x
) == PLUS
)
6482 if (!CONST_INT_P (XEXP (x
, 1)))
6487 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6490 /* Only some unspecs are valid as "constants". */
6491 if (GET_CODE (x
) == UNSPEC
)
6492 switch (XINT (x
, 1))
6495 return TARGET_64BIT
;
6498 x
= XVECEXP (x
, 0, 0);
6499 return (GET_CODE (x
) == SYMBOL_REF
6500 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6502 x
= XVECEXP (x
, 0, 0);
6503 return (GET_CODE (x
) == SYMBOL_REF
6504 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6509 /* We must have drilled down to a symbol. */
6510 if (GET_CODE (x
) == LABEL_REF
)
6512 if (GET_CODE (x
) != SYMBOL_REF
)
6517 /* TLS symbols are never valid. */
6518 if (SYMBOL_REF_TLS_MODEL (x
))
6523 if (GET_MODE (x
) == TImode
6524 && x
!= CONST0_RTX (TImode
)
6530 if (x
== CONST0_RTX (GET_MODE (x
)))
6538 /* Otherwise we handle everything else in the move patterns. */
6542 /* Determine if it's legal to put X into the constant pool. This
6543 is not possible for the address of thread-local symbols, which
6544 is checked above. */
6547 ix86_cannot_force_const_mem (rtx x
)
6549 /* We can always put integral constants and vectors in memory. */
6550 switch (GET_CODE (x
))
6560 return !legitimate_constant_p (x
);
6563 /* Determine if a given RTX is a valid constant address. */
6566 constant_address_p (rtx x
)
6568 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6571 /* Nonzero if the constant value X is a legitimate general operand
6572 when generating PIC code. It is given that flag_pic is on and
6573 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6576 legitimate_pic_operand_p (rtx x
)
6580 switch (GET_CODE (x
))
6583 inner
= XEXP (x
, 0);
6584 if (GET_CODE (inner
) == PLUS
6585 && CONST_INT_P (XEXP (inner
, 1)))
6586 inner
= XEXP (inner
, 0);
6588 /* Only some unspecs are valid as "constants". */
6589 if (GET_CODE (inner
) == UNSPEC
)
6590 switch (XINT (inner
, 1))
6593 return TARGET_64BIT
;
6595 x
= XVECEXP (inner
, 0, 0);
6596 return (GET_CODE (x
) == SYMBOL_REF
6597 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6605 return legitimate_pic_address_disp_p (x
);
6612 /* Determine if a given CONST RTX is a valid memory displacement
6616 legitimate_pic_address_disp_p (rtx disp
)
6620 /* In 64bit mode we can allow direct addresses of symbols and labels
6621 when they are not dynamic symbols. */
6624 rtx op0
= disp
, op1
;
6626 switch (GET_CODE (disp
))
6632 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6634 op0
= XEXP (XEXP (disp
, 0), 0);
6635 op1
= XEXP (XEXP (disp
, 0), 1);
6636 if (!CONST_INT_P (op1
)
6637 || INTVAL (op1
) >= 16*1024*1024
6638 || INTVAL (op1
) < -16*1024*1024)
6640 if (GET_CODE (op0
) == LABEL_REF
)
6642 if (GET_CODE (op0
) != SYMBOL_REF
)
6647 /* TLS references should always be enclosed in UNSPEC. */
6648 if (SYMBOL_REF_TLS_MODEL (op0
))
6650 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6658 if (GET_CODE (disp
) != CONST
)
6660 disp
= XEXP (disp
, 0);
6664 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6665 of GOT tables. We should not need these anyway. */
6666 if (GET_CODE (disp
) != UNSPEC
6667 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6668 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6671 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6672 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6678 if (GET_CODE (disp
) == PLUS
)
6680 if (!CONST_INT_P (XEXP (disp
, 1)))
6682 disp
= XEXP (disp
, 0);
6686 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6689 if (GET_CODE (disp
) != UNSPEC
)
6692 switch (XINT (disp
, 1))
6697 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6699 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6700 While ABI specify also 32bit relocation but we don't produce it in
6701 small PIC model at all. */
6702 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6703 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6705 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6707 case UNSPEC_GOTTPOFF
:
6708 case UNSPEC_GOTNTPOFF
:
6709 case UNSPEC_INDNTPOFF
:
6712 disp
= XVECEXP (disp
, 0, 0);
6713 return (GET_CODE (disp
) == SYMBOL_REF
6714 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6716 disp
= XVECEXP (disp
, 0, 0);
6717 return (GET_CODE (disp
) == SYMBOL_REF
6718 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6720 disp
= XVECEXP (disp
, 0, 0);
6721 return (GET_CODE (disp
) == SYMBOL_REF
6722 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6728 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6729 memory address for an instruction. The MODE argument is the machine mode
6730 for the MEM expression that wants to use this address.
6732 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6733 convert common non-canonical forms to canonical form so that they will
6737 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6739 struct ix86_address parts
;
6740 rtx base
, index
, disp
;
6741 HOST_WIDE_INT scale
;
6742 const char *reason
= NULL
;
6743 rtx reason_rtx
= NULL_RTX
;
6745 if (TARGET_DEBUG_ADDR
)
6748 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6749 GET_MODE_NAME (mode
), strict
);
6753 if (ix86_decompose_address (addr
, &parts
) <= 0)
6755 reason
= "decomposition failed";
6760 index
= parts
.index
;
6762 scale
= parts
.scale
;
6764 /* Validate base register.
6766 Don't allow SUBREG's that span more than a word here. It can lead to spill
6767 failures when the base is one word out of a two word structure, which is
6768 represented internally as a DImode int. */
6777 else if (GET_CODE (base
) == SUBREG
6778 && REG_P (SUBREG_REG (base
))
6779 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6781 reg
= SUBREG_REG (base
);
6784 reason
= "base is not a register";
6788 if (GET_MODE (base
) != Pmode
)
6790 reason
= "base is not in Pmode";
6794 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6795 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6797 reason
= "base is not valid";
6802 /* Validate index register.
6804 Don't allow SUBREG's that span more than a word here -- same as above. */
6813 else if (GET_CODE (index
) == SUBREG
6814 && REG_P (SUBREG_REG (index
))
6815 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6817 reg
= SUBREG_REG (index
);
6820 reason
= "index is not a register";
6824 if (GET_MODE (index
) != Pmode
)
6826 reason
= "index is not in Pmode";
6830 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6831 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6833 reason
= "index is not valid";
6838 /* Validate scale factor. */
6841 reason_rtx
= GEN_INT (scale
);
6844 reason
= "scale without index";
6848 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6850 reason
= "scale is not a valid multiplier";
6855 /* Validate displacement. */
6860 if (GET_CODE (disp
) == CONST
6861 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6862 switch (XINT (XEXP (disp
, 0), 1))
6864 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6865 used. While ABI specify also 32bit relocations, we don't produce
6866 them at all and use IP relative instead. */
6869 gcc_assert (flag_pic
);
6871 goto is_legitimate_pic
;
6872 reason
= "64bit address unspec";
6875 case UNSPEC_GOTPCREL
:
6876 gcc_assert (flag_pic
);
6877 goto is_legitimate_pic
;
6879 case UNSPEC_GOTTPOFF
:
6880 case UNSPEC_GOTNTPOFF
:
6881 case UNSPEC_INDNTPOFF
:
6887 reason
= "invalid address unspec";
6891 else if (SYMBOLIC_CONST (disp
)
6895 && MACHOPIC_INDIRECT
6896 && !machopic_operand_p (disp
)
6902 if (TARGET_64BIT
&& (index
|| base
))
6904 /* foo@dtpoff(%rX) is ok. */
6905 if (GET_CODE (disp
) != CONST
6906 || GET_CODE (XEXP (disp
, 0)) != PLUS
6907 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6908 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6909 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6910 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6912 reason
= "non-constant pic memory reference";
6916 else if (! legitimate_pic_address_disp_p (disp
))
6918 reason
= "displacement is an invalid pic construct";
6922 /* This code used to verify that a symbolic pic displacement
6923 includes the pic_offset_table_rtx register.
6925 While this is good idea, unfortunately these constructs may
6926 be created by "adds using lea" optimization for incorrect
6935 This code is nonsensical, but results in addressing
6936 GOT table with pic_offset_table_rtx base. We can't
6937 just refuse it easily, since it gets matched by
6938 "addsi3" pattern, that later gets split to lea in the
6939 case output register differs from input. While this
6940 can be handled by separate addsi pattern for this case
6941 that never results in lea, this seems to be easier and
6942 correct fix for crash to disable this test. */
6944 else if (GET_CODE (disp
) != LABEL_REF
6945 && !CONST_INT_P (disp
)
6946 && (GET_CODE (disp
) != CONST
6947 || !legitimate_constant_p (disp
))
6948 && (GET_CODE (disp
) != SYMBOL_REF
6949 || !legitimate_constant_p (disp
)))
6951 reason
= "displacement is not constant";
6954 else if (TARGET_64BIT
6955 && !x86_64_immediate_operand (disp
, VOIDmode
))
6957 reason
= "displacement is out of range";
6962 /* Everything looks valid. */
6963 if (TARGET_DEBUG_ADDR
)
6964 fprintf (stderr
, "Success.\n");
6968 if (TARGET_DEBUG_ADDR
)
6970 fprintf (stderr
, "Error: %s\n", reason
);
6971 debug_rtx (reason_rtx
);
6976 /* Return a unique alias set for the GOT. */
6978 static HOST_WIDE_INT
6979 ix86_GOT_alias_set (void)
6981 static HOST_WIDE_INT set
= -1;
6983 set
= new_alias_set ();
6987 /* Return a legitimate reference for ORIG (an address) using the
6988 register REG. If REG is 0, a new pseudo is generated.
6990 There are two types of references that must be handled:
6992 1. Global data references must load the address from the GOT, via
6993 the PIC reg. An insn is emitted to do this load, and the reg is
6996 2. Static data references, constant pool addresses, and code labels
6997 compute the address as an offset from the GOT, whose base is in
6998 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6999 differentiate them from global data objects. The returned
7000 address is the PIC reg + an unspec constant.
7002 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7003 reg also appears in the address. */
7006 legitimize_pic_address (rtx orig
, rtx reg
)
7013 if (TARGET_MACHO
&& !TARGET_64BIT
)
7016 reg
= gen_reg_rtx (Pmode
);
7017 /* Use the generic Mach-O PIC machinery. */
7018 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7022 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7024 else if (TARGET_64BIT
7025 && ix86_cmodel
!= CM_SMALL_PIC
7026 && local_symbolic_operand (addr
, Pmode
))
7029 /* This symbol may be referenced via a displacement from the PIC
7030 base address (@GOTOFF). */
7032 if (reload_in_progress
)
7033 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7034 if (GET_CODE (addr
) == CONST
)
7035 addr
= XEXP (addr
, 0);
7036 if (GET_CODE (addr
) == PLUS
)
7038 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7039 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7042 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7043 new = gen_rtx_CONST (Pmode
, new);
7045 tmpreg
= gen_reg_rtx (Pmode
);
7048 emit_move_insn (tmpreg
, new);
7052 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7053 tmpreg
, 1, OPTAB_DIRECT
);
7056 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7058 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
7060 /* This symbol may be referenced via a displacement from the PIC
7061 base address (@GOTOFF). */
7063 if (reload_in_progress
)
7064 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7065 if (GET_CODE (addr
) == CONST
)
7066 addr
= XEXP (addr
, 0);
7067 if (GET_CODE (addr
) == PLUS
)
7069 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7070 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7073 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7074 new = gen_rtx_CONST (Pmode
, new);
7075 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7079 emit_move_insn (reg
, new);
7083 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7087 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7088 new = gen_rtx_CONST (Pmode
, new);
7089 new = gen_const_mem (Pmode
, new);
7090 set_mem_alias_set (new, ix86_GOT_alias_set ());
7093 reg
= gen_reg_rtx (Pmode
);
7094 /* Use directly gen_movsi, otherwise the address is loaded
7095 into register for CSE. We don't want to CSE this addresses,
7096 instead we CSE addresses from the GOT table, so skip this. */
7097 emit_insn (gen_movsi (reg
, new));
7102 /* This symbol must be referenced via a load from the
7103 Global Offset Table (@GOT). */
7105 if (reload_in_progress
)
7106 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7107 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7108 new = gen_rtx_CONST (Pmode
, new);
7109 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7110 new = gen_const_mem (Pmode
, new);
7111 set_mem_alias_set (new, ix86_GOT_alias_set ());
7114 reg
= gen_reg_rtx (Pmode
);
7115 emit_move_insn (reg
, new);
7121 if (CONST_INT_P (addr
)
7122 && !x86_64_immediate_operand (addr
, VOIDmode
))
7126 emit_move_insn (reg
, addr
);
7130 new = force_reg (Pmode
, addr
);
7132 else if (GET_CODE (addr
) == CONST
)
7134 addr
= XEXP (addr
, 0);
7136 /* We must match stuff we generate before. Assume the only
7137 unspecs that can get here are ours. Not that we could do
7138 anything with them anyway.... */
7139 if (GET_CODE (addr
) == UNSPEC
7140 || (GET_CODE (addr
) == PLUS
7141 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7143 gcc_assert (GET_CODE (addr
) == PLUS
);
7145 if (GET_CODE (addr
) == PLUS
)
7147 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7149 /* Check first to see if this is a constant offset from a @GOTOFF
7150 symbol reference. */
7151 if (local_symbolic_operand (op0
, Pmode
)
7152 && CONST_INT_P (op1
))
7156 if (reload_in_progress
)
7157 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7158 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7160 new = gen_rtx_PLUS (Pmode
, new, op1
);
7161 new = gen_rtx_CONST (Pmode
, new);
7162 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7166 emit_move_insn (reg
, new);
7172 if (INTVAL (op1
) < -16*1024*1024
7173 || INTVAL (op1
) >= 16*1024*1024)
7175 if (!x86_64_immediate_operand (op1
, Pmode
))
7176 op1
= force_reg (Pmode
, op1
);
7177 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7183 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7184 new = legitimize_pic_address (XEXP (addr
, 1),
7185 base
== reg
? NULL_RTX
: reg
);
7187 if (CONST_INT_P (new))
7188 new = plus_constant (base
, INTVAL (new));
7191 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7193 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7194 new = XEXP (new, 1);
7196 new = gen_rtx_PLUS (Pmode
, base
, new);
7204 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7207 get_thread_pointer (int to_reg
)
7211 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7215 reg
= gen_reg_rtx (Pmode
);
7216 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7217 insn
= emit_insn (insn
);
7222 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7223 false if we expect this to be used for a memory address and true if
7224 we expect to load the address into a register. */
7227 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7229 rtx dest
, base
, off
, pic
, tp
;
7234 case TLS_MODEL_GLOBAL_DYNAMIC
:
7235 dest
= gen_reg_rtx (Pmode
);
7236 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7238 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7240 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7243 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7244 insns
= get_insns ();
7247 emit_libcall_block (insns
, dest
, rax
, x
);
7249 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7250 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7252 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7254 if (TARGET_GNU2_TLS
)
7256 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7258 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7262 case TLS_MODEL_LOCAL_DYNAMIC
:
7263 base
= gen_reg_rtx (Pmode
);
7264 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7266 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7268 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7271 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7272 insns
= get_insns ();
7275 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7276 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7277 emit_libcall_block (insns
, base
, rax
, note
);
7279 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7280 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7282 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7284 if (TARGET_GNU2_TLS
)
7286 rtx x
= ix86_tls_module_base ();
7288 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7289 gen_rtx_MINUS (Pmode
, x
, tp
));
7292 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7293 off
= gen_rtx_CONST (Pmode
, off
);
7295 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7297 if (TARGET_GNU2_TLS
)
7299 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7301 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7306 case TLS_MODEL_INITIAL_EXEC
:
7310 type
= UNSPEC_GOTNTPOFF
;
7314 if (reload_in_progress
)
7315 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7316 pic
= pic_offset_table_rtx
;
7317 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7319 else if (!TARGET_ANY_GNU_TLS
)
7321 pic
= gen_reg_rtx (Pmode
);
7322 emit_insn (gen_set_got (pic
));
7323 type
= UNSPEC_GOTTPOFF
;
7328 type
= UNSPEC_INDNTPOFF
;
7331 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7332 off
= gen_rtx_CONST (Pmode
, off
);
7334 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7335 off
= gen_const_mem (Pmode
, off
);
7336 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7338 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7340 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7341 off
= force_reg (Pmode
, off
);
7342 return gen_rtx_PLUS (Pmode
, base
, off
);
7346 base
= get_thread_pointer (true);
7347 dest
= gen_reg_rtx (Pmode
);
7348 emit_insn (gen_subsi3 (dest
, base
, off
));
7352 case TLS_MODEL_LOCAL_EXEC
:
7353 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7354 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7355 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7356 off
= gen_rtx_CONST (Pmode
, off
);
7358 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7360 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7361 return gen_rtx_PLUS (Pmode
, base
, off
);
7365 base
= get_thread_pointer (true);
7366 dest
= gen_reg_rtx (Pmode
);
7367 emit_insn (gen_subsi3 (dest
, base
, off
));
7378 /* Try machine-dependent ways of modifying an illegitimate address
7379 to be legitimate. If we find one, return the new, valid address.
7380 This macro is used in only one place: `memory_address' in explow.c.
7382 OLDX is the address as it was before break_out_memory_refs was called.
7383 In some cases it is useful to look at this to decide what needs to be done.
7385 MODE and WIN are passed so that this macro can use
7386 GO_IF_LEGITIMATE_ADDRESS.
7388 It is always safe for this macro to do nothing. It exists to recognize
7389 opportunities to optimize the output.
7391 For the 80386, we handle X+REG by loading X into a register R and
7392 using R+REG. R will go in a general reg and indexing will be used.
7393 However, if REG is a broken-out memory address or multiplication,
7394 nothing needs to be done because REG can certainly go in a general reg.
7396 When -fpic is used, special handling is needed for symbolic references.
7397 See comments by legitimize_pic_address in i386.c for details. */
7400 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7405 if (TARGET_DEBUG_ADDR
)
7407 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7408 GET_MODE_NAME (mode
));
7412 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7414 return legitimize_tls_address (x
, log
, false);
7415 if (GET_CODE (x
) == CONST
7416 && GET_CODE (XEXP (x
, 0)) == PLUS
7417 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7418 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7420 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7421 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7424 if (flag_pic
&& SYMBOLIC_CONST (x
))
7425 return legitimize_pic_address (x
, 0);
7427 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7428 if (GET_CODE (x
) == ASHIFT
7429 && CONST_INT_P (XEXP (x
, 1))
7430 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7433 log
= INTVAL (XEXP (x
, 1));
7434 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7435 GEN_INT (1 << log
));
7438 if (GET_CODE (x
) == PLUS
)
7440 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7442 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7443 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7444 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7447 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7448 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7449 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7450 GEN_INT (1 << log
));
7453 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7454 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7455 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7458 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7459 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7460 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7461 GEN_INT (1 << log
));
7464 /* Put multiply first if it isn't already. */
7465 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7467 rtx tmp
= XEXP (x
, 0);
7468 XEXP (x
, 0) = XEXP (x
, 1);
7473 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7474 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7475 created by virtual register instantiation, register elimination, and
7476 similar optimizations. */
7477 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7480 x
= gen_rtx_PLUS (Pmode
,
7481 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7482 XEXP (XEXP (x
, 1), 0)),
7483 XEXP (XEXP (x
, 1), 1));
7487 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7488 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7489 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7490 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7491 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7492 && CONSTANT_P (XEXP (x
, 1)))
7495 rtx other
= NULL_RTX
;
7497 if (CONST_INT_P (XEXP (x
, 1)))
7499 constant
= XEXP (x
, 1);
7500 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7502 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7504 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7505 other
= XEXP (x
, 1);
7513 x
= gen_rtx_PLUS (Pmode
,
7514 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7515 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7516 plus_constant (other
, INTVAL (constant
)));
7520 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7523 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7526 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7529 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7532 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7536 && REG_P (XEXP (x
, 1))
7537 && REG_P (XEXP (x
, 0)))
7540 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7543 x
= legitimize_pic_address (x
, 0);
7546 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7549 if (REG_P (XEXP (x
, 0)))
7551 rtx temp
= gen_reg_rtx (Pmode
);
7552 rtx val
= force_operand (XEXP (x
, 1), temp
);
7554 emit_move_insn (temp
, val
);
7560 else if (REG_P (XEXP (x
, 1)))
7562 rtx temp
= gen_reg_rtx (Pmode
);
7563 rtx val
= force_operand (XEXP (x
, 0), temp
);
7565 emit_move_insn (temp
, val
);
7575 /* Print an integer constant expression in assembler syntax. Addition
7576 and subtraction are the only arithmetic that may appear in these
7577 expressions. FILE is the stdio stream to write to, X is the rtx, and
7578 CODE is the operand print code from the output string. */
7581 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7585 switch (GET_CODE (x
))
7588 gcc_assert (flag_pic
);
7593 if (! TARGET_MACHO
|| TARGET_64BIT
)
7594 output_addr_const (file
, x
);
7597 const char *name
= XSTR (x
, 0);
7599 /* Mark the decl as referenced so that cgraph will output the function. */
7600 if (SYMBOL_REF_DECL (x
))
7601 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7604 if (MACHOPIC_INDIRECT
7605 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7606 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7608 assemble_name (file
, name
);
7610 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7611 fputs ("@PLT", file
);
7618 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7619 assemble_name (asm_out_file
, buf
);
7623 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7627 /* This used to output parentheses around the expression,
7628 but that does not work on the 386 (either ATT or BSD assembler). */
7629 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7633 if (GET_MODE (x
) == VOIDmode
)
7635 /* We can use %d if the number is <32 bits and positive. */
7636 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7637 fprintf (file
, "0x%lx%08lx",
7638 (unsigned long) CONST_DOUBLE_HIGH (x
),
7639 (unsigned long) CONST_DOUBLE_LOW (x
));
7641 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7644 /* We can't handle floating point constants;
7645 PRINT_OPERAND must handle them. */
7646 output_operand_lossage ("floating constant misused");
7650 /* Some assemblers need integer constants to appear first. */
7651 if (CONST_INT_P (XEXP (x
, 0)))
7653 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7655 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7659 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7660 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7662 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7668 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7669 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7671 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7673 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7677 gcc_assert (XVECLEN (x
, 0) == 1);
7678 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7679 switch (XINT (x
, 1))
7682 fputs ("@GOT", file
);
7685 fputs ("@GOTOFF", file
);
7687 case UNSPEC_GOTPCREL
:
7688 fputs ("@GOTPCREL(%rip)", file
);
7690 case UNSPEC_GOTTPOFF
:
7691 /* FIXME: This might be @TPOFF in Sun ld too. */
7692 fputs ("@GOTTPOFF", file
);
7695 fputs ("@TPOFF", file
);
7699 fputs ("@TPOFF", file
);
7701 fputs ("@NTPOFF", file
);
7704 fputs ("@DTPOFF", file
);
7706 case UNSPEC_GOTNTPOFF
:
7708 fputs ("@GOTTPOFF(%rip)", file
);
7710 fputs ("@GOTNTPOFF", file
);
7712 case UNSPEC_INDNTPOFF
:
7713 fputs ("@INDNTPOFF", file
);
7716 output_operand_lossage ("invalid UNSPEC as operand");
7722 output_operand_lossage ("invalid expression as operand");
7726 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7727 We need to emit DTP-relative relocations. */
7730 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7732 fputs (ASM_LONG
, file
);
7733 output_addr_const (file
, x
);
7734 fputs ("@DTPOFF", file
);
7740 fputs (", 0", file
);
7747 /* In the name of slightly smaller debug output, and to cater to
7748 general assembler lossage, recognize PIC+GOTOFF and turn it back
7749 into a direct symbol reference.
7751 On Darwin, this is necessary to avoid a crash, because Darwin
7752 has a different PIC label for each routine but the DWARF debugging
7753 information is not associated with any particular routine, so it's
7754 necessary to remove references to the PIC label from RTL stored by
7755 the DWARF output code. */
7758 ix86_delegitimize_address (rtx orig_x
)
7761 /* reg_addend is NULL or a multiple of some register. */
7762 rtx reg_addend
= NULL_RTX
;
7763 /* const_addend is NULL or a const_int. */
7764 rtx const_addend
= NULL_RTX
;
7765 /* This is the result, or NULL. */
7766 rtx result
= NULL_RTX
;
7773 if (GET_CODE (x
) != CONST
7774 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7775 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7778 return XVECEXP (XEXP (x
, 0), 0, 0);
7781 if (GET_CODE (x
) != PLUS
7782 || GET_CODE (XEXP (x
, 1)) != CONST
)
7785 if (REG_P (XEXP (x
, 0))
7786 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7787 /* %ebx + GOT/GOTOFF */
7789 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7791 /* %ebx + %reg * scale + GOT/GOTOFF */
7792 reg_addend
= XEXP (x
, 0);
7793 if (REG_P (XEXP (reg_addend
, 0))
7794 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7795 reg_addend
= XEXP (reg_addend
, 1);
7796 else if (REG_P (XEXP (reg_addend
, 1))
7797 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7798 reg_addend
= XEXP (reg_addend
, 0);
7801 if (!REG_P (reg_addend
)
7802 && GET_CODE (reg_addend
) != MULT
7803 && GET_CODE (reg_addend
) != ASHIFT
)
7809 x
= XEXP (XEXP (x
, 1), 0);
7810 if (GET_CODE (x
) == PLUS
7811 && CONST_INT_P (XEXP (x
, 1)))
7813 const_addend
= XEXP (x
, 1);
7817 if (GET_CODE (x
) == UNSPEC
7818 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7819 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7820 result
= XVECEXP (x
, 0, 0);
7822 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7824 result
= XEXP (x
, 0);
7830 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7832 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7837 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7842 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7844 enum rtx_code second_code
, bypass_code
;
7845 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7846 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7847 code
= ix86_fp_compare_code_to_integer (code
);
7851 code
= reverse_condition (code
);
7862 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7866 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7867 Those same assemblers have the same but opposite lossage on cmov. */
7868 gcc_assert (mode
== CCmode
);
7869 suffix
= fp
? "nbe" : "a";
7889 gcc_assert (mode
== CCmode
);
7911 gcc_assert (mode
== CCmode
);
7912 suffix
= fp
? "nb" : "ae";
7915 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7919 gcc_assert (mode
== CCmode
);
7923 suffix
= fp
? "u" : "p";
7926 suffix
= fp
? "nu" : "np";
7931 fputs (suffix
, file
);
7934 /* Print the name of register X to FILE based on its machine mode and number.
7935 If CODE is 'w', pretend the mode is HImode.
7936 If CODE is 'b', pretend the mode is QImode.
7937 If CODE is 'k', pretend the mode is SImode.
7938 If CODE is 'q', pretend the mode is DImode.
7939 If CODE is 'h', pretend the reg is the 'high' byte register.
7940 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7943 print_reg (rtx x
, int code
, FILE *file
)
7945 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7946 && REGNO (x
) != FRAME_POINTER_REGNUM
7947 && REGNO (x
) != FLAGS_REG
7948 && REGNO (x
) != FPSR_REG
7949 && REGNO (x
) != FPCR_REG
);
7951 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7954 if (code
== 'w' || MMX_REG_P (x
))
7956 else if (code
== 'b')
7958 else if (code
== 'k')
7960 else if (code
== 'q')
7962 else if (code
== 'y')
7964 else if (code
== 'h')
7967 code
= GET_MODE_SIZE (GET_MODE (x
));
7969 /* Irritatingly, AMD extended registers use different naming convention
7970 from the normal registers. */
7971 if (REX_INT_REG_P (x
))
7973 gcc_assert (TARGET_64BIT
);
7977 error ("extended registers have no high halves");
7980 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7983 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7986 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7989 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7992 error ("unsupported operand size for extended register");
8000 if (STACK_TOP_P (x
))
8002 fputs ("st(0)", file
);
8009 if (! ANY_FP_REG_P (x
))
8010 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8015 fputs (hi_reg_name
[REGNO (x
)], file
);
8018 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8020 fputs (qi_reg_name
[REGNO (x
)], file
);
8023 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8025 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8032 /* Locate some local-dynamic symbol still in use by this function
8033 so that we can print its name in some tls_local_dynamic_base
8037 get_some_local_dynamic_name (void)
8041 if (cfun
->machine
->some_ld_name
)
8042 return cfun
->machine
->some_ld_name
;
8044 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8046 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8047 return cfun
->machine
->some_ld_name
;
8053 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8057 if (GET_CODE (x
) == SYMBOL_REF
8058 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8060 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8068 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8069 C -- print opcode suffix for set/cmov insn.
8070 c -- like C, but print reversed condition
8071 F,f -- likewise, but for floating-point.
8072 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8074 R -- print the prefix for register names.
8075 z -- print the opcode suffix for the size of the current operand.
8076 * -- print a star (in certain assembler syntax)
8077 A -- print an absolute memory reference.
8078 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8079 s -- print a shift double count, followed by the assemblers argument
8081 b -- print the QImode name of the register for the indicated operand.
8082 %b0 would print %al if operands[0] is reg 0.
8083 w -- likewise, print the HImode name of the register.
8084 k -- likewise, print the SImode name of the register.
8085 q -- likewise, print the DImode name of the register.
8086 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8087 y -- print "st(0)" instead of "st" as a register.
8088 D -- print condition for SSE cmp instruction.
8089 P -- if PIC, print an @PLT suffix.
8090 X -- don't print any sort of PIC '@' suffix for a symbol.
8091 & -- print some in-use local-dynamic symbol name.
8092 H -- print a memory address offset by 8; used for sse high-parts
8096 print_operand (FILE *file
, rtx x
, int code
)
8103 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8108 assemble_name (file
, get_some_local_dynamic_name ());
8112 switch (ASSEMBLER_DIALECT
)
8119 /* Intel syntax. For absolute addresses, registers should not
8120 be surrounded by braces. */
8124 PRINT_OPERAND (file
, x
, 0);
8134 PRINT_OPERAND (file
, x
, 0);
8139 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8144 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8149 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8154 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8159 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8164 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8169 /* 387 opcodes don't get size suffixes if the operands are
8171 if (STACK_REG_P (x
))
8174 /* Likewise if using Intel opcodes. */
8175 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8178 /* This is the size of op from size of operand. */
8179 switch (GET_MODE_SIZE (GET_MODE (x
)))
8186 #ifdef HAVE_GAS_FILDS_FISTS
8192 if (GET_MODE (x
) == SFmode
)
8207 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8209 #ifdef GAS_MNEMONICS
8235 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8237 PRINT_OPERAND (file
, x
, 0);
8243 /* Little bit of braindamage here. The SSE compare instructions
8244 does use completely different names for the comparisons that the
8245 fp conditional moves. */
8246 switch (GET_CODE (x
))
8261 fputs ("unord", file
);
8265 fputs ("neq", file
);
8269 fputs ("nlt", file
);
8273 fputs ("nle", file
);
8276 fputs ("ord", file
);
8283 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8284 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8286 switch (GET_MODE (x
))
8288 case HImode
: putc ('w', file
); break;
8290 case SFmode
: putc ('l', file
); break;
8292 case DFmode
: putc ('q', file
); break;
8293 default: gcc_unreachable ();
8300 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8303 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8304 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8307 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8310 /* Like above, but reverse condition */
8312 /* Check to see if argument to %c is really a constant
8313 and not a condition code which needs to be reversed. */
8314 if (!COMPARISON_P (x
))
8316 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8319 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8322 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8323 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8326 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8330 /* It doesn't actually matter what mode we use here, as we're
8331 only going to use this for printing. */
8332 x
= adjust_address_nv (x
, DImode
, 8);
8339 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8342 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8345 int pred_val
= INTVAL (XEXP (x
, 0));
8347 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8348 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8350 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8351 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8353 /* Emit hints only in the case default branch prediction
8354 heuristics would fail. */
8355 if (taken
!= cputaken
)
8357 /* We use 3e (DS) prefix for taken branches and
8358 2e (CS) prefix for not taken branches. */
8360 fputs ("ds ; ", file
);
8362 fputs ("cs ; ", file
);
8369 output_operand_lossage ("invalid operand code '%c'", code
);
8374 print_reg (x
, code
, file
);
8378 /* No `byte ptr' prefix for call instructions. */
8379 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8382 switch (GET_MODE_SIZE (GET_MODE (x
)))
8384 case 1: size
= "BYTE"; break;
8385 case 2: size
= "WORD"; break;
8386 case 4: size
= "DWORD"; break;
8387 case 8: size
= "QWORD"; break;
8388 case 12: size
= "XWORD"; break;
8389 case 16: size
= "XMMWORD"; break;
8394 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8397 else if (code
== 'w')
8399 else if (code
== 'k')
8403 fputs (" PTR ", file
);
8407 /* Avoid (%rip) for call operands. */
8408 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8409 && !CONST_INT_P (x
))
8410 output_addr_const (file
, x
);
8411 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8412 output_operand_lossage ("invalid constraints for operand");
8417 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8422 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8423 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8425 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8427 fprintf (file
, "0x%08lx", l
);
8430 /* These float cases don't actually occur as immediate operands. */
8431 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8435 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8436 fprintf (file
, "%s", dstr
);
8439 else if (GET_CODE (x
) == CONST_DOUBLE
8440 && GET_MODE (x
) == XFmode
)
8444 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8445 fprintf (file
, "%s", dstr
);
8450 /* We have patterns that allow zero sets of memory, for instance.
8451 In 64-bit mode, we should probably support all 8-byte vectors,
8452 since we can in fact encode that into an immediate. */
8453 if (GET_CODE (x
) == CONST_VECTOR
)
8455 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8461 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8463 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8466 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8467 || GET_CODE (x
) == LABEL_REF
)
8469 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8472 fputs ("OFFSET FLAT:", file
);
8475 if (CONST_INT_P (x
))
8476 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8478 output_pic_addr_const (file
, x
, code
);
8480 output_addr_const (file
, x
);
8484 /* Print a memory operand whose address is ADDR. */
8487 print_operand_address (FILE *file
, rtx addr
)
8489 struct ix86_address parts
;
8490 rtx base
, index
, disp
;
8492 int ok
= ix86_decompose_address (addr
, &parts
);
8497 index
= parts
.index
;
8499 scale
= parts
.scale
;
8507 if (USER_LABEL_PREFIX
[0] == 0)
8509 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8515 if (!base
&& !index
)
8517 /* Displacement only requires special attention. */
8519 if (CONST_INT_P (disp
))
8521 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8523 if (USER_LABEL_PREFIX
[0] == 0)
8525 fputs ("ds:", file
);
8527 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8530 output_pic_addr_const (file
, disp
, 0);
8532 output_addr_const (file
, disp
);
8534 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8537 if (GET_CODE (disp
) == CONST
8538 && GET_CODE (XEXP (disp
, 0)) == PLUS
8539 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8540 disp
= XEXP (XEXP (disp
, 0), 0);
8541 if (GET_CODE (disp
) == LABEL_REF
8542 || (GET_CODE (disp
) == SYMBOL_REF
8543 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8544 fputs ("(%rip)", file
);
8549 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8554 output_pic_addr_const (file
, disp
, 0);
8555 else if (GET_CODE (disp
) == LABEL_REF
)
8556 output_asm_label (disp
);
8558 output_addr_const (file
, disp
);
8563 print_reg (base
, 0, file
);
8567 print_reg (index
, 0, file
);
8569 fprintf (file
, ",%d", scale
);
8575 rtx offset
= NULL_RTX
;
8579 /* Pull out the offset of a symbol; print any symbol itself. */
8580 if (GET_CODE (disp
) == CONST
8581 && GET_CODE (XEXP (disp
, 0)) == PLUS
8582 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8584 offset
= XEXP (XEXP (disp
, 0), 1);
8585 disp
= gen_rtx_CONST (VOIDmode
,
8586 XEXP (XEXP (disp
, 0), 0));
8590 output_pic_addr_const (file
, disp
, 0);
8591 else if (GET_CODE (disp
) == LABEL_REF
)
8592 output_asm_label (disp
);
8593 else if (CONST_INT_P (disp
))
8596 output_addr_const (file
, disp
);
8602 print_reg (base
, 0, file
);
8605 if (INTVAL (offset
) >= 0)
8607 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8611 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8618 print_reg (index
, 0, file
);
8620 fprintf (file
, "*%d", scale
);
8628 output_addr_const_extra (FILE *file
, rtx x
)
8632 if (GET_CODE (x
) != UNSPEC
)
8635 op
= XVECEXP (x
, 0, 0);
8636 switch (XINT (x
, 1))
8638 case UNSPEC_GOTTPOFF
:
8639 output_addr_const (file
, op
);
8640 /* FIXME: This might be @TPOFF in Sun ld. */
8641 fputs ("@GOTTPOFF", file
);
8644 output_addr_const (file
, op
);
8645 fputs ("@TPOFF", file
);
8648 output_addr_const (file
, op
);
8650 fputs ("@TPOFF", file
);
8652 fputs ("@NTPOFF", file
);
8655 output_addr_const (file
, op
);
8656 fputs ("@DTPOFF", file
);
8658 case UNSPEC_GOTNTPOFF
:
8659 output_addr_const (file
, op
);
8661 fputs ("@GOTTPOFF(%rip)", file
);
8663 fputs ("@GOTNTPOFF", file
);
8665 case UNSPEC_INDNTPOFF
:
8666 output_addr_const (file
, op
);
8667 fputs ("@INDNTPOFF", file
);
8677 /* Split one or more DImode RTL references into pairs of SImode
8678 references. The RTL can be REG, offsettable MEM, integer constant, or
8679 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8680 split and "num" is its length. lo_half and hi_half are output arrays
8681 that parallel "operands". */
8684 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8688 rtx op
= operands
[num
];
8690 /* simplify_subreg refuse to split volatile memory addresses,
8691 but we still have to handle it. */
8694 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8695 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8699 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8700 GET_MODE (op
) == VOIDmode
8701 ? DImode
: GET_MODE (op
), 0);
8702 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8703 GET_MODE (op
) == VOIDmode
8704 ? DImode
: GET_MODE (op
), 4);
8708 /* Split one or more TImode RTL references into pairs of DImode
8709 references. The RTL can be REG, offsettable MEM, integer constant, or
8710 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8711 split and "num" is its length. lo_half and hi_half are output arrays
8712 that parallel "operands". */
8715 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8719 rtx op
= operands
[num
];
8721 /* simplify_subreg refuse to split volatile memory addresses, but we
8722 still have to handle it. */
8725 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8726 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8730 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8731 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8736 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8737 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8738 is the expression of the binary operation. The output may either be
8739 emitted here, or returned to the caller, like all output_* functions.
8741 There is no guarantee that the operands are the same mode, as they
8742 might be within FLOAT or FLOAT_EXTEND expressions. */
8744 #ifndef SYSV386_COMPAT
8745 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8746 wants to fix the assemblers because that causes incompatibility
8747 with gcc. No-one wants to fix gcc because that causes
8748 incompatibility with assemblers... You can use the option of
8749 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8750 #define SYSV386_COMPAT 1
8754 output_387_binary_op (rtx insn
, rtx
*operands
)
8756 static char buf
[30];
8759 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8761 #ifdef ENABLE_CHECKING
8762 /* Even if we do not want to check the inputs, this documents input
8763 constraints. Which helps in understanding the following code. */
8764 if (STACK_REG_P (operands
[0])
8765 && ((REG_P (operands
[1])
8766 && REGNO (operands
[0]) == REGNO (operands
[1])
8767 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8768 || (REG_P (operands
[2])
8769 && REGNO (operands
[0]) == REGNO (operands
[2])
8770 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8771 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8774 gcc_assert (is_sse
);
8777 switch (GET_CODE (operands
[3]))
8780 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8781 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8789 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8790 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8798 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8799 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8807 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8808 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8822 if (GET_MODE (operands
[0]) == SFmode
)
8823 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8825 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8830 switch (GET_CODE (operands
[3]))
8834 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8836 rtx temp
= operands
[2];
8837 operands
[2] = operands
[1];
8841 /* know operands[0] == operands[1]. */
8843 if (MEM_P (operands
[2]))
8849 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8851 if (STACK_TOP_P (operands
[0]))
8852 /* How is it that we are storing to a dead operand[2]?
8853 Well, presumably operands[1] is dead too. We can't
8854 store the result to st(0) as st(0) gets popped on this
8855 instruction. Instead store to operands[2] (which I
8856 think has to be st(1)). st(1) will be popped later.
8857 gcc <= 2.8.1 didn't have this check and generated
8858 assembly code that the Unixware assembler rejected. */
8859 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8861 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8865 if (STACK_TOP_P (operands
[0]))
8866 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8868 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8873 if (MEM_P (operands
[1]))
8879 if (MEM_P (operands
[2]))
8885 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8888 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8889 derived assemblers, confusingly reverse the direction of
8890 the operation for fsub{r} and fdiv{r} when the
8891 destination register is not st(0). The Intel assembler
8892 doesn't have this brain damage. Read !SYSV386_COMPAT to
8893 figure out what the hardware really does. */
8894 if (STACK_TOP_P (operands
[0]))
8895 p
= "{p\t%0, %2|rp\t%2, %0}";
8897 p
= "{rp\t%2, %0|p\t%0, %2}";
8899 if (STACK_TOP_P (operands
[0]))
8900 /* As above for fmul/fadd, we can't store to st(0). */
8901 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8903 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8908 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8911 if (STACK_TOP_P (operands
[0]))
8912 p
= "{rp\t%0, %1|p\t%1, %0}";
8914 p
= "{p\t%1, %0|rp\t%0, %1}";
8916 if (STACK_TOP_P (operands
[0]))
8917 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8919 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8924 if (STACK_TOP_P (operands
[0]))
8926 if (STACK_TOP_P (operands
[1]))
8927 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8929 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8932 else if (STACK_TOP_P (operands
[1]))
8935 p
= "{\t%1, %0|r\t%0, %1}";
8937 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8943 p
= "{r\t%2, %0|\t%0, %2}";
8945 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8958 /* Return needed mode for entity in optimize_mode_switching pass. */
8961 ix86_mode_needed (int entity
, rtx insn
)
8963 enum attr_i387_cw mode
;
8965 /* The mode UNINITIALIZED is used to store control word after a
8966 function call or ASM pattern. The mode ANY specify that function
8967 has no requirements on the control word and make no changes in the
8968 bits we are interested in. */
8971 || (NONJUMP_INSN_P (insn
)
8972 && (asm_noperands (PATTERN (insn
)) >= 0
8973 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8974 return I387_CW_UNINITIALIZED
;
8976 if (recog_memoized (insn
) < 0)
8979 mode
= get_attr_i387_cw (insn
);
8984 if (mode
== I387_CW_TRUNC
)
8989 if (mode
== I387_CW_FLOOR
)
8994 if (mode
== I387_CW_CEIL
)
8999 if (mode
== I387_CW_MASK_PM
)
9010 /* Output code to initialize control word copies used by trunc?f?i and
9011 rounding patterns. CURRENT_MODE is set to current control word,
9012 while NEW_MODE is set to new control word. */
9015 emit_i387_cw_initialization (int mode
)
9017 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9022 rtx reg
= gen_reg_rtx (HImode
);
9024 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9025 emit_move_insn (reg
, copy_rtx (stored_mode
));
9027 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9032 /* round toward zero (truncate) */
9033 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9034 slot
= SLOT_CW_TRUNC
;
9038 /* round down toward -oo */
9039 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9040 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9041 slot
= SLOT_CW_FLOOR
;
9045 /* round up toward +oo */
9046 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9047 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9048 slot
= SLOT_CW_CEIL
;
9051 case I387_CW_MASK_PM
:
9052 /* mask precision exception for nearbyint() */
9053 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9054 slot
= SLOT_CW_MASK_PM
;
9066 /* round toward zero (truncate) */
9067 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9068 slot
= SLOT_CW_TRUNC
;
9072 /* round down toward -oo */
9073 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9074 slot
= SLOT_CW_FLOOR
;
9078 /* round up toward +oo */
9079 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9080 slot
= SLOT_CW_CEIL
;
9083 case I387_CW_MASK_PM
:
9084 /* mask precision exception for nearbyint() */
9085 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9086 slot
= SLOT_CW_MASK_PM
;
9094 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9096 new_mode
= assign_386_stack_local (HImode
, slot
);
9097 emit_move_insn (new_mode
, reg
);
9100 /* Output code for INSN to convert a float to a signed int. OPERANDS
9101 are the insn operands. The output may be [HSD]Imode and the input
9102 operand may be [SDX]Fmode. */
9105 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9107 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9108 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9109 int round_mode
= get_attr_i387_cw (insn
);
9111 /* Jump through a hoop or two for DImode, since the hardware has no
9112 non-popping instruction. We used to do this a different way, but
9113 that was somewhat fragile and broke with post-reload splitters. */
9114 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9115 output_asm_insn ("fld\t%y1", operands
);
9117 gcc_assert (STACK_TOP_P (operands
[1]));
9118 gcc_assert (MEM_P (operands
[0]));
9121 output_asm_insn ("fisttp%z0\t%0", operands
);
9124 if (round_mode
!= I387_CW_ANY
)
9125 output_asm_insn ("fldcw\t%3", operands
);
9126 if (stack_top_dies
|| dimode_p
)
9127 output_asm_insn ("fistp%z0\t%0", operands
);
9129 output_asm_insn ("fist%z0\t%0", operands
);
9130 if (round_mode
!= I387_CW_ANY
)
9131 output_asm_insn ("fldcw\t%2", operands
);
9137 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9138 have the values zero or one, indicates the ffreep insn's operand
9139 from the OPERANDS array. */
9142 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9144 if (TARGET_USE_FFREEP
)
9145 #if HAVE_AS_IX86_FFREEP
9146 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9149 static char retval
[] = ".word\t0xc_df";
9150 int regno
= REGNO (operands
[opno
]);
9152 gcc_assert (FP_REGNO_P (regno
));
9154 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9159 return opno
? "fstp\t%y1" : "fstp\t%y0";
9163 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9164 should be used. UNORDERED_P is true when fucom should be used. */
9167 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9170 rtx cmp_op0
, cmp_op1
;
9171 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9175 cmp_op0
= operands
[0];
9176 cmp_op1
= operands
[1];
9180 cmp_op0
= operands
[1];
9181 cmp_op1
= operands
[2];
9186 if (GET_MODE (operands
[0]) == SFmode
)
9188 return "ucomiss\t{%1, %0|%0, %1}";
9190 return "comiss\t{%1, %0|%0, %1}";
9193 return "ucomisd\t{%1, %0|%0, %1}";
9195 return "comisd\t{%1, %0|%0, %1}";
9198 gcc_assert (STACK_TOP_P (cmp_op0
));
9200 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9202 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9206 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9207 return output_387_ffreep (operands
, 1);
9210 return "ftst\n\tfnstsw\t%0";
9213 if (STACK_REG_P (cmp_op1
)
9215 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9216 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9218 /* If both the top of the 387 stack dies, and the other operand
9219 is also a stack register that dies, then this must be a
9220 `fcompp' float compare */
9224 /* There is no double popping fcomi variant. Fortunately,
9225 eflags is immune from the fstp's cc clobbering. */
9227 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9229 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9230 return output_387_ffreep (operands
, 0);
9235 return "fucompp\n\tfnstsw\t%0";
9237 return "fcompp\n\tfnstsw\t%0";
9242 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9244 static const char * const alt
[16] =
9246 "fcom%z2\t%y2\n\tfnstsw\t%0",
9247 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9248 "fucom%z2\t%y2\n\tfnstsw\t%0",
9249 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9251 "ficom%z2\t%y2\n\tfnstsw\t%0",
9252 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9256 "fcomi\t{%y1, %0|%0, %y1}",
9257 "fcomip\t{%y1, %0|%0, %y1}",
9258 "fucomi\t{%y1, %0|%0, %y1}",
9259 "fucomip\t{%y1, %0|%0, %y1}",
9270 mask
= eflags_p
<< 3;
9271 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9272 mask
|= unordered_p
<< 1;
9273 mask
|= stack_top_dies
;
9275 gcc_assert (mask
< 16);
9284 ix86_output_addr_vec_elt (FILE *file
, int value
)
9286 const char *directive
= ASM_LONG
;
9290 directive
= ASM_QUAD
;
9292 gcc_assert (!TARGET_64BIT
);
9295 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9299 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9302 fprintf (file
, "%s%s%d-%s%d\n",
9303 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9304 else if (HAVE_AS_GOTOFF_IN_DATA
)
9305 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9307 else if (TARGET_MACHO
)
9309 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9310 machopic_output_function_base_name (file
);
9311 fprintf(file
, "\n");
9315 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9316 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9319 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9323 ix86_expand_clear (rtx dest
)
9327 /* We play register width games, which are only valid after reload. */
9328 gcc_assert (reload_completed
);
9330 /* Avoid HImode and its attendant prefix byte. */
9331 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9332 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9334 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9336 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9337 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9339 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9340 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9346 /* X is an unchanging MEM. If it is a constant pool reference, return
9347 the constant pool rtx, else NULL. */
9350 maybe_get_pool_constant (rtx x
)
9352 x
= ix86_delegitimize_address (XEXP (x
, 0));
9354 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9355 return get_pool_constant (x
);
9361 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9363 int strict
= (reload_in_progress
|| reload_completed
);
9365 enum tls_model model
;
9370 if (GET_CODE (op1
) == SYMBOL_REF
)
9372 model
= SYMBOL_REF_TLS_MODEL (op1
);
9375 op1
= legitimize_tls_address (op1
, model
, true);
9376 op1
= force_operand (op1
, op0
);
9381 else if (GET_CODE (op1
) == CONST
9382 && GET_CODE (XEXP (op1
, 0)) == PLUS
9383 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9385 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9388 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9389 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9390 op1
= force_operand (op1
, NULL
);
9391 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9392 op0
, 1, OPTAB_DIRECT
);
9398 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9400 if (TARGET_MACHO
&& !TARGET_64BIT
)
9405 rtx temp
= ((reload_in_progress
9406 || ((op0
&& REG_P (op0
))
9408 ? op0
: gen_reg_rtx (Pmode
));
9409 op1
= machopic_indirect_data_reference (op1
, temp
);
9410 op1
= machopic_legitimize_pic_address (op1
, mode
,
9411 temp
== op1
? 0 : temp
);
9413 else if (MACHOPIC_INDIRECT
)
9414 op1
= machopic_indirect_data_reference (op1
, 0);
9422 op1
= force_reg (Pmode
, op1
);
9424 op1
= legitimize_address (op1
, op1
, Pmode
);
9430 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9431 || !push_operand (op0
, mode
))
9433 op1
= force_reg (mode
, op1
);
9435 if (push_operand (op0
, mode
)
9436 && ! general_no_elim_operand (op1
, mode
))
9437 op1
= copy_to_mode_reg (mode
, op1
);
9439 /* Force large constants in 64bit compilation into register
9440 to get them CSEed. */
9441 if (TARGET_64BIT
&& mode
== DImode
9442 && immediate_operand (op1
, mode
)
9443 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9444 && !register_operand (op0
, mode
)
9445 && optimize
&& !reload_completed
&& !reload_in_progress
)
9446 op1
= copy_to_mode_reg (mode
, op1
);
9448 if (FLOAT_MODE_P (mode
))
9450 /* If we are loading a floating point constant to a register,
9451 force the value to memory now, since we'll get better code
9452 out the back end. */
9456 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9458 op1
= validize_mem (force_const_mem (mode
, op1
));
9459 if (!register_operand (op0
, mode
))
9461 rtx temp
= gen_reg_rtx (mode
);
9462 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9463 emit_move_insn (op0
, temp
);
9470 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9474 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9476 rtx op0
= operands
[0], op1
= operands
[1];
9478 /* Force constants other than zero into memory. We do not know how
9479 the instructions used to build constants modify the upper 64 bits
9480 of the register, once we have that information we may be able
9481 to handle some of them more efficiently. */
9482 if ((reload_in_progress
| reload_completed
) == 0
9483 && register_operand (op0
, mode
)
9485 && standard_sse_constant_p (op1
) <= 0)
9486 op1
= validize_mem (force_const_mem (mode
, op1
));
9488 /* Make operand1 a register if it isn't already. */
9490 && !register_operand (op0
, mode
)
9491 && !register_operand (op1
, mode
))
9493 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9497 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9500 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9501 straight to ix86_expand_vector_move. */
9504 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9513 /* If we're optimizing for size, movups is the smallest. */
9516 op0
= gen_lowpart (V4SFmode
, op0
);
9517 op1
= gen_lowpart (V4SFmode
, op1
);
9518 emit_insn (gen_sse_movups (op0
, op1
));
9522 /* ??? If we have typed data, then it would appear that using
9523 movdqu is the only way to get unaligned data loaded with
9525 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9527 op0
= gen_lowpart (V16QImode
, op0
);
9528 op1
= gen_lowpart (V16QImode
, op1
);
9529 emit_insn (gen_sse2_movdqu (op0
, op1
));
9533 if (TARGET_SSE2
&& mode
== V2DFmode
)
9537 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9539 op0
= gen_lowpart (V2DFmode
, op0
);
9540 op1
= gen_lowpart (V2DFmode
, op1
);
9541 emit_insn (gen_sse2_movupd (op0
, op1
));
9545 /* When SSE registers are split into halves, we can avoid
9546 writing to the top half twice. */
9547 if (TARGET_SSE_SPLIT_REGS
)
9549 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9554 /* ??? Not sure about the best option for the Intel chips.
9555 The following would seem to satisfy; the register is
9556 entirely cleared, breaking the dependency chain. We
9557 then store to the upper half, with a dependency depth
9558 of one. A rumor has it that Intel recommends two movsd
9559 followed by an unpacklpd, but this is unconfirmed. And
9560 given that the dependency depth of the unpacklpd would
9561 still be one, I'm not sure why this would be better. */
9562 zero
= CONST0_RTX (V2DFmode
);
9565 m
= adjust_address (op1
, DFmode
, 0);
9566 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9567 m
= adjust_address (op1
, DFmode
, 8);
9568 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9572 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9574 op0
= gen_lowpart (V4SFmode
, op0
);
9575 op1
= gen_lowpart (V4SFmode
, op1
);
9576 emit_insn (gen_sse_movups (op0
, op1
));
9580 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9581 emit_move_insn (op0
, CONST0_RTX (mode
));
9583 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9585 if (mode
!= V4SFmode
)
9586 op0
= gen_lowpart (V4SFmode
, op0
);
9587 m
= adjust_address (op1
, V2SFmode
, 0);
9588 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9589 m
= adjust_address (op1
, V2SFmode
, 8);
9590 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9593 else if (MEM_P (op0
))
9595 /* If we're optimizing for size, movups is the smallest. */
9598 op0
= gen_lowpart (V4SFmode
, op0
);
9599 op1
= gen_lowpart (V4SFmode
, op1
);
9600 emit_insn (gen_sse_movups (op0
, op1
));
9604 /* ??? Similar to above, only less clear because of quote
9605 typeless stores unquote. */
9606 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9607 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9609 op0
= gen_lowpart (V16QImode
, op0
);
9610 op1
= gen_lowpart (V16QImode
, op1
);
9611 emit_insn (gen_sse2_movdqu (op0
, op1
));
9615 if (TARGET_SSE2
&& mode
== V2DFmode
)
9617 m
= adjust_address (op0
, DFmode
, 0);
9618 emit_insn (gen_sse2_storelpd (m
, op1
));
9619 m
= adjust_address (op0
, DFmode
, 8);
9620 emit_insn (gen_sse2_storehpd (m
, op1
));
9624 if (mode
!= V4SFmode
)
9625 op1
= gen_lowpart (V4SFmode
, op1
);
9626 m
= adjust_address (op0
, V2SFmode
, 0);
9627 emit_insn (gen_sse_storelps (m
, op1
));
9628 m
= adjust_address (op0
, V2SFmode
, 8);
9629 emit_insn (gen_sse_storehps (m
, op1
));
9636 /* Expand a push in MODE. This is some mode for which we do not support
9637 proper push instructions, at least from the registers that we expect
9638 the value to live in. */
9641 ix86_expand_push (enum machine_mode mode
, rtx x
)
9645 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9646 GEN_INT (-GET_MODE_SIZE (mode
)),
9647 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9648 if (tmp
!= stack_pointer_rtx
)
9649 emit_move_insn (stack_pointer_rtx
, tmp
);
9651 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9652 emit_move_insn (tmp
, x
);
9655 /* Helper function of ix86_fixup_binary_operands to canonicalize
9656 operand order. Returns true if the operands should be swapped. */
9659 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9662 rtx dst
= operands
[0];
9663 rtx src1
= operands
[1];
9664 rtx src2
= operands
[2];
9666 /* If the operation is not commutative, we can't do anything. */
9667 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9670 /* Highest priority is that src1 should match dst. */
9671 if (rtx_equal_p (dst
, src1
))
9673 if (rtx_equal_p (dst
, src2
))
9676 /* Next highest priority is that immediate constants come second. */
9677 if (immediate_operand (src2
, mode
))
9679 if (immediate_operand (src1
, mode
))
9682 /* Lowest priority is that memory references should come second. */
9692 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9693 destination to use for the operation. If different from the true
9694 destination in operands[0], a copy operation will be required. */
9697 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9700 rtx dst
= operands
[0];
9701 rtx src1
= operands
[1];
9702 rtx src2
= operands
[2];
9704 /* Canonicalize operand order. */
9705 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9712 /* Both source operands cannot be in memory. */
9713 if (MEM_P (src1
) && MEM_P (src2
))
9715 /* Optimization: Only read from memory once. */
9716 if (rtx_equal_p (src1
, src2
))
9718 src2
= force_reg (mode
, src2
);
9722 src2
= force_reg (mode
, src2
);
9725 /* If the destination is memory, and we do not have matching source
9726 operands, do things in registers. */
9727 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9728 dst
= gen_reg_rtx (mode
);
9730 /* Source 1 cannot be a constant. */
9731 if (CONSTANT_P (src1
))
9732 src1
= force_reg (mode
, src1
);
9734 /* Source 1 cannot be a non-matching memory. */
9735 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9736 src1
= force_reg (mode
, src1
);
9743 /* Similarly, but assume that the destination has already been
9747 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9748 enum machine_mode mode
, rtx operands
[])
9750 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9751 gcc_assert (dst
== operands
[0]);
9754 /* Attempt to expand a binary operator. Make the expansion closer to the
9755 actual machine, then just general_operand, which will allow 3 separate
9756 memory references (one output, two input) in a single insn. */
9759 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9762 rtx src1
, src2
, dst
, op
, clob
;
9764 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9768 /* Emit the instruction. */
9770 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9771 if (reload_in_progress
)
9773 /* Reload doesn't know about the flags register, and doesn't know that
9774 it doesn't want to clobber it. We can only do this with PLUS. */
9775 gcc_assert (code
== PLUS
);
9780 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9781 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9784 /* Fix up the destination if needed. */
9785 if (dst
!= operands
[0])
9786 emit_move_insn (operands
[0], dst
);
9789 /* Return TRUE or FALSE depending on whether the binary operator meets the
9790 appropriate constraints. */
9793 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9796 rtx dst
= operands
[0];
9797 rtx src1
= operands
[1];
9798 rtx src2
= operands
[2];
9800 /* Both source operands cannot be in memory. */
9801 if (MEM_P (src1
) && MEM_P (src2
))
9804 /* Canonicalize operand order for commutative operators. */
9805 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9812 /* If the destination is memory, we must have a matching source operand. */
9813 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9816 /* Source 1 cannot be a constant. */
9817 if (CONSTANT_P (src1
))
9820 /* Source 1 cannot be a non-matching memory. */
9821 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9827 /* Attempt to expand a unary operator. Make the expansion closer to the
9828 actual machine, then just general_operand, which will allow 2 separate
9829 memory references (one output, one input) in a single insn. */
9832 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9835 int matching_memory
;
9836 rtx src
, dst
, op
, clob
;
9841 /* If the destination is memory, and we do not have matching source
9842 operands, do things in registers. */
9843 matching_memory
= 0;
9846 if (rtx_equal_p (dst
, src
))
9847 matching_memory
= 1;
9849 dst
= gen_reg_rtx (mode
);
9852 /* When source operand is memory, destination must match. */
9853 if (MEM_P (src
) && !matching_memory
)
9854 src
= force_reg (mode
, src
);
9856 /* Emit the instruction. */
9858 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9859 if (reload_in_progress
|| code
== NOT
)
9861 /* Reload doesn't know about the flags register, and doesn't know that
9862 it doesn't want to clobber it. */
9863 gcc_assert (code
== NOT
);
9868 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9869 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9872 /* Fix up the destination if needed. */
9873 if (dst
!= operands
[0])
9874 emit_move_insn (operands
[0], dst
);
9877 /* Return TRUE or FALSE depending on whether the unary operator meets the
9878 appropriate constraints. */
9881 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9882 enum machine_mode mode ATTRIBUTE_UNUSED
,
9883 rtx operands
[2] ATTRIBUTE_UNUSED
)
9885 /* If one of operands is memory, source and destination must match. */
9886 if ((MEM_P (operands
[0])
9887 || MEM_P (operands
[1]))
9888 && ! rtx_equal_p (operands
[0], operands
[1]))
9893 /* Post-reload splitter for converting an SF or DFmode value in an
9894 SSE register into an unsigned SImode. */
9897 ix86_split_convert_uns_si_sse (rtx operands
[])
9899 enum machine_mode vecmode
;
9900 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
9902 large
= operands
[1];
9903 zero_or_two31
= operands
[2];
9904 input
= operands
[3];
9905 two31
= operands
[4];
9906 vecmode
= GET_MODE (large
);
9907 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
9909 /* Load up the value into the low element. We must ensure that the other
9910 elements are valid floats -- zero is the easiest such value. */
9913 if (vecmode
== V4SFmode
)
9914 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
9916 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
9920 input
= gen_rtx_REG (vecmode
, REGNO (input
));
9921 emit_move_insn (value
, CONST0_RTX (vecmode
));
9922 if (vecmode
== V4SFmode
)
9923 emit_insn (gen_sse_movss (value
, value
, input
));
9925 emit_insn (gen_sse2_movsd (value
, value
, input
));
9928 emit_move_insn (large
, two31
);
9929 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
9931 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
9932 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
9934 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
9935 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
9937 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
9938 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
9940 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
9941 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
9943 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
9944 if (vecmode
== V4SFmode
)
9945 emit_insn (gen_sse2_cvttps2dq (x
, value
));
9947 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
9950 emit_insn (gen_xorv4si3 (value
, value
, large
));
9953 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9954 Expects the 64-bit DImode to be supplied in a pair of integral
9955 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9956 -mfpmath=sse, !optimize_size only. */
9959 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
9961 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
9962 rtx int_xmm
, fp_xmm
;
9963 rtx biases
, exponents
;
9966 int_xmm
= gen_reg_rtx (V4SImode
);
9967 if (TARGET_INTER_UNIT_MOVES
)
9968 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
9969 else if (TARGET_SSE_SPLIT_REGS
)
9971 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
9972 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
9976 x
= gen_reg_rtx (V2DImode
);
9977 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
9978 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
9981 x
= gen_rtx_CONST_VECTOR (V4SImode
,
9982 gen_rtvec (4, GEN_INT (0x43300000UL
),
9983 GEN_INT (0x45300000UL
),
9984 const0_rtx
, const0_rtx
));
9985 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
9987 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9988 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
9990 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9991 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9992 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9993 (0x1.0p84 + double(fp_value_hi_xmm)).
9994 Note these exponents differ by 32. */
9996 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
9998 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9999 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10000 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10001 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10002 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10003 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10004 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10005 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10006 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10008 /* Add the upper and lower DFmode values together. */
10010 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10013 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10014 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10015 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10018 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10021 /* Convert an unsigned SImode value into a DFmode. Only currently used
10022 for SSE, but applicable anywhere. */
10025 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10027 REAL_VALUE_TYPE TWO31r
;
10030 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10031 NULL
, 1, OPTAB_DIRECT
);
10033 fp
= gen_reg_rtx (DFmode
);
10034 emit_insn (gen_floatsidf2 (fp
, x
));
10036 real_ldexp (&TWO31r
, &dconst1
, 31);
10037 x
= const_double_from_real_value (TWO31r
, DFmode
);
10039 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10041 emit_move_insn (target
, x
);
10044 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10045 32-bit mode; otherwise we have a direct convert instruction. */
10048 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10050 REAL_VALUE_TYPE TWO32r
;
10051 rtx fp_lo
, fp_hi
, x
;
10053 fp_lo
= gen_reg_rtx (DFmode
);
10054 fp_hi
= gen_reg_rtx (DFmode
);
10056 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10058 real_ldexp (&TWO32r
, &dconst1
, 32);
10059 x
= const_double_from_real_value (TWO32r
, DFmode
);
10060 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10062 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10064 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10067 emit_move_insn (target
, x
);
10070 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10071 For x86_32, -mfpmath=sse, !optimize_size only. */
10073 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10075 REAL_VALUE_TYPE ONE16r
;
10076 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10078 real_ldexp (&ONE16r
, &dconst1
, 16);
10079 x
= const_double_from_real_value (ONE16r
, SFmode
);
10080 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10081 NULL
, 0, OPTAB_DIRECT
);
10082 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10083 NULL
, 0, OPTAB_DIRECT
);
10084 fp_hi
= gen_reg_rtx (SFmode
);
10085 fp_lo
= gen_reg_rtx (SFmode
);
10086 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10087 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10088 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10090 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10092 if (!rtx_equal_p (target
, fp_hi
))
10093 emit_move_insn (target
, fp_hi
);
10096 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10097 then replicate the value for all elements of the vector
10101 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10108 v
= gen_rtvec (4, value
, value
, value
, value
);
10110 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10111 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10112 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10116 v
= gen_rtvec (2, value
, value
);
10118 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10119 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10122 gcc_unreachable ();
10126 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10127 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10128 true, then replicate the mask for all elements of the vector register.
10129 If INVERT is true, then create a mask excluding the sign bit. */
10132 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10134 enum machine_mode vec_mode
;
10135 HOST_WIDE_INT hi
, lo
;
10140 /* Find the sign bit, sign extended to 2*HWI. */
10141 if (mode
== SFmode
)
10142 lo
= 0x80000000, hi
= lo
< 0;
10143 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10144 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10146 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10149 lo
= ~lo
, hi
= ~hi
;
10151 /* Force this value into the low part of a fp vector constant. */
10152 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10153 mask
= gen_lowpart (mode
, mask
);
10155 v
= ix86_build_const_vector (mode
, vect
, mask
);
10156 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10157 return force_reg (vec_mode
, v
);
10160 /* Generate code for floating point ABS or NEG. */
10163 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10166 rtx mask
, set
, use
, clob
, dst
, src
;
10167 bool matching_memory
;
10168 bool use_sse
= false;
10169 bool vector_mode
= VECTOR_MODE_P (mode
);
10170 enum machine_mode elt_mode
= mode
;
10174 elt_mode
= GET_MODE_INNER (mode
);
10177 else if (TARGET_SSE_MATH
)
10178 use_sse
= SSE_FLOAT_MODE_P (mode
);
10180 /* NEG and ABS performed with SSE use bitwise mask operations.
10181 Create the appropriate mask now. */
10183 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10190 /* If the destination is memory, and we don't have matching source
10191 operands or we're using the x87, do things in registers. */
10192 matching_memory
= false;
10195 if (use_sse
&& rtx_equal_p (dst
, src
))
10196 matching_memory
= true;
10198 dst
= gen_reg_rtx (mode
);
10200 if (MEM_P (src
) && !matching_memory
)
10201 src
= force_reg (mode
, src
);
10205 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10206 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10211 set
= gen_rtx_fmt_e (code
, mode
, src
);
10212 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10215 use
= gen_rtx_USE (VOIDmode
, mask
);
10216 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10217 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10218 gen_rtvec (3, set
, use
, clob
)));
10224 if (dst
!= operands
[0])
10225 emit_move_insn (operands
[0], dst
);
10228 /* Expand a copysign operation. Special case operand 0 being a constant. */
10231 ix86_expand_copysign (rtx operands
[])
10233 enum machine_mode mode
, vmode
;
10234 rtx dest
, op0
, op1
, mask
, nmask
;
10236 dest
= operands
[0];
10240 mode
= GET_MODE (dest
);
10241 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10243 if (GET_CODE (op0
) == CONST_DOUBLE
)
10247 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10248 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10250 if (op0
== CONST0_RTX (mode
))
10251 op0
= CONST0_RTX (vmode
);
10254 if (mode
== SFmode
)
10255 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10256 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10258 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10259 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10262 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10264 if (mode
== SFmode
)
10265 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10267 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10271 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10272 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10274 if (mode
== SFmode
)
10275 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10277 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10281 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10282 be a constant, and so has already been expanded into a vector constant. */
10285 ix86_split_copysign_const (rtx operands
[])
10287 enum machine_mode mode
, vmode
;
10288 rtx dest
, op0
, op1
, mask
, x
;
10290 dest
= operands
[0];
10293 mask
= operands
[3];
10295 mode
= GET_MODE (dest
);
10296 vmode
= GET_MODE (mask
);
10298 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10299 x
= gen_rtx_AND (vmode
, dest
, mask
);
10300 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10302 if (op0
!= CONST0_RTX (vmode
))
10304 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10305 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10309 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10310 so we have to do two masks. */
10313 ix86_split_copysign_var (rtx operands
[])
10315 enum machine_mode mode
, vmode
;
10316 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10318 dest
= operands
[0];
10319 scratch
= operands
[1];
10322 nmask
= operands
[4];
10323 mask
= operands
[5];
10325 mode
= GET_MODE (dest
);
10326 vmode
= GET_MODE (mask
);
10328 if (rtx_equal_p (op0
, op1
))
10330 /* Shouldn't happen often (it's useless, obviously), but when it does
10331 we'd generate incorrect code if we continue below. */
10332 emit_move_insn (dest
, op0
);
10336 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10338 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10340 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10341 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10344 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10345 x
= gen_rtx_NOT (vmode
, dest
);
10346 x
= gen_rtx_AND (vmode
, x
, op0
);
10347 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10351 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10353 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10355 else /* alternative 2,4 */
10357 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10358 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10359 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10361 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10363 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10365 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10366 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10368 else /* alternative 3,4 */
10370 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10372 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10373 x
= gen_rtx_AND (vmode
, dest
, op0
);
10375 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10378 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10379 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10382 /* Return TRUE or FALSE depending on whether the first SET in INSN
10383 has source and destination with matching CC modes, and that the
10384 CC mode is at least as constrained as REQ_MODE. */
10387 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10390 enum machine_mode set_mode
;
10392 set
= PATTERN (insn
);
10393 if (GET_CODE (set
) == PARALLEL
)
10394 set
= XVECEXP (set
, 0, 0);
10395 gcc_assert (GET_CODE (set
) == SET
);
10396 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10398 set_mode
= GET_MODE (SET_DEST (set
));
10402 if (req_mode
!= CCNOmode
10403 && (req_mode
!= CCmode
10404 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10408 if (req_mode
== CCGCmode
)
10412 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10416 if (req_mode
== CCZmode
)
10423 gcc_unreachable ();
10426 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10429 /* Generate insn patterns to do an integer compare of OPERANDS. */
10432 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10434 enum machine_mode cmpmode
;
10437 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10438 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10440 /* This is very simple, but making the interface the same as in the
10441 FP case makes the rest of the code easier. */
10442 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10443 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10445 /* Return the test that should be put into the flags user, i.e.
10446 the bcc, scc, or cmov instruction. */
10447 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10450 /* Figure out whether to use ordered or unordered fp comparisons.
10451 Return the appropriate mode to use. */
10454 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10456 /* ??? In order to make all comparisons reversible, we do all comparisons
10457 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10458 all forms trapping and nontrapping comparisons, we can make inequality
10459 comparisons trapping again, since it results in better code when using
10460 FCOM based compares. */
10461 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10465 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10467 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10468 return ix86_fp_compare_mode (code
);
10471 /* Only zero flag is needed. */
10472 case EQ
: /* ZF=0 */
10473 case NE
: /* ZF!=0 */
10475 /* Codes needing carry flag. */
10476 case GEU
: /* CF=0 */
10477 case GTU
: /* CF=0 & ZF=0 */
10478 case LTU
: /* CF=1 */
10479 case LEU
: /* CF=1 | ZF=1 */
10481 /* Codes possibly doable only with sign flag when
10482 comparing against zero. */
10483 case GE
: /* SF=OF or SF=0 */
10484 case LT
: /* SF<>OF or SF=1 */
10485 if (op1
== const0_rtx
)
10488 /* For other cases Carry flag is not required. */
10490 /* Codes doable only with sign flag when comparing
10491 against zero, but we miss jump instruction for it
10492 so we need to use relational tests against overflow
10493 that thus needs to be zero. */
10494 case GT
: /* ZF=0 & SF=OF */
10495 case LE
: /* ZF=1 | SF<>OF */
10496 if (op1
== const0_rtx
)
10500 /* strcmp pattern do (use flags) and combine may ask us for proper
10505 gcc_unreachable ();
10509 /* Return the fixed registers used for condition codes. */
10512 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10519 /* If two condition code modes are compatible, return a condition code
10520 mode which is compatible with both. Otherwise, return
10523 static enum machine_mode
10524 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10529 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10532 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10533 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10539 gcc_unreachable ();
10561 /* These are only compatible with themselves, which we already
10567 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10570 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10572 enum rtx_code swapped_code
= swap_condition (code
);
10573 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10574 || (ix86_fp_comparison_cost (swapped_code
)
10575 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10578 /* Swap, force into registers, or otherwise massage the two operands
10579 to a fp comparison. The operands are updated in place; the new
10580 comparison code is returned. */
10582 static enum rtx_code
10583 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10585 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10586 rtx op0
= *pop0
, op1
= *pop1
;
10587 enum machine_mode op_mode
= GET_MODE (op0
);
10588 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10590 /* All of the unordered compare instructions only work on registers.
10591 The same is true of the fcomi compare instructions. The XFmode
10592 compare instructions require registers except when comparing
10593 against zero or when converting operand 1 from fixed point to
10597 && (fpcmp_mode
== CCFPUmode
10598 || (op_mode
== XFmode
10599 && ! (standard_80387_constant_p (op0
) == 1
10600 || standard_80387_constant_p (op1
) == 1)
10601 && GET_CODE (op1
) != FLOAT
)
10602 || ix86_use_fcomi_compare (code
)))
10604 op0
= force_reg (op_mode
, op0
);
10605 op1
= force_reg (op_mode
, op1
);
10609 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10610 things around if they appear profitable, otherwise force op0
10611 into a register. */
10613 if (standard_80387_constant_p (op0
) == 0
10615 && ! (standard_80387_constant_p (op1
) == 0
10619 tmp
= op0
, op0
= op1
, op1
= tmp
;
10620 code
= swap_condition (code
);
10624 op0
= force_reg (op_mode
, op0
);
10626 if (CONSTANT_P (op1
))
10628 int tmp
= standard_80387_constant_p (op1
);
10630 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10634 op1
= force_reg (op_mode
, op1
);
10637 op1
= force_reg (op_mode
, op1
);
10641 /* Try to rearrange the comparison to make it cheaper. */
10642 if (ix86_fp_comparison_cost (code
)
10643 > ix86_fp_comparison_cost (swap_condition (code
))
10644 && (REG_P (op1
) || !no_new_pseudos
))
10647 tmp
= op0
, op0
= op1
, op1
= tmp
;
10648 code
= swap_condition (code
);
10650 op0
= force_reg (op_mode
, op0
);
10658 /* Convert comparison codes we use to represent FP comparison to integer
10659 code that will result in proper branch. Return UNKNOWN if no such code
10663 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10692 /* Split comparison code CODE into comparisons we can do using branch
10693 instructions. BYPASS_CODE is comparison code for branch that will
10694 branch around FIRST_CODE and SECOND_CODE. If some of branches
10695 is not required, set value to UNKNOWN.
10696 We never require more than two branches. */
10699 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10700 enum rtx_code
*first_code
,
10701 enum rtx_code
*second_code
)
10703 *first_code
= code
;
10704 *bypass_code
= UNKNOWN
;
10705 *second_code
= UNKNOWN
;
10707 /* The fcomi comparison sets flags as follows:
10717 case GT
: /* GTU - CF=0 & ZF=0 */
10718 case GE
: /* GEU - CF=0 */
10719 case ORDERED
: /* PF=0 */
10720 case UNORDERED
: /* PF=1 */
10721 case UNEQ
: /* EQ - ZF=1 */
10722 case UNLT
: /* LTU - CF=1 */
10723 case UNLE
: /* LEU - CF=1 | ZF=1 */
10724 case LTGT
: /* EQ - ZF=0 */
10726 case LT
: /* LTU - CF=1 - fails on unordered */
10727 *first_code
= UNLT
;
10728 *bypass_code
= UNORDERED
;
10730 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10731 *first_code
= UNLE
;
10732 *bypass_code
= UNORDERED
;
10734 case EQ
: /* EQ - ZF=1 - fails on unordered */
10735 *first_code
= UNEQ
;
10736 *bypass_code
= UNORDERED
;
10738 case NE
: /* NE - ZF=0 - fails on unordered */
10739 *first_code
= LTGT
;
10740 *second_code
= UNORDERED
;
10742 case UNGE
: /* GEU - CF=0 - fails on unordered */
10744 *second_code
= UNORDERED
;
10746 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10748 *second_code
= UNORDERED
;
10751 gcc_unreachable ();
10753 if (!TARGET_IEEE_FP
)
10755 *second_code
= UNKNOWN
;
10756 *bypass_code
= UNKNOWN
;
10760 /* Return cost of comparison done fcom + arithmetics operations on AX.
10761 All following functions do use number of instructions as a cost metrics.
10762 In future this should be tweaked to compute bytes for optimize_size and
10763 take into account performance of various instructions on various CPUs. */
10765 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10767 if (!TARGET_IEEE_FP
)
10769 /* The cost of code output by ix86_expand_fp_compare. */
10793 gcc_unreachable ();
10797 /* Return cost of comparison done using fcomi operation.
10798 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10800 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10802 enum rtx_code bypass_code
, first_code
, second_code
;
10803 /* Return arbitrarily high cost when instruction is not supported - this
10804 prevents gcc from using it. */
10807 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10808 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10811 /* Return cost of comparison done using sahf operation.
10812 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10814 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10816 enum rtx_code bypass_code
, first_code
, second_code
;
10817 /* Return arbitrarily high cost when instruction is not preferred - this
10818 avoids gcc from using it. */
10819 if (!TARGET_USE_SAHF
&& !optimize_size
)
10821 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10822 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10825 /* Compute cost of the comparison done using any method.
10826 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10828 ix86_fp_comparison_cost (enum rtx_code code
)
10830 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10833 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10834 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10836 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10837 if (min
> sahf_cost
)
10839 if (min
> fcomi_cost
)
10844 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10847 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10848 rtx
*second_test
, rtx
*bypass_test
)
10850 enum machine_mode fpcmp_mode
, intcmp_mode
;
10852 int cost
= ix86_fp_comparison_cost (code
);
10853 enum rtx_code bypass_code
, first_code
, second_code
;
10855 fpcmp_mode
= ix86_fp_compare_mode (code
);
10856 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10859 *second_test
= NULL_RTX
;
10861 *bypass_test
= NULL_RTX
;
10863 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10865 /* Do fcomi/sahf based test when profitable. */
10866 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10867 && (second_code
== UNKNOWN
|| second_test
)
10868 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10872 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10873 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10879 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10880 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10882 scratch
= gen_reg_rtx (HImode
);
10883 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10884 emit_insn (gen_x86_sahf_1 (scratch
));
10887 /* The FP codes work out to act like unsigned. */
10888 intcmp_mode
= fpcmp_mode
;
10890 if (bypass_code
!= UNKNOWN
)
10891 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10892 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10894 if (second_code
!= UNKNOWN
)
10895 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10896 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10901 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10902 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10903 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10905 scratch
= gen_reg_rtx (HImode
);
10906 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10908 /* In the unordered case, we have to check C2 for NaN's, which
10909 doesn't happen to work out to anything nice combination-wise.
10910 So do some bit twiddling on the value we've got in AH to come
10911 up with an appropriate set of condition codes. */
10913 intcmp_mode
= CCNOmode
;
10918 if (code
== GT
|| !TARGET_IEEE_FP
)
10920 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10925 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10926 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10927 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10928 intcmp_mode
= CCmode
;
10934 if (code
== LT
&& TARGET_IEEE_FP
)
10936 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10937 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10938 intcmp_mode
= CCmode
;
10943 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10949 if (code
== GE
|| !TARGET_IEEE_FP
)
10951 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10956 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10957 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10964 if (code
== LE
&& TARGET_IEEE_FP
)
10966 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10967 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10968 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10969 intcmp_mode
= CCmode
;
10974 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10980 if (code
== EQ
&& TARGET_IEEE_FP
)
10982 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10983 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10984 intcmp_mode
= CCmode
;
10989 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10996 if (code
== NE
&& TARGET_IEEE_FP
)
10998 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10999 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11005 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11011 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11015 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11020 gcc_unreachable ();
11024 /* Return the test that should be put into the flags user, i.e.
11025 the bcc, scc, or cmov instruction. */
11026 return gen_rtx_fmt_ee (code
, VOIDmode
,
11027 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11032 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11035 op0
= ix86_compare_op0
;
11036 op1
= ix86_compare_op1
;
11039 *second_test
= NULL_RTX
;
11041 *bypass_test
= NULL_RTX
;
11043 if (ix86_compare_emitted
)
11045 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11046 ix86_compare_emitted
= NULL_RTX
;
11048 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11049 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11050 second_test
, bypass_test
);
11052 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11057 /* Return true if the CODE will result in nontrivial jump sequence. */
11059 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11061 enum rtx_code bypass_code
, first_code
, second_code
;
11064 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11065 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11069 ix86_expand_branch (enum rtx_code code
, rtx label
)
11073 /* If we have emitted a compare insn, go straight to simple.
11074 ix86_expand_compare won't emit anything if ix86_compare_emitted
11076 if (ix86_compare_emitted
)
11079 switch (GET_MODE (ix86_compare_op0
))
11085 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11086 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11087 gen_rtx_LABEL_REF (VOIDmode
, label
),
11089 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11098 enum rtx_code bypass_code
, first_code
, second_code
;
11100 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11101 &ix86_compare_op1
);
11103 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11105 /* Check whether we will use the natural sequence with one jump. If
11106 so, we can expand jump early. Otherwise delay expansion by
11107 creating compound insn to not confuse optimizers. */
11108 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11111 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11112 gen_rtx_LABEL_REF (VOIDmode
, label
),
11113 pc_rtx
, NULL_RTX
, NULL_RTX
);
11117 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11118 ix86_compare_op0
, ix86_compare_op1
);
11119 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11120 gen_rtx_LABEL_REF (VOIDmode
, label
),
11122 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11124 use_fcomi
= ix86_use_fcomi_compare (code
);
11125 vec
= rtvec_alloc (3 + !use_fcomi
);
11126 RTVEC_ELT (vec
, 0) = tmp
;
11128 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11130 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11133 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11135 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11144 /* Expand DImode branch into multiple compare+branch. */
11146 rtx lo
[2], hi
[2], label2
;
11147 enum rtx_code code1
, code2
, code3
;
11148 enum machine_mode submode
;
11150 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11152 tmp
= ix86_compare_op0
;
11153 ix86_compare_op0
= ix86_compare_op1
;
11154 ix86_compare_op1
= tmp
;
11155 code
= swap_condition (code
);
11157 if (GET_MODE (ix86_compare_op0
) == DImode
)
11159 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11160 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11165 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11166 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11170 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11171 avoid two branches. This costs one extra insn, so disable when
11172 optimizing for size. */
11174 if ((code
== EQ
|| code
== NE
)
11176 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11181 if (hi
[1] != const0_rtx
)
11182 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11183 NULL_RTX
, 0, OPTAB_WIDEN
);
11186 if (lo
[1] != const0_rtx
)
11187 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11188 NULL_RTX
, 0, OPTAB_WIDEN
);
11190 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11191 NULL_RTX
, 0, OPTAB_WIDEN
);
11193 ix86_compare_op0
= tmp
;
11194 ix86_compare_op1
= const0_rtx
;
11195 ix86_expand_branch (code
, label
);
11199 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11200 op1 is a constant and the low word is zero, then we can just
11201 examine the high word. */
11203 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11206 case LT
: case LTU
: case GE
: case GEU
:
11207 ix86_compare_op0
= hi
[0];
11208 ix86_compare_op1
= hi
[1];
11209 ix86_expand_branch (code
, label
);
11215 /* Otherwise, we need two or three jumps. */
11217 label2
= gen_label_rtx ();
11220 code2
= swap_condition (code
);
11221 code3
= unsigned_condition (code
);
11225 case LT
: case GT
: case LTU
: case GTU
:
11228 case LE
: code1
= LT
; code2
= GT
; break;
11229 case GE
: code1
= GT
; code2
= LT
; break;
11230 case LEU
: code1
= LTU
; code2
= GTU
; break;
11231 case GEU
: code1
= GTU
; code2
= LTU
; break;
11233 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11234 case NE
: code2
= UNKNOWN
; break;
11237 gcc_unreachable ();
11242 * if (hi(a) < hi(b)) goto true;
11243 * if (hi(a) > hi(b)) goto false;
11244 * if (lo(a) < lo(b)) goto true;
11248 ix86_compare_op0
= hi
[0];
11249 ix86_compare_op1
= hi
[1];
11251 if (code1
!= UNKNOWN
)
11252 ix86_expand_branch (code1
, label
);
11253 if (code2
!= UNKNOWN
)
11254 ix86_expand_branch (code2
, label2
);
11256 ix86_compare_op0
= lo
[0];
11257 ix86_compare_op1
= lo
[1];
11258 ix86_expand_branch (code3
, label
);
11260 if (code2
!= UNKNOWN
)
11261 emit_label (label2
);
11266 gcc_unreachable ();
11270 /* Split branch based on floating point condition. */
11272 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11273 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11275 rtx second
, bypass
;
11276 rtx label
= NULL_RTX
;
11278 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11281 if (target2
!= pc_rtx
)
11284 code
= reverse_condition_maybe_unordered (code
);
11289 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11290 tmp
, &second
, &bypass
);
11292 /* Remove pushed operand from stack. */
11294 ix86_free_from_memory (GET_MODE (pushed
));
11296 if (split_branch_probability
>= 0)
11298 /* Distribute the probabilities across the jumps.
11299 Assume the BYPASS and SECOND to be always test
11301 probability
= split_branch_probability
;
11303 /* Value of 1 is low enough to make no need for probability
11304 to be updated. Later we may run some experiments and see
11305 if unordered values are more frequent in practice. */
11307 bypass_probability
= 1;
11309 second_probability
= 1;
11311 if (bypass
!= NULL_RTX
)
11313 label
= gen_label_rtx ();
11314 i
= emit_jump_insn (gen_rtx_SET
11316 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11318 gen_rtx_LABEL_REF (VOIDmode
,
11321 if (bypass_probability
>= 0)
11323 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11324 GEN_INT (bypass_probability
),
11327 i
= emit_jump_insn (gen_rtx_SET
11329 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11330 condition
, target1
, target2
)));
11331 if (probability
>= 0)
11333 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11334 GEN_INT (probability
),
11336 if (second
!= NULL_RTX
)
11338 i
= emit_jump_insn (gen_rtx_SET
11340 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11342 if (second_probability
>= 0)
11344 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11345 GEN_INT (second_probability
),
11348 if (label
!= NULL_RTX
)
11349 emit_label (label
);
11353 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11355 rtx ret
, tmp
, tmpreg
, equiv
;
11356 rtx second_test
, bypass_test
;
11358 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11359 return 0; /* FAIL */
11361 gcc_assert (GET_MODE (dest
) == QImode
);
11363 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11364 PUT_MODE (ret
, QImode
);
11369 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11370 if (bypass_test
|| second_test
)
11372 rtx test
= second_test
;
11374 rtx tmp2
= gen_reg_rtx (QImode
);
11377 gcc_assert (!second_test
);
11378 test
= bypass_test
;
11380 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11382 PUT_MODE (test
, QImode
);
11383 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11386 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11388 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11391 /* Attach a REG_EQUAL note describing the comparison result. */
11392 if (ix86_compare_op0
&& ix86_compare_op1
)
11394 equiv
= simplify_gen_relational (code
, QImode
,
11395 GET_MODE (ix86_compare_op0
),
11396 ix86_compare_op0
, ix86_compare_op1
);
11397 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11400 return 1; /* DONE */
11403 /* Expand comparison setting or clearing carry flag. Return true when
11404 successful and set pop for the operation. */
11406 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11408 enum machine_mode mode
=
11409 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11411 /* Do not handle DImode compares that go through special path. Also we can't
11412 deal with FP compares yet. This is possible to add. */
11413 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11415 if (FLOAT_MODE_P (mode
))
11417 rtx second_test
= NULL
, bypass_test
= NULL
;
11418 rtx compare_op
, compare_seq
;
11420 /* Shortcut: following common codes never translate into carry flag compares. */
11421 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11422 || code
== ORDERED
|| code
== UNORDERED
)
11425 /* These comparisons require zero flag; swap operands so they won't. */
11426 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11427 && !TARGET_IEEE_FP
)
11432 code
= swap_condition (code
);
11435 /* Try to expand the comparison and verify that we end up with carry flag
11436 based comparison. This is fails to be true only when we decide to expand
11437 comparison using arithmetic that is not too common scenario. */
11439 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11440 &second_test
, &bypass_test
);
11441 compare_seq
= get_insns ();
11444 if (second_test
|| bypass_test
)
11446 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11447 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11448 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11450 code
= GET_CODE (compare_op
);
11451 if (code
!= LTU
&& code
!= GEU
)
11453 emit_insn (compare_seq
);
11457 if (!INTEGRAL_MODE_P (mode
))
11465 /* Convert a==0 into (unsigned)a<1. */
11468 if (op1
!= const0_rtx
)
11471 code
= (code
== EQ
? LTU
: GEU
);
11474 /* Convert a>b into b<a or a>=b-1. */
11477 if (CONST_INT_P (op1
))
11479 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11480 /* Bail out on overflow. We still can swap operands but that
11481 would force loading of the constant into register. */
11482 if (op1
== const0_rtx
11483 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11485 code
= (code
== GTU
? GEU
: LTU
);
11492 code
= (code
== GTU
? LTU
: GEU
);
11496 /* Convert a>=0 into (unsigned)a<0x80000000. */
11499 if (mode
== DImode
|| op1
!= const0_rtx
)
11501 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11502 code
= (code
== LT
? GEU
: LTU
);
11506 if (mode
== DImode
|| op1
!= constm1_rtx
)
11508 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11509 code
= (code
== LE
? GEU
: LTU
);
11515 /* Swapping operands may cause constant to appear as first operand. */
11516 if (!nonimmediate_operand (op0
, VOIDmode
))
11518 if (no_new_pseudos
)
11520 op0
= force_reg (mode
, op0
);
11522 ix86_compare_op0
= op0
;
11523 ix86_compare_op1
= op1
;
11524 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11525 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11530 ix86_expand_int_movcc (rtx operands
[])
11532 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11533 rtx compare_seq
, compare_op
;
11534 rtx second_test
, bypass_test
;
11535 enum machine_mode mode
= GET_MODE (operands
[0]);
11536 bool sign_bit_compare_p
= false;;
11539 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11540 compare_seq
= get_insns ();
11543 compare_code
= GET_CODE (compare_op
);
11545 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11546 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11547 sign_bit_compare_p
= true;
11549 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11550 HImode insns, we'd be swallowed in word prefix ops. */
11552 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11553 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11554 && CONST_INT_P (operands
[2])
11555 && CONST_INT_P (operands
[3]))
11557 rtx out
= operands
[0];
11558 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11559 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11560 HOST_WIDE_INT diff
;
11563 /* Sign bit compares are better done using shifts than we do by using
11565 if (sign_bit_compare_p
11566 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11567 ix86_compare_op1
, &compare_op
))
11569 /* Detect overlap between destination and compare sources. */
11572 if (!sign_bit_compare_p
)
11574 bool fpcmp
= false;
11576 compare_code
= GET_CODE (compare_op
);
11578 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11579 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11582 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11585 /* To simplify rest of code, restrict to the GEU case. */
11586 if (compare_code
== LTU
)
11588 HOST_WIDE_INT tmp
= ct
;
11591 compare_code
= reverse_condition (compare_code
);
11592 code
= reverse_condition (code
);
11597 PUT_CODE (compare_op
,
11598 reverse_condition_maybe_unordered
11599 (GET_CODE (compare_op
)));
11601 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11605 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11606 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11607 tmp
= gen_reg_rtx (mode
);
11609 if (mode
== DImode
)
11610 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11612 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11616 if (code
== GT
|| code
== GE
)
11617 code
= reverse_condition (code
);
11620 HOST_WIDE_INT tmp
= ct
;
11625 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11626 ix86_compare_op1
, VOIDmode
, 0, -1);
11639 tmp
= expand_simple_binop (mode
, PLUS
,
11641 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11652 tmp
= expand_simple_binop (mode
, IOR
,
11654 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11656 else if (diff
== -1 && ct
)
11666 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11668 tmp
= expand_simple_binop (mode
, PLUS
,
11669 copy_rtx (tmp
), GEN_INT (cf
),
11670 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11678 * andl cf - ct, dest
11688 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11691 tmp
= expand_simple_binop (mode
, AND
,
11693 gen_int_mode (cf
- ct
, mode
),
11694 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11696 tmp
= expand_simple_binop (mode
, PLUS
,
11697 copy_rtx (tmp
), GEN_INT (ct
),
11698 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11701 if (!rtx_equal_p (tmp
, out
))
11702 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11704 return 1; /* DONE */
11710 tmp
= ct
, ct
= cf
, cf
= tmp
;
11712 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11714 /* We may be reversing unordered compare to normal compare, that
11715 is not valid in general (we may convert non-trapping condition
11716 to trapping one), however on i386 we currently emit all
11717 comparisons unordered. */
11718 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11719 code
= reverse_condition_maybe_unordered (code
);
11723 compare_code
= reverse_condition (compare_code
);
11724 code
= reverse_condition (code
);
11728 compare_code
= UNKNOWN
;
11729 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11730 && CONST_INT_P (ix86_compare_op1
))
11732 if (ix86_compare_op1
== const0_rtx
11733 && (code
== LT
|| code
== GE
))
11734 compare_code
= code
;
11735 else if (ix86_compare_op1
== constm1_rtx
)
11739 else if (code
== GT
)
11744 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11745 if (compare_code
!= UNKNOWN
11746 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11747 && (cf
== -1 || ct
== -1))
11749 /* If lea code below could be used, only optimize
11750 if it results in a 2 insn sequence. */
11752 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11753 || diff
== 3 || diff
== 5 || diff
== 9)
11754 || (compare_code
== LT
&& ct
== -1)
11755 || (compare_code
== GE
&& cf
== -1))
11758 * notl op1 (if necessary)
11766 code
= reverse_condition (code
);
11769 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11770 ix86_compare_op1
, VOIDmode
, 0, -1);
11772 out
= expand_simple_binop (mode
, IOR
,
11774 out
, 1, OPTAB_DIRECT
);
11775 if (out
!= operands
[0])
11776 emit_move_insn (operands
[0], out
);
11778 return 1; /* DONE */
11783 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11784 || diff
== 3 || diff
== 5 || diff
== 9)
11785 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11787 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11793 * lea cf(dest*(ct-cf)),dest
11797 * This also catches the degenerate setcc-only case.
11803 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11804 ix86_compare_op1
, VOIDmode
, 0, 1);
11807 /* On x86_64 the lea instruction operates on Pmode, so we need
11808 to get arithmetics done in proper mode to match. */
11810 tmp
= copy_rtx (out
);
11814 out1
= copy_rtx (out
);
11815 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11819 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11825 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11828 if (!rtx_equal_p (tmp
, out
))
11831 out
= force_operand (tmp
, copy_rtx (out
));
11833 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11835 if (!rtx_equal_p (out
, operands
[0]))
11836 emit_move_insn (operands
[0], copy_rtx (out
));
11838 return 1; /* DONE */
11842 * General case: Jumpful:
11843 * xorl dest,dest cmpl op1, op2
11844 * cmpl op1, op2 movl ct, dest
11845 * setcc dest jcc 1f
11846 * decl dest movl cf, dest
11847 * andl (cf-ct),dest 1:
11850 * Size 20. Size 14.
11852 * This is reasonably steep, but branch mispredict costs are
11853 * high on modern cpus, so consider failing only if optimizing
11857 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11858 && BRANCH_COST
>= 2)
11864 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11865 /* We may be reversing unordered compare to normal compare,
11866 that is not valid in general (we may convert non-trapping
11867 condition to trapping one), however on i386 we currently
11868 emit all comparisons unordered. */
11869 code
= reverse_condition_maybe_unordered (code
);
11872 code
= reverse_condition (code
);
11873 if (compare_code
!= UNKNOWN
)
11874 compare_code
= reverse_condition (compare_code
);
11878 if (compare_code
!= UNKNOWN
)
11880 /* notl op1 (if needed)
11885 For x < 0 (resp. x <= -1) there will be no notl,
11886 so if possible swap the constants to get rid of the
11888 True/false will be -1/0 while code below (store flag
11889 followed by decrement) is 0/-1, so the constants need
11890 to be exchanged once more. */
11892 if (compare_code
== GE
|| !cf
)
11894 code
= reverse_condition (code
);
11899 HOST_WIDE_INT tmp
= cf
;
11904 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11905 ix86_compare_op1
, VOIDmode
, 0, -1);
11909 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11910 ix86_compare_op1
, VOIDmode
, 0, 1);
11912 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11913 copy_rtx (out
), 1, OPTAB_DIRECT
);
11916 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11917 gen_int_mode (cf
- ct
, mode
),
11918 copy_rtx (out
), 1, OPTAB_DIRECT
);
11920 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11921 copy_rtx (out
), 1, OPTAB_DIRECT
);
11922 if (!rtx_equal_p (out
, operands
[0]))
11923 emit_move_insn (operands
[0], copy_rtx (out
));
11925 return 1; /* DONE */
11929 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11931 /* Try a few things more with specific constants and a variable. */
11934 rtx var
, orig_out
, out
, tmp
;
11936 if (BRANCH_COST
<= 2)
11937 return 0; /* FAIL */
11939 /* If one of the two operands is an interesting constant, load a
11940 constant with the above and mask it in with a logical operation. */
11942 if (CONST_INT_P (operands
[2]))
11945 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11946 operands
[3] = constm1_rtx
, op
= and_optab
;
11947 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11948 operands
[3] = const0_rtx
, op
= ior_optab
;
11950 return 0; /* FAIL */
11952 else if (CONST_INT_P (operands
[3]))
11955 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11956 operands
[2] = constm1_rtx
, op
= and_optab
;
11957 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11958 operands
[2] = const0_rtx
, op
= ior_optab
;
11960 return 0; /* FAIL */
11963 return 0; /* FAIL */
11965 orig_out
= operands
[0];
11966 tmp
= gen_reg_rtx (mode
);
11969 /* Recurse to get the constant loaded. */
11970 if (ix86_expand_int_movcc (operands
) == 0)
11971 return 0; /* FAIL */
11973 /* Mask in the interesting variable. */
11974 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11976 if (!rtx_equal_p (out
, orig_out
))
11977 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11979 return 1; /* DONE */
11983 * For comparison with above,
11993 if (! nonimmediate_operand (operands
[2], mode
))
11994 operands
[2] = force_reg (mode
, operands
[2]);
11995 if (! nonimmediate_operand (operands
[3], mode
))
11996 operands
[3] = force_reg (mode
, operands
[3]);
11998 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12000 rtx tmp
= gen_reg_rtx (mode
);
12001 emit_move_insn (tmp
, operands
[3]);
12004 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12006 rtx tmp
= gen_reg_rtx (mode
);
12007 emit_move_insn (tmp
, operands
[2]);
12011 if (! register_operand (operands
[2], VOIDmode
)
12013 || ! register_operand (operands
[3], VOIDmode
)))
12014 operands
[2] = force_reg (mode
, operands
[2]);
12017 && ! register_operand (operands
[3], VOIDmode
))
12018 operands
[3] = force_reg (mode
, operands
[3]);
12020 emit_insn (compare_seq
);
12021 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12022 gen_rtx_IF_THEN_ELSE (mode
,
12023 compare_op
, operands
[2],
12026 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12027 gen_rtx_IF_THEN_ELSE (mode
,
12029 copy_rtx (operands
[3]),
12030 copy_rtx (operands
[0]))));
12032 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12033 gen_rtx_IF_THEN_ELSE (mode
,
12035 copy_rtx (operands
[2]),
12036 copy_rtx (operands
[0]))));
12038 return 1; /* DONE */
12041 /* Swap, force into registers, or otherwise massage the two operands
12042 to an sse comparison with a mask result. Thus we differ a bit from
12043 ix86_prepare_fp_compare_args which expects to produce a flags result.
12045 The DEST operand exists to help determine whether to commute commutative
12046 operators. The POP0/POP1 operands are updated in place. The new
12047 comparison code is returned, or UNKNOWN if not implementable. */
12049 static enum rtx_code
12050 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12051 rtx
*pop0
, rtx
*pop1
)
12059 /* We have no LTGT as an operator. We could implement it with
12060 NE & ORDERED, but this requires an extra temporary. It's
12061 not clear that it's worth it. */
12068 /* These are supported directly. */
12075 /* For commutative operators, try to canonicalize the destination
12076 operand to be first in the comparison - this helps reload to
12077 avoid extra moves. */
12078 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12086 /* These are not supported directly. Swap the comparison operands
12087 to transform into something that is supported. */
12091 code
= swap_condition (code
);
12095 gcc_unreachable ();
12101 /* Detect conditional moves that exactly match min/max operational
12102 semantics. Note that this is IEEE safe, as long as we don't
12103 interchange the operands.
12105 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12106 and TRUE if the operation is successful and instructions are emitted. */
12109 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12110 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12112 enum machine_mode mode
;
12118 else if (code
== UNGE
)
12121 if_true
= if_false
;
12127 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12129 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12134 mode
= GET_MODE (dest
);
12136 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12137 but MODE may be a vector mode and thus not appropriate. */
12138 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12140 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12143 if_true
= force_reg (mode
, if_true
);
12144 v
= gen_rtvec (2, if_true
, if_false
);
12145 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12149 code
= is_min
? SMIN
: SMAX
;
12150 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12153 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12157 /* Expand an sse vector comparison. Return the register with the result. */
12160 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12161 rtx op_true
, rtx op_false
)
12163 enum machine_mode mode
= GET_MODE (dest
);
12166 cmp_op0
= force_reg (mode
, cmp_op0
);
12167 if (!nonimmediate_operand (cmp_op1
, mode
))
12168 cmp_op1
= force_reg (mode
, cmp_op1
);
12171 || reg_overlap_mentioned_p (dest
, op_true
)
12172 || reg_overlap_mentioned_p (dest
, op_false
))
12173 dest
= gen_reg_rtx (mode
);
12175 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12176 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12181 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12182 operations. This is used for both scalar and vector conditional moves. */
12185 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12187 enum machine_mode mode
= GET_MODE (dest
);
12190 if (op_false
== CONST0_RTX (mode
))
12192 op_true
= force_reg (mode
, op_true
);
12193 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12194 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12196 else if (op_true
== CONST0_RTX (mode
))
12198 op_false
= force_reg (mode
, op_false
);
12199 x
= gen_rtx_NOT (mode
, cmp
);
12200 x
= gen_rtx_AND (mode
, x
, op_false
);
12201 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12205 op_true
= force_reg (mode
, op_true
);
12206 op_false
= force_reg (mode
, op_false
);
12208 t2
= gen_reg_rtx (mode
);
12210 t3
= gen_reg_rtx (mode
);
12214 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12215 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12217 x
= gen_rtx_NOT (mode
, cmp
);
12218 x
= gen_rtx_AND (mode
, x
, op_false
);
12219 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12221 x
= gen_rtx_IOR (mode
, t3
, t2
);
12222 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12226 /* Expand a floating-point conditional move. Return true if successful. */
12229 ix86_expand_fp_movcc (rtx operands
[])
12231 enum machine_mode mode
= GET_MODE (operands
[0]);
12232 enum rtx_code code
= GET_CODE (operands
[1]);
12233 rtx tmp
, compare_op
, second_test
, bypass_test
;
12235 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12237 enum machine_mode cmode
;
12239 /* Since we've no cmove for sse registers, don't force bad register
12240 allocation just to gain access to it. Deny movcc when the
12241 comparison mode doesn't match the move mode. */
12242 cmode
= GET_MODE (ix86_compare_op0
);
12243 if (cmode
== VOIDmode
)
12244 cmode
= GET_MODE (ix86_compare_op1
);
12248 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12250 &ix86_compare_op1
);
12251 if (code
== UNKNOWN
)
12254 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12255 ix86_compare_op1
, operands
[2],
12259 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12260 ix86_compare_op1
, operands
[2], operands
[3]);
12261 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12265 /* The floating point conditional move instructions don't directly
12266 support conditions resulting from a signed integer comparison. */
12268 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12270 /* The floating point conditional move instructions don't directly
12271 support signed integer comparisons. */
12273 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12275 gcc_assert (!second_test
&& !bypass_test
);
12276 tmp
= gen_reg_rtx (QImode
);
12277 ix86_expand_setcc (code
, tmp
);
12279 ix86_compare_op0
= tmp
;
12280 ix86_compare_op1
= const0_rtx
;
12281 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12283 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12285 tmp
= gen_reg_rtx (mode
);
12286 emit_move_insn (tmp
, operands
[3]);
12289 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12291 tmp
= gen_reg_rtx (mode
);
12292 emit_move_insn (tmp
, operands
[2]);
12296 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12297 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12298 operands
[2], operands
[3])));
12300 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12301 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12302 operands
[3], operands
[0])));
12304 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12305 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12306 operands
[2], operands
[0])));
12311 /* Expand a floating-point vector conditional move; a vcond operation
12312 rather than a movcc operation. */
12315 ix86_expand_fp_vcond (rtx operands
[])
12317 enum rtx_code code
= GET_CODE (operands
[3]);
12320 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12321 &operands
[4], &operands
[5]);
12322 if (code
== UNKNOWN
)
12325 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12326 operands
[5], operands
[1], operands
[2]))
12329 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12330 operands
[1], operands
[2]);
12331 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12335 /* Expand a signed integral vector conditional move. */
12338 ix86_expand_int_vcond (rtx operands
[])
12340 enum machine_mode mode
= GET_MODE (operands
[0]);
12341 enum rtx_code code
= GET_CODE (operands
[3]);
12342 bool negate
= false;
12345 cop0
= operands
[4];
12346 cop1
= operands
[5];
12348 /* Canonicalize the comparison to EQ, GT, GTU. */
12359 code
= reverse_condition (code
);
12365 code
= reverse_condition (code
);
12371 code
= swap_condition (code
);
12372 x
= cop0
, cop0
= cop1
, cop1
= x
;
12376 gcc_unreachable ();
12379 /* Unsigned parallel compare is not supported by the hardware. Play some
12380 tricks to turn this into a signed comparison against 0. */
12383 cop0
= force_reg (mode
, cop0
);
12391 /* Perform a parallel modulo subtraction. */
12392 t1
= gen_reg_rtx (mode
);
12393 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12395 /* Extract the original sign bit of op0. */
12396 mask
= GEN_INT (-0x80000000);
12397 mask
= gen_rtx_CONST_VECTOR (mode
,
12398 gen_rtvec (4, mask
, mask
, mask
, mask
));
12399 mask
= force_reg (mode
, mask
);
12400 t2
= gen_reg_rtx (mode
);
12401 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12403 /* XOR it back into the result of the subtraction. This results
12404 in the sign bit set iff we saw unsigned underflow. */
12405 x
= gen_reg_rtx (mode
);
12406 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12414 /* Perform a parallel unsigned saturating subtraction. */
12415 x
= gen_reg_rtx (mode
);
12416 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12417 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12424 gcc_unreachable ();
12428 cop1
= CONST0_RTX (mode
);
12431 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12432 operands
[1+negate
], operands
[2-negate
]);
12434 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12435 operands
[2-negate
]);
12439 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12440 true if we should do zero extension, else sign extension. HIGH_P is
12441 true if we want the N/2 high elements, else the low elements. */
12444 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12446 enum machine_mode imode
= GET_MODE (operands
[1]);
12447 rtx (*unpack
)(rtx
, rtx
, rtx
);
12454 unpack
= gen_vec_interleave_highv16qi
;
12456 unpack
= gen_vec_interleave_lowv16qi
;
12460 unpack
= gen_vec_interleave_highv8hi
;
12462 unpack
= gen_vec_interleave_lowv8hi
;
12466 unpack
= gen_vec_interleave_highv4si
;
12468 unpack
= gen_vec_interleave_lowv4si
;
12471 gcc_unreachable ();
12474 dest
= gen_lowpart (imode
, operands
[0]);
12477 se
= force_reg (imode
, CONST0_RTX (imode
));
12479 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12480 operands
[1], pc_rtx
, pc_rtx
);
12482 emit_insn (unpack (dest
, operands
[1], se
));
12485 /* Expand conditional increment or decrement using adb/sbb instructions.
12486 The default case using setcc followed by the conditional move can be
12487 done by generic code. */
12489 ix86_expand_int_addcc (rtx operands
[])
12491 enum rtx_code code
= GET_CODE (operands
[1]);
12493 rtx val
= const0_rtx
;
12494 bool fpcmp
= false;
12495 enum machine_mode mode
= GET_MODE (operands
[0]);
12497 if (operands
[3] != const1_rtx
12498 && operands
[3] != constm1_rtx
)
12500 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12501 ix86_compare_op1
, &compare_op
))
12503 code
= GET_CODE (compare_op
);
12505 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12506 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12509 code
= ix86_fp_compare_code_to_integer (code
);
12516 PUT_CODE (compare_op
,
12517 reverse_condition_maybe_unordered
12518 (GET_CODE (compare_op
)));
12520 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12522 PUT_MODE (compare_op
, mode
);
12524 /* Construct either adc or sbb insn. */
12525 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12527 switch (GET_MODE (operands
[0]))
12530 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12533 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12536 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12539 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12542 gcc_unreachable ();
12547 switch (GET_MODE (operands
[0]))
12550 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12553 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12556 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12559 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12562 gcc_unreachable ();
12565 return 1; /* DONE */
12569 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12570 works for floating pointer parameters and nonoffsetable memories.
12571 For pushes, it returns just stack offsets; the values will be saved
12572 in the right order. Maximally three parts are generated. */
12575 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12580 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12582 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12584 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12585 gcc_assert (size
>= 2 && size
<= 3);
12587 /* Optimize constant pool reference to immediates. This is used by fp
12588 moves, that force all constants to memory to allow combining. */
12589 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12591 rtx tmp
= maybe_get_pool_constant (operand
);
12596 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12598 /* The only non-offsetable memories we handle are pushes. */
12599 int ok
= push_operand (operand
, VOIDmode
);
12603 operand
= copy_rtx (operand
);
12604 PUT_MODE (operand
, Pmode
);
12605 parts
[0] = parts
[1] = parts
[2] = operand
;
12609 if (GET_CODE (operand
) == CONST_VECTOR
)
12611 enum machine_mode imode
= int_mode_for_mode (mode
);
12612 /* Caution: if we looked through a constant pool memory above,
12613 the operand may actually have a different mode now. That's
12614 ok, since we want to pun this all the way back to an integer. */
12615 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12616 gcc_assert (operand
!= NULL
);
12622 if (mode
== DImode
)
12623 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12626 if (REG_P (operand
))
12628 gcc_assert (reload_completed
);
12629 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12630 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12632 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12634 else if (offsettable_memref_p (operand
))
12636 operand
= adjust_address (operand
, SImode
, 0);
12637 parts
[0] = operand
;
12638 parts
[1] = adjust_address (operand
, SImode
, 4);
12640 parts
[2] = adjust_address (operand
, SImode
, 8);
12642 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12647 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12651 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12652 parts
[2] = gen_int_mode (l
[2], SImode
);
12655 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12658 gcc_unreachable ();
12660 parts
[1] = gen_int_mode (l
[1], SImode
);
12661 parts
[0] = gen_int_mode (l
[0], SImode
);
12664 gcc_unreachable ();
12669 if (mode
== TImode
)
12670 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12671 if (mode
== XFmode
|| mode
== TFmode
)
12673 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12674 if (REG_P (operand
))
12676 gcc_assert (reload_completed
);
12677 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12678 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12680 else if (offsettable_memref_p (operand
))
12682 operand
= adjust_address (operand
, DImode
, 0);
12683 parts
[0] = operand
;
12684 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12686 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12691 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12692 real_to_target (l
, &r
, mode
);
12694 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12695 if (HOST_BITS_PER_WIDE_INT
>= 64)
12698 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12699 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12702 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12704 if (upper_mode
== SImode
)
12705 parts
[1] = gen_int_mode (l
[2], SImode
);
12706 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12709 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12710 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12713 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12716 gcc_unreachable ();
12723 /* Emit insns to perform a move or push of DI, DF, and XF values.
12724 Return false when normal moves are needed; true when all required
12725 insns have been emitted. Operands 2-4 contain the input values
12726 int the correct order; operands 5-7 contain the output values. */
12729 ix86_split_long_move (rtx operands
[])
12734 int collisions
= 0;
12735 enum machine_mode mode
= GET_MODE (operands
[0]);
12737 /* The DFmode expanders may ask us to move double.
12738 For 64bit target this is single move. By hiding the fact
12739 here we simplify i386.md splitters. */
12740 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12742 /* Optimize constant pool reference to immediates. This is used by
12743 fp moves, that force all constants to memory to allow combining. */
12745 if (MEM_P (operands
[1])
12746 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12747 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12748 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12749 if (push_operand (operands
[0], VOIDmode
))
12751 operands
[0] = copy_rtx (operands
[0]);
12752 PUT_MODE (operands
[0], Pmode
);
12755 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12756 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12757 emit_move_insn (operands
[0], operands
[1]);
12761 /* The only non-offsettable memory we handle is push. */
12762 if (push_operand (operands
[0], VOIDmode
))
12765 gcc_assert (!MEM_P (operands
[0])
12766 || offsettable_memref_p (operands
[0]));
12768 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12769 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12771 /* When emitting push, take care for source operands on the stack. */
12772 if (push
&& MEM_P (operands
[1])
12773 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12776 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12777 XEXP (part
[1][2], 0));
12778 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12779 XEXP (part
[1][1], 0));
12782 /* We need to do copy in the right order in case an address register
12783 of the source overlaps the destination. */
12784 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12786 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12788 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12791 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12794 /* Collision in the middle part can be handled by reordering. */
12795 if (collisions
== 1 && nparts
== 3
12796 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12799 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12800 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12803 /* If there are more collisions, we can't handle it by reordering.
12804 Do an lea to the last part and use only one colliding move. */
12805 else if (collisions
> 1)
12811 base
= part
[0][nparts
- 1];
12813 /* Handle the case when the last part isn't valid for lea.
12814 Happens in 64-bit mode storing the 12-byte XFmode. */
12815 if (GET_MODE (base
) != Pmode
)
12816 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12818 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12819 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12820 part
[1][1] = replace_equiv_address (part
[1][1],
12821 plus_constant (base
, UNITS_PER_WORD
));
12823 part
[1][2] = replace_equiv_address (part
[1][2],
12824 plus_constant (base
, 8));
12834 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12835 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12836 emit_move_insn (part
[0][2], part
[1][2]);
12841 /* In 64bit mode we don't have 32bit push available. In case this is
12842 register, it is OK - we will just use larger counterpart. We also
12843 retype memory - these comes from attempt to avoid REX prefix on
12844 moving of second half of TFmode value. */
12845 if (GET_MODE (part
[1][1]) == SImode
)
12847 switch (GET_CODE (part
[1][1]))
12850 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12854 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12858 gcc_unreachable ();
12861 if (GET_MODE (part
[1][0]) == SImode
)
12862 part
[1][0] = part
[1][1];
12865 emit_move_insn (part
[0][1], part
[1][1]);
12866 emit_move_insn (part
[0][0], part
[1][0]);
12870 /* Choose correct order to not overwrite the source before it is copied. */
12871 if ((REG_P (part
[0][0])
12872 && REG_P (part
[1][1])
12873 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12875 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12877 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12881 operands
[2] = part
[0][2];
12882 operands
[3] = part
[0][1];
12883 operands
[4] = part
[0][0];
12884 operands
[5] = part
[1][2];
12885 operands
[6] = part
[1][1];
12886 operands
[7] = part
[1][0];
12890 operands
[2] = part
[0][1];
12891 operands
[3] = part
[0][0];
12892 operands
[5] = part
[1][1];
12893 operands
[6] = part
[1][0];
12900 operands
[2] = part
[0][0];
12901 operands
[3] = part
[0][1];
12902 operands
[4] = part
[0][2];
12903 operands
[5] = part
[1][0];
12904 operands
[6] = part
[1][1];
12905 operands
[7] = part
[1][2];
12909 operands
[2] = part
[0][0];
12910 operands
[3] = part
[0][1];
12911 operands
[5] = part
[1][0];
12912 operands
[6] = part
[1][1];
12916 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12919 if (CONST_INT_P (operands
[5])
12920 && operands
[5] != const0_rtx
12921 && REG_P (operands
[2]))
12923 if (CONST_INT_P (operands
[6])
12924 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12925 operands
[6] = operands
[2];
12928 && CONST_INT_P (operands
[7])
12929 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12930 operands
[7] = operands
[2];
12934 && CONST_INT_P (operands
[6])
12935 && operands
[6] != const0_rtx
12936 && REG_P (operands
[3])
12937 && CONST_INT_P (operands
[7])
12938 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12939 operands
[7] = operands
[3];
12942 emit_move_insn (operands
[2], operands
[5]);
12943 emit_move_insn (operands
[3], operands
[6]);
12945 emit_move_insn (operands
[4], operands
[7]);
12950 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12951 left shift by a constant, either using a single shift or
12952 a sequence of add instructions. */
12955 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12959 emit_insn ((mode
== DImode
12961 : gen_adddi3
) (operand
, operand
, operand
));
12963 else if (!optimize_size
12964 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12967 for (i
=0; i
<count
; i
++)
12969 emit_insn ((mode
== DImode
12971 : gen_adddi3
) (operand
, operand
, operand
));
12975 emit_insn ((mode
== DImode
12977 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12981 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12983 rtx low
[2], high
[2];
12985 const int single_width
= mode
== DImode
? 32 : 64;
12987 if (CONST_INT_P (operands
[2]))
12989 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12990 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12992 if (count
>= single_width
)
12994 emit_move_insn (high
[0], low
[1]);
12995 emit_move_insn (low
[0], const0_rtx
);
12997 if (count
> single_width
)
12998 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13002 if (!rtx_equal_p (operands
[0], operands
[1]))
13003 emit_move_insn (operands
[0], operands
[1]);
13004 emit_insn ((mode
== DImode
13006 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13007 ix86_expand_ashl_const (low
[0], count
, mode
);
13012 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13014 if (operands
[1] == const1_rtx
)
13016 /* Assuming we've chosen a QImode capable registers, then 1 << N
13017 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13018 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13020 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13022 ix86_expand_clear (low
[0]);
13023 ix86_expand_clear (high
[0]);
13024 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13026 d
= gen_lowpart (QImode
, low
[0]);
13027 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13028 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13029 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13031 d
= gen_lowpart (QImode
, high
[0]);
13032 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13033 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13034 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13037 /* Otherwise, we can get the same results by manually performing
13038 a bit extract operation on bit 5/6, and then performing the two
13039 shifts. The two methods of getting 0/1 into low/high are exactly
13040 the same size. Avoiding the shift in the bit extract case helps
13041 pentium4 a bit; no one else seems to care much either way. */
13046 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13047 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13049 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13050 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13052 emit_insn ((mode
== DImode
13054 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13055 emit_insn ((mode
== DImode
13057 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13058 emit_move_insn (low
[0], high
[0]);
13059 emit_insn ((mode
== DImode
13061 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13064 emit_insn ((mode
== DImode
13066 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13067 emit_insn ((mode
== DImode
13069 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13073 if (operands
[1] == constm1_rtx
)
13075 /* For -1 << N, we can avoid the shld instruction, because we
13076 know that we're shifting 0...31/63 ones into a -1. */
13077 emit_move_insn (low
[0], constm1_rtx
);
13079 emit_move_insn (high
[0], low
[0]);
13081 emit_move_insn (high
[0], constm1_rtx
);
13085 if (!rtx_equal_p (operands
[0], operands
[1]))
13086 emit_move_insn (operands
[0], operands
[1]);
13088 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13089 emit_insn ((mode
== DImode
13091 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13094 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13096 if (TARGET_CMOVE
&& scratch
)
13098 ix86_expand_clear (scratch
);
13099 emit_insn ((mode
== DImode
13100 ? gen_x86_shift_adj_1
13101 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13104 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13108 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13110 rtx low
[2], high
[2];
13112 const int single_width
= mode
== DImode
? 32 : 64;
13114 if (CONST_INT_P (operands
[2]))
13116 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13117 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13119 if (count
== single_width
* 2 - 1)
13121 emit_move_insn (high
[0], high
[1]);
13122 emit_insn ((mode
== DImode
13124 : gen_ashrdi3
) (high
[0], high
[0],
13125 GEN_INT (single_width
- 1)));
13126 emit_move_insn (low
[0], high
[0]);
13129 else if (count
>= single_width
)
13131 emit_move_insn (low
[0], high
[1]);
13132 emit_move_insn (high
[0], low
[0]);
13133 emit_insn ((mode
== DImode
13135 : gen_ashrdi3
) (high
[0], high
[0],
13136 GEN_INT (single_width
- 1)));
13137 if (count
> single_width
)
13138 emit_insn ((mode
== DImode
13140 : gen_ashrdi3
) (low
[0], low
[0],
13141 GEN_INT (count
- single_width
)));
13145 if (!rtx_equal_p (operands
[0], operands
[1]))
13146 emit_move_insn (operands
[0], operands
[1]);
13147 emit_insn ((mode
== DImode
13149 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13150 emit_insn ((mode
== DImode
13152 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13157 if (!rtx_equal_p (operands
[0], operands
[1]))
13158 emit_move_insn (operands
[0], operands
[1]);
13160 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13162 emit_insn ((mode
== DImode
13164 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13165 emit_insn ((mode
== DImode
13167 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13169 if (TARGET_CMOVE
&& scratch
)
13171 emit_move_insn (scratch
, high
[0]);
13172 emit_insn ((mode
== DImode
13174 : gen_ashrdi3
) (scratch
, scratch
,
13175 GEN_INT (single_width
- 1)));
13176 emit_insn ((mode
== DImode
13177 ? gen_x86_shift_adj_1
13178 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13182 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13187 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13189 rtx low
[2], high
[2];
13191 const int single_width
= mode
== DImode
? 32 : 64;
13193 if (CONST_INT_P (operands
[2]))
13195 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13196 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13198 if (count
>= single_width
)
13200 emit_move_insn (low
[0], high
[1]);
13201 ix86_expand_clear (high
[0]);
13203 if (count
> single_width
)
13204 emit_insn ((mode
== DImode
13206 : gen_lshrdi3
) (low
[0], low
[0],
13207 GEN_INT (count
- single_width
)));
13211 if (!rtx_equal_p (operands
[0], operands
[1]))
13212 emit_move_insn (operands
[0], operands
[1]);
13213 emit_insn ((mode
== DImode
13215 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13216 emit_insn ((mode
== DImode
13218 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13223 if (!rtx_equal_p (operands
[0], operands
[1]))
13224 emit_move_insn (operands
[0], operands
[1]);
13226 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13228 emit_insn ((mode
== DImode
13230 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13231 emit_insn ((mode
== DImode
13233 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13235 /* Heh. By reversing the arguments, we can reuse this pattern. */
13236 if (TARGET_CMOVE
&& scratch
)
13238 ix86_expand_clear (scratch
);
13239 emit_insn ((mode
== DImode
13240 ? gen_x86_shift_adj_1
13241 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13245 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13249 /* Predict just emitted jump instruction to be taken with probability PROB. */
13251 predict_jump (int prob
)
13253 rtx insn
= get_last_insn ();
13254 gcc_assert (JUMP_P (insn
));
13256 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13261 /* Helper function for the string operations below. Dest VARIABLE whether
13262 it is aligned to VALUE bytes. If true, jump to the label. */
13264 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13266 rtx label
= gen_label_rtx ();
13267 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13268 if (GET_MODE (variable
) == DImode
)
13269 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13271 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13272 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13275 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13277 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13281 /* Adjust COUNTER by the VALUE. */
13283 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13285 if (GET_MODE (countreg
) == DImode
)
13286 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13288 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13291 /* Zero extend possibly SImode EXP to Pmode register. */
13293 ix86_zero_extend_to_Pmode (rtx exp
)
13296 if (GET_MODE (exp
) == VOIDmode
)
13297 return force_reg (Pmode
, exp
);
13298 if (GET_MODE (exp
) == Pmode
)
13299 return copy_to_mode_reg (Pmode
, exp
);
13300 r
= gen_reg_rtx (Pmode
);
13301 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13305 /* Divide COUNTREG by SCALE. */
13307 scale_counter (rtx countreg
, int scale
)
13310 rtx piece_size_mask
;
13314 if (CONST_INT_P (countreg
))
13315 return GEN_INT (INTVAL (countreg
) / scale
);
13316 gcc_assert (REG_P (countreg
));
13318 piece_size_mask
= GEN_INT (scale
- 1);
13319 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13320 GEN_INT (exact_log2 (scale
)),
13321 NULL
, 1, OPTAB_DIRECT
);
13325 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13326 for constant loop counts. */
13328 static enum machine_mode
13329 counter_mode (rtx count_exp
)
13331 if (GET_MODE (count_exp
) != VOIDmode
)
13332 return GET_MODE (count_exp
);
13333 if (GET_CODE (count_exp
) != CONST_INT
)
13335 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13340 /* When SRCPTR is non-NULL, output simple loop to move memory
13341 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13342 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13343 equivalent loop to set memory by VALUE (supposed to be in MODE).
13345 The size is rounded down to whole number of chunk size moved at once.
13346 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13350 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13351 rtx destptr
, rtx srcptr
, rtx value
,
13352 rtx count
, enum machine_mode mode
, int unroll
,
13355 rtx out_label
, top_label
, iter
, tmp
;
13356 enum machine_mode iter_mode
= counter_mode (count
);
13357 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13358 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13364 top_label
= gen_label_rtx ();
13365 out_label
= gen_label_rtx ();
13366 iter
= gen_reg_rtx (iter_mode
);
13368 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13369 NULL
, 1, OPTAB_DIRECT
);
13370 /* Those two should combine. */
13371 if (piece_size
== const1_rtx
)
13373 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13375 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13377 emit_move_insn (iter
, const0_rtx
);
13379 emit_label (top_label
);
13381 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13382 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13383 destmem
= change_address (destmem
, mode
, x_addr
);
13387 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13388 srcmem
= change_address (srcmem
, mode
, y_addr
);
13390 /* When unrolling for chips that reorder memory reads and writes,
13391 we can save registers by using single temporary.
13392 Also using 4 temporaries is overkill in 32bit mode. */
13393 if (!TARGET_64BIT
&& 0)
13395 for (i
= 0; i
< unroll
; i
++)
13400 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13402 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13404 emit_move_insn (destmem
, srcmem
);
13410 gcc_assert (unroll
<= 4);
13411 for (i
= 0; i
< unroll
; i
++)
13413 tmpreg
[i
] = gen_reg_rtx (mode
);
13417 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13419 emit_move_insn (tmpreg
[i
], srcmem
);
13421 for (i
= 0; i
< unroll
; i
++)
13426 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13428 emit_move_insn (destmem
, tmpreg
[i
]);
13433 for (i
= 0; i
< unroll
; i
++)
13437 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13438 emit_move_insn (destmem
, value
);
13441 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13442 true, OPTAB_LIB_WIDEN
);
13444 emit_move_insn (iter
, tmp
);
13446 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13448 if (expected_size
!= -1)
13450 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13451 if (expected_size
== 0)
13453 else if (expected_size
> REG_BR_PROB_BASE
)
13454 predict_jump (REG_BR_PROB_BASE
- 1);
13456 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13459 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13460 iter
= ix86_zero_extend_to_Pmode (iter
);
13461 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13462 true, OPTAB_LIB_WIDEN
);
13463 if (tmp
!= destptr
)
13464 emit_move_insn (destptr
, tmp
);
13467 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13468 true, OPTAB_LIB_WIDEN
);
13470 emit_move_insn (srcptr
, tmp
);
13472 emit_label (out_label
);
13475 /* Output "rep; mov" instruction.
13476 Arguments have same meaning as for previous function */
13478 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13479 rtx destptr
, rtx srcptr
,
13481 enum machine_mode mode
)
13487 /* If the size is known, it is shorter to use rep movs. */
13488 if (mode
== QImode
&& CONST_INT_P (count
)
13489 && !(INTVAL (count
) & 3))
13492 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13493 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13494 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13495 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13496 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13497 if (mode
!= QImode
)
13499 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13500 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13501 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13502 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13503 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13504 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13508 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13509 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13511 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13515 /* Output "rep; stos" instruction.
13516 Arguments have same meaning as for previous function */
13518 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13520 enum machine_mode mode
)
13525 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13526 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13527 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13528 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13529 if (mode
!= QImode
)
13531 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13532 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13533 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13536 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13537 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13541 emit_strmov (rtx destmem
, rtx srcmem
,
13542 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13544 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13545 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13546 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13549 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13551 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13552 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13555 if (CONST_INT_P (count
))
13557 HOST_WIDE_INT countval
= INTVAL (count
);
13560 if ((countval
& 0x10) && max_size
> 16)
13564 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13565 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13568 gcc_unreachable ();
13571 if ((countval
& 0x08) && max_size
> 8)
13574 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13577 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13578 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13582 if ((countval
& 0x04) && max_size
> 4)
13584 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13587 if ((countval
& 0x02) && max_size
> 2)
13589 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13592 if ((countval
& 0x01) && max_size
> 1)
13594 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13601 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13602 count
, 1, OPTAB_DIRECT
);
13603 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13604 count
, QImode
, 1, 4);
13608 /* When there are stringops, we can cheaply increase dest and src pointers.
13609 Otherwise we save code size by maintaining offset (zero is readily
13610 available from preceding rep operation) and using x86 addressing modes.
13612 if (TARGET_SINGLE_STRINGOP
)
13616 rtx label
= ix86_expand_aligntest (count
, 4, true);
13617 src
= change_address (srcmem
, SImode
, srcptr
);
13618 dest
= change_address (destmem
, SImode
, destptr
);
13619 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13620 emit_label (label
);
13621 LABEL_NUSES (label
) = 1;
13625 rtx label
= ix86_expand_aligntest (count
, 2, true);
13626 src
= change_address (srcmem
, HImode
, srcptr
);
13627 dest
= change_address (destmem
, HImode
, destptr
);
13628 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13629 emit_label (label
);
13630 LABEL_NUSES (label
) = 1;
13634 rtx label
= ix86_expand_aligntest (count
, 1, true);
13635 src
= change_address (srcmem
, QImode
, srcptr
);
13636 dest
= change_address (destmem
, QImode
, destptr
);
13637 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13638 emit_label (label
);
13639 LABEL_NUSES (label
) = 1;
13644 rtx offset
= force_reg (Pmode
, const0_rtx
);
13649 rtx label
= ix86_expand_aligntest (count
, 4, true);
13650 src
= change_address (srcmem
, SImode
, srcptr
);
13651 dest
= change_address (destmem
, SImode
, destptr
);
13652 emit_move_insn (dest
, src
);
13653 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13654 true, OPTAB_LIB_WIDEN
);
13656 emit_move_insn (offset
, tmp
);
13657 emit_label (label
);
13658 LABEL_NUSES (label
) = 1;
13662 rtx label
= ix86_expand_aligntest (count
, 2, true);
13663 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13664 src
= change_address (srcmem
, HImode
, tmp
);
13665 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13666 dest
= change_address (destmem
, HImode
, tmp
);
13667 emit_move_insn (dest
, src
);
13668 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13669 true, OPTAB_LIB_WIDEN
);
13671 emit_move_insn (offset
, tmp
);
13672 emit_label (label
);
13673 LABEL_NUSES (label
) = 1;
13677 rtx label
= ix86_expand_aligntest (count
, 1, true);
13678 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13679 src
= change_address (srcmem
, QImode
, tmp
);
13680 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13681 dest
= change_address (destmem
, QImode
, tmp
);
13682 emit_move_insn (dest
, src
);
13683 emit_label (label
);
13684 LABEL_NUSES (label
) = 1;
13689 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13691 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13692 rtx count
, int max_size
)
13695 expand_simple_binop (counter_mode (count
), AND
, count
,
13696 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13697 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13698 gen_lowpart (QImode
, value
), count
, QImode
,
13702 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13704 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13708 if (CONST_INT_P (count
))
13710 HOST_WIDE_INT countval
= INTVAL (count
);
13713 if ((countval
& 0x10) && max_size
> 16)
13717 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13718 emit_insn (gen_strset (destptr
, dest
, value
));
13719 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13720 emit_insn (gen_strset (destptr
, dest
, value
));
13723 gcc_unreachable ();
13726 if ((countval
& 0x08) && max_size
> 8)
13730 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13731 emit_insn (gen_strset (destptr
, dest
, value
));
13735 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13736 emit_insn (gen_strset (destptr
, dest
, value
));
13737 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13738 emit_insn (gen_strset (destptr
, dest
, value
));
13742 if ((countval
& 0x04) && max_size
> 4)
13744 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13745 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13748 if ((countval
& 0x02) && max_size
> 2)
13750 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13751 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13754 if ((countval
& 0x01) && max_size
> 1)
13756 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13757 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13764 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13769 rtx label
= ix86_expand_aligntest (count
, 16, true);
13772 dest
= change_address (destmem
, DImode
, destptr
);
13773 emit_insn (gen_strset (destptr
, dest
, value
));
13774 emit_insn (gen_strset (destptr
, dest
, value
));
13778 dest
= change_address (destmem
, SImode
, destptr
);
13779 emit_insn (gen_strset (destptr
, dest
, value
));
13780 emit_insn (gen_strset (destptr
, dest
, value
));
13781 emit_insn (gen_strset (destptr
, dest
, value
));
13782 emit_insn (gen_strset (destptr
, dest
, value
));
13784 emit_label (label
);
13785 LABEL_NUSES (label
) = 1;
13789 rtx label
= ix86_expand_aligntest (count
, 8, true);
13792 dest
= change_address (destmem
, DImode
, destptr
);
13793 emit_insn (gen_strset (destptr
, dest
, value
));
13797 dest
= change_address (destmem
, SImode
, destptr
);
13798 emit_insn (gen_strset (destptr
, dest
, value
));
13799 emit_insn (gen_strset (destptr
, dest
, value
));
13801 emit_label (label
);
13802 LABEL_NUSES (label
) = 1;
13806 rtx label
= ix86_expand_aligntest (count
, 4, true);
13807 dest
= change_address (destmem
, SImode
, destptr
);
13808 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13809 emit_label (label
);
13810 LABEL_NUSES (label
) = 1;
13814 rtx label
= ix86_expand_aligntest (count
, 2, true);
13815 dest
= change_address (destmem
, HImode
, destptr
);
13816 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13817 emit_label (label
);
13818 LABEL_NUSES (label
) = 1;
13822 rtx label
= ix86_expand_aligntest (count
, 1, true);
13823 dest
= change_address (destmem
, QImode
, destptr
);
13824 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13825 emit_label (label
);
13826 LABEL_NUSES (label
) = 1;
13830 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13831 DESIRED_ALIGNMENT. */
13833 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13834 rtx destptr
, rtx srcptr
, rtx count
,
13835 int align
, int desired_alignment
)
13837 if (align
<= 1 && desired_alignment
> 1)
13839 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13840 srcmem
= change_address (srcmem
, QImode
, srcptr
);
13841 destmem
= change_address (destmem
, QImode
, destptr
);
13842 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13843 ix86_adjust_counter (count
, 1);
13844 emit_label (label
);
13845 LABEL_NUSES (label
) = 1;
13847 if (align
<= 2 && desired_alignment
> 2)
13849 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13850 srcmem
= change_address (srcmem
, HImode
, srcptr
);
13851 destmem
= change_address (destmem
, HImode
, destptr
);
13852 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13853 ix86_adjust_counter (count
, 2);
13854 emit_label (label
);
13855 LABEL_NUSES (label
) = 1;
13857 if (align
<= 4 && desired_alignment
> 4)
13859 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13860 srcmem
= change_address (srcmem
, SImode
, srcptr
);
13861 destmem
= change_address (destmem
, SImode
, destptr
);
13862 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
13863 ix86_adjust_counter (count
, 4);
13864 emit_label (label
);
13865 LABEL_NUSES (label
) = 1;
13867 gcc_assert (desired_alignment
<= 8);
13870 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13871 DESIRED_ALIGNMENT. */
13873 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
13874 int align
, int desired_alignment
)
13876 if (align
<= 1 && desired_alignment
> 1)
13878 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
13879 destmem
= change_address (destmem
, QImode
, destptr
);
13880 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
13881 ix86_adjust_counter (count
, 1);
13882 emit_label (label
);
13883 LABEL_NUSES (label
) = 1;
13885 if (align
<= 2 && desired_alignment
> 2)
13887 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
13888 destmem
= change_address (destmem
, HImode
, destptr
);
13889 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
13890 ix86_adjust_counter (count
, 2);
13891 emit_label (label
);
13892 LABEL_NUSES (label
) = 1;
13894 if (align
<= 4 && desired_alignment
> 4)
13896 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
13897 destmem
= change_address (destmem
, SImode
, destptr
);
13898 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
13899 ix86_adjust_counter (count
, 4);
13900 emit_label (label
);
13901 LABEL_NUSES (label
) = 1;
13903 gcc_assert (desired_alignment
<= 8);
13906 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13907 static enum stringop_alg
13908 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
13909 int *dynamic_check
)
13911 const struct stringop_algs
* algs
;
13913 *dynamic_check
= -1;
13915 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
13917 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
13918 if (stringop_alg
!= no_stringop
)
13919 return stringop_alg
;
13920 /* rep; movq or rep; movl is the smallest variant. */
13921 else if (optimize_size
)
13923 if (!count
|| (count
& 3))
13924 return rep_prefix_1_byte
;
13926 return rep_prefix_4_byte
;
13928 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13930 else if (expected_size
!= -1 && expected_size
< 4)
13931 return loop_1_byte
;
13932 else if (expected_size
!= -1)
13935 enum stringop_alg alg
= libcall
;
13936 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13938 gcc_assert (algs
->size
[i
].max
);
13939 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
13941 if (algs
->size
[i
].alg
!= libcall
)
13942 alg
= algs
->size
[i
].alg
;
13943 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13944 last non-libcall inline algorithm. */
13945 if (TARGET_INLINE_ALL_STRINGOPS
)
13947 /* When the current size is best to be copied by a libcall,
13948 but we are still forced to inline, run the heuristic bellow
13949 that will pick code for medium sized blocks. */
13950 if (alg
!= libcall
)
13955 return algs
->size
[i
].alg
;
13958 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
13960 /* When asked to inline the call anyway, try to pick meaningful choice.
13961 We look for maximal size of block that is faster to copy by hand and
13962 take blocks of at most of that size guessing that average size will
13963 be roughly half of the block.
13965 If this turns out to be bad, we might simply specify the preferred
13966 choice in ix86_costs. */
13967 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13968 && algs
->unknown_size
== libcall
)
13971 enum stringop_alg alg
;
13974 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
13975 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
13976 max
= algs
->size
[i
].max
;
13979 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
13980 gcc_assert (*dynamic_check
== -1);
13981 gcc_assert (alg
!= libcall
);
13982 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
13983 *dynamic_check
= max
;
13986 return algs
->unknown_size
;
13989 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13990 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13992 decide_alignment (int align
,
13993 enum stringop_alg alg
,
13996 int desired_align
= 0;
14000 gcc_unreachable ();
14002 case unrolled_loop
:
14003 desired_align
= GET_MODE_SIZE (Pmode
);
14005 case rep_prefix_8_byte
:
14008 case rep_prefix_4_byte
:
14009 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14010 copying whole cacheline at once. */
14011 if (TARGET_PENTIUMPRO
)
14016 case rep_prefix_1_byte
:
14017 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14018 copying whole cacheline at once. */
14019 if (TARGET_PENTIUMPRO
)
14033 if (desired_align
< align
)
14034 desired_align
= align
;
14035 if (expected_size
!= -1 && expected_size
< 4)
14036 desired_align
= align
;
14037 return desired_align
;
14040 /* Return the smallest power of 2 greater than VAL. */
14042 smallest_pow2_greater_than (int val
)
14050 /* Expand string move (memcpy) operation. Use i386 string operations when
14051 profitable. expand_clrmem contains similar code. The code depends upon
14052 architecture, block size and alignment, but always has the same
14055 1) Prologue guard: Conditional that jumps up to epilogues for small
14056 blocks that can be handled by epilogue alone. This is faster but
14057 also needed for correctness, since prologue assume the block is larger
14058 than the desired alignment.
14060 Optional dynamic check for size and libcall for large
14061 blocks is emitted here too, with -minline-stringops-dynamically.
14063 2) Prologue: copy first few bytes in order to get destination aligned
14064 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14065 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14066 We emit either a jump tree on power of two sized blocks, or a byte loop.
14068 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14069 with specified algorithm.
14071 4) Epilogue: code copying tail of the block that is too small to be
14072 handled by main body (or up to size guarded by prologue guard). */
14075 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14076 rtx expected_align_exp
, rtx expected_size_exp
)
14082 rtx jump_around_label
= NULL
;
14083 HOST_WIDE_INT align
= 1;
14084 unsigned HOST_WIDE_INT count
= 0;
14085 HOST_WIDE_INT expected_size
= -1;
14086 int size_needed
= 0, epilogue_size_needed
;
14087 int desired_align
= 0;
14088 enum stringop_alg alg
;
14091 if (CONST_INT_P (align_exp
))
14092 align
= INTVAL (align_exp
);
14093 /* i386 can do misaligned access on reasonably increased cost. */
14094 if (CONST_INT_P (expected_align_exp
)
14095 && INTVAL (expected_align_exp
) > align
)
14096 align
= INTVAL (expected_align_exp
);
14097 if (CONST_INT_P (count_exp
))
14098 count
= expected_size
= INTVAL (count_exp
);
14099 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14100 expected_size
= INTVAL (expected_size_exp
);
14102 /* Step 0: Decide on preferred algorithm, desired alignment and
14103 size of chunks to be copied by main loop. */
14105 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14106 desired_align
= decide_alignment (align
, alg
, expected_size
);
14108 if (!TARGET_ALIGN_STRINGOPS
)
14109 align
= desired_align
;
14111 if (alg
== libcall
)
14113 gcc_assert (alg
!= no_stringop
);
14115 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14116 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14117 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14122 gcc_unreachable ();
14124 size_needed
= GET_MODE_SIZE (Pmode
);
14126 case unrolled_loop
:
14127 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14129 case rep_prefix_8_byte
:
14132 case rep_prefix_4_byte
:
14135 case rep_prefix_1_byte
:
14141 epilogue_size_needed
= size_needed
;
14143 /* Step 1: Prologue guard. */
14145 /* Alignment code needs count to be in register. */
14146 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14148 enum machine_mode mode
= SImode
;
14149 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14151 count_exp
= force_reg (mode
, count_exp
);
14153 gcc_assert (desired_align
>= 1 && align
>= 1);
14155 /* Ensure that alignment prologue won't copy past end of block. */
14156 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14158 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14159 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14160 Make sure it is power of 2. */
14161 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14163 label
= gen_label_rtx ();
14164 emit_cmp_and_jump_insns (count_exp
,
14165 GEN_INT (epilogue_size_needed
),
14166 LTU
, 0, counter_mode (count_exp
), 1, label
);
14167 if (GET_CODE (count_exp
) == CONST_INT
)
14169 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14170 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14172 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14174 /* Emit code to decide on runtime whether library call or inline should be
14176 if (dynamic_check
!= -1)
14178 rtx hot_label
= gen_label_rtx ();
14179 jump_around_label
= gen_label_rtx ();
14180 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14181 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14182 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14183 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14184 emit_jump (jump_around_label
);
14185 emit_label (hot_label
);
14188 /* Step 2: Alignment prologue. */
14190 if (desired_align
> align
)
14192 /* Except for the first move in epilogue, we no longer know
14193 constant offset in aliasing info. It don't seems to worth
14194 the pain to maintain it for the first move, so throw away
14196 src
= change_address (src
, BLKmode
, srcreg
);
14197 dst
= change_address (dst
, BLKmode
, destreg
);
14198 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14201 if (label
&& size_needed
== 1)
14203 emit_label (label
);
14204 LABEL_NUSES (label
) = 1;
14208 /* Step 3: Main loop. */
14214 gcc_unreachable ();
14216 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14217 count_exp
, QImode
, 1, expected_size
);
14220 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14221 count_exp
, Pmode
, 1, expected_size
);
14223 case unrolled_loop
:
14224 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14225 registers for 4 temporaries anyway. */
14226 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14227 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14230 case rep_prefix_8_byte
:
14231 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14234 case rep_prefix_4_byte
:
14235 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14238 case rep_prefix_1_byte
:
14239 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14243 /* Adjust properly the offset of src and dest memory for aliasing. */
14244 if (CONST_INT_P (count_exp
))
14246 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14247 (count
/ size_needed
) * size_needed
);
14248 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14249 (count
/ size_needed
) * size_needed
);
14253 src
= change_address (src
, BLKmode
, srcreg
);
14254 dst
= change_address (dst
, BLKmode
, destreg
);
14257 /* Step 4: Epilogue to copy the remaining bytes. */
14261 /* When the main loop is done, COUNT_EXP might hold original count,
14262 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14263 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14264 bytes. Compensate if needed. */
14266 if (size_needed
< epilogue_size_needed
)
14269 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14270 GEN_INT (size_needed
- 1), count_exp
, 1,
14272 if (tmp
!= count_exp
)
14273 emit_move_insn (count_exp
, tmp
);
14275 emit_label (label
);
14276 LABEL_NUSES (label
) = 1;
14279 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14280 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14281 epilogue_size_needed
);
14282 if (jump_around_label
)
14283 emit_label (jump_around_label
);
14287 /* Helper function for memcpy. For QImode value 0xXY produce
14288 0xXYXYXYXY of wide specified by MODE. This is essentially
14289 a * 0x10101010, but we can do slightly better than
14290 synth_mult by unwinding the sequence by hand on CPUs with
14293 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14295 enum machine_mode valmode
= GET_MODE (val
);
14297 int nops
= mode
== DImode
? 3 : 2;
14299 gcc_assert (mode
== SImode
|| mode
== DImode
);
14300 if (val
== const0_rtx
)
14301 return copy_to_mode_reg (mode
, const0_rtx
);
14302 if (CONST_INT_P (val
))
14304 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14308 if (mode
== DImode
)
14309 v
|= (v
<< 16) << 16;
14310 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14313 if (valmode
== VOIDmode
)
14315 if (valmode
!= QImode
)
14316 val
= gen_lowpart (QImode
, val
);
14317 if (mode
== QImode
)
14319 if (!TARGET_PARTIAL_REG_STALL
)
14321 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14322 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14323 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14324 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14326 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14327 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14328 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14333 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14335 if (!TARGET_PARTIAL_REG_STALL
)
14336 if (mode
== SImode
)
14337 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14339 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14342 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14343 NULL
, 1, OPTAB_DIRECT
);
14345 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14347 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14348 NULL
, 1, OPTAB_DIRECT
);
14349 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14350 if (mode
== SImode
)
14352 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14353 NULL
, 1, OPTAB_DIRECT
);
14354 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14359 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14360 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14361 alignment from ALIGN to DESIRED_ALIGN. */
14363 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14368 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14369 promoted_val
= promote_duplicated_reg (DImode
, val
);
14370 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14371 promoted_val
= promote_duplicated_reg (SImode
, val
);
14372 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14373 promoted_val
= promote_duplicated_reg (HImode
, val
);
14375 promoted_val
= val
;
14377 return promoted_val
;
14380 /* Expand string clear operation (bzero). Use i386 string operations when
14381 profitable. See expand_movmem comment for explanation of individual
14382 steps performed. */
14384 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14385 rtx expected_align_exp
, rtx expected_size_exp
)
14390 rtx jump_around_label
= NULL
;
14391 HOST_WIDE_INT align
= 1;
14392 unsigned HOST_WIDE_INT count
= 0;
14393 HOST_WIDE_INT expected_size
= -1;
14394 int size_needed
= 0, epilogue_size_needed
;
14395 int desired_align
= 0;
14396 enum stringop_alg alg
;
14397 rtx promoted_val
= NULL
;
14398 bool force_loopy_epilogue
= false;
14401 if (CONST_INT_P (align_exp
))
14402 align
= INTVAL (align_exp
);
14403 /* i386 can do misaligned access on reasonably increased cost. */
14404 if (CONST_INT_P (expected_align_exp
)
14405 && INTVAL (expected_align_exp
) > align
)
14406 align
= INTVAL (expected_align_exp
);
14407 if (CONST_INT_P (count_exp
))
14408 count
= expected_size
= INTVAL (count_exp
);
14409 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14410 expected_size
= INTVAL (expected_size_exp
);
14412 /* Step 0: Decide on preferred algorithm, desired alignment and
14413 size of chunks to be copied by main loop. */
14415 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14416 desired_align
= decide_alignment (align
, alg
, expected_size
);
14418 if (!TARGET_ALIGN_STRINGOPS
)
14419 align
= desired_align
;
14421 if (alg
== libcall
)
14423 gcc_assert (alg
!= no_stringop
);
14425 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14426 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14431 gcc_unreachable ();
14433 size_needed
= GET_MODE_SIZE (Pmode
);
14435 case unrolled_loop
:
14436 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14438 case rep_prefix_8_byte
:
14441 case rep_prefix_4_byte
:
14444 case rep_prefix_1_byte
:
14449 epilogue_size_needed
= size_needed
;
14451 /* Step 1: Prologue guard. */
14453 /* Alignment code needs count to be in register. */
14454 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14456 enum machine_mode mode
= SImode
;
14457 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14459 count_exp
= force_reg (mode
, count_exp
);
14461 /* Do the cheap promotion to allow better CSE across the
14462 main loop and epilogue (ie one load of the big constant in the
14463 front of all code. */
14464 if (CONST_INT_P (val_exp
))
14465 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14466 desired_align
, align
);
14467 /* Ensure that alignment prologue won't copy past end of block. */
14468 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14470 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14471 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14472 Make sure it is power of 2. */
14473 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14475 /* To improve performance of small blocks, we jump around the VAL
14476 promoting mode. This mean that if the promoted VAL is not constant,
14477 we might not use it in the epilogue and have to use byte
14479 if (epilogue_size_needed
> 2 && !promoted_val
)
14480 force_loopy_epilogue
= true;
14481 label
= gen_label_rtx ();
14482 emit_cmp_and_jump_insns (count_exp
,
14483 GEN_INT (epilogue_size_needed
),
14484 LTU
, 0, counter_mode (count_exp
), 1, label
);
14485 if (GET_CODE (count_exp
) == CONST_INT
)
14487 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14488 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14490 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14492 if (dynamic_check
!= -1)
14494 rtx hot_label
= gen_label_rtx ();
14495 jump_around_label
= gen_label_rtx ();
14496 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14497 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14498 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14499 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14500 emit_jump (jump_around_label
);
14501 emit_label (hot_label
);
14504 /* Step 2: Alignment prologue. */
14506 /* Do the expensive promotion once we branched off the small blocks. */
14508 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14509 desired_align
, align
);
14510 gcc_assert (desired_align
>= 1 && align
>= 1);
14512 if (desired_align
> align
)
14514 /* Except for the first move in epilogue, we no longer know
14515 constant offset in aliasing info. It don't seems to worth
14516 the pain to maintain it for the first move, so throw away
14518 dst
= change_address (dst
, BLKmode
, destreg
);
14519 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14522 if (label
&& size_needed
== 1)
14524 emit_label (label
);
14525 LABEL_NUSES (label
) = 1;
14529 /* Step 3: Main loop. */
14535 gcc_unreachable ();
14537 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14538 count_exp
, QImode
, 1, expected_size
);
14541 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14542 count_exp
, Pmode
, 1, expected_size
);
14544 case unrolled_loop
:
14545 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14546 count_exp
, Pmode
, 4, expected_size
);
14548 case rep_prefix_8_byte
:
14549 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14552 case rep_prefix_4_byte
:
14553 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14556 case rep_prefix_1_byte
:
14557 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14561 /* Adjust properly the offset of src and dest memory for aliasing. */
14562 if (CONST_INT_P (count_exp
))
14563 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14564 (count
/ size_needed
) * size_needed
);
14566 dst
= change_address (dst
, BLKmode
, destreg
);
14568 /* Step 4: Epilogue to copy the remaining bytes. */
14572 /* When the main loop is done, COUNT_EXP might hold original count,
14573 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14574 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14575 bytes. Compensate if needed. */
14577 if (size_needed
< desired_align
- align
)
14580 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14581 GEN_INT (size_needed
- 1), count_exp
, 1,
14583 size_needed
= desired_align
- align
+ 1;
14584 if (tmp
!= count_exp
)
14585 emit_move_insn (count_exp
, tmp
);
14587 emit_label (label
);
14588 LABEL_NUSES (label
) = 1;
14590 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14592 if (force_loopy_epilogue
)
14593 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14596 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14599 if (jump_around_label
)
14600 emit_label (jump_around_label
);
14604 /* Expand strlen. */
14606 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14608 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14610 /* The generic case of strlen expander is long. Avoid it's
14611 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14613 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14614 && !TARGET_INLINE_ALL_STRINGOPS
14616 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14619 addr
= force_reg (Pmode
, XEXP (src
, 0));
14620 scratch1
= gen_reg_rtx (Pmode
);
14622 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14625 /* Well it seems that some optimizer does not combine a call like
14626 foo(strlen(bar), strlen(bar));
14627 when the move and the subtraction is done here. It does calculate
14628 the length just once when these instructions are done inside of
14629 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14630 often used and I use one fewer register for the lifetime of
14631 output_strlen_unroll() this is better. */
14633 emit_move_insn (out
, addr
);
14635 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14637 /* strlensi_unroll_1 returns the address of the zero at the end of
14638 the string, like memchr(), so compute the length by subtracting
14639 the start address. */
14641 emit_insn (gen_subdi3 (out
, out
, addr
));
14643 emit_insn (gen_subsi3 (out
, out
, addr
));
14648 scratch2
= gen_reg_rtx (Pmode
);
14649 scratch3
= gen_reg_rtx (Pmode
);
14650 scratch4
= force_reg (Pmode
, constm1_rtx
);
14652 emit_move_insn (scratch3
, addr
);
14653 eoschar
= force_reg (QImode
, eoschar
);
14655 src
= replace_equiv_address_nv (src
, scratch3
);
14657 /* If .md starts supporting :P, this can be done in .md. */
14658 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14659 scratch4
), UNSPEC_SCAS
);
14660 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14663 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14664 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14668 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14669 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14675 /* Expand the appropriate insns for doing strlen if not just doing
14678 out = result, initialized with the start address
14679 align_rtx = alignment of the address.
14680 scratch = scratch register, initialized with the startaddress when
14681 not aligned, otherwise undefined
14683 This is just the body. It needs the initializations mentioned above and
14684 some address computing at the end. These things are done in i386.md. */
14687 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14691 rtx align_2_label
= NULL_RTX
;
14692 rtx align_3_label
= NULL_RTX
;
14693 rtx align_4_label
= gen_label_rtx ();
14694 rtx end_0_label
= gen_label_rtx ();
14696 rtx tmpreg
= gen_reg_rtx (SImode
);
14697 rtx scratch
= gen_reg_rtx (SImode
);
14701 if (CONST_INT_P (align_rtx
))
14702 align
= INTVAL (align_rtx
);
14704 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14706 /* Is there a known alignment and is it less than 4? */
14709 rtx scratch1
= gen_reg_rtx (Pmode
);
14710 emit_move_insn (scratch1
, out
);
14711 /* Is there a known alignment and is it not 2? */
14714 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14715 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14717 /* Leave just the 3 lower bits. */
14718 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14719 NULL_RTX
, 0, OPTAB_WIDEN
);
14721 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14722 Pmode
, 1, align_4_label
);
14723 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14724 Pmode
, 1, align_2_label
);
14725 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14726 Pmode
, 1, align_3_label
);
14730 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14731 check if is aligned to 4 - byte. */
14733 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14734 NULL_RTX
, 0, OPTAB_WIDEN
);
14736 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14737 Pmode
, 1, align_4_label
);
14740 mem
= change_address (src
, QImode
, out
);
14742 /* Now compare the bytes. */
14744 /* Compare the first n unaligned byte on a byte per byte basis. */
14745 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14746 QImode
, 1, end_0_label
);
14748 /* Increment the address. */
14750 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14752 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14754 /* Not needed with an alignment of 2 */
14757 emit_label (align_2_label
);
14759 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14763 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14765 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14767 emit_label (align_3_label
);
14770 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14774 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14776 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14779 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14780 align this loop. It gives only huge programs, but does not help to
14782 emit_label (align_4_label
);
14784 mem
= change_address (src
, SImode
, out
);
14785 emit_move_insn (scratch
, mem
);
14787 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14789 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14791 /* This formula yields a nonzero result iff one of the bytes is zero.
14792 This saves three branches inside loop and many cycles. */
14794 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14795 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14796 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14797 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14798 gen_int_mode (0x80808080, SImode
)));
14799 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14804 rtx reg
= gen_reg_rtx (SImode
);
14805 rtx reg2
= gen_reg_rtx (Pmode
);
14806 emit_move_insn (reg
, tmpreg
);
14807 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14809 /* If zero is not in the first two bytes, move two bytes forward. */
14810 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14811 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14812 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14813 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14814 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14817 /* Emit lea manually to avoid clobbering of flags. */
14818 emit_insn (gen_rtx_SET (SImode
, reg2
,
14819 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14821 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14822 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14823 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14824 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14831 rtx end_2_label
= gen_label_rtx ();
14832 /* Is zero in the first two bytes? */
14834 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14835 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14836 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14837 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14838 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
14840 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
14841 JUMP_LABEL (tmp
) = end_2_label
;
14843 /* Not in the first two. Move two bytes forward. */
14844 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
14846 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
14848 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
14850 emit_label (end_2_label
);
14854 /* Avoid branch in fixing the byte. */
14855 tmpreg
= gen_lowpart (QImode
, tmpreg
);
14856 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
14857 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
14859 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
14861 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
14863 emit_label (end_0_label
);
14867 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
14868 rtx callarg2 ATTRIBUTE_UNUSED
,
14869 rtx pop
, int sibcall
)
14871 rtx use
= NULL
, call
;
14873 if (pop
== const0_rtx
)
14875 gcc_assert (!TARGET_64BIT
|| !pop
);
14877 if (TARGET_MACHO
&& !TARGET_64BIT
)
14880 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
14881 fnaddr
= machopic_indirect_call_target (fnaddr
);
14886 /* Static functions and indirect calls don't need the pic register. */
14887 if (! TARGET_64BIT
&& flag_pic
14888 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
14889 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
14890 use_reg (&use
, pic_offset_table_rtx
);
14893 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
14895 rtx al
= gen_rtx_REG (QImode
, 0);
14896 emit_move_insn (al
, callarg2
);
14897 use_reg (&use
, al
);
14900 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
14902 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14903 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14905 if (sibcall
&& TARGET_64BIT
14906 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
14909 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
14910 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
14911 emit_move_insn (fnaddr
, addr
);
14912 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
14915 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
14917 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
14920 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
14921 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
14922 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
14925 call
= emit_call_insn (call
);
14927 CALL_INSN_FUNCTION_USAGE (call
) = use
;
14931 /* Clear stack slot assignments remembered from previous functions.
14932 This is called from INIT_EXPANDERS once before RTL is emitted for each
14935 static struct machine_function
*
14936 ix86_init_machine_status (void)
14938 struct machine_function
*f
;
14940 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
14941 f
->use_fast_prologue_epilogue_nregs
= -1;
14942 f
->tls_descriptor_call_expanded_p
= 0;
14947 /* Return a MEM corresponding to a stack slot with mode MODE.
14948 Allocate a new slot if necessary.
14950 The RTL for a function can have several slots available: N is
14951 which slot to use. */
14954 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
14956 struct stack_local_entry
*s
;
14958 gcc_assert (n
< MAX_386_STACK_LOCALS
);
14960 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
14961 if (s
->mode
== mode
&& s
->n
== n
)
14962 return copy_rtx (s
->rtl
);
14964 s
= (struct stack_local_entry
*)
14965 ggc_alloc (sizeof (struct stack_local_entry
));
14968 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
14970 s
->next
= ix86_stack_locals
;
14971 ix86_stack_locals
= s
;
14975 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14977 static GTY(()) rtx ix86_tls_symbol
;
14979 ix86_tls_get_addr (void)
14982 if (!ix86_tls_symbol
)
14984 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
14985 (TARGET_ANY_GNU_TLS
14987 ? "___tls_get_addr"
14988 : "__tls_get_addr");
14991 return ix86_tls_symbol
;
14994 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14996 static GTY(()) rtx ix86_tls_module_base_symbol
;
14998 ix86_tls_module_base (void)
15001 if (!ix86_tls_module_base_symbol
)
15003 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15004 "_TLS_MODULE_BASE_");
15005 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15006 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15009 return ix86_tls_module_base_symbol
;
15012 /* Calculate the length of the memory address in the instruction
15013 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15016 memory_address_length (rtx addr
)
15018 struct ix86_address parts
;
15019 rtx base
, index
, disp
;
15023 if (GET_CODE (addr
) == PRE_DEC
15024 || GET_CODE (addr
) == POST_INC
15025 || GET_CODE (addr
) == PRE_MODIFY
15026 || GET_CODE (addr
) == POST_MODIFY
)
15029 ok
= ix86_decompose_address (addr
, &parts
);
15032 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15033 parts
.base
= SUBREG_REG (parts
.base
);
15034 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15035 parts
.index
= SUBREG_REG (parts
.index
);
15038 index
= parts
.index
;
15043 - esp as the base always wants an index,
15044 - ebp as the base always wants a displacement. */
15046 /* Register Indirect. */
15047 if (base
&& !index
&& !disp
)
15049 /* esp (for its index) and ebp (for its displacement) need
15050 the two-byte modrm form. */
15051 if (addr
== stack_pointer_rtx
15052 || addr
== arg_pointer_rtx
15053 || addr
== frame_pointer_rtx
15054 || addr
== hard_frame_pointer_rtx
)
15058 /* Direct Addressing. */
15059 else if (disp
&& !base
&& !index
)
15064 /* Find the length of the displacement constant. */
15067 if (base
&& satisfies_constraint_K (disp
))
15072 /* ebp always wants a displacement. */
15073 else if (base
== hard_frame_pointer_rtx
)
15076 /* An index requires the two-byte modrm form.... */
15078 /* ...like esp, which always wants an index. */
15079 || base
== stack_pointer_rtx
15080 || base
== arg_pointer_rtx
15081 || base
== frame_pointer_rtx
)
15088 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15089 is set, expect that insn have 8bit immediate alternative. */
15091 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15095 extract_insn_cached (insn
);
15096 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15097 if (CONSTANT_P (recog_data
.operand
[i
]))
15100 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15104 switch (get_attr_mode (insn
))
15115 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15120 fatal_insn ("unknown insn mode", insn
);
15126 /* Compute default value for "length_address" attribute. */
15128 ix86_attr_length_address_default (rtx insn
)
15132 if (get_attr_type (insn
) == TYPE_LEA
)
15134 rtx set
= PATTERN (insn
);
15136 if (GET_CODE (set
) == PARALLEL
)
15137 set
= XVECEXP (set
, 0, 0);
15139 gcc_assert (GET_CODE (set
) == SET
);
15141 return memory_address_length (SET_SRC (set
));
15144 extract_insn_cached (insn
);
15145 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15146 if (MEM_P (recog_data
.operand
[i
]))
15148 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15154 /* Return the maximum number of instructions a cpu can issue. */
15157 ix86_issue_rate (void)
15161 case PROCESSOR_PENTIUM
:
15165 case PROCESSOR_PENTIUMPRO
:
15166 case PROCESSOR_PENTIUM4
:
15167 case PROCESSOR_ATHLON
:
15169 case PROCESSOR_AMDFAM10
:
15170 case PROCESSOR_NOCONA
:
15171 case PROCESSOR_GENERIC32
:
15172 case PROCESSOR_GENERIC64
:
15175 case PROCESSOR_CORE2
:
15183 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15184 by DEP_INSN and nothing set by DEP_INSN. */
15187 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15191 /* Simplify the test for uninteresting insns. */
15192 if (insn_type
!= TYPE_SETCC
15193 && insn_type
!= TYPE_ICMOV
15194 && insn_type
!= TYPE_FCMOV
15195 && insn_type
!= TYPE_IBR
)
15198 if ((set
= single_set (dep_insn
)) != 0)
15200 set
= SET_DEST (set
);
15203 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15204 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15205 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15206 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15208 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15209 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15214 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15217 /* This test is true if the dependent insn reads the flags but
15218 not any other potentially set register. */
15219 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15222 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15228 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15229 address with operands set by DEP_INSN. */
15232 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15236 if (insn_type
== TYPE_LEA
15239 addr
= PATTERN (insn
);
15241 if (GET_CODE (addr
) == PARALLEL
)
15242 addr
= XVECEXP (addr
, 0, 0);
15244 gcc_assert (GET_CODE (addr
) == SET
);
15246 addr
= SET_SRC (addr
);
15251 extract_insn_cached (insn
);
15252 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15253 if (MEM_P (recog_data
.operand
[i
]))
15255 addr
= XEXP (recog_data
.operand
[i
], 0);
15262 return modified_in_p (addr
, dep_insn
);
15266 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15268 enum attr_type insn_type
, dep_insn_type
;
15269 enum attr_memory memory
;
15271 int dep_insn_code_number
;
15273 /* Anti and output dependencies have zero cost on all CPUs. */
15274 if (REG_NOTE_KIND (link
) != 0)
15277 dep_insn_code_number
= recog_memoized (dep_insn
);
15279 /* If we can't recognize the insns, we can't really do anything. */
15280 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15283 insn_type
= get_attr_type (insn
);
15284 dep_insn_type
= get_attr_type (dep_insn
);
15288 case PROCESSOR_PENTIUM
:
15289 /* Address Generation Interlock adds a cycle of latency. */
15290 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15293 /* ??? Compares pair with jump/setcc. */
15294 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15297 /* Floating point stores require value to be ready one cycle earlier. */
15298 if (insn_type
== TYPE_FMOV
15299 && get_attr_memory (insn
) == MEMORY_STORE
15300 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15304 case PROCESSOR_PENTIUMPRO
:
15305 memory
= get_attr_memory (insn
);
15307 /* INT->FP conversion is expensive. */
15308 if (get_attr_fp_int_src (dep_insn
))
15311 /* There is one cycle extra latency between an FP op and a store. */
15312 if (insn_type
== TYPE_FMOV
15313 && (set
= single_set (dep_insn
)) != NULL_RTX
15314 && (set2
= single_set (insn
)) != NULL_RTX
15315 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15316 && MEM_P (SET_DEST (set2
)))
15319 /* Show ability of reorder buffer to hide latency of load by executing
15320 in parallel with previous instruction in case
15321 previous instruction is not needed to compute the address. */
15322 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15323 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15325 /* Claim moves to take one cycle, as core can issue one load
15326 at time and the next load can start cycle later. */
15327 if (dep_insn_type
== TYPE_IMOV
15328 || dep_insn_type
== TYPE_FMOV
)
15336 memory
= get_attr_memory (insn
);
15338 /* The esp dependency is resolved before the instruction is really
15340 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15341 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15344 /* INT->FP conversion is expensive. */
15345 if (get_attr_fp_int_src (dep_insn
))
15348 /* Show ability of reorder buffer to hide latency of load by executing
15349 in parallel with previous instruction in case
15350 previous instruction is not needed to compute the address. */
15351 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15352 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15354 /* Claim moves to take one cycle, as core can issue one load
15355 at time and the next load can start cycle later. */
15356 if (dep_insn_type
== TYPE_IMOV
15357 || dep_insn_type
== TYPE_FMOV
)
15366 case PROCESSOR_ATHLON
:
15368 case PROCESSOR_AMDFAM10
:
15369 case PROCESSOR_GENERIC32
:
15370 case PROCESSOR_GENERIC64
:
15371 memory
= get_attr_memory (insn
);
15373 /* Show ability of reorder buffer to hide latency of load by executing
15374 in parallel with previous instruction in case
15375 previous instruction is not needed to compute the address. */
15376 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15377 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15379 enum attr_unit unit
= get_attr_unit (insn
);
15382 /* Because of the difference between the length of integer and
15383 floating unit pipeline preparation stages, the memory operands
15384 for floating point are cheaper.
15386 ??? For Athlon it the difference is most probably 2. */
15387 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15390 loadcost
= TARGET_ATHLON
? 2 : 0;
15392 if (cost
>= loadcost
)
15405 /* How many alternative schedules to try. This should be as wide as the
15406 scheduling freedom in the DFA, but no wider. Making this value too
15407 large results extra work for the scheduler. */
15410 ia32_multipass_dfa_lookahead (void)
15412 if (ix86_tune
== PROCESSOR_PENTIUM
)
15415 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15416 || ix86_tune
== PROCESSOR_K6
)
15424 /* Compute the alignment given to a constant that is being placed in memory.
15425 EXP is the constant and ALIGN is the alignment that the object would
15427 The value of this function is used instead of that alignment to align
15431 ix86_constant_alignment (tree exp
, int align
)
15433 if (TREE_CODE (exp
) == REAL_CST
)
15435 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15437 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15440 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15441 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15442 return BITS_PER_WORD
;
15447 /* Compute the alignment for a static variable.
15448 TYPE is the data type, and ALIGN is the alignment that
15449 the object would ordinarily have. The value of this function is used
15450 instead of that alignment to align the object. */
15453 ix86_data_alignment (tree type
, int align
)
15455 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15457 if (AGGREGATE_TYPE_P (type
)
15458 && TYPE_SIZE (type
)
15459 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15460 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15461 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15462 && align
< max_align
)
15465 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15466 to 16byte boundary. */
15469 if (AGGREGATE_TYPE_P (type
)
15470 && TYPE_SIZE (type
)
15471 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15472 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15473 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15477 if (TREE_CODE (type
) == ARRAY_TYPE
)
15479 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15481 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15484 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15487 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15489 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15492 else if ((TREE_CODE (type
) == RECORD_TYPE
15493 || TREE_CODE (type
) == UNION_TYPE
15494 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15495 && TYPE_FIELDS (type
))
15497 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15499 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15502 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15503 || TREE_CODE (type
) == INTEGER_TYPE
)
15505 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15507 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15514 /* Compute the alignment for a local variable.
15515 TYPE is the data type, and ALIGN is the alignment that
15516 the object would ordinarily have. The value of this macro is used
15517 instead of that alignment to align the object. */
15520 ix86_local_alignment (tree type
, int align
)
15522 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15523 to 16byte boundary. */
15526 if (AGGREGATE_TYPE_P (type
)
15527 && TYPE_SIZE (type
)
15528 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15529 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15530 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15533 if (TREE_CODE (type
) == ARRAY_TYPE
)
15535 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15537 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15540 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15542 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15544 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15547 else if ((TREE_CODE (type
) == RECORD_TYPE
15548 || TREE_CODE (type
) == UNION_TYPE
15549 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15550 && TYPE_FIELDS (type
))
15552 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15554 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15557 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15558 || TREE_CODE (type
) == INTEGER_TYPE
)
15561 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15563 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15569 /* Emit RTL insns to initialize the variable parts of a trampoline.
15570 FNADDR is an RTX for the address of the function's pure code.
15571 CXT is an RTX for the static chain value for the function. */
15573 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15577 /* Compute offset from the end of the jmp to the target function. */
15578 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15579 plus_constant (tramp
, 10),
15580 NULL_RTX
, 1, OPTAB_DIRECT
);
15581 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15582 gen_int_mode (0xb9, QImode
));
15583 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15584 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15585 gen_int_mode (0xe9, QImode
));
15586 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15591 /* Try to load address using shorter movl instead of movabs.
15592 We may want to support movq for kernel mode, but kernel does not use
15593 trampolines at the moment. */
15594 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15596 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15597 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15598 gen_int_mode (0xbb41, HImode
));
15599 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15600 gen_lowpart (SImode
, fnaddr
));
15605 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15606 gen_int_mode (0xbb49, HImode
));
15607 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15611 /* Load static chain using movabs to r10. */
15612 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15613 gen_int_mode (0xba49, HImode
));
15614 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15617 /* Jump to the r11 */
15618 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15619 gen_int_mode (0xff49, HImode
));
15620 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15621 gen_int_mode (0xe3, QImode
));
15623 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15626 #ifdef ENABLE_EXECUTE_STACK
15627 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15628 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15632 /* Codes for all the SSE/MMX builtins. */
15635 IX86_BUILTIN_ADDPS
,
15636 IX86_BUILTIN_ADDSS
,
15637 IX86_BUILTIN_DIVPS
,
15638 IX86_BUILTIN_DIVSS
,
15639 IX86_BUILTIN_MULPS
,
15640 IX86_BUILTIN_MULSS
,
15641 IX86_BUILTIN_SUBPS
,
15642 IX86_BUILTIN_SUBSS
,
15644 IX86_BUILTIN_CMPEQPS
,
15645 IX86_BUILTIN_CMPLTPS
,
15646 IX86_BUILTIN_CMPLEPS
,
15647 IX86_BUILTIN_CMPGTPS
,
15648 IX86_BUILTIN_CMPGEPS
,
15649 IX86_BUILTIN_CMPNEQPS
,
15650 IX86_BUILTIN_CMPNLTPS
,
15651 IX86_BUILTIN_CMPNLEPS
,
15652 IX86_BUILTIN_CMPNGTPS
,
15653 IX86_BUILTIN_CMPNGEPS
,
15654 IX86_BUILTIN_CMPORDPS
,
15655 IX86_BUILTIN_CMPUNORDPS
,
15656 IX86_BUILTIN_CMPEQSS
,
15657 IX86_BUILTIN_CMPLTSS
,
15658 IX86_BUILTIN_CMPLESS
,
15659 IX86_BUILTIN_CMPNEQSS
,
15660 IX86_BUILTIN_CMPNLTSS
,
15661 IX86_BUILTIN_CMPNLESS
,
15662 IX86_BUILTIN_CMPNGTSS
,
15663 IX86_BUILTIN_CMPNGESS
,
15664 IX86_BUILTIN_CMPORDSS
,
15665 IX86_BUILTIN_CMPUNORDSS
,
15667 IX86_BUILTIN_COMIEQSS
,
15668 IX86_BUILTIN_COMILTSS
,
15669 IX86_BUILTIN_COMILESS
,
15670 IX86_BUILTIN_COMIGTSS
,
15671 IX86_BUILTIN_COMIGESS
,
15672 IX86_BUILTIN_COMINEQSS
,
15673 IX86_BUILTIN_UCOMIEQSS
,
15674 IX86_BUILTIN_UCOMILTSS
,
15675 IX86_BUILTIN_UCOMILESS
,
15676 IX86_BUILTIN_UCOMIGTSS
,
15677 IX86_BUILTIN_UCOMIGESS
,
15678 IX86_BUILTIN_UCOMINEQSS
,
15680 IX86_BUILTIN_CVTPI2PS
,
15681 IX86_BUILTIN_CVTPS2PI
,
15682 IX86_BUILTIN_CVTSI2SS
,
15683 IX86_BUILTIN_CVTSI642SS
,
15684 IX86_BUILTIN_CVTSS2SI
,
15685 IX86_BUILTIN_CVTSS2SI64
,
15686 IX86_BUILTIN_CVTTPS2PI
,
15687 IX86_BUILTIN_CVTTSS2SI
,
15688 IX86_BUILTIN_CVTTSS2SI64
,
15690 IX86_BUILTIN_MAXPS
,
15691 IX86_BUILTIN_MAXSS
,
15692 IX86_BUILTIN_MINPS
,
15693 IX86_BUILTIN_MINSS
,
15695 IX86_BUILTIN_LOADUPS
,
15696 IX86_BUILTIN_STOREUPS
,
15697 IX86_BUILTIN_MOVSS
,
15699 IX86_BUILTIN_MOVHLPS
,
15700 IX86_BUILTIN_MOVLHPS
,
15701 IX86_BUILTIN_LOADHPS
,
15702 IX86_BUILTIN_LOADLPS
,
15703 IX86_BUILTIN_STOREHPS
,
15704 IX86_BUILTIN_STORELPS
,
15706 IX86_BUILTIN_MASKMOVQ
,
15707 IX86_BUILTIN_MOVMSKPS
,
15708 IX86_BUILTIN_PMOVMSKB
,
15710 IX86_BUILTIN_MOVNTPS
,
15711 IX86_BUILTIN_MOVNTQ
,
15713 IX86_BUILTIN_LOADDQU
,
15714 IX86_BUILTIN_STOREDQU
,
15716 IX86_BUILTIN_PACKSSWB
,
15717 IX86_BUILTIN_PACKSSDW
,
15718 IX86_BUILTIN_PACKUSWB
,
15720 IX86_BUILTIN_PADDB
,
15721 IX86_BUILTIN_PADDW
,
15722 IX86_BUILTIN_PADDD
,
15723 IX86_BUILTIN_PADDQ
,
15724 IX86_BUILTIN_PADDSB
,
15725 IX86_BUILTIN_PADDSW
,
15726 IX86_BUILTIN_PADDUSB
,
15727 IX86_BUILTIN_PADDUSW
,
15728 IX86_BUILTIN_PSUBB
,
15729 IX86_BUILTIN_PSUBW
,
15730 IX86_BUILTIN_PSUBD
,
15731 IX86_BUILTIN_PSUBQ
,
15732 IX86_BUILTIN_PSUBSB
,
15733 IX86_BUILTIN_PSUBSW
,
15734 IX86_BUILTIN_PSUBUSB
,
15735 IX86_BUILTIN_PSUBUSW
,
15738 IX86_BUILTIN_PANDN
,
15742 IX86_BUILTIN_PAVGB
,
15743 IX86_BUILTIN_PAVGW
,
15745 IX86_BUILTIN_PCMPEQB
,
15746 IX86_BUILTIN_PCMPEQW
,
15747 IX86_BUILTIN_PCMPEQD
,
15748 IX86_BUILTIN_PCMPGTB
,
15749 IX86_BUILTIN_PCMPGTW
,
15750 IX86_BUILTIN_PCMPGTD
,
15752 IX86_BUILTIN_PMADDWD
,
15754 IX86_BUILTIN_PMAXSW
,
15755 IX86_BUILTIN_PMAXUB
,
15756 IX86_BUILTIN_PMINSW
,
15757 IX86_BUILTIN_PMINUB
,
15759 IX86_BUILTIN_PMULHUW
,
15760 IX86_BUILTIN_PMULHW
,
15761 IX86_BUILTIN_PMULLW
,
15763 IX86_BUILTIN_PSADBW
,
15764 IX86_BUILTIN_PSHUFW
,
15766 IX86_BUILTIN_PSLLW
,
15767 IX86_BUILTIN_PSLLD
,
15768 IX86_BUILTIN_PSLLQ
,
15769 IX86_BUILTIN_PSRAW
,
15770 IX86_BUILTIN_PSRAD
,
15771 IX86_BUILTIN_PSRLW
,
15772 IX86_BUILTIN_PSRLD
,
15773 IX86_BUILTIN_PSRLQ
,
15774 IX86_BUILTIN_PSLLWI
,
15775 IX86_BUILTIN_PSLLDI
,
15776 IX86_BUILTIN_PSLLQI
,
15777 IX86_BUILTIN_PSRAWI
,
15778 IX86_BUILTIN_PSRADI
,
15779 IX86_BUILTIN_PSRLWI
,
15780 IX86_BUILTIN_PSRLDI
,
15781 IX86_BUILTIN_PSRLQI
,
15783 IX86_BUILTIN_PUNPCKHBW
,
15784 IX86_BUILTIN_PUNPCKHWD
,
15785 IX86_BUILTIN_PUNPCKHDQ
,
15786 IX86_BUILTIN_PUNPCKLBW
,
15787 IX86_BUILTIN_PUNPCKLWD
,
15788 IX86_BUILTIN_PUNPCKLDQ
,
15790 IX86_BUILTIN_SHUFPS
,
15792 IX86_BUILTIN_RCPPS
,
15793 IX86_BUILTIN_RCPSS
,
15794 IX86_BUILTIN_RSQRTPS
,
15795 IX86_BUILTIN_RSQRTSS
,
15796 IX86_BUILTIN_SQRTPS
,
15797 IX86_BUILTIN_SQRTSS
,
15799 IX86_BUILTIN_UNPCKHPS
,
15800 IX86_BUILTIN_UNPCKLPS
,
15802 IX86_BUILTIN_ANDPS
,
15803 IX86_BUILTIN_ANDNPS
,
15805 IX86_BUILTIN_XORPS
,
15808 IX86_BUILTIN_LDMXCSR
,
15809 IX86_BUILTIN_STMXCSR
,
15810 IX86_BUILTIN_SFENCE
,
15812 /* 3DNow! Original */
15813 IX86_BUILTIN_FEMMS
,
15814 IX86_BUILTIN_PAVGUSB
,
15815 IX86_BUILTIN_PF2ID
,
15816 IX86_BUILTIN_PFACC
,
15817 IX86_BUILTIN_PFADD
,
15818 IX86_BUILTIN_PFCMPEQ
,
15819 IX86_BUILTIN_PFCMPGE
,
15820 IX86_BUILTIN_PFCMPGT
,
15821 IX86_BUILTIN_PFMAX
,
15822 IX86_BUILTIN_PFMIN
,
15823 IX86_BUILTIN_PFMUL
,
15824 IX86_BUILTIN_PFRCP
,
15825 IX86_BUILTIN_PFRCPIT1
,
15826 IX86_BUILTIN_PFRCPIT2
,
15827 IX86_BUILTIN_PFRSQIT1
,
15828 IX86_BUILTIN_PFRSQRT
,
15829 IX86_BUILTIN_PFSUB
,
15830 IX86_BUILTIN_PFSUBR
,
15831 IX86_BUILTIN_PI2FD
,
15832 IX86_BUILTIN_PMULHRW
,
15834 /* 3DNow! Athlon Extensions */
15835 IX86_BUILTIN_PF2IW
,
15836 IX86_BUILTIN_PFNACC
,
15837 IX86_BUILTIN_PFPNACC
,
15838 IX86_BUILTIN_PI2FW
,
15839 IX86_BUILTIN_PSWAPDSI
,
15840 IX86_BUILTIN_PSWAPDSF
,
15843 IX86_BUILTIN_ADDPD
,
15844 IX86_BUILTIN_ADDSD
,
15845 IX86_BUILTIN_DIVPD
,
15846 IX86_BUILTIN_DIVSD
,
15847 IX86_BUILTIN_MULPD
,
15848 IX86_BUILTIN_MULSD
,
15849 IX86_BUILTIN_SUBPD
,
15850 IX86_BUILTIN_SUBSD
,
15852 IX86_BUILTIN_CMPEQPD
,
15853 IX86_BUILTIN_CMPLTPD
,
15854 IX86_BUILTIN_CMPLEPD
,
15855 IX86_BUILTIN_CMPGTPD
,
15856 IX86_BUILTIN_CMPGEPD
,
15857 IX86_BUILTIN_CMPNEQPD
,
15858 IX86_BUILTIN_CMPNLTPD
,
15859 IX86_BUILTIN_CMPNLEPD
,
15860 IX86_BUILTIN_CMPNGTPD
,
15861 IX86_BUILTIN_CMPNGEPD
,
15862 IX86_BUILTIN_CMPORDPD
,
15863 IX86_BUILTIN_CMPUNORDPD
,
15864 IX86_BUILTIN_CMPNEPD
,
15865 IX86_BUILTIN_CMPEQSD
,
15866 IX86_BUILTIN_CMPLTSD
,
15867 IX86_BUILTIN_CMPLESD
,
15868 IX86_BUILTIN_CMPNEQSD
,
15869 IX86_BUILTIN_CMPNLTSD
,
15870 IX86_BUILTIN_CMPNLESD
,
15871 IX86_BUILTIN_CMPORDSD
,
15872 IX86_BUILTIN_CMPUNORDSD
,
15873 IX86_BUILTIN_CMPNESD
,
15875 IX86_BUILTIN_COMIEQSD
,
15876 IX86_BUILTIN_COMILTSD
,
15877 IX86_BUILTIN_COMILESD
,
15878 IX86_BUILTIN_COMIGTSD
,
15879 IX86_BUILTIN_COMIGESD
,
15880 IX86_BUILTIN_COMINEQSD
,
15881 IX86_BUILTIN_UCOMIEQSD
,
15882 IX86_BUILTIN_UCOMILTSD
,
15883 IX86_BUILTIN_UCOMILESD
,
15884 IX86_BUILTIN_UCOMIGTSD
,
15885 IX86_BUILTIN_UCOMIGESD
,
15886 IX86_BUILTIN_UCOMINEQSD
,
15888 IX86_BUILTIN_MAXPD
,
15889 IX86_BUILTIN_MAXSD
,
15890 IX86_BUILTIN_MINPD
,
15891 IX86_BUILTIN_MINSD
,
15893 IX86_BUILTIN_ANDPD
,
15894 IX86_BUILTIN_ANDNPD
,
15896 IX86_BUILTIN_XORPD
,
15898 IX86_BUILTIN_SQRTPD
,
15899 IX86_BUILTIN_SQRTSD
,
15901 IX86_BUILTIN_UNPCKHPD
,
15902 IX86_BUILTIN_UNPCKLPD
,
15904 IX86_BUILTIN_SHUFPD
,
15906 IX86_BUILTIN_LOADUPD
,
15907 IX86_BUILTIN_STOREUPD
,
15908 IX86_BUILTIN_MOVSD
,
15910 IX86_BUILTIN_LOADHPD
,
15911 IX86_BUILTIN_LOADLPD
,
15913 IX86_BUILTIN_CVTDQ2PD
,
15914 IX86_BUILTIN_CVTDQ2PS
,
15916 IX86_BUILTIN_CVTPD2DQ
,
15917 IX86_BUILTIN_CVTPD2PI
,
15918 IX86_BUILTIN_CVTPD2PS
,
15919 IX86_BUILTIN_CVTTPD2DQ
,
15920 IX86_BUILTIN_CVTTPD2PI
,
15922 IX86_BUILTIN_CVTPI2PD
,
15923 IX86_BUILTIN_CVTSI2SD
,
15924 IX86_BUILTIN_CVTSI642SD
,
15926 IX86_BUILTIN_CVTSD2SI
,
15927 IX86_BUILTIN_CVTSD2SI64
,
15928 IX86_BUILTIN_CVTSD2SS
,
15929 IX86_BUILTIN_CVTSS2SD
,
15930 IX86_BUILTIN_CVTTSD2SI
,
15931 IX86_BUILTIN_CVTTSD2SI64
,
15933 IX86_BUILTIN_CVTPS2DQ
,
15934 IX86_BUILTIN_CVTPS2PD
,
15935 IX86_BUILTIN_CVTTPS2DQ
,
15937 IX86_BUILTIN_MOVNTI
,
15938 IX86_BUILTIN_MOVNTPD
,
15939 IX86_BUILTIN_MOVNTDQ
,
15942 IX86_BUILTIN_MASKMOVDQU
,
15943 IX86_BUILTIN_MOVMSKPD
,
15944 IX86_BUILTIN_PMOVMSKB128
,
15946 IX86_BUILTIN_PACKSSWB128
,
15947 IX86_BUILTIN_PACKSSDW128
,
15948 IX86_BUILTIN_PACKUSWB128
,
15950 IX86_BUILTIN_PADDB128
,
15951 IX86_BUILTIN_PADDW128
,
15952 IX86_BUILTIN_PADDD128
,
15953 IX86_BUILTIN_PADDQ128
,
15954 IX86_BUILTIN_PADDSB128
,
15955 IX86_BUILTIN_PADDSW128
,
15956 IX86_BUILTIN_PADDUSB128
,
15957 IX86_BUILTIN_PADDUSW128
,
15958 IX86_BUILTIN_PSUBB128
,
15959 IX86_BUILTIN_PSUBW128
,
15960 IX86_BUILTIN_PSUBD128
,
15961 IX86_BUILTIN_PSUBQ128
,
15962 IX86_BUILTIN_PSUBSB128
,
15963 IX86_BUILTIN_PSUBSW128
,
15964 IX86_BUILTIN_PSUBUSB128
,
15965 IX86_BUILTIN_PSUBUSW128
,
15967 IX86_BUILTIN_PAND128
,
15968 IX86_BUILTIN_PANDN128
,
15969 IX86_BUILTIN_POR128
,
15970 IX86_BUILTIN_PXOR128
,
15972 IX86_BUILTIN_PAVGB128
,
15973 IX86_BUILTIN_PAVGW128
,
15975 IX86_BUILTIN_PCMPEQB128
,
15976 IX86_BUILTIN_PCMPEQW128
,
15977 IX86_BUILTIN_PCMPEQD128
,
15978 IX86_BUILTIN_PCMPGTB128
,
15979 IX86_BUILTIN_PCMPGTW128
,
15980 IX86_BUILTIN_PCMPGTD128
,
15982 IX86_BUILTIN_PMADDWD128
,
15984 IX86_BUILTIN_PMAXSW128
,
15985 IX86_BUILTIN_PMAXUB128
,
15986 IX86_BUILTIN_PMINSW128
,
15987 IX86_BUILTIN_PMINUB128
,
15989 IX86_BUILTIN_PMULUDQ
,
15990 IX86_BUILTIN_PMULUDQ128
,
15991 IX86_BUILTIN_PMULHUW128
,
15992 IX86_BUILTIN_PMULHW128
,
15993 IX86_BUILTIN_PMULLW128
,
15995 IX86_BUILTIN_PSADBW128
,
15996 IX86_BUILTIN_PSHUFHW
,
15997 IX86_BUILTIN_PSHUFLW
,
15998 IX86_BUILTIN_PSHUFD
,
16000 IX86_BUILTIN_PSLLW128
,
16001 IX86_BUILTIN_PSLLD128
,
16002 IX86_BUILTIN_PSLLQ128
,
16003 IX86_BUILTIN_PSRAW128
,
16004 IX86_BUILTIN_PSRAD128
,
16005 IX86_BUILTIN_PSRLW128
,
16006 IX86_BUILTIN_PSRLD128
,
16007 IX86_BUILTIN_PSRLQ128
,
16008 IX86_BUILTIN_PSLLDQI128
,
16009 IX86_BUILTIN_PSLLWI128
,
16010 IX86_BUILTIN_PSLLDI128
,
16011 IX86_BUILTIN_PSLLQI128
,
16012 IX86_BUILTIN_PSRAWI128
,
16013 IX86_BUILTIN_PSRADI128
,
16014 IX86_BUILTIN_PSRLDQI128
,
16015 IX86_BUILTIN_PSRLWI128
,
16016 IX86_BUILTIN_PSRLDI128
,
16017 IX86_BUILTIN_PSRLQI128
,
16019 IX86_BUILTIN_PUNPCKHBW128
,
16020 IX86_BUILTIN_PUNPCKHWD128
,
16021 IX86_BUILTIN_PUNPCKHDQ128
,
16022 IX86_BUILTIN_PUNPCKHQDQ128
,
16023 IX86_BUILTIN_PUNPCKLBW128
,
16024 IX86_BUILTIN_PUNPCKLWD128
,
16025 IX86_BUILTIN_PUNPCKLDQ128
,
16026 IX86_BUILTIN_PUNPCKLQDQ128
,
16028 IX86_BUILTIN_CLFLUSH
,
16029 IX86_BUILTIN_MFENCE
,
16030 IX86_BUILTIN_LFENCE
,
16032 /* Prescott New Instructions. */
16033 IX86_BUILTIN_ADDSUBPS
,
16034 IX86_BUILTIN_HADDPS
,
16035 IX86_BUILTIN_HSUBPS
,
16036 IX86_BUILTIN_MOVSHDUP
,
16037 IX86_BUILTIN_MOVSLDUP
,
16038 IX86_BUILTIN_ADDSUBPD
,
16039 IX86_BUILTIN_HADDPD
,
16040 IX86_BUILTIN_HSUBPD
,
16041 IX86_BUILTIN_LDDQU
,
16043 IX86_BUILTIN_MONITOR
,
16044 IX86_BUILTIN_MWAIT
,
16047 IX86_BUILTIN_PHADDW
,
16048 IX86_BUILTIN_PHADDD
,
16049 IX86_BUILTIN_PHADDSW
,
16050 IX86_BUILTIN_PHSUBW
,
16051 IX86_BUILTIN_PHSUBD
,
16052 IX86_BUILTIN_PHSUBSW
,
16053 IX86_BUILTIN_PMADDUBSW
,
16054 IX86_BUILTIN_PMULHRSW
,
16055 IX86_BUILTIN_PSHUFB
,
16056 IX86_BUILTIN_PSIGNB
,
16057 IX86_BUILTIN_PSIGNW
,
16058 IX86_BUILTIN_PSIGND
,
16059 IX86_BUILTIN_PALIGNR
,
16060 IX86_BUILTIN_PABSB
,
16061 IX86_BUILTIN_PABSW
,
16062 IX86_BUILTIN_PABSD
,
16064 IX86_BUILTIN_PHADDW128
,
16065 IX86_BUILTIN_PHADDD128
,
16066 IX86_BUILTIN_PHADDSW128
,
16067 IX86_BUILTIN_PHSUBW128
,
16068 IX86_BUILTIN_PHSUBD128
,
16069 IX86_BUILTIN_PHSUBSW128
,
16070 IX86_BUILTIN_PMADDUBSW128
,
16071 IX86_BUILTIN_PMULHRSW128
,
16072 IX86_BUILTIN_PSHUFB128
,
16073 IX86_BUILTIN_PSIGNB128
,
16074 IX86_BUILTIN_PSIGNW128
,
16075 IX86_BUILTIN_PSIGND128
,
16076 IX86_BUILTIN_PALIGNR128
,
16077 IX86_BUILTIN_PABSB128
,
16078 IX86_BUILTIN_PABSW128
,
16079 IX86_BUILTIN_PABSD128
,
16081 /* AMDFAM10 - SSE4A New Instructions. */
16082 IX86_BUILTIN_MOVNTSD
,
16083 IX86_BUILTIN_MOVNTSS
,
16084 IX86_BUILTIN_EXTRQI
,
16085 IX86_BUILTIN_EXTRQ
,
16086 IX86_BUILTIN_INSERTQI
,
16087 IX86_BUILTIN_INSERTQ
,
16089 IX86_BUILTIN_VEC_INIT_V2SI
,
16090 IX86_BUILTIN_VEC_INIT_V4HI
,
16091 IX86_BUILTIN_VEC_INIT_V8QI
,
16092 IX86_BUILTIN_VEC_EXT_V2DF
,
16093 IX86_BUILTIN_VEC_EXT_V2DI
,
16094 IX86_BUILTIN_VEC_EXT_V4SF
,
16095 IX86_BUILTIN_VEC_EXT_V4SI
,
16096 IX86_BUILTIN_VEC_EXT_V8HI
,
16097 IX86_BUILTIN_VEC_EXT_V2SI
,
16098 IX86_BUILTIN_VEC_EXT_V4HI
,
16099 IX86_BUILTIN_VEC_SET_V8HI
,
16100 IX86_BUILTIN_VEC_SET_V4HI
,
16105 /* Table for the ix86 builtin decls. */
16106 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16108 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16109 * if the target_flags include one of MASK. Stores the function decl
16110 * in the ix86_builtins array.
16111 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16114 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16116 tree decl
= NULL_TREE
;
16118 if (mask
& target_flags
16119 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16121 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16123 ix86_builtins
[(int) code
] = decl
;
16129 /* Like def_builtin, but also marks the function decl "const". */
16132 def_builtin_const (int mask
, const char *name
, tree type
,
16133 enum ix86_builtins code
)
16135 tree decl
= def_builtin (mask
, name
, type
, code
);
16137 TREE_READONLY (decl
) = 1;
16141 /* Bits for builtin_description.flag. */
16143 /* Set when we don't support the comparison natively, and should
16144 swap_comparison in order to support it. */
16145 #define BUILTIN_DESC_SWAP_OPERANDS 1
16147 struct builtin_description
16149 const unsigned int mask
;
16150 const enum insn_code icode
;
16151 const char *const name
;
16152 const enum ix86_builtins code
;
16153 const enum rtx_code comparison
;
16154 const unsigned int flag
;
16157 static const struct builtin_description bdesc_comi
[] =
16159 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16160 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16161 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16162 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16163 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16164 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16165 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16166 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16167 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16168 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16169 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16170 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16171 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16172 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16173 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16174 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16175 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16176 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16177 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16178 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16179 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16180 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16181 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16182 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16185 static const struct builtin_description bdesc_2arg
[] =
16188 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16189 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16190 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16191 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16192 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16193 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16194 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16195 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16197 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16198 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16199 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16200 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16201 BUILTIN_DESC_SWAP_OPERANDS
},
16202 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16203 BUILTIN_DESC_SWAP_OPERANDS
},
16204 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16205 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16206 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16207 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16208 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16209 BUILTIN_DESC_SWAP_OPERANDS
},
16210 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16211 BUILTIN_DESC_SWAP_OPERANDS
},
16212 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16213 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16214 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16215 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16216 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16217 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16218 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16219 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16220 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16221 BUILTIN_DESC_SWAP_OPERANDS
},
16222 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16223 BUILTIN_DESC_SWAP_OPERANDS
},
16224 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16226 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16227 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16228 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16229 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16231 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16232 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16233 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16234 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16236 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16237 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16238 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16239 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16240 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16243 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16244 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16245 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16246 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16247 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16248 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16249 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16250 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16252 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16253 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16254 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16255 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16256 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16257 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16258 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16259 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16261 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16262 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16263 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16265 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16266 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16267 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16268 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16270 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16271 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16273 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16274 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16275 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16276 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16277 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16278 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16280 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16281 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16282 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16283 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16285 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16286 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16287 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16288 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16289 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16290 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16293 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16294 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16295 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16297 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16298 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16299 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16301 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16302 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16303 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16304 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16305 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16306 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16308 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16309 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16310 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16311 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16312 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16313 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16315 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16316 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16317 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16318 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16320 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16321 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16324 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16325 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16326 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16327 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16328 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16329 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16330 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16331 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16333 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16334 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16335 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16336 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16337 BUILTIN_DESC_SWAP_OPERANDS
},
16338 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16339 BUILTIN_DESC_SWAP_OPERANDS
},
16340 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16341 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16342 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16343 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16344 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16345 BUILTIN_DESC_SWAP_OPERANDS
},
16346 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16347 BUILTIN_DESC_SWAP_OPERANDS
},
16348 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16349 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16350 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16351 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16352 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16353 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16354 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16355 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16356 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16358 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16359 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16360 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16361 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16363 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16364 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16365 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16366 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16368 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16369 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16370 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16373 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16374 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16375 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16376 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16377 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16378 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16379 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16380 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16382 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16383 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16384 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16385 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16386 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16387 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16388 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16389 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16391 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16392 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16394 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16395 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16396 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16397 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16399 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16400 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16402 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16403 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16404 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16405 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16406 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16407 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16409 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16410 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16411 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16412 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16414 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16415 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16416 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16417 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16418 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16419 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16420 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16421 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16423 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16424 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16425 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16427 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16428 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16430 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16431 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16433 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16434 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16435 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16437 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16438 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16439 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16441 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16442 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16444 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16446 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16447 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16448 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16449 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16452 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16453 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16454 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16455 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16456 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16457 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16460 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16461 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16462 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16463 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16464 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16465 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16466 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16467 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16468 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16469 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16470 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16471 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16472 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16473 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16474 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16475 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16476 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16477 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16478 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16479 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16480 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16481 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16482 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16483 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16486 static const struct builtin_description bdesc_1arg
[] =
16488 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16489 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16491 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16492 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16493 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16495 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16496 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16497 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16498 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16499 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16500 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16502 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16503 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16505 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16507 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16508 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16510 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16511 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16512 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16513 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16514 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16516 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16518 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16519 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16520 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16521 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16523 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16524 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16525 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16528 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16529 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16532 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16533 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16534 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16535 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16536 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16537 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16541 ix86_init_builtins (void)
16544 ix86_init_mmx_sse_builtins ();
16547 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16548 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16551 ix86_init_mmx_sse_builtins (void)
16553 const struct builtin_description
* d
;
16556 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16557 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16558 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16559 tree V2DI_type_node
16560 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16561 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16562 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16563 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16564 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16565 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16566 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16568 tree pchar_type_node
= build_pointer_type (char_type_node
);
16569 tree pcchar_type_node
= build_pointer_type (
16570 build_type_variant (char_type_node
, 1, 0));
16571 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16572 tree pcfloat_type_node
= build_pointer_type (
16573 build_type_variant (float_type_node
, 1, 0));
16574 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16575 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16576 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16579 tree int_ftype_v4sf_v4sf
16580 = build_function_type_list (integer_type_node
,
16581 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16582 tree v4si_ftype_v4sf_v4sf
16583 = build_function_type_list (V4SI_type_node
,
16584 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16585 /* MMX/SSE/integer conversions. */
16586 tree int_ftype_v4sf
16587 = build_function_type_list (integer_type_node
,
16588 V4SF_type_node
, NULL_TREE
);
16589 tree int64_ftype_v4sf
16590 = build_function_type_list (long_long_integer_type_node
,
16591 V4SF_type_node
, NULL_TREE
);
16592 tree int_ftype_v8qi
16593 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16594 tree v4sf_ftype_v4sf_int
16595 = build_function_type_list (V4SF_type_node
,
16596 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16597 tree v4sf_ftype_v4sf_int64
16598 = build_function_type_list (V4SF_type_node
,
16599 V4SF_type_node
, long_long_integer_type_node
,
16601 tree v4sf_ftype_v4sf_v2si
16602 = build_function_type_list (V4SF_type_node
,
16603 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16605 /* Miscellaneous. */
16606 tree v8qi_ftype_v4hi_v4hi
16607 = build_function_type_list (V8QI_type_node
,
16608 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16609 tree v4hi_ftype_v2si_v2si
16610 = build_function_type_list (V4HI_type_node
,
16611 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16612 tree v4sf_ftype_v4sf_v4sf_int
16613 = build_function_type_list (V4SF_type_node
,
16614 V4SF_type_node
, V4SF_type_node
,
16615 integer_type_node
, NULL_TREE
);
16616 tree v2si_ftype_v4hi_v4hi
16617 = build_function_type_list (V2SI_type_node
,
16618 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16619 tree v4hi_ftype_v4hi_int
16620 = build_function_type_list (V4HI_type_node
,
16621 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16622 tree v4hi_ftype_v4hi_di
16623 = build_function_type_list (V4HI_type_node
,
16624 V4HI_type_node
, long_long_unsigned_type_node
,
16626 tree v2si_ftype_v2si_di
16627 = build_function_type_list (V2SI_type_node
,
16628 V2SI_type_node
, long_long_unsigned_type_node
,
16630 tree void_ftype_void
16631 = build_function_type (void_type_node
, void_list_node
);
16632 tree void_ftype_unsigned
16633 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16634 tree void_ftype_unsigned_unsigned
16635 = build_function_type_list (void_type_node
, unsigned_type_node
,
16636 unsigned_type_node
, NULL_TREE
);
16637 tree void_ftype_pcvoid_unsigned_unsigned
16638 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16639 unsigned_type_node
, unsigned_type_node
,
16641 tree unsigned_ftype_void
16642 = build_function_type (unsigned_type_node
, void_list_node
);
16643 tree v2si_ftype_v4sf
16644 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16645 /* Loads/stores. */
16646 tree void_ftype_v8qi_v8qi_pchar
16647 = build_function_type_list (void_type_node
,
16648 V8QI_type_node
, V8QI_type_node
,
16649 pchar_type_node
, NULL_TREE
);
16650 tree v4sf_ftype_pcfloat
16651 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16652 /* @@@ the type is bogus */
16653 tree v4sf_ftype_v4sf_pv2si
16654 = build_function_type_list (V4SF_type_node
,
16655 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16656 tree void_ftype_pv2si_v4sf
16657 = build_function_type_list (void_type_node
,
16658 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16659 tree void_ftype_pfloat_v4sf
16660 = build_function_type_list (void_type_node
,
16661 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16662 tree void_ftype_pdi_di
16663 = build_function_type_list (void_type_node
,
16664 pdi_type_node
, long_long_unsigned_type_node
,
16666 tree void_ftype_pv2di_v2di
16667 = build_function_type_list (void_type_node
,
16668 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16669 /* Normal vector unops. */
16670 tree v4sf_ftype_v4sf
16671 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16672 tree v16qi_ftype_v16qi
16673 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16674 tree v8hi_ftype_v8hi
16675 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16676 tree v4si_ftype_v4si
16677 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16678 tree v8qi_ftype_v8qi
16679 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16680 tree v4hi_ftype_v4hi
16681 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16683 /* Normal vector binops. */
16684 tree v4sf_ftype_v4sf_v4sf
16685 = build_function_type_list (V4SF_type_node
,
16686 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16687 tree v8qi_ftype_v8qi_v8qi
16688 = build_function_type_list (V8QI_type_node
,
16689 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16690 tree v4hi_ftype_v4hi_v4hi
16691 = build_function_type_list (V4HI_type_node
,
16692 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16693 tree v2si_ftype_v2si_v2si
16694 = build_function_type_list (V2SI_type_node
,
16695 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16696 tree di_ftype_di_di
16697 = build_function_type_list (long_long_unsigned_type_node
,
16698 long_long_unsigned_type_node
,
16699 long_long_unsigned_type_node
, NULL_TREE
);
16701 tree di_ftype_di_di_int
16702 = build_function_type_list (long_long_unsigned_type_node
,
16703 long_long_unsigned_type_node
,
16704 long_long_unsigned_type_node
,
16705 integer_type_node
, NULL_TREE
);
16707 tree v2si_ftype_v2sf
16708 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16709 tree v2sf_ftype_v2si
16710 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16711 tree v2si_ftype_v2si
16712 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16713 tree v2sf_ftype_v2sf
16714 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16715 tree v2sf_ftype_v2sf_v2sf
16716 = build_function_type_list (V2SF_type_node
,
16717 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16718 tree v2si_ftype_v2sf_v2sf
16719 = build_function_type_list (V2SI_type_node
,
16720 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16721 tree pint_type_node
= build_pointer_type (integer_type_node
);
16722 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16723 tree pcdouble_type_node
= build_pointer_type (
16724 build_type_variant (double_type_node
, 1, 0));
16725 tree int_ftype_v2df_v2df
16726 = build_function_type_list (integer_type_node
,
16727 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16729 tree void_ftype_pcvoid
16730 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16731 tree v4sf_ftype_v4si
16732 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16733 tree v4si_ftype_v4sf
16734 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16735 tree v2df_ftype_v4si
16736 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16737 tree v4si_ftype_v2df
16738 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16739 tree v2si_ftype_v2df
16740 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16741 tree v4sf_ftype_v2df
16742 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16743 tree v2df_ftype_v2si
16744 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16745 tree v2df_ftype_v4sf
16746 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16747 tree int_ftype_v2df
16748 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16749 tree int64_ftype_v2df
16750 = build_function_type_list (long_long_integer_type_node
,
16751 V2DF_type_node
, NULL_TREE
);
16752 tree v2df_ftype_v2df_int
16753 = build_function_type_list (V2DF_type_node
,
16754 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16755 tree v2df_ftype_v2df_int64
16756 = build_function_type_list (V2DF_type_node
,
16757 V2DF_type_node
, long_long_integer_type_node
,
16759 tree v4sf_ftype_v4sf_v2df
16760 = build_function_type_list (V4SF_type_node
,
16761 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16762 tree v2df_ftype_v2df_v4sf
16763 = build_function_type_list (V2DF_type_node
,
16764 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16765 tree v2df_ftype_v2df_v2df_int
16766 = build_function_type_list (V2DF_type_node
,
16767 V2DF_type_node
, V2DF_type_node
,
16770 tree v2df_ftype_v2df_pcdouble
16771 = build_function_type_list (V2DF_type_node
,
16772 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16773 tree void_ftype_pdouble_v2df
16774 = build_function_type_list (void_type_node
,
16775 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16776 tree void_ftype_pint_int
16777 = build_function_type_list (void_type_node
,
16778 pint_type_node
, integer_type_node
, NULL_TREE
);
16779 tree void_ftype_v16qi_v16qi_pchar
16780 = build_function_type_list (void_type_node
,
16781 V16QI_type_node
, V16QI_type_node
,
16782 pchar_type_node
, NULL_TREE
);
16783 tree v2df_ftype_pcdouble
16784 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16785 tree v2df_ftype_v2df_v2df
16786 = build_function_type_list (V2DF_type_node
,
16787 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16788 tree v16qi_ftype_v16qi_v16qi
16789 = build_function_type_list (V16QI_type_node
,
16790 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16791 tree v8hi_ftype_v8hi_v8hi
16792 = build_function_type_list (V8HI_type_node
,
16793 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16794 tree v4si_ftype_v4si_v4si
16795 = build_function_type_list (V4SI_type_node
,
16796 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16797 tree v2di_ftype_v2di_v2di
16798 = build_function_type_list (V2DI_type_node
,
16799 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16800 tree v2di_ftype_v2df_v2df
16801 = build_function_type_list (V2DI_type_node
,
16802 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16803 tree v2df_ftype_v2df
16804 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16805 tree v2di_ftype_v2di_int
16806 = build_function_type_list (V2DI_type_node
,
16807 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16808 tree v2di_ftype_v2di_v2di_int
16809 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16810 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16811 tree v4si_ftype_v4si_int
16812 = build_function_type_list (V4SI_type_node
,
16813 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16814 tree v8hi_ftype_v8hi_int
16815 = build_function_type_list (V8HI_type_node
,
16816 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16817 tree v8hi_ftype_v8hi_v2di
16818 = build_function_type_list (V8HI_type_node
,
16819 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
16820 tree v4si_ftype_v4si_v2di
16821 = build_function_type_list (V4SI_type_node
,
16822 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
16823 tree v4si_ftype_v8hi_v8hi
16824 = build_function_type_list (V4SI_type_node
,
16825 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16826 tree di_ftype_v8qi_v8qi
16827 = build_function_type_list (long_long_unsigned_type_node
,
16828 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16829 tree di_ftype_v2si_v2si
16830 = build_function_type_list (long_long_unsigned_type_node
,
16831 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16832 tree v2di_ftype_v16qi_v16qi
16833 = build_function_type_list (V2DI_type_node
,
16834 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16835 tree v2di_ftype_v4si_v4si
16836 = build_function_type_list (V2DI_type_node
,
16837 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16838 tree int_ftype_v16qi
16839 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
16840 tree v16qi_ftype_pcchar
16841 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
16842 tree void_ftype_pchar_v16qi
16843 = build_function_type_list (void_type_node
,
16844 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
16846 tree v2di_ftype_v2di_unsigned_unsigned
16847 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16848 unsigned_type_node
, unsigned_type_node
,
16850 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16851 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
16852 unsigned_type_node
, unsigned_type_node
,
16854 tree v2di_ftype_v2di_v16qi
16855 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
16859 tree float128_type
;
16862 /* The __float80 type. */
16863 if (TYPE_MODE (long_double_type_node
) == XFmode
)
16864 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
16868 /* The __float80 type. */
16869 float80_type
= make_node (REAL_TYPE
);
16870 TYPE_PRECISION (float80_type
) = 80;
16871 layout_type (float80_type
);
16872 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
16877 float128_type
= make_node (REAL_TYPE
);
16878 TYPE_PRECISION (float128_type
) = 128;
16879 layout_type (float128_type
);
16880 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
16883 /* Add all builtins that are more or less simple operations on two
16885 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16887 /* Use one of the operands; the target can have a different mode for
16888 mask-generating compares. */
16889 enum machine_mode mode
;
16894 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16899 type
= v16qi_ftype_v16qi_v16qi
;
16902 type
= v8hi_ftype_v8hi_v8hi
;
16905 type
= v4si_ftype_v4si_v4si
;
16908 type
= v2di_ftype_v2di_v2di
;
16911 type
= v2df_ftype_v2df_v2df
;
16914 type
= v4sf_ftype_v4sf_v4sf
;
16917 type
= v8qi_ftype_v8qi_v8qi
;
16920 type
= v4hi_ftype_v4hi_v4hi
;
16923 type
= v2si_ftype_v2si_v2si
;
16926 type
= di_ftype_di_di
;
16930 gcc_unreachable ();
16933 /* Override for comparisons. */
16934 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16935 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
16936 type
= v4si_ftype_v4sf_v4sf
;
16938 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16939 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16940 type
= v2di_ftype_v2df_v2df
;
16942 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16945 /* Add all builtins that are more or less simple operations on 1 operand. */
16946 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16948 enum machine_mode mode
;
16953 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16958 type
= v16qi_ftype_v16qi
;
16961 type
= v8hi_ftype_v8hi
;
16964 type
= v4si_ftype_v4si
;
16967 type
= v2df_ftype_v2df
;
16970 type
= v4sf_ftype_v4sf
;
16973 type
= v8qi_ftype_v8qi
;
16976 type
= v4hi_ftype_v4hi
;
16979 type
= v2si_ftype_v2si
;
16986 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
16989 /* Add the remaining MMX insns with somewhat more complicated types. */
16990 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
16991 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
16992 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
16993 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
16995 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
16996 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
16997 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
16999 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17000 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17002 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17003 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17005 /* comi/ucomi insns. */
17006 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17007 if (d
->mask
== MASK_SSE2
)
17008 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17010 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17012 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17013 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17014 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17016 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17017 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17018 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17019 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17020 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17021 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17022 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17023 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17024 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17025 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17026 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17028 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17030 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17031 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17033 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17034 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17035 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17036 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17038 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17039 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17040 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17041 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17043 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17045 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17047 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17048 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17049 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17050 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17051 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17052 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17054 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17056 /* Original 3DNow! */
17057 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17058 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17059 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17060 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17061 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17062 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17063 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17064 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17065 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17066 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17067 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17068 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17069 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17070 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17071 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17072 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17073 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17074 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17075 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17076 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17078 /* 3DNow! extension as used in the Athlon CPU. */
17079 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17080 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17081 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17082 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17083 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17084 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17087 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17089 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17090 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17092 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17093 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17095 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17096 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17097 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17098 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17099 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17101 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17102 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17103 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17104 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17106 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17107 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17109 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17111 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17112 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17114 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17115 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17116 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17117 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17118 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17120 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17122 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17123 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17124 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17125 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17127 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17128 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17129 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17131 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17132 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17133 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17134 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17136 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17137 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17138 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17140 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17141 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17143 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17144 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17146 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17147 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17148 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17150 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17151 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17152 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17154 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17155 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17157 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17158 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17159 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17160 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17162 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17163 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17164 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17165 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17167 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17168 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17170 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17172 /* Prescott New Instructions. */
17173 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17174 void_ftype_pcvoid_unsigned_unsigned
,
17175 IX86_BUILTIN_MONITOR
);
17176 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17177 void_ftype_unsigned_unsigned
,
17178 IX86_BUILTIN_MWAIT
);
17179 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17180 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17183 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17184 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17185 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17186 IX86_BUILTIN_PALIGNR
);
17188 /* AMDFAM10 SSE4A New built-ins */
17189 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17190 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17191 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17192 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17193 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17194 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17195 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17196 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17197 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17198 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17199 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17200 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17202 /* Access to the vec_init patterns. */
17203 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17204 integer_type_node
, NULL_TREE
);
17205 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17206 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17208 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17209 short_integer_type_node
,
17210 short_integer_type_node
,
17211 short_integer_type_node
, NULL_TREE
);
17212 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17213 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17215 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17216 char_type_node
, char_type_node
,
17217 char_type_node
, char_type_node
,
17218 char_type_node
, char_type_node
,
17219 char_type_node
, NULL_TREE
);
17220 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17221 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17223 /* Access to the vec_extract patterns. */
17224 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17225 integer_type_node
, NULL_TREE
);
17226 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17227 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17229 ftype
= build_function_type_list (long_long_integer_type_node
,
17230 V2DI_type_node
, integer_type_node
,
17232 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17233 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17235 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17236 integer_type_node
, NULL_TREE
);
17237 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17238 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17240 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17241 integer_type_node
, NULL_TREE
);
17242 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17243 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17245 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17246 integer_type_node
, NULL_TREE
);
17247 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17248 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17250 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17251 integer_type_node
, NULL_TREE
);
17252 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17253 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17255 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17256 integer_type_node
, NULL_TREE
);
17257 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17258 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17260 /* Access to the vec_set patterns. */
17261 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17263 integer_type_node
, NULL_TREE
);
17264 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17265 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17267 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17269 integer_type_node
, NULL_TREE
);
17270 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17271 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17274 /* Errors in the source file can cause expand_expr to return const0_rtx
17275 where we expect a vector. To avoid crashing, use one of the vector
17276 clear instructions. */
17278 safe_vector_operand (rtx x
, enum machine_mode mode
)
17280 if (x
== const0_rtx
)
17281 x
= CONST0_RTX (mode
);
17285 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17288 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17291 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17292 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17293 rtx op0
= expand_normal (arg0
);
17294 rtx op1
= expand_normal (arg1
);
17295 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17296 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17297 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17299 if (VECTOR_MODE_P (mode0
))
17300 op0
= safe_vector_operand (op0
, mode0
);
17301 if (VECTOR_MODE_P (mode1
))
17302 op1
= safe_vector_operand (op1
, mode1
);
17304 if (optimize
|| !target
17305 || GET_MODE (target
) != tmode
17306 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17307 target
= gen_reg_rtx (tmode
);
17309 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17311 rtx x
= gen_reg_rtx (V4SImode
);
17312 emit_insn (gen_sse2_loadd (x
, op1
));
17313 op1
= gen_lowpart (TImode
, x
);
17316 /* The insn must want input operands in the same modes as the
17318 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17319 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17321 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17322 op0
= copy_to_mode_reg (mode0
, op0
);
17323 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17324 op1
= copy_to_mode_reg (mode1
, op1
);
17326 /* ??? Using ix86_fixup_binary_operands is problematic when
17327 we've got mismatched modes. Fake it. */
17333 if (tmode
== mode0
&& tmode
== mode1
)
17335 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17339 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17341 op0
= force_reg (mode0
, op0
);
17342 op1
= force_reg (mode1
, op1
);
17343 target
= gen_reg_rtx (tmode
);
17346 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17353 /* Subroutine of ix86_expand_builtin to take care of stores. */
17356 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17359 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17360 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17361 rtx op0
= expand_normal (arg0
);
17362 rtx op1
= expand_normal (arg1
);
17363 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17364 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17366 if (VECTOR_MODE_P (mode1
))
17367 op1
= safe_vector_operand (op1
, mode1
);
17369 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17370 op1
= copy_to_mode_reg (mode1
, op1
);
17372 pat
= GEN_FCN (icode
) (op0
, op1
);
17378 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17381 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17382 rtx target
, int do_load
)
17385 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17386 rtx op0
= expand_normal (arg0
);
17387 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17388 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17390 if (optimize
|| !target
17391 || GET_MODE (target
) != tmode
17392 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17393 target
= gen_reg_rtx (tmode
);
17395 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17398 if (VECTOR_MODE_P (mode0
))
17399 op0
= safe_vector_operand (op0
, mode0
);
17401 if ((optimize
&& !register_operand (op0
, mode0
))
17402 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17403 op0
= copy_to_mode_reg (mode0
, op0
);
17406 pat
= GEN_FCN (icode
) (target
, op0
);
17413 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17414 sqrtss, rsqrtss, rcpss. */
17417 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17420 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17421 rtx op1
, op0
= expand_normal (arg0
);
17422 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17423 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17425 if (optimize
|| !target
17426 || GET_MODE (target
) != tmode
17427 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17428 target
= gen_reg_rtx (tmode
);
17430 if (VECTOR_MODE_P (mode0
))
17431 op0
= safe_vector_operand (op0
, mode0
);
17433 if ((optimize
&& !register_operand (op0
, mode0
))
17434 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17435 op0
= copy_to_mode_reg (mode0
, op0
);
17438 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17439 op1
= copy_to_mode_reg (mode0
, op1
);
17441 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17448 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17451 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17455 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17456 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17457 rtx op0
= expand_normal (arg0
);
17458 rtx op1
= expand_normal (arg1
);
17460 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17461 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17462 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17463 enum rtx_code comparison
= d
->comparison
;
17465 if (VECTOR_MODE_P (mode0
))
17466 op0
= safe_vector_operand (op0
, mode0
);
17467 if (VECTOR_MODE_P (mode1
))
17468 op1
= safe_vector_operand (op1
, mode1
);
17470 /* Swap operands if we have a comparison that isn't available in
17472 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17474 rtx tmp
= gen_reg_rtx (mode1
);
17475 emit_move_insn (tmp
, op1
);
17480 if (optimize
|| !target
17481 || GET_MODE (target
) != tmode
17482 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17483 target
= gen_reg_rtx (tmode
);
17485 if ((optimize
&& !register_operand (op0
, mode0
))
17486 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17487 op0
= copy_to_mode_reg (mode0
, op0
);
17488 if ((optimize
&& !register_operand (op1
, mode1
))
17489 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17490 op1
= copy_to_mode_reg (mode1
, op1
);
17492 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17493 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17500 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17503 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17507 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17508 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17509 rtx op0
= expand_normal (arg0
);
17510 rtx op1
= expand_normal (arg1
);
17512 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17513 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17514 enum rtx_code comparison
= d
->comparison
;
17516 if (VECTOR_MODE_P (mode0
))
17517 op0
= safe_vector_operand (op0
, mode0
);
17518 if (VECTOR_MODE_P (mode1
))
17519 op1
= safe_vector_operand (op1
, mode1
);
17521 /* Swap operands if we have a comparison that isn't available in
17523 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17530 target
= gen_reg_rtx (SImode
);
17531 emit_move_insn (target
, const0_rtx
);
17532 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17534 if ((optimize
&& !register_operand (op0
, mode0
))
17535 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17536 op0
= copy_to_mode_reg (mode0
, op0
);
17537 if ((optimize
&& !register_operand (op1
, mode1
))
17538 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17539 op1
= copy_to_mode_reg (mode1
, op1
);
17541 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17542 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17546 emit_insn (gen_rtx_SET (VOIDmode
,
17547 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17548 gen_rtx_fmt_ee (comparison
, QImode
,
17552 return SUBREG_REG (target
);
17555 /* Return the integer constant in ARG. Constrain it to be in the range
17556 of the subparts of VEC_TYPE; issue an error if not. */
17559 get_element_number (tree vec_type
, tree arg
)
17561 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17563 if (!host_integerp (arg
, 1)
17564 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17566 error ("selector must be an integer constant in the range 0..%wi", max
);
17573 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17574 ix86_expand_vector_init. We DO have language-level syntax for this, in
17575 the form of (type){ init-list }. Except that since we can't place emms
17576 instructions from inside the compiler, we can't allow the use of MMX
17577 registers unless the user explicitly asks for it. So we do *not* define
17578 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17579 we have builtins invoked by mmintrin.h that gives us license to emit
17580 these sorts of instructions. */
17583 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17585 enum machine_mode tmode
= TYPE_MODE (type
);
17586 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17587 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17588 rtvec v
= rtvec_alloc (n_elt
);
17590 gcc_assert (VECTOR_MODE_P (tmode
));
17591 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17593 for (i
= 0; i
< n_elt
; ++i
)
17595 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17596 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17599 if (!target
|| !register_operand (target
, tmode
))
17600 target
= gen_reg_rtx (tmode
);
17602 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17606 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17607 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17608 had a language-level syntax for referencing vector elements. */
17611 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17613 enum machine_mode tmode
, mode0
;
17618 arg0
= CALL_EXPR_ARG (exp
, 0);
17619 arg1
= CALL_EXPR_ARG (exp
, 1);
17621 op0
= expand_normal (arg0
);
17622 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17624 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17625 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17626 gcc_assert (VECTOR_MODE_P (mode0
));
17628 op0
= force_reg (mode0
, op0
);
17630 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17631 target
= gen_reg_rtx (tmode
);
17633 ix86_expand_vector_extract (true, target
, op0
, elt
);
17638 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17639 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17640 a language-level syntax for referencing vector elements. */
17643 ix86_expand_vec_set_builtin (tree exp
)
17645 enum machine_mode tmode
, mode1
;
17646 tree arg0
, arg1
, arg2
;
17650 arg0
= CALL_EXPR_ARG (exp
, 0);
17651 arg1
= CALL_EXPR_ARG (exp
, 1);
17652 arg2
= CALL_EXPR_ARG (exp
, 2);
17654 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17655 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17656 gcc_assert (VECTOR_MODE_P (tmode
));
17658 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17659 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17660 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17662 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17663 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17665 op0
= force_reg (tmode
, op0
);
17666 op1
= force_reg (mode1
, op1
);
17668 ix86_expand_vector_set (true, op0
, op1
, elt
);
17673 /* Expand an expression EXP that calls a built-in function,
17674 with result going to TARGET if that's convenient
17675 (and in mode MODE if that's convenient).
17676 SUBTARGET may be used as the target for computing one of EXP's operands.
17677 IGNORE is nonzero if the value is to be ignored. */
17680 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17681 enum machine_mode mode ATTRIBUTE_UNUSED
,
17682 int ignore ATTRIBUTE_UNUSED
)
17684 const struct builtin_description
*d
;
17686 enum insn_code icode
;
17687 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17688 tree arg0
, arg1
, arg2
, arg3
;
17689 rtx op0
, op1
, op2
, op3
, pat
;
17690 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17691 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17695 case IX86_BUILTIN_EMMS
:
17696 emit_insn (gen_mmx_emms ());
17699 case IX86_BUILTIN_SFENCE
:
17700 emit_insn (gen_sse_sfence ());
17703 case IX86_BUILTIN_MASKMOVQ
:
17704 case IX86_BUILTIN_MASKMOVDQU
:
17705 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17706 ? CODE_FOR_mmx_maskmovq
17707 : CODE_FOR_sse2_maskmovdqu
);
17708 /* Note the arg order is different from the operand order. */
17709 arg1
= CALL_EXPR_ARG (exp
, 0);
17710 arg2
= CALL_EXPR_ARG (exp
, 1);
17711 arg0
= CALL_EXPR_ARG (exp
, 2);
17712 op0
= expand_normal (arg0
);
17713 op1
= expand_normal (arg1
);
17714 op2
= expand_normal (arg2
);
17715 mode0
= insn_data
[icode
].operand
[0].mode
;
17716 mode1
= insn_data
[icode
].operand
[1].mode
;
17717 mode2
= insn_data
[icode
].operand
[2].mode
;
17719 op0
= force_reg (Pmode
, op0
);
17720 op0
= gen_rtx_MEM (mode1
, op0
);
17722 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17723 op0
= copy_to_mode_reg (mode0
, op0
);
17724 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17725 op1
= copy_to_mode_reg (mode1
, op1
);
17726 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17727 op2
= copy_to_mode_reg (mode2
, op2
);
17728 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17734 case IX86_BUILTIN_SQRTSS
:
17735 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17736 case IX86_BUILTIN_RSQRTSS
:
17737 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17738 case IX86_BUILTIN_RCPSS
:
17739 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17741 case IX86_BUILTIN_LOADUPS
:
17742 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17744 case IX86_BUILTIN_STOREUPS
:
17745 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17747 case IX86_BUILTIN_LOADHPS
:
17748 case IX86_BUILTIN_LOADLPS
:
17749 case IX86_BUILTIN_LOADHPD
:
17750 case IX86_BUILTIN_LOADLPD
:
17751 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17752 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17753 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17754 : CODE_FOR_sse2_loadlpd
);
17755 arg0
= CALL_EXPR_ARG (exp
, 0);
17756 arg1
= CALL_EXPR_ARG (exp
, 1);
17757 op0
= expand_normal (arg0
);
17758 op1
= expand_normal (arg1
);
17759 tmode
= insn_data
[icode
].operand
[0].mode
;
17760 mode0
= insn_data
[icode
].operand
[1].mode
;
17761 mode1
= insn_data
[icode
].operand
[2].mode
;
17763 op0
= force_reg (mode0
, op0
);
17764 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17765 if (optimize
|| target
== 0
17766 || GET_MODE (target
) != tmode
17767 || !register_operand (target
, tmode
))
17768 target
= gen_reg_rtx (tmode
);
17769 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17775 case IX86_BUILTIN_STOREHPS
:
17776 case IX86_BUILTIN_STORELPS
:
17777 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17778 : CODE_FOR_sse_storelps
);
17779 arg0
= CALL_EXPR_ARG (exp
, 0);
17780 arg1
= CALL_EXPR_ARG (exp
, 1);
17781 op0
= expand_normal (arg0
);
17782 op1
= expand_normal (arg1
);
17783 mode0
= insn_data
[icode
].operand
[0].mode
;
17784 mode1
= insn_data
[icode
].operand
[1].mode
;
17786 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17787 op1
= force_reg (mode1
, op1
);
17789 pat
= GEN_FCN (icode
) (op0
, op1
);
17795 case IX86_BUILTIN_MOVNTPS
:
17796 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17797 case IX86_BUILTIN_MOVNTQ
:
17798 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17800 case IX86_BUILTIN_LDMXCSR
:
17801 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17802 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17803 emit_move_insn (target
, op0
);
17804 emit_insn (gen_sse_ldmxcsr (target
));
17807 case IX86_BUILTIN_STMXCSR
:
17808 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17809 emit_insn (gen_sse_stmxcsr (target
));
17810 return copy_to_mode_reg (SImode
, target
);
17812 case IX86_BUILTIN_SHUFPS
:
17813 case IX86_BUILTIN_SHUFPD
:
17814 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17815 ? CODE_FOR_sse_shufps
17816 : CODE_FOR_sse2_shufpd
);
17817 arg0
= CALL_EXPR_ARG (exp
, 0);
17818 arg1
= CALL_EXPR_ARG (exp
, 1);
17819 arg2
= CALL_EXPR_ARG (exp
, 2);
17820 op0
= expand_normal (arg0
);
17821 op1
= expand_normal (arg1
);
17822 op2
= expand_normal (arg2
);
17823 tmode
= insn_data
[icode
].operand
[0].mode
;
17824 mode0
= insn_data
[icode
].operand
[1].mode
;
17825 mode1
= insn_data
[icode
].operand
[2].mode
;
17826 mode2
= insn_data
[icode
].operand
[3].mode
;
17828 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17829 op0
= copy_to_mode_reg (mode0
, op0
);
17830 if ((optimize
&& !register_operand (op1
, mode1
))
17831 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17832 op1
= copy_to_mode_reg (mode1
, op1
);
17833 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17835 /* @@@ better error message */
17836 error ("mask must be an immediate");
17837 return gen_reg_rtx (tmode
);
17839 if (optimize
|| target
== 0
17840 || GET_MODE (target
) != tmode
17841 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17842 target
= gen_reg_rtx (tmode
);
17843 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17849 case IX86_BUILTIN_PSHUFW
:
17850 case IX86_BUILTIN_PSHUFD
:
17851 case IX86_BUILTIN_PSHUFHW
:
17852 case IX86_BUILTIN_PSHUFLW
:
17853 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
17854 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
17855 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
17856 : CODE_FOR_mmx_pshufw
);
17857 arg0
= CALL_EXPR_ARG (exp
, 0);
17858 arg1
= CALL_EXPR_ARG (exp
, 1);
17859 op0
= expand_normal (arg0
);
17860 op1
= expand_normal (arg1
);
17861 tmode
= insn_data
[icode
].operand
[0].mode
;
17862 mode1
= insn_data
[icode
].operand
[1].mode
;
17863 mode2
= insn_data
[icode
].operand
[2].mode
;
17865 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17866 op0
= copy_to_mode_reg (mode1
, op0
);
17867 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17869 /* @@@ better error message */
17870 error ("mask must be an immediate");
17874 || GET_MODE (target
) != tmode
17875 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17876 target
= gen_reg_rtx (tmode
);
17877 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17883 case IX86_BUILTIN_PSLLDQI128
:
17884 case IX86_BUILTIN_PSRLDQI128
:
17885 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
17886 : CODE_FOR_sse2_lshrti3
);
17887 arg0
= CALL_EXPR_ARG (exp
, 0);
17888 arg1
= CALL_EXPR_ARG (exp
, 1);
17889 op0
= expand_normal (arg0
);
17890 op1
= expand_normal (arg1
);
17891 tmode
= insn_data
[icode
].operand
[0].mode
;
17892 mode1
= insn_data
[icode
].operand
[1].mode
;
17893 mode2
= insn_data
[icode
].operand
[2].mode
;
17895 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
17897 op0
= copy_to_reg (op0
);
17898 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
17900 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
17902 error ("shift must be an immediate");
17905 target
= gen_reg_rtx (V2DImode
);
17906 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
17912 case IX86_BUILTIN_FEMMS
:
17913 emit_insn (gen_mmx_femms ());
17916 case IX86_BUILTIN_PAVGUSB
:
17917 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
17919 case IX86_BUILTIN_PF2ID
:
17920 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
17922 case IX86_BUILTIN_PFACC
:
17923 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
17925 case IX86_BUILTIN_PFADD
:
17926 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
17928 case IX86_BUILTIN_PFCMPEQ
:
17929 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
17931 case IX86_BUILTIN_PFCMPGE
:
17932 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
17934 case IX86_BUILTIN_PFCMPGT
:
17935 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
17937 case IX86_BUILTIN_PFMAX
:
17938 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
17940 case IX86_BUILTIN_PFMIN
:
17941 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
17943 case IX86_BUILTIN_PFMUL
:
17944 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
17946 case IX86_BUILTIN_PFRCP
:
17947 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
17949 case IX86_BUILTIN_PFRCPIT1
:
17950 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
17952 case IX86_BUILTIN_PFRCPIT2
:
17953 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
17955 case IX86_BUILTIN_PFRSQIT1
:
17956 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
17958 case IX86_BUILTIN_PFRSQRT
:
17959 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
17961 case IX86_BUILTIN_PFSUB
:
17962 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
17964 case IX86_BUILTIN_PFSUBR
:
17965 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
17967 case IX86_BUILTIN_PI2FD
:
17968 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
17970 case IX86_BUILTIN_PMULHRW
:
17971 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
17973 case IX86_BUILTIN_PF2IW
:
17974 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
17976 case IX86_BUILTIN_PFNACC
:
17977 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
17979 case IX86_BUILTIN_PFPNACC
:
17980 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
17982 case IX86_BUILTIN_PI2FW
:
17983 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
17985 case IX86_BUILTIN_PSWAPDSI
:
17986 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
17988 case IX86_BUILTIN_PSWAPDSF
:
17989 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
17991 case IX86_BUILTIN_SQRTSD
:
17992 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
17993 case IX86_BUILTIN_LOADUPD
:
17994 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
17995 case IX86_BUILTIN_STOREUPD
:
17996 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
17998 case IX86_BUILTIN_MFENCE
:
17999 emit_insn (gen_sse2_mfence ());
18001 case IX86_BUILTIN_LFENCE
:
18002 emit_insn (gen_sse2_lfence ());
18005 case IX86_BUILTIN_CLFLUSH
:
18006 arg0
= CALL_EXPR_ARG (exp
, 0);
18007 op0
= expand_normal (arg0
);
18008 icode
= CODE_FOR_sse2_clflush
;
18009 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18010 op0
= copy_to_mode_reg (Pmode
, op0
);
18012 emit_insn (gen_sse2_clflush (op0
));
18015 case IX86_BUILTIN_MOVNTPD
:
18016 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18017 case IX86_BUILTIN_MOVNTDQ
:
18018 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18019 case IX86_BUILTIN_MOVNTI
:
18020 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18022 case IX86_BUILTIN_LOADDQU
:
18023 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18024 case IX86_BUILTIN_STOREDQU
:
18025 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18027 case IX86_BUILTIN_MONITOR
:
18028 arg0
= CALL_EXPR_ARG (exp
, 0);
18029 arg1
= CALL_EXPR_ARG (exp
, 1);
18030 arg2
= CALL_EXPR_ARG (exp
, 2);
18031 op0
= expand_normal (arg0
);
18032 op1
= expand_normal (arg1
);
18033 op2
= expand_normal (arg2
);
18035 op0
= copy_to_mode_reg (Pmode
, op0
);
18037 op1
= copy_to_mode_reg (SImode
, op1
);
18039 op2
= copy_to_mode_reg (SImode
, op2
);
18041 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18043 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18046 case IX86_BUILTIN_MWAIT
:
18047 arg0
= CALL_EXPR_ARG (exp
, 0);
18048 arg1
= CALL_EXPR_ARG (exp
, 1);
18049 op0
= expand_normal (arg0
);
18050 op1
= expand_normal (arg1
);
18052 op0
= copy_to_mode_reg (SImode
, op0
);
18054 op1
= copy_to_mode_reg (SImode
, op1
);
18055 emit_insn (gen_sse3_mwait (op0
, op1
));
18058 case IX86_BUILTIN_LDDQU
:
18059 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18062 case IX86_BUILTIN_PALIGNR
:
18063 case IX86_BUILTIN_PALIGNR128
:
18064 if (fcode
== IX86_BUILTIN_PALIGNR
)
18066 icode
= CODE_FOR_ssse3_palignrdi
;
18071 icode
= CODE_FOR_ssse3_palignrti
;
18074 arg0
= CALL_EXPR_ARG (exp
, 0);
18075 arg1
= CALL_EXPR_ARG (exp
, 1);
18076 arg2
= CALL_EXPR_ARG (exp
, 2);
18077 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18078 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18079 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18080 tmode
= insn_data
[icode
].operand
[0].mode
;
18081 mode1
= insn_data
[icode
].operand
[1].mode
;
18082 mode2
= insn_data
[icode
].operand
[2].mode
;
18083 mode3
= insn_data
[icode
].operand
[3].mode
;
18085 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18087 op0
= copy_to_reg (op0
);
18088 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18090 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18092 op1
= copy_to_reg (op1
);
18093 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18095 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18097 error ("shift must be an immediate");
18100 target
= gen_reg_rtx (mode
);
18101 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18108 case IX86_BUILTIN_MOVNTSD
:
18109 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18111 case IX86_BUILTIN_MOVNTSS
:
18112 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18114 case IX86_BUILTIN_INSERTQ
:
18115 case IX86_BUILTIN_EXTRQ
:
18116 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18117 ? CODE_FOR_sse4a_extrq
18118 : CODE_FOR_sse4a_insertq
);
18119 arg0
= CALL_EXPR_ARG (exp
, 0);
18120 arg1
= CALL_EXPR_ARG (exp
, 1);
18121 op0
= expand_normal (arg0
);
18122 op1
= expand_normal (arg1
);
18123 tmode
= insn_data
[icode
].operand
[0].mode
;
18124 mode1
= insn_data
[icode
].operand
[1].mode
;
18125 mode2
= insn_data
[icode
].operand
[2].mode
;
18126 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18127 op0
= copy_to_mode_reg (mode1
, op0
);
18128 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18129 op1
= copy_to_mode_reg (mode2
, op1
);
18130 if (optimize
|| target
== 0
18131 || GET_MODE (target
) != tmode
18132 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18133 target
= gen_reg_rtx (tmode
);
18134 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18140 case IX86_BUILTIN_EXTRQI
:
18141 icode
= CODE_FOR_sse4a_extrqi
;
18142 arg0
= CALL_EXPR_ARG (exp
, 0);
18143 arg1
= CALL_EXPR_ARG (exp
, 1);
18144 arg2
= CALL_EXPR_ARG (exp
, 2);
18145 op0
= expand_normal (arg0
);
18146 op1
= expand_normal (arg1
);
18147 op2
= expand_normal (arg2
);
18148 tmode
= insn_data
[icode
].operand
[0].mode
;
18149 mode1
= insn_data
[icode
].operand
[1].mode
;
18150 mode2
= insn_data
[icode
].operand
[2].mode
;
18151 mode3
= insn_data
[icode
].operand
[3].mode
;
18152 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18153 op0
= copy_to_mode_reg (mode1
, op0
);
18154 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18156 error ("index mask must be an immediate");
18157 return gen_reg_rtx (tmode
);
18159 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18161 error ("length mask must be an immediate");
18162 return gen_reg_rtx (tmode
);
18164 if (optimize
|| target
== 0
18165 || GET_MODE (target
) != tmode
18166 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18167 target
= gen_reg_rtx (tmode
);
18168 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18174 case IX86_BUILTIN_INSERTQI
:
18175 icode
= CODE_FOR_sse4a_insertqi
;
18176 arg0
= CALL_EXPR_ARG (exp
, 0);
18177 arg1
= CALL_EXPR_ARG (exp
, 1);
18178 arg2
= CALL_EXPR_ARG (exp
, 2);
18179 arg3
= CALL_EXPR_ARG (exp
, 3);
18180 op0
= expand_normal (arg0
);
18181 op1
= expand_normal (arg1
);
18182 op2
= expand_normal (arg2
);
18183 op3
= expand_normal (arg3
);
18184 tmode
= insn_data
[icode
].operand
[0].mode
;
18185 mode1
= insn_data
[icode
].operand
[1].mode
;
18186 mode2
= insn_data
[icode
].operand
[2].mode
;
18187 mode3
= insn_data
[icode
].operand
[3].mode
;
18188 mode4
= insn_data
[icode
].operand
[4].mode
;
18190 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18191 op0
= copy_to_mode_reg (mode1
, op0
);
18193 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18194 op1
= copy_to_mode_reg (mode2
, op1
);
18196 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18198 error ("index mask must be an immediate");
18199 return gen_reg_rtx (tmode
);
18201 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18203 error ("length mask must be an immediate");
18204 return gen_reg_rtx (tmode
);
18206 if (optimize
|| target
== 0
18207 || GET_MODE (target
) != tmode
18208 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18209 target
= gen_reg_rtx (tmode
);
18210 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18216 case IX86_BUILTIN_VEC_INIT_V2SI
:
18217 case IX86_BUILTIN_VEC_INIT_V4HI
:
18218 case IX86_BUILTIN_VEC_INIT_V8QI
:
18219 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18221 case IX86_BUILTIN_VEC_EXT_V2DF
:
18222 case IX86_BUILTIN_VEC_EXT_V2DI
:
18223 case IX86_BUILTIN_VEC_EXT_V4SF
:
18224 case IX86_BUILTIN_VEC_EXT_V4SI
:
18225 case IX86_BUILTIN_VEC_EXT_V8HI
:
18226 case IX86_BUILTIN_VEC_EXT_V2SI
:
18227 case IX86_BUILTIN_VEC_EXT_V4HI
:
18228 return ix86_expand_vec_ext_builtin (exp
, target
);
18230 case IX86_BUILTIN_VEC_SET_V8HI
:
18231 case IX86_BUILTIN_VEC_SET_V4HI
:
18232 return ix86_expand_vec_set_builtin (exp
);
18238 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18239 if (d
->code
== fcode
)
18241 /* Compares are treated specially. */
18242 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18243 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18244 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18245 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18246 return ix86_expand_sse_compare (d
, exp
, target
);
18248 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18251 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18252 if (d
->code
== fcode
)
18253 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18255 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18256 if (d
->code
== fcode
)
18257 return ix86_expand_sse_comi (d
, exp
, target
);
18259 gcc_unreachable ();
18262 /* Returns a function decl for a vectorized version of the builtin function
18263 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18264 if it is not available. */
18267 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18270 enum machine_mode in_mode
, out_mode
;
18273 if (TREE_CODE (type_out
) != VECTOR_TYPE
18274 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18277 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18278 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18279 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18280 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18284 case BUILT_IN_SQRT
:
18285 if (out_mode
== DFmode
&& out_n
== 2
18286 && in_mode
== DFmode
&& in_n
== 2)
18287 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18290 case BUILT_IN_SQRTF
:
18291 if (out_mode
== SFmode
&& out_n
== 4
18292 && in_mode
== SFmode
&& in_n
== 4)
18293 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18296 case BUILT_IN_LRINTF
:
18297 if (out_mode
== SImode
&& out_n
== 4
18298 && in_mode
== SFmode
&& in_n
== 4)
18299 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18309 /* Returns a decl of a function that implements conversion of the
18310 input vector of type TYPE, or NULL_TREE if it is not available. */
18313 ix86_builtin_conversion (enum tree_code code
, tree type
)
18315 if (TREE_CODE (type
) != VECTOR_TYPE
)
18321 switch (TYPE_MODE (type
))
18324 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18329 case FIX_TRUNC_EXPR
:
18330 switch (TYPE_MODE (type
))
18333 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18343 /* Store OPERAND to the memory after reload is completed. This means
18344 that we can't easily use assign_stack_local. */
18346 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18350 gcc_assert (reload_completed
);
18351 if (TARGET_RED_ZONE
)
18353 result
= gen_rtx_MEM (mode
,
18354 gen_rtx_PLUS (Pmode
,
18356 GEN_INT (-RED_ZONE_SIZE
)));
18357 emit_move_insn (result
, operand
);
18359 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18365 operand
= gen_lowpart (DImode
, operand
);
18369 gen_rtx_SET (VOIDmode
,
18370 gen_rtx_MEM (DImode
,
18371 gen_rtx_PRE_DEC (DImode
,
18372 stack_pointer_rtx
)),
18376 gcc_unreachable ();
18378 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18387 split_di (&operand
, 1, operands
, operands
+ 1);
18389 gen_rtx_SET (VOIDmode
,
18390 gen_rtx_MEM (SImode
,
18391 gen_rtx_PRE_DEC (Pmode
,
18392 stack_pointer_rtx
)),
18395 gen_rtx_SET (VOIDmode
,
18396 gen_rtx_MEM (SImode
,
18397 gen_rtx_PRE_DEC (Pmode
,
18398 stack_pointer_rtx
)),
18403 /* Store HImodes as SImodes. */
18404 operand
= gen_lowpart (SImode
, operand
);
18408 gen_rtx_SET (VOIDmode
,
18409 gen_rtx_MEM (GET_MODE (operand
),
18410 gen_rtx_PRE_DEC (SImode
,
18411 stack_pointer_rtx
)),
18415 gcc_unreachable ();
18417 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18422 /* Free operand from the memory. */
18424 ix86_free_from_memory (enum machine_mode mode
)
18426 if (!TARGET_RED_ZONE
)
18430 if (mode
== DImode
|| TARGET_64BIT
)
18434 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18435 to pop or add instruction if registers are available. */
18436 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18437 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18442 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18443 QImode must go into class Q_REGS.
18444 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18445 movdf to do mem-to-mem moves through integer regs. */
18447 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18449 enum machine_mode mode
= GET_MODE (x
);
18451 /* We're only allowed to return a subclass of CLASS. Many of the
18452 following checks fail for NO_REGS, so eliminate that early. */
18453 if (class == NO_REGS
)
18456 /* All classes can load zeros. */
18457 if (x
== CONST0_RTX (mode
))
18460 /* Force constants into memory if we are loading a (nonzero) constant into
18461 an MMX or SSE register. This is because there are no MMX/SSE instructions
18462 to load from a constant. */
18464 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18467 /* Prefer SSE regs only, if we can use them for math. */
18468 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18469 return SSE_CLASS_P (class) ? class : NO_REGS
;
18471 /* Floating-point constants need more complex checks. */
18472 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18474 /* General regs can load everything. */
18475 if (reg_class_subset_p (class, GENERAL_REGS
))
18478 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18479 zero above. We only want to wind up preferring 80387 registers if
18480 we plan on doing computation with them. */
18482 && standard_80387_constant_p (x
))
18484 /* Limit class to non-sse. */
18485 if (class == FLOAT_SSE_REGS
)
18487 if (class == FP_TOP_SSE_REGS
)
18489 if (class == FP_SECOND_SSE_REGS
)
18490 return FP_SECOND_REG
;
18491 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18498 /* Generally when we see PLUS here, it's the function invariant
18499 (plus soft-fp const_int). Which can only be computed into general
18501 if (GET_CODE (x
) == PLUS
)
18502 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18504 /* QImode constants are easy to load, but non-constant QImode data
18505 must go into Q_REGS. */
18506 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18508 if (reg_class_subset_p (class, Q_REGS
))
18510 if (reg_class_subset_p (Q_REGS
, class))
18518 /* Discourage putting floating-point values in SSE registers unless
18519 SSE math is being used, and likewise for the 387 registers. */
18521 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18523 enum machine_mode mode
= GET_MODE (x
);
18525 /* Restrict the output reload class to the register bank that we are doing
18526 math on. If we would like not to return a subset of CLASS, reject this
18527 alternative: if reload cannot do this, it will still use its choice. */
18528 mode
= GET_MODE (x
);
18529 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18530 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18532 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18534 if (class == FP_TOP_SSE_REGS
)
18536 else if (class == FP_SECOND_SSE_REGS
)
18537 return FP_SECOND_REG
;
18539 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18545 /* If we are copying between general and FP registers, we need a memory
18546 location. The same is true for SSE and MMX registers.
18548 The macro can't work reliably when one of the CLASSES is class containing
18549 registers from multiple units (SSE, MMX, integer). We avoid this by never
18550 combining those units in single alternative in the machine description.
18551 Ensure that this constraint holds to avoid unexpected surprises.
18553 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18554 enforce these sanity checks. */
18557 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18558 enum machine_mode mode
, int strict
)
18560 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18561 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18562 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18563 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18564 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18565 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18567 gcc_assert (!strict
);
18571 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18574 /* ??? This is a lie. We do have moves between mmx/general, and for
18575 mmx/sse2. But by saying we need secondary memory we discourage the
18576 register allocator from using the mmx registers unless needed. */
18577 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18580 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18582 /* SSE1 doesn't have any direct moves from other classes. */
18586 /* If the target says that inter-unit moves are more expensive
18587 than moving through memory, then don't generate them. */
18588 if (!TARGET_INTER_UNIT_MOVES
)
18591 /* Between SSE and general, we have moves no larger than word size. */
18592 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18599 /* Return true if the registers in CLASS cannot represent the change from
18600 modes FROM to TO. */
18603 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18604 enum reg_class
class)
18609 /* x87 registers can't do subreg at all, as all values are reformatted
18610 to extended precision. */
18611 if (MAYBE_FLOAT_CLASS_P (class))
18614 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18616 /* Vector registers do not support QI or HImode loads. If we don't
18617 disallow a change to these modes, reload will assume it's ok to
18618 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18619 the vec_dupv4hi pattern. */
18620 if (GET_MODE_SIZE (from
) < 4)
18623 /* Vector registers do not support subreg with nonzero offsets, which
18624 are otherwise valid for integer registers. Since we can't see
18625 whether we have a nonzero offset from here, prohibit all
18626 nonparadoxical subregs changing size. */
18627 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18634 /* Return the cost of moving data from a register in class CLASS1 to
18635 one in class CLASS2.
18637 It is not required that the cost always equal 2 when FROM is the same as TO;
18638 on some machines it is expensive to move between registers if they are not
18639 general registers. */
18642 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18643 enum reg_class class2
)
18645 /* In case we require secondary memory, compute cost of the store followed
18646 by load. In order to avoid bad register allocation choices, we need
18647 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18649 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18653 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18654 MEMORY_MOVE_COST (mode
, class1
, 1));
18655 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18656 MEMORY_MOVE_COST (mode
, class2
, 1));
18658 /* In case of copying from general_purpose_register we may emit multiple
18659 stores followed by single load causing memory size mismatch stall.
18660 Count this as arbitrarily high cost of 20. */
18661 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18664 /* In the case of FP/MMX moves, the registers actually overlap, and we
18665 have to switch modes in order to treat them differently. */
18666 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18667 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18673 /* Moves between SSE/MMX and integer unit are expensive. */
18674 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18675 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18676 return ix86_cost
->mmxsse_to_integer
;
18677 if (MAYBE_FLOAT_CLASS_P (class1
))
18678 return ix86_cost
->fp_move
;
18679 if (MAYBE_SSE_CLASS_P (class1
))
18680 return ix86_cost
->sse_move
;
18681 if (MAYBE_MMX_CLASS_P (class1
))
18682 return ix86_cost
->mmx_move
;
18686 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18689 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18691 /* Flags and only flags can only hold CCmode values. */
18692 if (CC_REGNO_P (regno
))
18693 return GET_MODE_CLASS (mode
) == MODE_CC
;
18694 if (GET_MODE_CLASS (mode
) == MODE_CC
18695 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18696 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18698 if (FP_REGNO_P (regno
))
18699 return VALID_FP_MODE_P (mode
);
18700 if (SSE_REGNO_P (regno
))
18702 /* We implement the move patterns for all vector modes into and
18703 out of SSE registers, even when no operation instructions
18705 return (VALID_SSE_REG_MODE (mode
)
18706 || VALID_SSE2_REG_MODE (mode
)
18707 || VALID_MMX_REG_MODE (mode
)
18708 || VALID_MMX_REG_MODE_3DNOW (mode
));
18710 if (MMX_REGNO_P (regno
))
18712 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18713 so if the register is available at all, then we can move data of
18714 the given mode into or out of it. */
18715 return (VALID_MMX_REG_MODE (mode
)
18716 || VALID_MMX_REG_MODE_3DNOW (mode
));
18719 if (mode
== QImode
)
18721 /* Take care for QImode values - they can be in non-QI regs,
18722 but then they do cause partial register stalls. */
18723 if (regno
< 4 || TARGET_64BIT
)
18725 if (!TARGET_PARTIAL_REG_STALL
)
18727 return reload_in_progress
|| reload_completed
;
18729 /* We handle both integer and floats in the general purpose registers. */
18730 else if (VALID_INT_MODE_P (mode
))
18732 else if (VALID_FP_MODE_P (mode
))
18734 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18735 on to use that value in smaller contexts, this can easily force a
18736 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18737 supporting DImode, allow it. */
18738 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18744 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18745 tieable integer mode. */
18748 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18757 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18760 return TARGET_64BIT
;
18767 /* Return true if MODE1 is accessible in a register that can hold MODE2
18768 without copying. That is, all register classes that can hold MODE2
18769 can also hold MODE1. */
18772 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18774 if (mode1
== mode2
)
18777 if (ix86_tieable_integer_mode_p (mode1
)
18778 && ix86_tieable_integer_mode_p (mode2
))
18781 /* MODE2 being XFmode implies fp stack or general regs, which means we
18782 can tie any smaller floating point modes to it. Note that we do not
18783 tie this with TFmode. */
18784 if (mode2
== XFmode
)
18785 return mode1
== SFmode
|| mode1
== DFmode
;
18787 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18788 that we can tie it with SFmode. */
18789 if (mode2
== DFmode
)
18790 return mode1
== SFmode
;
18792 /* If MODE2 is only appropriate for an SSE register, then tie with
18793 any other mode acceptable to SSE registers. */
18794 if (GET_MODE_SIZE (mode2
) == 16
18795 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18796 return (GET_MODE_SIZE (mode1
) == 16
18797 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18799 /* If MODE2 is appropriate for an MMX register, then tie
18800 with any other mode acceptable to MMX registers. */
18801 if (GET_MODE_SIZE (mode2
) == 8
18802 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18803 return (GET_MODE_SIZE (mode1
) == 8
18804 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18809 /* Return the cost of moving data of mode M between a
18810 register and memory. A value of 2 is the default; this cost is
18811 relative to those in `REGISTER_MOVE_COST'.
18813 If moving between registers and memory is more expensive than
18814 between two registers, you should define this macro to express the
18817 Model also increased moving costs of QImode registers in non
18821 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
18823 if (FLOAT_CLASS_P (class))
18840 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18842 if (SSE_CLASS_P (class))
18845 switch (GET_MODE_SIZE (mode
))
18859 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18861 if (MMX_CLASS_P (class))
18864 switch (GET_MODE_SIZE (mode
))
18875 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18877 switch (GET_MODE_SIZE (mode
))
18881 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
18882 : ix86_cost
->movzbl_load
);
18884 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
18885 : ix86_cost
->int_store
[0] + 4);
18888 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18890 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18891 if (mode
== TFmode
)
18893 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
18894 * (((int) GET_MODE_SIZE (mode
)
18895 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
18899 /* Compute a (partial) cost for rtx X. Return true if the complete
18900 cost has been computed, and false if subexpressions should be
18901 scanned. In either case, *TOTAL contains the cost result. */
18904 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
18906 enum machine_mode mode
= GET_MODE (x
);
18914 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
18916 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
18918 else if (flag_pic
&& SYMBOLIC_CONST (x
)
18920 || (!GET_CODE (x
) != LABEL_REF
18921 && (GET_CODE (x
) != SYMBOL_REF
18922 || !SYMBOL_REF_LOCAL_P (x
)))))
18929 if (mode
== VOIDmode
)
18932 switch (standard_80387_constant_p (x
))
18937 default: /* Other constants */
18942 /* Start with (MEM (SYMBOL_REF)), since that's where
18943 it'll probably end up. Add a penalty for size. */
18944 *total
= (COSTS_N_INSNS (1)
18945 + (flag_pic
!= 0 && !TARGET_64BIT
)
18946 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
18952 /* The zero extensions is often completely free on x86_64, so make
18953 it as cheap as possible. */
18954 if (TARGET_64BIT
&& mode
== DImode
18955 && GET_MODE (XEXP (x
, 0)) == SImode
)
18957 else if (TARGET_ZERO_EXTEND_WITH_AND
)
18958 *total
= ix86_cost
->add
;
18960 *total
= ix86_cost
->movzx
;
18964 *total
= ix86_cost
->movsx
;
18968 if (CONST_INT_P (XEXP (x
, 1))
18969 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
18971 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
18974 *total
= ix86_cost
->add
;
18977 if ((value
== 2 || value
== 3)
18978 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
18980 *total
= ix86_cost
->lea
;
18990 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
18992 if (CONST_INT_P (XEXP (x
, 1)))
18994 if (INTVAL (XEXP (x
, 1)) > 32)
18995 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
18997 *total
= ix86_cost
->shift_const
* 2;
19001 if (GET_CODE (XEXP (x
, 1)) == AND
)
19002 *total
= ix86_cost
->shift_var
* 2;
19004 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19009 if (CONST_INT_P (XEXP (x
, 1)))
19010 *total
= ix86_cost
->shift_const
;
19012 *total
= ix86_cost
->shift_var
;
19017 if (FLOAT_MODE_P (mode
))
19019 *total
= ix86_cost
->fmul
;
19024 rtx op0
= XEXP (x
, 0);
19025 rtx op1
= XEXP (x
, 1);
19027 if (CONST_INT_P (XEXP (x
, 1)))
19029 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19030 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19034 /* This is arbitrary. */
19037 /* Compute costs correctly for widening multiplication. */
19038 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19039 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19040 == GET_MODE_SIZE (mode
))
19042 int is_mulwiden
= 0;
19043 enum machine_mode inner_mode
= GET_MODE (op0
);
19045 if (GET_CODE (op0
) == GET_CODE (op1
))
19046 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19047 else if (CONST_INT_P (op1
))
19049 if (GET_CODE (op0
) == SIGN_EXTEND
)
19050 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19053 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19057 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19060 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19061 + nbits
* ix86_cost
->mult_bit
19062 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19071 if (FLOAT_MODE_P (mode
))
19072 *total
= ix86_cost
->fdiv
;
19074 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19078 if (FLOAT_MODE_P (mode
))
19079 *total
= ix86_cost
->fadd
;
19080 else if (GET_MODE_CLASS (mode
) == MODE_INT
19081 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19083 if (GET_CODE (XEXP (x
, 0)) == PLUS
19084 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19085 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19086 && CONSTANT_P (XEXP (x
, 1)))
19088 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19089 if (val
== 2 || val
== 4 || val
== 8)
19091 *total
= ix86_cost
->lea
;
19092 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19093 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19095 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19099 else if (GET_CODE (XEXP (x
, 0)) == MULT
19100 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19102 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19103 if (val
== 2 || val
== 4 || val
== 8)
19105 *total
= ix86_cost
->lea
;
19106 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19107 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19111 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19113 *total
= ix86_cost
->lea
;
19114 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19115 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19116 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19123 if (FLOAT_MODE_P (mode
))
19125 *total
= ix86_cost
->fadd
;
19133 if (!TARGET_64BIT
&& mode
== DImode
)
19135 *total
= (ix86_cost
->add
* 2
19136 + (rtx_cost (XEXP (x
, 0), outer_code
)
19137 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19138 + (rtx_cost (XEXP (x
, 1), outer_code
)
19139 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19145 if (FLOAT_MODE_P (mode
))
19147 *total
= ix86_cost
->fchs
;
19153 if (!TARGET_64BIT
&& mode
== DImode
)
19154 *total
= ix86_cost
->add
* 2;
19156 *total
= ix86_cost
->add
;
19160 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19161 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19162 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19163 && XEXP (x
, 1) == const0_rtx
)
19165 /* This kind of construct is implemented using test[bwl].
19166 Treat it as if we had an AND. */
19167 *total
= (ix86_cost
->add
19168 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19169 + rtx_cost (const1_rtx
, outer_code
));
19175 if (!TARGET_SSE_MATH
19177 || (mode
== DFmode
&& !TARGET_SSE2
))
19182 if (FLOAT_MODE_P (mode
))
19183 *total
= ix86_cost
->fabs
;
19187 if (FLOAT_MODE_P (mode
))
19188 *total
= ix86_cost
->fsqrt
;
19192 if (XINT (x
, 1) == UNSPEC_TP
)
19203 static int current_machopic_label_num
;
19205 /* Given a symbol name and its associated stub, write out the
19206 definition of the stub. */
19209 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19211 unsigned int length
;
19212 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19213 int label
= ++current_machopic_label_num
;
19215 /* For 64-bit we shouldn't get here. */
19216 gcc_assert (!TARGET_64BIT
);
19218 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19219 symb
= (*targetm
.strip_name_encoding
) (symb
);
19221 length
= strlen (stub
);
19222 binder_name
= alloca (length
+ 32);
19223 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19225 length
= strlen (symb
);
19226 symbol_name
= alloca (length
+ 32);
19227 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19229 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19232 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19234 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19236 fprintf (file
, "%s:\n", stub
);
19237 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19241 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19242 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19243 fprintf (file
, "\tjmp\t*%%edx\n");
19246 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19248 fprintf (file
, "%s:\n", binder_name
);
19252 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19253 fprintf (file
, "\tpushl\t%%eax\n");
19256 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19258 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19260 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19261 fprintf (file
, "%s:\n", lazy_ptr_name
);
19262 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19263 fprintf (file
, "\t.long %s\n", binder_name
);
19267 darwin_x86_file_end (void)
19269 darwin_file_end ();
19272 #endif /* TARGET_MACHO */
19274 /* Order the registers for register allocator. */
19277 x86_order_regs_for_local_alloc (void)
19282 /* First allocate the local general purpose registers. */
19283 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19284 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19285 reg_alloc_order
[pos
++] = i
;
19287 /* Global general purpose registers. */
19288 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19289 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19290 reg_alloc_order
[pos
++] = i
;
19292 /* x87 registers come first in case we are doing FP math
19294 if (!TARGET_SSE_MATH
)
19295 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19296 reg_alloc_order
[pos
++] = i
;
19298 /* SSE registers. */
19299 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19300 reg_alloc_order
[pos
++] = i
;
19301 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19302 reg_alloc_order
[pos
++] = i
;
19304 /* x87 registers. */
19305 if (TARGET_SSE_MATH
)
19306 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19307 reg_alloc_order
[pos
++] = i
;
19309 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19310 reg_alloc_order
[pos
++] = i
;
19312 /* Initialize the rest of array as we do not allocate some registers
19314 while (pos
< FIRST_PSEUDO_REGISTER
)
19315 reg_alloc_order
[pos
++] = 0;
19318 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19319 struct attribute_spec.handler. */
19321 ix86_handle_struct_attribute (tree
*node
, tree name
,
19322 tree args ATTRIBUTE_UNUSED
,
19323 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19326 if (DECL_P (*node
))
19328 if (TREE_CODE (*node
) == TYPE_DECL
)
19329 type
= &TREE_TYPE (*node
);
19334 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19335 || TREE_CODE (*type
) == UNION_TYPE
)))
19337 warning (OPT_Wattributes
, "%qs attribute ignored",
19338 IDENTIFIER_POINTER (name
));
19339 *no_add_attrs
= true;
19342 else if ((is_attribute_p ("ms_struct", name
)
19343 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19344 || ((is_attribute_p ("gcc_struct", name
)
19345 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19347 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19348 IDENTIFIER_POINTER (name
));
19349 *no_add_attrs
= true;
19356 ix86_ms_bitfield_layout_p (tree record_type
)
19358 return (TARGET_MS_BITFIELD_LAYOUT
&&
19359 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19360 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19363 /* Returns an expression indicating where the this parameter is
19364 located on entry to the FUNCTION. */
19367 x86_this_parameter (tree function
)
19369 tree type
= TREE_TYPE (function
);
19373 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19374 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19377 if (ix86_function_regparm (type
, function
) > 0)
19381 parm
= TYPE_ARG_TYPES (type
);
19382 /* Figure out whether or not the function has a variable number of
19384 for (; parm
; parm
= TREE_CHAIN (parm
))
19385 if (TREE_VALUE (parm
) == void_type_node
)
19387 /* If not, the this parameter is in the first argument. */
19391 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19393 return gen_rtx_REG (SImode
, regno
);
19397 if (aggregate_value_p (TREE_TYPE (type
), type
))
19398 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19400 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19403 /* Determine whether x86_output_mi_thunk can succeed. */
19406 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19407 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19408 HOST_WIDE_INT vcall_offset
, tree function
)
19410 /* 64-bit can handle anything. */
19414 /* For 32-bit, everything's fine if we have one free register. */
19415 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19418 /* Need a free register for vcall_offset. */
19422 /* Need a free register for GOT references. */
19423 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19426 /* Otherwise ok. */
19430 /* Output the assembler code for a thunk function. THUNK_DECL is the
19431 declaration for the thunk function itself, FUNCTION is the decl for
19432 the target function. DELTA is an immediate constant offset to be
19433 added to THIS. If VCALL_OFFSET is nonzero, the word at
19434 *(*this + vcall_offset) should be added to THIS. */
19437 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19438 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19439 HOST_WIDE_INT vcall_offset
, tree function
)
19442 rtx
this = x86_this_parameter (function
);
19445 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19446 pull it in now and let DELTA benefit. */
19449 else if (vcall_offset
)
19451 /* Put the this parameter into %eax. */
19453 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19454 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19457 this_reg
= NULL_RTX
;
19459 /* Adjust the this parameter by a fixed constant. */
19462 xops
[0] = GEN_INT (delta
);
19463 xops
[1] = this_reg
? this_reg
: this;
19466 if (!x86_64_general_operand (xops
[0], DImode
))
19468 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19470 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19474 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19477 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19480 /* Adjust the this parameter by a value stored in the vtable. */
19484 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19487 int tmp_regno
= 2 /* ECX */;
19488 if (lookup_attribute ("fastcall",
19489 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19490 tmp_regno
= 0 /* EAX */;
19491 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19494 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19497 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19499 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19501 /* Adjust the this parameter. */
19502 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19503 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19505 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19506 xops
[0] = GEN_INT (vcall_offset
);
19508 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19509 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19511 xops
[1] = this_reg
;
19513 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19515 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19518 /* If necessary, drop THIS back to its stack slot. */
19519 if (this_reg
&& this_reg
!= this)
19521 xops
[0] = this_reg
;
19523 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19526 xops
[0] = XEXP (DECL_RTL (function
), 0);
19529 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19530 output_asm_insn ("jmp\t%P0", xops
);
19533 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19534 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19535 tmp
= gen_rtx_MEM (QImode
, tmp
);
19537 output_asm_insn ("jmp\t%A0", xops
);
19542 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19543 output_asm_insn ("jmp\t%P0", xops
);
19548 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19549 tmp
= (gen_rtx_SYMBOL_REF
19551 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19552 tmp
= gen_rtx_MEM (QImode
, tmp
);
19554 output_asm_insn ("jmp\t%0", xops
);
19557 #endif /* TARGET_MACHO */
19559 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19560 output_set_got (tmp
, NULL_RTX
);
19563 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19564 output_asm_insn ("jmp\t{*}%1", xops
);
19570 x86_file_start (void)
19572 default_file_start ();
19574 darwin_file_start ();
19576 if (X86_FILE_START_VERSION_DIRECTIVE
)
19577 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19578 if (X86_FILE_START_FLTUSED
)
19579 fputs ("\t.global\t__fltused\n", asm_out_file
);
19580 if (ix86_asm_dialect
== ASM_INTEL
)
19581 fputs ("\t.intel_syntax\n", asm_out_file
);
19585 x86_field_alignment (tree field
, int computed
)
19587 enum machine_mode mode
;
19588 tree type
= TREE_TYPE (field
);
19590 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19592 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19593 ? get_inner_array_type (type
) : type
);
19594 if (mode
== DFmode
|| mode
== DCmode
19595 || GET_MODE_CLASS (mode
) == MODE_INT
19596 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19597 return MIN (32, computed
);
19601 /* Output assembler code to FILE to increment profiler label # LABELNO
19602 for profiling a function entry. */
19604 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19609 #ifndef NO_PROFILE_COUNTERS
19610 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19612 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19616 #ifndef NO_PROFILE_COUNTERS
19617 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19619 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19623 #ifndef NO_PROFILE_COUNTERS
19624 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19625 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19627 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19631 #ifndef NO_PROFILE_COUNTERS
19632 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19633 PROFILE_COUNT_REGISTER
);
19635 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19639 /* We don't have exact information about the insn sizes, but we may assume
19640 quite safely that we are informed about all 1 byte insns and memory
19641 address sizes. This is enough to eliminate unnecessary padding in
19645 min_insn_size (rtx insn
)
19649 if (!INSN_P (insn
) || !active_insn_p (insn
))
19652 /* Discard alignments we've emit and jump instructions. */
19653 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19654 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19657 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19658 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19661 /* Important case - calls are always 5 bytes.
19662 It is common to have many calls in the row. */
19664 && symbolic_reference_mentioned_p (PATTERN (insn
))
19665 && !SIBLING_CALL_P (insn
))
19667 if (get_attr_length (insn
) <= 1)
19670 /* For normal instructions we may rely on the sizes of addresses
19671 and the presence of symbol to require 4 bytes of encoding.
19672 This is not the case for jumps where references are PC relative. */
19673 if (!JUMP_P (insn
))
19675 l
= get_attr_length_address (insn
);
19676 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19685 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19689 ix86_avoid_jump_misspredicts (void)
19691 rtx insn
, start
= get_insns ();
19692 int nbytes
= 0, njumps
= 0;
19695 /* Look for all minimal intervals of instructions containing 4 jumps.
19696 The intervals are bounded by START and INSN. NBYTES is the total
19697 size of instructions in the interval including INSN and not including
19698 START. When the NBYTES is smaller than 16 bytes, it is possible
19699 that the end of START and INSN ends up in the same 16byte page.
19701 The smallest offset in the page INSN can start is the case where START
19702 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19703 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19705 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19708 nbytes
+= min_insn_size (insn
);
19710 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19711 INSN_UID (insn
), min_insn_size (insn
));
19713 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19714 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19722 start
= NEXT_INSN (start
);
19723 if ((JUMP_P (start
)
19724 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19725 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19727 njumps
--, isjump
= 1;
19730 nbytes
-= min_insn_size (start
);
19732 gcc_assert (njumps
>= 0);
19734 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19735 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19737 if (njumps
== 3 && isjump
&& nbytes
< 16)
19739 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19742 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19743 INSN_UID (insn
), padsize
);
19744 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19749 /* AMD Athlon works faster
19750 when RET is not destination of conditional jump or directly preceded
19751 by other jump instruction. We avoid the penalty by inserting NOP just
19752 before the RET instructions in such cases. */
19754 ix86_pad_returns (void)
19759 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19761 basic_block bb
= e
->src
;
19762 rtx ret
= BB_END (bb
);
19764 bool replace
= false;
19766 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19767 || !maybe_hot_bb_p (bb
))
19769 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19770 if (active_insn_p (prev
) || LABEL_P (prev
))
19772 if (prev
&& LABEL_P (prev
))
19777 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19778 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19779 && !(e
->flags
& EDGE_FALLTHRU
))
19784 prev
= prev_active_insn (ret
);
19786 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19789 /* Empty functions get branch mispredict even when the jump destination
19790 is not visible to us. */
19791 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19796 emit_insn_before (gen_return_internal_long (), ret
);
19802 /* Implement machine specific optimizations. We implement padding of returns
19803 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19807 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19808 ix86_pad_returns ();
19809 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19810 ix86_avoid_jump_misspredicts ();
19813 /* Return nonzero when QImode register that must be represented via REX prefix
19816 x86_extended_QIreg_mentioned_p (rtx insn
)
19819 extract_insn_cached (insn
);
19820 for (i
= 0; i
< recog_data
.n_operands
; i
++)
19821 if (REG_P (recog_data
.operand
[i
])
19822 && REGNO (recog_data
.operand
[i
]) >= 4)
19827 /* Return nonzero when P points to register encoded via REX prefix.
19828 Called via for_each_rtx. */
19830 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
19832 unsigned int regno
;
19835 regno
= REGNO (*p
);
19836 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
19839 /* Return true when INSN mentions register that must be encoded using REX
19842 x86_extended_reg_mentioned_p (rtx insn
)
19844 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
19847 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19848 optabs would emit if we didn't have TFmode patterns. */
19851 x86_emit_floatuns (rtx operands
[2])
19853 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
19854 enum machine_mode mode
, inmode
;
19856 inmode
= GET_MODE (operands
[1]);
19857 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
19860 in
= force_reg (inmode
, operands
[1]);
19861 mode
= GET_MODE (out
);
19862 neglab
= gen_label_rtx ();
19863 donelab
= gen_label_rtx ();
19864 f0
= gen_reg_rtx (mode
);
19866 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
19868 expand_float (out
, in
, 0);
19870 emit_jump_insn (gen_jump (donelab
));
19873 emit_label (neglab
);
19875 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
19877 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
19879 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
19881 expand_float (f0
, i0
, 0);
19883 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
19885 emit_label (donelab
);
19888 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19889 with all elements equal to VAR. Return true if successful. */
19892 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
19893 rtx target
, rtx val
)
19895 enum machine_mode smode
, wsmode
, wvmode
;
19910 val
= force_reg (GET_MODE_INNER (mode
), val
);
19911 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19912 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19918 if (TARGET_SSE
|| TARGET_3DNOW_A
)
19920 val
= gen_lowpart (SImode
, val
);
19921 x
= gen_rtx_TRUNCATE (HImode
, val
);
19922 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
19923 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
19945 /* Extend HImode to SImode using a paradoxical SUBREG. */
19946 tmp1
= gen_reg_rtx (SImode
);
19947 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19948 /* Insert the SImode value as low element of V4SImode vector. */
19949 tmp2
= gen_reg_rtx (V4SImode
);
19950 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19951 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19952 CONST0_RTX (V4SImode
),
19954 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19955 /* Cast the V4SImode vector back to a V8HImode vector. */
19956 tmp1
= gen_reg_rtx (V8HImode
);
19957 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
19958 /* Duplicate the low short through the whole low SImode word. */
19959 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
19960 /* Cast the V8HImode vector back to a V4SImode vector. */
19961 tmp2
= gen_reg_rtx (V4SImode
);
19962 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19963 /* Replicate the low element of the V4SImode vector. */
19964 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19965 /* Cast the V2SImode back to V8HImode, and store in target. */
19966 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
19977 /* Extend QImode to SImode using a paradoxical SUBREG. */
19978 tmp1
= gen_reg_rtx (SImode
);
19979 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
19980 /* Insert the SImode value as low element of V4SImode vector. */
19981 tmp2
= gen_reg_rtx (V4SImode
);
19982 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
19983 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
19984 CONST0_RTX (V4SImode
),
19986 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
19987 /* Cast the V4SImode vector back to a V16QImode vector. */
19988 tmp1
= gen_reg_rtx (V16QImode
);
19989 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
19990 /* Duplicate the low byte through the whole low SImode word. */
19991 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19992 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
19993 /* Cast the V16QImode vector back to a V4SImode vector. */
19994 tmp2
= gen_reg_rtx (V4SImode
);
19995 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
19996 /* Replicate the low element of the V4SImode vector. */
19997 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
19998 /* Cast the V2SImode back to V16QImode, and store in target. */
19999 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20007 /* Replicate the value once into the next wider mode and recurse. */
20008 val
= convert_modes (wsmode
, smode
, val
, true);
20009 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20010 GEN_INT (GET_MODE_BITSIZE (smode
)),
20011 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20012 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20014 x
= gen_reg_rtx (wvmode
);
20015 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20016 gcc_unreachable ();
20017 emit_move_insn (target
, gen_lowpart (mode
, x
));
20025 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20026 whose ONE_VAR element is VAR, and other elements are zero. Return true
20030 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20031 rtx target
, rtx var
, int one_var
)
20033 enum machine_mode vsimode
;
20049 var
= force_reg (GET_MODE_INNER (mode
), var
);
20050 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20051 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20056 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20057 new_target
= gen_reg_rtx (mode
);
20059 new_target
= target
;
20060 var
= force_reg (GET_MODE_INNER (mode
), var
);
20061 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20062 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20063 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20066 /* We need to shuffle the value to the correct position, so
20067 create a new pseudo to store the intermediate result. */
20069 /* With SSE2, we can use the integer shuffle insns. */
20070 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20072 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20074 GEN_INT (one_var
== 1 ? 0 : 1),
20075 GEN_INT (one_var
== 2 ? 0 : 1),
20076 GEN_INT (one_var
== 3 ? 0 : 1)));
20077 if (target
!= new_target
)
20078 emit_move_insn (target
, new_target
);
20082 /* Otherwise convert the intermediate result to V4SFmode and
20083 use the SSE1 shuffle instructions. */
20084 if (mode
!= V4SFmode
)
20086 tmp
= gen_reg_rtx (V4SFmode
);
20087 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20092 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20094 GEN_INT (one_var
== 1 ? 0 : 1),
20095 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20096 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20098 if (mode
!= V4SFmode
)
20099 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20100 else if (tmp
!= target
)
20101 emit_move_insn (target
, tmp
);
20103 else if (target
!= new_target
)
20104 emit_move_insn (target
, new_target
);
20109 vsimode
= V4SImode
;
20115 vsimode
= V2SImode
;
20121 /* Zero extend the variable element to SImode and recurse. */
20122 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20124 x
= gen_reg_rtx (vsimode
);
20125 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20127 gcc_unreachable ();
20129 emit_move_insn (target
, gen_lowpart (mode
, x
));
20137 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20138 consisting of the values in VALS. It is known that all elements
20139 except ONE_VAR are constants. Return true if successful. */
20142 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20143 rtx target
, rtx vals
, int one_var
)
20145 rtx var
= XVECEXP (vals
, 0, one_var
);
20146 enum machine_mode wmode
;
20149 const_vec
= copy_rtx (vals
);
20150 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20151 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20159 /* For the two element vectors, it's just as easy to use
20160 the general case. */
20176 /* There's no way to set one QImode entry easily. Combine
20177 the variable value with its adjacent constant value, and
20178 promote to an HImode set. */
20179 x
= XVECEXP (vals
, 0, one_var
^ 1);
20182 var
= convert_modes (HImode
, QImode
, var
, true);
20183 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20184 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20185 x
= GEN_INT (INTVAL (x
) & 0xff);
20189 var
= convert_modes (HImode
, QImode
, var
, true);
20190 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20192 if (x
!= const0_rtx
)
20193 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20194 1, OPTAB_LIB_WIDEN
);
20196 x
= gen_reg_rtx (wmode
);
20197 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20198 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20200 emit_move_insn (target
, gen_lowpart (mode
, x
));
20207 emit_move_insn (target
, const_vec
);
20208 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20212 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20213 all values variable, and none identical. */
20216 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20217 rtx target
, rtx vals
)
20219 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20220 rtx op0
= NULL
, op1
= NULL
;
20221 bool use_vec_concat
= false;
20227 if (!mmx_ok
&& !TARGET_SSE
)
20233 /* For the two element vectors, we always implement VEC_CONCAT. */
20234 op0
= XVECEXP (vals
, 0, 0);
20235 op1
= XVECEXP (vals
, 0, 1);
20236 use_vec_concat
= true;
20240 half_mode
= V2SFmode
;
20243 half_mode
= V2SImode
;
20249 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20250 Recurse to load the two halves. */
20252 op0
= gen_reg_rtx (half_mode
);
20253 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20254 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20256 op1
= gen_reg_rtx (half_mode
);
20257 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20258 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20260 use_vec_concat
= true;
20271 gcc_unreachable ();
20274 if (use_vec_concat
)
20276 if (!register_operand (op0
, half_mode
))
20277 op0
= force_reg (half_mode
, op0
);
20278 if (!register_operand (op1
, half_mode
))
20279 op1
= force_reg (half_mode
, op1
);
20281 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20282 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20286 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20287 enum machine_mode inner_mode
;
20288 rtx words
[4], shift
;
20290 inner_mode
= GET_MODE_INNER (mode
);
20291 n_elts
= GET_MODE_NUNITS (mode
);
20292 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20293 n_elt_per_word
= n_elts
/ n_words
;
20294 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20296 for (i
= 0; i
< n_words
; ++i
)
20298 rtx word
= NULL_RTX
;
20300 for (j
= 0; j
< n_elt_per_word
; ++j
)
20302 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20303 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20309 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20310 word
, 1, OPTAB_LIB_WIDEN
);
20311 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20312 word
, 1, OPTAB_LIB_WIDEN
);
20320 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20321 else if (n_words
== 2)
20323 rtx tmp
= gen_reg_rtx (mode
);
20324 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20325 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20326 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20327 emit_move_insn (target
, tmp
);
20329 else if (n_words
== 4)
20331 rtx tmp
= gen_reg_rtx (V4SImode
);
20332 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20333 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20334 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20337 gcc_unreachable ();
20341 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20342 instructions unless MMX_OK is true. */
20345 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20347 enum machine_mode mode
= GET_MODE (target
);
20348 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20349 int n_elts
= GET_MODE_NUNITS (mode
);
20350 int n_var
= 0, one_var
= -1;
20351 bool all_same
= true, all_const_zero
= true;
20355 for (i
= 0; i
< n_elts
; ++i
)
20357 x
= XVECEXP (vals
, 0, i
);
20358 if (!CONSTANT_P (x
))
20359 n_var
++, one_var
= i
;
20360 else if (x
!= CONST0_RTX (inner_mode
))
20361 all_const_zero
= false;
20362 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20366 /* Constants are best loaded from the constant pool. */
20369 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20373 /* If all values are identical, broadcast the value. */
20375 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20376 XVECEXP (vals
, 0, 0)))
20379 /* Values where only one field is non-constant are best loaded from
20380 the pool and overwritten via move later. */
20384 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20385 XVECEXP (vals
, 0, one_var
),
20389 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20393 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20397 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20399 enum machine_mode mode
= GET_MODE (target
);
20400 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20401 bool use_vec_merge
= false;
20410 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20411 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20413 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20415 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20416 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20426 /* For the two element vectors, we implement a VEC_CONCAT with
20427 the extraction of the other element. */
20429 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20430 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20433 op0
= val
, op1
= tmp
;
20435 op0
= tmp
, op1
= val
;
20437 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20438 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20446 use_vec_merge
= true;
20450 /* tmp = target = A B C D */
20451 tmp
= copy_to_reg (target
);
20452 /* target = A A B B */
20453 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20454 /* target = X A B B */
20455 ix86_expand_vector_set (false, target
, val
, 0);
20456 /* target = A X C D */
20457 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20458 GEN_INT (1), GEN_INT (0),
20459 GEN_INT (2+4), GEN_INT (3+4)));
20463 /* tmp = target = A B C D */
20464 tmp
= copy_to_reg (target
);
20465 /* tmp = X B C D */
20466 ix86_expand_vector_set (false, tmp
, val
, 0);
20467 /* target = A B X D */
20468 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20469 GEN_INT (0), GEN_INT (1),
20470 GEN_INT (0+4), GEN_INT (3+4)));
20474 /* tmp = target = A B C D */
20475 tmp
= copy_to_reg (target
);
20476 /* tmp = X B C D */
20477 ix86_expand_vector_set (false, tmp
, val
, 0);
20478 /* target = A B X D */
20479 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20480 GEN_INT (0), GEN_INT (1),
20481 GEN_INT (2+4), GEN_INT (0+4)));
20485 gcc_unreachable ();
20490 /* Element 0 handled by vec_merge below. */
20493 use_vec_merge
= true;
20499 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20500 store into element 0, then shuffle them back. */
20504 order
[0] = GEN_INT (elt
);
20505 order
[1] = const1_rtx
;
20506 order
[2] = const2_rtx
;
20507 order
[3] = GEN_INT (3);
20508 order
[elt
] = const0_rtx
;
20510 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20511 order
[1], order
[2], order
[3]));
20513 ix86_expand_vector_set (false, target
, val
, 0);
20515 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20516 order
[1], order
[2], order
[3]));
20520 /* For SSE1, we have to reuse the V4SF code. */
20521 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20522 gen_lowpart (SFmode
, val
), elt
);
20527 use_vec_merge
= TARGET_SSE2
;
20530 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20541 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20542 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20543 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20547 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20549 emit_move_insn (mem
, target
);
20551 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20552 emit_move_insn (tmp
, val
);
20554 emit_move_insn (target
, mem
);
20559 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20561 enum machine_mode mode
= GET_MODE (vec
);
20562 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20563 bool use_vec_extr
= false;
20576 use_vec_extr
= true;
20588 tmp
= gen_reg_rtx (mode
);
20589 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20590 GEN_INT (elt
), GEN_INT (elt
),
20591 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20595 tmp
= gen_reg_rtx (mode
);
20596 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20600 gcc_unreachable ();
20603 use_vec_extr
= true;
20618 tmp
= gen_reg_rtx (mode
);
20619 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20620 GEN_INT (elt
), GEN_INT (elt
),
20621 GEN_INT (elt
), GEN_INT (elt
)));
20625 tmp
= gen_reg_rtx (mode
);
20626 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20630 gcc_unreachable ();
20633 use_vec_extr
= true;
20638 /* For SSE1, we have to reuse the V4SF code. */
20639 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20640 gen_lowpart (V4SFmode
, vec
), elt
);
20646 use_vec_extr
= TARGET_SSE2
;
20649 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20654 /* ??? Could extract the appropriate HImode element and shift. */
20661 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20662 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20664 /* Let the rtl optimizers know about the zero extension performed. */
20665 if (inner_mode
== HImode
)
20667 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20668 target
= gen_lowpart (SImode
, target
);
20671 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20675 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20677 emit_move_insn (mem
, vec
);
20679 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20680 emit_move_insn (target
, tmp
);
20684 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20685 pattern to reduce; DEST is the destination; IN is the input vector. */
20688 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20690 rtx tmp1
, tmp2
, tmp3
;
20692 tmp1
= gen_reg_rtx (V4SFmode
);
20693 tmp2
= gen_reg_rtx (V4SFmode
);
20694 tmp3
= gen_reg_rtx (V4SFmode
);
20696 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20697 emit_insn (fn (tmp2
, tmp1
, in
));
20699 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20700 GEN_INT (1), GEN_INT (1),
20701 GEN_INT (1+4), GEN_INT (1+4)));
20702 emit_insn (fn (dest
, tmp2
, tmp3
));
20705 /* Target hook for scalar_mode_supported_p. */
20707 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20709 if (DECIMAL_FLOAT_MODE_P (mode
))
20712 return default_scalar_mode_supported_p (mode
);
20715 /* Implements target hook vector_mode_supported_p. */
20717 ix86_vector_mode_supported_p (enum machine_mode mode
)
20719 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20721 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20723 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20725 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20730 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20732 We do this in the new i386 backend to maintain source compatibility
20733 with the old cc0-based compiler. */
20736 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20737 tree inputs ATTRIBUTE_UNUSED
,
20740 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20742 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20747 /* Return true if this goes in small data/bss. */
20750 ix86_in_large_data_p (tree exp
)
20752 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20755 /* Functions are never large data. */
20756 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20759 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20761 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20762 if (strcmp (section
, ".ldata") == 0
20763 || strcmp (section
, ".lbss") == 0)
20769 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20771 /* If this is an incomplete type with size 0, then we can't put it
20772 in data because it might be too big when completed. */
20773 if (!size
|| size
> ix86_section_threshold
)
20780 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20782 default_encode_section_info (decl
, rtl
, first
);
20784 if (TREE_CODE (decl
) == VAR_DECL
20785 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20786 && ix86_in_large_data_p (decl
))
20787 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20790 /* Worker function for REVERSE_CONDITION. */
20793 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20795 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20796 ? reverse_condition (code
)
20797 : reverse_condition_maybe_unordered (code
));
20800 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20804 output_387_reg_move (rtx insn
, rtx
*operands
)
20806 if (REG_P (operands
[1])
20807 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20809 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20810 return output_387_ffreep (operands
, 0);
20811 return "fstp\t%y0";
20813 if (STACK_TOP_P (operands
[0]))
20814 return "fld%z1\t%y1";
20818 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20819 FP status register is set. */
20822 ix86_emit_fp_unordered_jump (rtx label
)
20824 rtx reg
= gen_reg_rtx (HImode
);
20827 emit_insn (gen_x86_fnstsw_1 (reg
));
20829 if (TARGET_USE_SAHF
)
20831 emit_insn (gen_x86_sahf_1 (reg
));
20833 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
20834 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
20838 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
20840 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20841 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
20844 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
20845 gen_rtx_LABEL_REF (VOIDmode
, label
),
20847 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
20848 emit_jump_insn (temp
);
20851 /* Output code to perform a log1p XFmode calculation. */
20853 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
20855 rtx label1
= gen_label_rtx ();
20856 rtx label2
= gen_label_rtx ();
20858 rtx tmp
= gen_reg_rtx (XFmode
);
20859 rtx tmp2
= gen_reg_rtx (XFmode
);
20861 emit_insn (gen_absxf2 (tmp
, op1
));
20862 emit_insn (gen_cmpxf (tmp
,
20863 CONST_DOUBLE_FROM_REAL_VALUE (
20864 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
20866 emit_jump_insn (gen_bge (label1
));
20868 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20869 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
20870 emit_jump (label2
);
20872 emit_label (label1
);
20873 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
20874 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
20875 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
20876 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
20878 emit_label (label2
);
20881 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20884 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20887 /* With Binutils 2.15, the "@unwind" marker must be specified on
20888 every occurrence of the ".eh_frame" section, not just the first
20891 && strcmp (name
, ".eh_frame") == 0)
20893 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20894 flags
& SECTION_WRITE
? "aw" : "a");
20897 default_elf_asm_named_section (name
, flags
, decl
);
20900 /* Return the mangling of TYPE if it is an extended fundamental type. */
20902 static const char *
20903 ix86_mangle_fundamental_type (tree type
)
20905 switch (TYPE_MODE (type
))
20908 /* __float128 is "g". */
20911 /* "long double" or __float80 is "e". */
20918 /* For 32-bit code we can save PIC register setup by using
20919 __stack_chk_fail_local hidden function instead of calling
20920 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20921 register, so it is better to call __stack_chk_fail directly. */
20924 ix86_stack_protect_fail (void)
20926 return TARGET_64BIT
20927 ? default_external_stack_protect_fail ()
20928 : default_hidden_stack_protect_fail ();
20931 /* Select a format to encode pointers in exception handling data. CODE
20932 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20933 true if the symbol may be affected by dynamic relocations.
20935 ??? All x86 object file formats are capable of representing this.
20936 After all, the relocation needed is the same as for the call insn.
20937 Whether or not a particular assembler allows us to enter such, I
20938 guess we'll have to see. */
20940 asm_preferred_eh_data_format (int code
, int global
)
20944 int type
= DW_EH_PE_sdata8
;
20946 || ix86_cmodel
== CM_SMALL_PIC
20947 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
20948 type
= DW_EH_PE_sdata4
;
20949 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
20951 if (ix86_cmodel
== CM_SMALL
20952 || (ix86_cmodel
== CM_MEDIUM
&& code
))
20953 return DW_EH_PE_udata4
;
20954 return DW_EH_PE_absptr
;
20957 /* Expand copysign from SIGN to the positive value ABS_VALUE
20958 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20961 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
20963 enum machine_mode mode
= GET_MODE (sign
);
20964 rtx sgn
= gen_reg_rtx (mode
);
20965 if (mask
== NULL_RTX
)
20967 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
20968 if (!VECTOR_MODE_P (mode
))
20970 /* We need to generate a scalar mode mask in this case. */
20971 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
20972 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
20973 mask
= gen_reg_rtx (mode
);
20974 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
20978 mask
= gen_rtx_NOT (mode
, mask
);
20979 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
20980 gen_rtx_AND (mode
, mask
, sign
)));
20981 emit_insn (gen_rtx_SET (VOIDmode
, result
,
20982 gen_rtx_IOR (mode
, abs_value
, sgn
)));
20985 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20986 mask for masking out the sign-bit is stored in *SMASK, if that is
20989 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
20991 enum machine_mode mode
= GET_MODE (op0
);
20994 xa
= gen_reg_rtx (mode
);
20995 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
20996 if (!VECTOR_MODE_P (mode
))
20998 /* We need to generate a scalar mode mask in this case. */
20999 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21000 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21001 mask
= gen_reg_rtx (mode
);
21002 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21004 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21005 gen_rtx_AND (mode
, op0
, mask
)));
21013 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21014 swapping the operands if SWAP_OPERANDS is true. The expanded
21015 code is a forward jump to a newly created label in case the
21016 comparison is true. The generated label rtx is returned. */
21018 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21019 bool swap_operands
)
21030 label
= gen_label_rtx ();
21031 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21032 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21033 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21034 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21035 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21036 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21037 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21038 JUMP_LABEL (tmp
) = label
;
21043 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21044 using comparison code CODE. Operands are swapped for the comparison if
21045 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21047 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21048 bool swap_operands
)
21050 enum machine_mode mode
= GET_MODE (op0
);
21051 rtx mask
= gen_reg_rtx (mode
);
21060 if (mode
== DFmode
)
21061 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21062 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21064 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21065 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21070 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21071 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21073 ix86_gen_TWO52 (enum machine_mode mode
)
21075 REAL_VALUE_TYPE TWO52r
;
21078 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21079 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21080 TWO52
= force_reg (mode
, TWO52
);
21085 /* Expand SSE sequence for computing lround from OP1 storing
21088 ix86_expand_lround (rtx op0
, rtx op1
)
21090 /* C code for the stuff we're doing below:
21091 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21094 enum machine_mode mode
= GET_MODE (op1
);
21095 const struct real_format
*fmt
;
21096 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21099 /* load nextafter (0.5, 0.0) */
21100 fmt
= REAL_MODE_FORMAT (mode
);
21101 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21102 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21104 /* adj = copysign (0.5, op1) */
21105 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21106 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21108 /* adj = op1 + adj */
21109 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21111 /* op0 = (imode)adj */
21112 expand_fix (op0
, adj
, 0);
21115 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21118 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21120 /* C code for the stuff we're doing below (for do_floor):
21122 xi -= (double)xi > op1 ? 1 : 0;
21125 enum machine_mode fmode
= GET_MODE (op1
);
21126 enum machine_mode imode
= GET_MODE (op0
);
21127 rtx ireg
, freg
, label
, tmp
;
21129 /* reg = (long)op1 */
21130 ireg
= gen_reg_rtx (imode
);
21131 expand_fix (ireg
, op1
, 0);
21133 /* freg = (double)reg */
21134 freg
= gen_reg_rtx (fmode
);
21135 expand_float (freg
, ireg
, 0);
21137 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21138 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21139 freg
, op1
, !do_floor
);
21140 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21141 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21142 emit_move_insn (ireg
, tmp
);
21144 emit_label (label
);
21145 LABEL_NUSES (label
) = 1;
21147 emit_move_insn (op0
, ireg
);
21150 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21151 result in OPERAND0. */
21153 ix86_expand_rint (rtx operand0
, rtx operand1
)
21155 /* C code for the stuff we're doing below:
21156 xa = fabs (operand1);
21157 if (!isless (xa, 2**52))
21159 xa = xa + 2**52 - 2**52;
21160 return copysign (xa, operand1);
21162 enum machine_mode mode
= GET_MODE (operand0
);
21163 rtx res
, xa
, label
, TWO52
, mask
;
21165 res
= gen_reg_rtx (mode
);
21166 emit_move_insn (res
, operand1
);
21168 /* xa = abs (operand1) */
21169 xa
= ix86_expand_sse_fabs (res
, &mask
);
21171 /* if (!isless (xa, TWO52)) goto label; */
21172 TWO52
= ix86_gen_TWO52 (mode
);
21173 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21175 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21176 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21178 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21180 emit_label (label
);
21181 LABEL_NUSES (label
) = 1;
21183 emit_move_insn (operand0
, res
);
21186 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21189 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21191 /* C code for the stuff we expand below.
21192 double xa = fabs (x), x2;
21193 if (!isless (xa, TWO52))
21195 xa = xa + TWO52 - TWO52;
21196 x2 = copysign (xa, x);
21205 enum machine_mode mode
= GET_MODE (operand0
);
21206 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21208 TWO52
= ix86_gen_TWO52 (mode
);
21210 /* Temporary for holding the result, initialized to the input
21211 operand to ease control flow. */
21212 res
= gen_reg_rtx (mode
);
21213 emit_move_insn (res
, operand1
);
21215 /* xa = abs (operand1) */
21216 xa
= ix86_expand_sse_fabs (res
, &mask
);
21218 /* if (!isless (xa, TWO52)) goto label; */
21219 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21221 /* xa = xa + TWO52 - TWO52; */
21222 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21223 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21225 /* xa = copysign (xa, operand1) */
21226 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21228 /* generate 1.0 or -1.0 */
21229 one
= force_reg (mode
,
21230 const_double_from_real_value (do_floor
21231 ? dconst1
: dconstm1
, mode
));
21233 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21234 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21235 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21236 gen_rtx_AND (mode
, one
, tmp
)));
21237 /* We always need to subtract here to preserve signed zero. */
21238 tmp
= expand_simple_binop (mode
, MINUS
,
21239 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21240 emit_move_insn (res
, tmp
);
21242 emit_label (label
);
21243 LABEL_NUSES (label
) = 1;
21245 emit_move_insn (operand0
, res
);
21248 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21251 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21253 /* C code for the stuff we expand below.
21254 double xa = fabs (x), x2;
21255 if (!isless (xa, TWO52))
21257 x2 = (double)(long)x;
21264 if (HONOR_SIGNED_ZEROS (mode))
21265 return copysign (x2, x);
21268 enum machine_mode mode
= GET_MODE (operand0
);
21269 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21271 TWO52
= ix86_gen_TWO52 (mode
);
21273 /* Temporary for holding the result, initialized to the input
21274 operand to ease control flow. */
21275 res
= gen_reg_rtx (mode
);
21276 emit_move_insn (res
, operand1
);
21278 /* xa = abs (operand1) */
21279 xa
= ix86_expand_sse_fabs (res
, &mask
);
21281 /* if (!isless (xa, TWO52)) goto label; */
21282 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21284 /* xa = (double)(long)x */
21285 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21286 expand_fix (xi
, res
, 0);
21287 expand_float (xa
, xi
, 0);
21290 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21292 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21293 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21294 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21295 gen_rtx_AND (mode
, one
, tmp
)));
21296 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21297 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21298 emit_move_insn (res
, tmp
);
21300 if (HONOR_SIGNED_ZEROS (mode
))
21301 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21303 emit_label (label
);
21304 LABEL_NUSES (label
) = 1;
21306 emit_move_insn (operand0
, res
);
21309 /* Expand SSE sequence for computing round from OPERAND1 storing
21310 into OPERAND0. Sequence that works without relying on DImode truncation
21311 via cvttsd2siq that is only available on 64bit targets. */
21313 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21315 /* C code for the stuff we expand below.
21316 double xa = fabs (x), xa2, x2;
21317 if (!isless (xa, TWO52))
21319 Using the absolute value and copying back sign makes
21320 -0.0 -> -0.0 correct.
21321 xa2 = xa + TWO52 - TWO52;
21326 else if (dxa > 0.5)
21328 x2 = copysign (xa2, x);
21331 enum machine_mode mode
= GET_MODE (operand0
);
21332 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21334 TWO52
= ix86_gen_TWO52 (mode
);
21336 /* Temporary for holding the result, initialized to the input
21337 operand to ease control flow. */
21338 res
= gen_reg_rtx (mode
);
21339 emit_move_insn (res
, operand1
);
21341 /* xa = abs (operand1) */
21342 xa
= ix86_expand_sse_fabs (res
, &mask
);
21344 /* if (!isless (xa, TWO52)) goto label; */
21345 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21347 /* xa2 = xa + TWO52 - TWO52; */
21348 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21349 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21351 /* dxa = xa2 - xa; */
21352 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21354 /* generate 0.5, 1.0 and -0.5 */
21355 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21356 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21357 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21361 tmp
= gen_reg_rtx (mode
);
21362 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21363 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21364 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21365 gen_rtx_AND (mode
, one
, tmp
)));
21366 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21367 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21368 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21369 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21370 gen_rtx_AND (mode
, one
, tmp
)));
21371 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21373 /* res = copysign (xa2, operand1) */
21374 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21376 emit_label (label
);
21377 LABEL_NUSES (label
) = 1;
21379 emit_move_insn (operand0
, res
);
21382 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21385 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21387 /* C code for SSE variant we expand below.
21388 double xa = fabs (x), x2;
21389 if (!isless (xa, TWO52))
21391 x2 = (double)(long)x;
21392 if (HONOR_SIGNED_ZEROS (mode))
21393 return copysign (x2, x);
21396 enum machine_mode mode
= GET_MODE (operand0
);
21397 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21399 TWO52
= ix86_gen_TWO52 (mode
);
21401 /* Temporary for holding the result, initialized to the input
21402 operand to ease control flow. */
21403 res
= gen_reg_rtx (mode
);
21404 emit_move_insn (res
, operand1
);
21406 /* xa = abs (operand1) */
21407 xa
= ix86_expand_sse_fabs (res
, &mask
);
21409 /* if (!isless (xa, TWO52)) goto label; */
21410 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21412 /* x = (double)(long)x */
21413 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21414 expand_fix (xi
, res
, 0);
21415 expand_float (res
, xi
, 0);
21417 if (HONOR_SIGNED_ZEROS (mode
))
21418 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21420 emit_label (label
);
21421 LABEL_NUSES (label
) = 1;
21423 emit_move_insn (operand0
, res
);
21426 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21429 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21431 enum machine_mode mode
= GET_MODE (operand0
);
21432 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21434 /* C code for SSE variant we expand below.
21435 double xa = fabs (x), x2;
21436 if (!isless (xa, TWO52))
21438 xa2 = xa + TWO52 - TWO52;
21442 x2 = copysign (xa2, x);
21446 TWO52
= ix86_gen_TWO52 (mode
);
21448 /* Temporary for holding the result, initialized to the input
21449 operand to ease control flow. */
21450 res
= gen_reg_rtx (mode
);
21451 emit_move_insn (res
, operand1
);
21453 /* xa = abs (operand1) */
21454 xa
= ix86_expand_sse_fabs (res
, &smask
);
21456 /* if (!isless (xa, TWO52)) goto label; */
21457 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21459 /* res = xa + TWO52 - TWO52; */
21460 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21461 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21462 emit_move_insn (res
, tmp
);
21465 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21467 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21468 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21469 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21470 gen_rtx_AND (mode
, mask
, one
)));
21471 tmp
= expand_simple_binop (mode
, MINUS
,
21472 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21473 emit_move_insn (res
, tmp
);
21475 /* res = copysign (res, operand1) */
21476 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21478 emit_label (label
);
21479 LABEL_NUSES (label
) = 1;
21481 emit_move_insn (operand0
, res
);
21484 /* Expand SSE sequence for computing round from OPERAND1 storing
21487 ix86_expand_round (rtx operand0
, rtx operand1
)
21489 /* C code for the stuff we're doing below:
21490 double xa = fabs (x);
21491 if (!isless (xa, TWO52))
21493 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21494 return copysign (xa, x);
21496 enum machine_mode mode
= GET_MODE (operand0
);
21497 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21498 const struct real_format
*fmt
;
21499 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21501 /* Temporary for holding the result, initialized to the input
21502 operand to ease control flow. */
21503 res
= gen_reg_rtx (mode
);
21504 emit_move_insn (res
, operand1
);
21506 TWO52
= ix86_gen_TWO52 (mode
);
21507 xa
= ix86_expand_sse_fabs (res
, &mask
);
21508 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21510 /* load nextafter (0.5, 0.0) */
21511 fmt
= REAL_MODE_FORMAT (mode
);
21512 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21513 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21515 /* xa = xa + 0.5 */
21516 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21517 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21519 /* xa = (double)(int64_t)xa */
21520 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21521 expand_fix (xi
, xa
, 0);
21522 expand_float (xa
, xi
, 0);
21524 /* res = copysign (xa, operand1) */
21525 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21527 emit_label (label
);
21528 LABEL_NUSES (label
) = 1;
21530 emit_move_insn (operand0
, res
);
21533 #include "gt-i386.h"