1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of l1 cache */
116 0, /* size of l2 cache */
117 0, /* size of prefetch block */
118 0, /* number of parallel prefetches */
120 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
121 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
122 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
123 COSTS_N_BYTES (2), /* cost of FABS instruction. */
124 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
125 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
128 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
129 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
132 /* Processor costs (relative to an add) */
134 struct processor_costs i386_cost
= { /* 386 specific costs */
135 COSTS_N_INSNS (1), /* cost of an add instruction */
136 COSTS_N_INSNS (1), /* cost of a lea instruction */
137 COSTS_N_INSNS (3), /* variable shift costs */
138 COSTS_N_INSNS (2), /* constant shift costs */
139 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
140 COSTS_N_INSNS (6), /* HI */
141 COSTS_N_INSNS (6), /* SI */
142 COSTS_N_INSNS (6), /* DI */
143 COSTS_N_INSNS (6)}, /* other */
144 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
145 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
146 COSTS_N_INSNS (23), /* HI */
147 COSTS_N_INSNS (23), /* SI */
148 COSTS_N_INSNS (23), /* DI */
149 COSTS_N_INSNS (23)}, /* other */
150 COSTS_N_INSNS (3), /* cost of movsx */
151 COSTS_N_INSNS (2), /* cost of movzx */
152 15, /* "large" insn */
154 4, /* cost for loading QImode using movzbl */
155 {2, 4, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 4, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {8, 8, 8}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {8, 8, 8}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 2, /* cost of moving MMX register */
165 {4, 8}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {4, 8}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {4, 8, 16}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {4, 8, 16}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
180 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
181 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
182 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
183 COSTS_N_INSNS (22), /* cost of FABS instruction. */
184 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
185 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
186 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
187 DUMMY_STRINGOP_ALGS
},
188 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
189 DUMMY_STRINGOP_ALGS
},
193 struct processor_costs i486_cost
= { /* 486 specific costs */
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (12), /* HI */
200 COSTS_N_INSNS (12), /* SI */
201 COSTS_N_INSNS (12), /* DI */
202 COSTS_N_INSNS (12)}, /* other */
203 1, /* cost of multiply per each bit set */
204 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (40), /* HI */
206 COSTS_N_INSNS (40), /* SI */
207 COSTS_N_INSNS (40), /* DI */
208 COSTS_N_INSNS (40)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
211 15, /* "large" insn */
213 4, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
233 3, /* MMX or SSE register to integer */
234 4, /* size of l1 cache. 486 has 8kB cache
235 shared for code and data, so 4kB is
236 not really precise. */
237 4, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
241 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (3), /* cost of FABS instruction. */
245 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
247 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
248 DUMMY_STRINGOP_ALGS
},
249 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
254 struct processor_costs pentium_cost
= {
255 COSTS_N_INSNS (1), /* cost of an add instruction */
256 COSTS_N_INSNS (1), /* cost of a lea instruction */
257 COSTS_N_INSNS (4), /* variable shift costs */
258 COSTS_N_INSNS (1), /* constant shift costs */
259 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
260 COSTS_N_INSNS (11), /* HI */
261 COSTS_N_INSNS (11), /* SI */
262 COSTS_N_INSNS (11), /* DI */
263 COSTS_N_INSNS (11)}, /* other */
264 0, /* cost of multiply per each bit set */
265 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
266 COSTS_N_INSNS (25), /* HI */
267 COSTS_N_INSNS (25), /* SI */
268 COSTS_N_INSNS (25), /* DI */
269 COSTS_N_INSNS (25)}, /* other */
270 COSTS_N_INSNS (3), /* cost of movsx */
271 COSTS_N_INSNS (2), /* cost of movzx */
272 8, /* "large" insn */
274 6, /* cost for loading QImode using movzbl */
275 {2, 4, 2}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 4, 2}, /* cost of storing integer registers */
279 2, /* cost of reg,reg fld/fst */
280 {2, 2, 6}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 6}, /* cost of storing fp registers
283 in SFmode, DFmode and XFmode */
284 8, /* cost of moving MMX register */
285 {8, 8}, /* cost of loading MMX registers
286 in SImode and DImode */
287 {8, 8}, /* cost of storing MMX registers
288 in SImode and DImode */
289 2, /* cost of moving SSE register */
290 {4, 8, 16}, /* cost of loading SSE registers
291 in SImode, DImode and TImode */
292 {4, 8, 16}, /* cost of storing SSE registers
293 in SImode, DImode and TImode */
294 3, /* MMX or SSE register to integer */
295 8, /* size of l1 cache. */
296 8, /* size of l2 cache */
297 0, /* size of prefetch block */
298 0, /* number of parallel prefetches */
300 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
301 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
302 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
303 COSTS_N_INSNS (1), /* cost of FABS instruction. */
304 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
305 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
306 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
307 DUMMY_STRINGOP_ALGS
},
308 {{libcall
, {{-1, rep_prefix_4_byte
}}},
313 struct processor_costs pentiumpro_cost
= {
314 COSTS_N_INSNS (1), /* cost of an add instruction */
315 COSTS_N_INSNS (1), /* cost of a lea instruction */
316 COSTS_N_INSNS (1), /* variable shift costs */
317 COSTS_N_INSNS (1), /* constant shift costs */
318 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
319 COSTS_N_INSNS (4), /* HI */
320 COSTS_N_INSNS (4), /* SI */
321 COSTS_N_INSNS (4), /* DI */
322 COSTS_N_INSNS (4)}, /* other */
323 0, /* cost of multiply per each bit set */
324 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
325 COSTS_N_INSNS (17), /* HI */
326 COSTS_N_INSNS (17), /* SI */
327 COSTS_N_INSNS (17), /* DI */
328 COSTS_N_INSNS (17)}, /* other */
329 COSTS_N_INSNS (1), /* cost of movsx */
330 COSTS_N_INSNS (1), /* cost of movzx */
331 8, /* "large" insn */
333 2, /* cost for loading QImode using movzbl */
334 {4, 4, 4}, /* cost of loading integer registers
335 in QImode, HImode and SImode.
336 Relative to reg-reg move (2). */
337 {2, 2, 2}, /* cost of storing integer registers */
338 2, /* cost of reg,reg fld/fst */
339 {2, 2, 6}, /* cost of loading fp registers
340 in SFmode, DFmode and XFmode */
341 {4, 4, 6}, /* cost of storing fp registers
342 in SFmode, DFmode and XFmode */
343 2, /* cost of moving MMX register */
344 {2, 2}, /* cost of loading MMX registers
345 in SImode and DImode */
346 {2, 2}, /* cost of storing MMX registers
347 in SImode and DImode */
348 2, /* cost of moving SSE register */
349 {2, 2, 8}, /* cost of loading SSE registers
350 in SImode, DImode and TImode */
351 {2, 2, 8}, /* cost of storing SSE registers
352 in SImode, DImode and TImode */
353 3, /* MMX or SSE register to integer */
354 8, /* size of l1 cache. */
355 256, /* size of l2 cache */
356 32, /* size of prefetch block */
357 6, /* number of parallel prefetches */
359 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
360 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
361 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
362 COSTS_N_INSNS (2), /* cost of FABS instruction. */
363 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
364 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
365 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
366 the alignment). For small blocks inline loop is still a noticeable win, for bigger
367 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
368 more expensive startup time in CPU, but after 4K the difference is down in the noise.
370 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
371 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
372 DUMMY_STRINGOP_ALGS
},
373 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
374 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
379 struct processor_costs geode_cost
= {
380 COSTS_N_INSNS (1), /* cost of an add instruction */
381 COSTS_N_INSNS (1), /* cost of a lea instruction */
382 COSTS_N_INSNS (2), /* variable shift costs */
383 COSTS_N_INSNS (1), /* constant shift costs */
384 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
385 COSTS_N_INSNS (4), /* HI */
386 COSTS_N_INSNS (7), /* SI */
387 COSTS_N_INSNS (7), /* DI */
388 COSTS_N_INSNS (7)}, /* other */
389 0, /* cost of multiply per each bit set */
390 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
391 COSTS_N_INSNS (23), /* HI */
392 COSTS_N_INSNS (39), /* SI */
393 COSTS_N_INSNS (39), /* DI */
394 COSTS_N_INSNS (39)}, /* other */
395 COSTS_N_INSNS (1), /* cost of movsx */
396 COSTS_N_INSNS (1), /* cost of movzx */
397 8, /* "large" insn */
399 1, /* cost for loading QImode using movzbl */
400 {1, 1, 1}, /* cost of loading integer registers
401 in QImode, HImode and SImode.
402 Relative to reg-reg move (2). */
403 {1, 1, 1}, /* cost of storing integer registers */
404 1, /* cost of reg,reg fld/fst */
405 {1, 1, 1}, /* cost of loading fp registers
406 in SFmode, DFmode and XFmode */
407 {4, 6, 6}, /* cost of storing fp registers
408 in SFmode, DFmode and XFmode */
410 1, /* cost of moving MMX register */
411 {1, 1}, /* cost of loading MMX registers
412 in SImode and DImode */
413 {1, 1}, /* cost of storing MMX registers
414 in SImode and DImode */
415 1, /* cost of moving SSE register */
416 {1, 1, 1}, /* cost of loading SSE registers
417 in SImode, DImode and TImode */
418 {1, 1, 1}, /* cost of storing SSE registers
419 in SImode, DImode and TImode */
420 1, /* MMX or SSE register to integer */
421 64, /* size of l1 cache. */
422 128, /* size of l2 cache. */
423 32, /* size of prefetch block */
424 1, /* number of parallel prefetches */
426 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (1), /* cost of FABS instruction. */
430 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
432 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
433 DUMMY_STRINGOP_ALGS
},
434 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
439 struct processor_costs k6_cost
= {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (2), /* cost of a lea instruction */
442 COSTS_N_INSNS (1), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (3), /* HI */
446 COSTS_N_INSNS (3), /* SI */
447 COSTS_N_INSNS (3), /* DI */
448 COSTS_N_INSNS (3)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (18), /* HI */
452 COSTS_N_INSNS (18), /* SI */
453 COSTS_N_INSNS (18), /* DI */
454 COSTS_N_INSNS (18)}, /* other */
455 COSTS_N_INSNS (2), /* cost of movsx */
456 COSTS_N_INSNS (2), /* cost of movzx */
457 8, /* "large" insn */
459 3, /* cost for loading QImode using movzbl */
460 {4, 5, 4}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {2, 3, 2}, /* cost of storing integer registers */
464 4, /* cost of reg,reg fld/fst */
465 {6, 6, 6}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 4, 4}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469 2, /* cost of moving MMX register */
470 {2, 2}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {2, 2}, /* cost of storing MMX registers
473 in SImode and DImode */
474 2, /* cost of moving SSE register */
475 {2, 2, 8}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {2, 2, 8}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 6, /* MMX or SSE register to integer */
480 32, /* size of l1 cache. */
481 32, /* size of l2 cache. Some models
482 have integrated l2 cache, but
483 optimizing for k6 is not important
484 enough to worry about that. */
485 32, /* size of prefetch block */
486 1, /* number of parallel prefetches */
488 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
494 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
495 DUMMY_STRINGOP_ALGS
},
496 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
501 struct processor_costs athlon_cost
= {
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (2), /* cost of a lea instruction */
504 COSTS_N_INSNS (1), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (5), /* HI */
508 COSTS_N_INSNS (5), /* SI */
509 COSTS_N_INSNS (5), /* DI */
510 COSTS_N_INSNS (5)}, /* other */
511 0, /* cost of multiply per each bit set */
512 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (26), /* HI */
514 COSTS_N_INSNS (42), /* SI */
515 COSTS_N_INSNS (74), /* DI */
516 COSTS_N_INSNS (74)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
519 8, /* "large" insn */
521 4, /* cost for loading QImode using movzbl */
522 {3, 4, 3}, /* cost of loading integer registers
523 in QImode, HImode and SImode.
524 Relative to reg-reg move (2). */
525 {3, 4, 3}, /* cost of storing integer registers */
526 4, /* cost of reg,reg fld/fst */
527 {4, 4, 12}, /* cost of loading fp registers
528 in SFmode, DFmode and XFmode */
529 {6, 6, 8}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
531 2, /* cost of moving MMX register */
532 {4, 4}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {4, 4}, /* cost of storing MMX registers
535 in SImode and DImode */
536 2, /* cost of moving SSE register */
537 {4, 4, 6}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {4, 4, 5}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 5, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 256, /* size of l2 cache. */
544 64, /* size of prefetch block */
545 6, /* number of parallel prefetches */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 /* For some reason, Athlon deals better with REP prefix (relative to loops)
554 compared to K8. Alignment becomes important after 8 bytes for memcpy and
555 128 bytes for memset. */
556 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
557 DUMMY_STRINGOP_ALGS
},
558 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
563 struct processor_costs k8_cost
= {
564 COSTS_N_INSNS (1), /* cost of an add instruction */
565 COSTS_N_INSNS (2), /* cost of a lea instruction */
566 COSTS_N_INSNS (1), /* variable shift costs */
567 COSTS_N_INSNS (1), /* constant shift costs */
568 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
569 COSTS_N_INSNS (4), /* HI */
570 COSTS_N_INSNS (3), /* SI */
571 COSTS_N_INSNS (4), /* DI */
572 COSTS_N_INSNS (5)}, /* other */
573 0, /* cost of multiply per each bit set */
574 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
575 COSTS_N_INSNS (26), /* HI */
576 COSTS_N_INSNS (42), /* SI */
577 COSTS_N_INSNS (74), /* DI */
578 COSTS_N_INSNS (74)}, /* other */
579 COSTS_N_INSNS (1), /* cost of movsx */
580 COSTS_N_INSNS (1), /* cost of movzx */
581 8, /* "large" insn */
583 4, /* cost for loading QImode using movzbl */
584 {3, 4, 3}, /* cost of loading integer registers
585 in QImode, HImode and SImode.
586 Relative to reg-reg move (2). */
587 {3, 4, 3}, /* cost of storing integer registers */
588 4, /* cost of reg,reg fld/fst */
589 {4, 4, 12}, /* cost of loading fp registers
590 in SFmode, DFmode and XFmode */
591 {6, 6, 8}, /* cost of storing fp registers
592 in SFmode, DFmode and XFmode */
593 2, /* cost of moving MMX register */
594 {3, 3}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {4, 4}, /* cost of storing MMX registers
597 in SImode and DImode */
598 2, /* cost of moving SSE register */
599 {4, 3, 6}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {4, 4, 5}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 5, /* MMX or SSE register to integer */
604 64, /* size of l1 cache. */
605 512, /* size of l2 cache. */
606 64, /* size of prefetch block */
607 /* New AMD processors never drop prefetches; if they cannot be performed
608 immediately, they are queued. We set number of simultaneous prefetches
609 to a large constant to reflect this (it probably is not a good idea not
610 to limit number of prefetches at all, as their execution also takes some
612 100, /* number of parallel prefetches */
614 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
620 /* K8 has optimized REP instruction for medium sized blocks, but for very small
621 blocks it is better to use loop. For large blocks, libcall can do
622 nontemporary accesses and beat inline considerably. */
623 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
624 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
625 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
626 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
627 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
630 struct processor_costs amdfam10_cost
= {
631 COSTS_N_INSNS (1), /* cost of an add instruction */
632 COSTS_N_INSNS (2), /* cost of a lea instruction */
633 COSTS_N_INSNS (1), /* variable shift costs */
634 COSTS_N_INSNS (1), /* constant shift costs */
635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
636 COSTS_N_INSNS (4), /* HI */
637 COSTS_N_INSNS (3), /* SI */
638 COSTS_N_INSNS (4), /* DI */
639 COSTS_N_INSNS (5)}, /* other */
640 0, /* cost of multiply per each bit set */
641 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
642 COSTS_N_INSNS (35), /* HI */
643 COSTS_N_INSNS (51), /* SI */
644 COSTS_N_INSNS (83), /* DI */
645 COSTS_N_INSNS (83)}, /* other */
646 COSTS_N_INSNS (1), /* cost of movsx */
647 COSTS_N_INSNS (1), /* cost of movzx */
648 8, /* "large" insn */
650 4, /* cost for loading QImode using movzbl */
651 {3, 4, 3}, /* cost of loading integer registers
652 in QImode, HImode and SImode.
653 Relative to reg-reg move (2). */
654 {3, 4, 3}, /* cost of storing integer registers */
655 4, /* cost of reg,reg fld/fst */
656 {4, 4, 12}, /* cost of loading fp registers
657 in SFmode, DFmode and XFmode */
658 {6, 6, 8}, /* cost of storing fp registers
659 in SFmode, DFmode and XFmode */
660 2, /* cost of moving MMX register */
661 {3, 3}, /* cost of loading MMX registers
662 in SImode and DImode */
663 {4, 4}, /* cost of storing MMX registers
664 in SImode and DImode */
665 2, /* cost of moving SSE register */
666 {4, 4, 3}, /* cost of loading SSE registers
667 in SImode, DImode and TImode */
668 {4, 4, 5}, /* cost of storing SSE registers
669 in SImode, DImode and TImode */
670 3, /* MMX or SSE register to integer */
672 MOVD reg64, xmmreg Double FSTORE 4
673 MOVD reg32, xmmreg Double FSTORE 4
675 MOVD reg64, xmmreg Double FADD 3
677 MOVD reg32, xmmreg Double FADD 3
679 64, /* size of l1 cache. */
680 512, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 /* New AMD processors never drop prefetches; if they cannot be performed
683 immediately, they are queued. We set number of simultaneous prefetches
684 to a large constant to reflect this (it probably is not a good idea not
685 to limit number of prefetches at all, as their execution also takes some
687 100, /* number of parallel prefetches */
689 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
690 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
691 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
692 COSTS_N_INSNS (2), /* cost of FABS instruction. */
693 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
694 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
696 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
697 very small blocks it is better to use loop. For large blocks, libcall can
698 do nontemporary accesses and beat inline considerably. */
699 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
700 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
701 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
702 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
703 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
707 struct processor_costs pentium4_cost
= {
708 COSTS_N_INSNS (1), /* cost of an add instruction */
709 COSTS_N_INSNS (3), /* cost of a lea instruction */
710 COSTS_N_INSNS (4), /* variable shift costs */
711 COSTS_N_INSNS (4), /* constant shift costs */
712 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
713 COSTS_N_INSNS (15), /* HI */
714 COSTS_N_INSNS (15), /* SI */
715 COSTS_N_INSNS (15), /* DI */
716 COSTS_N_INSNS (15)}, /* other */
717 0, /* cost of multiply per each bit set */
718 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
719 COSTS_N_INSNS (56), /* HI */
720 COSTS_N_INSNS (56), /* SI */
721 COSTS_N_INSNS (56), /* DI */
722 COSTS_N_INSNS (56)}, /* other */
723 COSTS_N_INSNS (1), /* cost of movsx */
724 COSTS_N_INSNS (1), /* cost of movzx */
725 16, /* "large" insn */
727 2, /* cost for loading QImode using movzbl */
728 {4, 5, 4}, /* cost of loading integer registers
729 in QImode, HImode and SImode.
730 Relative to reg-reg move (2). */
731 {2, 3, 2}, /* cost of storing integer registers */
732 2, /* cost of reg,reg fld/fst */
733 {2, 2, 6}, /* cost of loading fp registers
734 in SFmode, DFmode and XFmode */
735 {4, 4, 6}, /* cost of storing fp registers
736 in SFmode, DFmode and XFmode */
737 2, /* cost of moving MMX register */
738 {2, 2}, /* cost of loading MMX registers
739 in SImode and DImode */
740 {2, 2}, /* cost of storing MMX registers
741 in SImode and DImode */
742 12, /* cost of moving SSE register */
743 {12, 12, 12}, /* cost of loading SSE registers
744 in SImode, DImode and TImode */
745 {2, 2, 8}, /* cost of storing SSE registers
746 in SImode, DImode and TImode */
747 10, /* MMX or SSE register to integer */
748 8, /* size of l1 cache. */
749 256, /* size of l2 cache. */
750 64, /* size of prefetch block */
751 6, /* number of parallel prefetches */
753 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
754 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
755 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
756 COSTS_N_INSNS (2), /* cost of FABS instruction. */
757 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
758 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
759 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
760 DUMMY_STRINGOP_ALGS
},
761 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
763 DUMMY_STRINGOP_ALGS
},
767 struct processor_costs nocona_cost
= {
768 COSTS_N_INSNS (1), /* cost of an add instruction */
769 COSTS_N_INSNS (1), /* cost of a lea instruction */
770 COSTS_N_INSNS (1), /* variable shift costs */
771 COSTS_N_INSNS (1), /* constant shift costs */
772 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
773 COSTS_N_INSNS (10), /* HI */
774 COSTS_N_INSNS (10), /* SI */
775 COSTS_N_INSNS (10), /* DI */
776 COSTS_N_INSNS (10)}, /* other */
777 0, /* cost of multiply per each bit set */
778 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
779 COSTS_N_INSNS (66), /* HI */
780 COSTS_N_INSNS (66), /* SI */
781 COSTS_N_INSNS (66), /* DI */
782 COSTS_N_INSNS (66)}, /* other */
783 COSTS_N_INSNS (1), /* cost of movsx */
784 COSTS_N_INSNS (1), /* cost of movzx */
785 16, /* "large" insn */
787 4, /* cost for loading QImode using movzbl */
788 {4, 4, 4}, /* cost of loading integer registers
789 in QImode, HImode and SImode.
790 Relative to reg-reg move (2). */
791 {4, 4, 4}, /* cost of storing integer registers */
792 3, /* cost of reg,reg fld/fst */
793 {12, 12, 12}, /* cost of loading fp registers
794 in SFmode, DFmode and XFmode */
795 {4, 4, 4}, /* cost of storing fp registers
796 in SFmode, DFmode and XFmode */
797 6, /* cost of moving MMX register */
798 {12, 12}, /* cost of loading MMX registers
799 in SImode and DImode */
800 {12, 12}, /* cost of storing MMX registers
801 in SImode and DImode */
802 6, /* cost of moving SSE register */
803 {12, 12, 12}, /* cost of loading SSE registers
804 in SImode, DImode and TImode */
805 {12, 12, 12}, /* cost of storing SSE registers
806 in SImode, DImode and TImode */
807 8, /* MMX or SSE register to integer */
808 8, /* size of l1 cache. */
809 1024, /* size of l2 cache. */
810 128, /* size of prefetch block */
811 8, /* number of parallel prefetches */
813 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
815 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
816 COSTS_N_INSNS (3), /* cost of FABS instruction. */
817 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
818 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
819 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
820 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
821 {100000, unrolled_loop
}, {-1, libcall
}}}},
822 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
824 {libcall
, {{24, loop
}, {64, unrolled_loop
},
825 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
829 struct processor_costs core2_cost
= {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (3), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (3), /* DI */
838 COSTS_N_INSNS (3)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (22), /* HI */
842 COSTS_N_INSNS (22), /* SI */
843 COSTS_N_INSNS (22), /* DI */
844 COSTS_N_INSNS (22)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
849 2, /* cost for loading QImode using movzbl */
850 {6, 6, 6}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {4, 4, 4}, /* cost of storing integer registers */
854 2, /* cost of reg,reg fld/fst */
855 {6, 6, 6}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {4, 4, 4}, /* cost of loading integer registers */
858 2, /* cost of moving MMX register */
859 {6, 6}, /* cost of loading MMX registers
860 in SImode and DImode */
861 {4, 4}, /* cost of storing MMX registers
862 in SImode and DImode */
863 2, /* cost of moving SSE register */
864 {6, 6, 6}, /* cost of loading SSE registers
865 in SImode, DImode and TImode */
866 {4, 4, 4}, /* cost of storing SSE registers
867 in SImode, DImode and TImode */
868 2, /* MMX or SSE register to integer */
869 32, /* size of l1 cache. */
870 2048, /* size of l2 cache. */
871 128, /* size of prefetch block */
872 8, /* number of parallel prefetches */
874 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (1), /* cost of FABS instruction. */
878 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
880 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
881 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
882 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
883 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
884 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
885 {libcall
, {{24, loop
}, {32, unrolled_loop
},
886 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
889 /* Generic64 should produce code tuned for Nocona and K8. */
891 struct processor_costs generic64_cost
= {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 /* On all chips taken into consideration lea is 2 cycles and more. With
894 this cost however our current implementation of synth_mult results in
895 use of unnecessary temporary registers causing regression on several
896 SPECfp benchmarks. */
897 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
898 COSTS_N_INSNS (1), /* variable shift costs */
899 COSTS_N_INSNS (1), /* constant shift costs */
900 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
901 COSTS_N_INSNS (4), /* HI */
902 COSTS_N_INSNS (3), /* SI */
903 COSTS_N_INSNS (4), /* DI */
904 COSTS_N_INSNS (2)}, /* other */
905 0, /* cost of multiply per each bit set */
906 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
907 COSTS_N_INSNS (26), /* HI */
908 COSTS_N_INSNS (42), /* SI */
909 COSTS_N_INSNS (74), /* DI */
910 COSTS_N_INSNS (74)}, /* other */
911 COSTS_N_INSNS (1), /* cost of movsx */
912 COSTS_N_INSNS (1), /* cost of movzx */
913 8, /* "large" insn */
915 4, /* cost for loading QImode using movzbl */
916 {4, 4, 4}, /* cost of loading integer registers
917 in QImode, HImode and SImode.
918 Relative to reg-reg move (2). */
919 {4, 4, 4}, /* cost of storing integer registers */
920 4, /* cost of reg,reg fld/fst */
921 {12, 12, 12}, /* cost of loading fp registers
922 in SFmode, DFmode and XFmode */
923 {6, 6, 8}, /* cost of storing fp registers
924 in SFmode, DFmode and XFmode */
925 2, /* cost of moving MMX register */
926 {8, 8}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {8, 8}, /* cost of storing MMX registers
929 in SImode and DImode */
930 2, /* cost of moving SSE register */
931 {8, 8, 8}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {8, 8, 8}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 5, /* MMX or SSE register to integer */
936 32, /* size of l1 cache. */
937 512, /* size of l2 cache. */
938 64, /* size of prefetch block */
939 6, /* number of parallel prefetches */
940 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
941 is increased to perhaps more appropriate value of 5. */
943 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (8), /* cost of FABS instruction. */
947 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
949 {DUMMY_STRINGOP_ALGS
,
950 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
951 {DUMMY_STRINGOP_ALGS
,
952 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
955 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
957 struct processor_costs generic32_cost
= {
958 COSTS_N_INSNS (1), /* cost of an add instruction */
959 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
960 COSTS_N_INSNS (1), /* variable shift costs */
961 COSTS_N_INSNS (1), /* constant shift costs */
962 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
963 COSTS_N_INSNS (4), /* HI */
964 COSTS_N_INSNS (3), /* SI */
965 COSTS_N_INSNS (4), /* DI */
966 COSTS_N_INSNS (2)}, /* other */
967 0, /* cost of multiply per each bit set */
968 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
969 COSTS_N_INSNS (26), /* HI */
970 COSTS_N_INSNS (42), /* SI */
971 COSTS_N_INSNS (74), /* DI */
972 COSTS_N_INSNS (74)}, /* other */
973 COSTS_N_INSNS (1), /* cost of movsx */
974 COSTS_N_INSNS (1), /* cost of movzx */
975 8, /* "large" insn */
977 4, /* cost for loading QImode using movzbl */
978 {4, 4, 4}, /* cost of loading integer registers
979 in QImode, HImode and SImode.
980 Relative to reg-reg move (2). */
981 {4, 4, 4}, /* cost of storing integer registers */
982 4, /* cost of reg,reg fld/fst */
983 {12, 12, 12}, /* cost of loading fp registers
984 in SFmode, DFmode and XFmode */
985 {6, 6, 8}, /* cost of storing fp registers
986 in SFmode, DFmode and XFmode */
987 2, /* cost of moving MMX register */
988 {8, 8}, /* cost of loading MMX registers
989 in SImode and DImode */
990 {8, 8}, /* cost of storing MMX registers
991 in SImode and DImode */
992 2, /* cost of moving SSE register */
993 {8, 8, 8}, /* cost of loading SSE registers
994 in SImode, DImode and TImode */
995 {8, 8, 8}, /* cost of storing SSE registers
996 in SImode, DImode and TImode */
997 5, /* MMX or SSE register to integer */
998 32, /* size of l1 cache. */
999 256, /* size of l2 cache. */
1000 64, /* size of prefetch block */
1001 6, /* number of parallel prefetches */
1002 3, /* Branch cost */
1003 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1009 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1010 DUMMY_STRINGOP_ALGS
},
1011 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1012 DUMMY_STRINGOP_ALGS
},
1015 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1017 /* Processor feature/optimization bitmasks. */
1018 #define m_386 (1<<PROCESSOR_I386)
1019 #define m_486 (1<<PROCESSOR_I486)
1020 #define m_PENT (1<<PROCESSOR_PENTIUM)
1021 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1022 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1023 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1024 #define m_CORE2 (1<<PROCESSOR_CORE2)
1026 #define m_GEODE (1<<PROCESSOR_GEODE)
1027 #define m_K6 (1<<PROCESSOR_K6)
1028 #define m_K6_GEODE (m_K6 | m_GEODE)
1029 #define m_K8 (1<<PROCESSOR_K8)
1030 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1031 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1032 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1033 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1035 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1036 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1038 /* Generic instruction choice should be common subset of supported CPUs
1039 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1040 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1042 /* Feature tests against the various tunings. */
1043 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1044 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1045 negatively, so enabling for Generic64 seems like good code size
1046 tradeoff. We can't enable it for 32bit generic because it does not
1047 work well with PPro base chips. */
1048 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1050 /* X86_TUNE_PUSH_MEMORY */
1051 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1052 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1054 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1057 /* X86_TUNE_USE_BIT_TEST */
1060 /* X86_TUNE_UNROLL_STRLEN */
1061 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1064 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_GENERIC
,
1066 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1067 on simulation result. But after P4 was made, no performance benefit
1068 was observed with branch hints. It also increases the code size.
1069 As a result, icc never generates branch hints. */
1072 /* X86_TUNE_DOUBLE_WITH_ADD */
1075 /* X86_TUNE_USE_SAHF */
1076 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1077 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1079 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1080 partial dependencies. */
1081 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1082 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1084 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1085 register stalls on Generic32 compilation setting as well. However
1086 in current implementation the partial register stalls are not eliminated
1087 very well - they can be introduced via subregs synthesized by combine
1088 and can happen in caller/callee saving sequences. Because this option
1089 pays back little on PPro based chips and is in conflict with partial reg
1090 dependencies used by Athlon/P4 based chips, it is better to leave it off
1091 for generic32 for now. */
1094 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1095 m_CORE2
| m_GENERIC
,
1097 /* X86_TUNE_USE_HIMODE_FIOP */
1098 m_386
| m_486
| m_K6_GEODE
,
1100 /* X86_TUNE_USE_SIMODE_FIOP */
1101 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1103 /* X86_TUNE_USE_MOV0 */
1106 /* X86_TUNE_USE_CLTD */
1107 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1109 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1112 /* X86_TUNE_SPLIT_LONG_MOVES */
1115 /* X86_TUNE_READ_MODIFY_WRITE */
1118 /* X86_TUNE_READ_MODIFY */
1121 /* X86_TUNE_PROMOTE_QIMODE */
1122 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1123 | m_GENERIC
/* | m_PENT4 ? */,
1125 /* X86_TUNE_FAST_PREFIX */
1126 ~(m_PENT
| m_486
| m_386
),
1128 /* X86_TUNE_SINGLE_STRINGOP */
1129 m_386
| m_PENT4
| m_NOCONA
,
1131 /* X86_TUNE_QIMODE_MATH */
1134 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1135 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1136 might be considered for Generic32 if our scheme for avoiding partial
1137 stalls was more effective. */
1140 /* X86_TUNE_PROMOTE_QI_REGS */
1143 /* X86_TUNE_PROMOTE_HI_REGS */
1146 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1147 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1149 /* X86_TUNE_ADD_ESP_8 */
1150 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1151 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1153 /* X86_TUNE_SUB_ESP_4 */
1154 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1156 /* X86_TUNE_SUB_ESP_8 */
1157 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1158 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1160 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1161 for DFmode copies */
1162 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1163 | m_GENERIC
| m_GEODE
),
1165 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1166 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1168 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1169 conflict here in between PPro/Pentium4 based chips that thread 128bit
1170 SSE registers as single units versus K8 based chips that divide SSE
1171 registers to two 64bit halves. This knob promotes all store destinations
1172 to be 128bit to allow register renaming on 128bit SSE units, but usually
1173 results in one extra microop on 64bit SSE units. Experimental results
1174 shows that disabling this option on P4 brings over 20% SPECfp regression,
1175 while enabling it on K8 brings roughly 2.4% regression that can be partly
1176 masked by careful scheduling of moves. */
1177 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1179 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1182 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1183 are resolved on SSE register parts instead of whole registers, so we may
1184 maintain just lower part of scalar values in proper format leaving the
1185 upper part undefined. */
1188 /* X86_TUNE_SSE_TYPELESS_STORES */
1189 m_ATHLON_K8_AMDFAM10
,
1191 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1192 m_PPRO
| m_PENT4
| m_NOCONA
,
1194 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1195 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1197 /* X86_TUNE_PROLOGUE_USING_MOVE */
1198 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1200 /* X86_TUNE_EPILOGUE_USING_MOVE */
1201 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1203 /* X86_TUNE_SHIFT1 */
1206 /* X86_TUNE_USE_FFREEP */
1207 m_ATHLON_K8_AMDFAM10
,
1209 /* X86_TUNE_INTER_UNIT_MOVES */
1210 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1212 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1213 than 4 branch instructions in the 16 byte window. */
1214 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1216 /* X86_TUNE_SCHEDULE */
1217 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1219 /* X86_TUNE_USE_BT */
1220 m_ATHLON_K8_AMDFAM10
,
1222 /* X86_TUNE_USE_INCDEC */
1223 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1225 /* X86_TUNE_PAD_RETURNS */
1226 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1228 /* X86_TUNE_EXT_80387_CONSTANTS */
1229 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1231 /* X86_TUNE_SHORTEN_X87_SSE */
1234 /* X86_TUNE_AVOID_VECTOR_DECODE */
1237 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1238 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1242 vector path on AMD machines. */
1243 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1245 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1247 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1249 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1253 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1254 but one byte longer. */
1257 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1258 operand that cannot be represented using a modRM byte. The XOR
1259 replacement is long decoded, so this split helps here as well. */
1262 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1263 from integer to FP. */
1267 /* Feature tests against the various architecture variations. */
1268 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1269 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1270 ~(m_386
| m_486
| m_PENT
| m_K6
),
1272 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1275 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1278 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1281 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1285 static const unsigned int x86_accumulate_outgoing_args
1286 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1288 static const unsigned int x86_arch_always_fancy_math_387
1289 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1290 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1292 static enum stringop_alg stringop_alg
= no_stringop
;
1294 /* In case the average insn count for single function invocation is
1295 lower than this constant, emit fast (but longer) prologue and
1297 #define FAST_PROLOGUE_INSN_COUNT 20
1299 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1300 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1301 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1302 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1304 /* Array of the smallest class containing reg number REGNO, indexed by
1305 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1307 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1309 /* ax, dx, cx, bx */
1310 AREG
, DREG
, CREG
, BREG
,
1311 /* si, di, bp, sp */
1312 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1314 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1315 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1318 /* flags, fpsr, fpcr, frame */
1319 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1321 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1324 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1327 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1328 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1329 /* SSE REX registers */
1330 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1334 /* The "default" register map used in 32bit mode. */
1336 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1338 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1339 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1340 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1347 static int const x86_64_int_parameter_registers
[6] =
1349 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1350 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1353 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1355 2 /*RCX*/, 1 /*RDX*/,
1356 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1359 static int const x86_64_int_return_registers
[4] =
1361 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1364 /* The "default" register map used in 64bit mode. */
1365 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1367 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1368 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1369 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1370 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1371 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1372 8,9,10,11,12,13,14,15, /* extended integer registers */
1373 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1376 /* Define the register numbers to be used in Dwarf debugging information.
1377 The SVR4 reference port C compiler uses the following register numbers
1378 in its Dwarf output code:
1379 0 for %eax (gcc regno = 0)
1380 1 for %ecx (gcc regno = 2)
1381 2 for %edx (gcc regno = 1)
1382 3 for %ebx (gcc regno = 3)
1383 4 for %esp (gcc regno = 7)
1384 5 for %ebp (gcc regno = 6)
1385 6 for %esi (gcc regno = 4)
1386 7 for %edi (gcc regno = 5)
1387 The following three DWARF register numbers are never generated by
1388 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1389 believes these numbers have these meanings.
1390 8 for %eip (no gcc equivalent)
1391 9 for %eflags (gcc regno = 17)
1392 10 for %trapno (no gcc equivalent)
1393 It is not at all clear how we should number the FP stack registers
1394 for the x86 architecture. If the version of SDB on x86/svr4 were
1395 a bit less brain dead with respect to floating-point then we would
1396 have a precedent to follow with respect to DWARF register numbers
1397 for x86 FP registers, but the SDB on x86/svr4 is so completely
1398 broken with respect to FP registers that it is hardly worth thinking
1399 of it as something to strive for compatibility with.
1400 The version of x86/svr4 SDB I have at the moment does (partially)
1401 seem to believe that DWARF register number 11 is associated with
1402 the x86 register %st(0), but that's about all. Higher DWARF
1403 register numbers don't seem to be associated with anything in
1404 particular, and even for DWARF regno 11, SDB only seems to under-
1405 stand that it should say that a variable lives in %st(0) (when
1406 asked via an `=' command) if we said it was in DWARF regno 11,
1407 but SDB still prints garbage when asked for the value of the
1408 variable in question (via a `/' command).
1409 (Also note that the labels SDB prints for various FP stack regs
1410 when doing an `x' command are all wrong.)
1411 Note that these problems generally don't affect the native SVR4
1412 C compiler because it doesn't allow the use of -O with -g and
1413 because when it is *not* optimizing, it allocates a memory
1414 location for each floating-point variable, and the memory
1415 location is what gets described in the DWARF AT_location
1416 attribute for the variable in question.
1417 Regardless of the severe mental illness of the x86/svr4 SDB, we
1418 do something sensible here and we use the following DWARF
1419 register numbers. Note that these are all stack-top-relative
1421 11 for %st(0) (gcc regno = 8)
1422 12 for %st(1) (gcc regno = 9)
1423 13 for %st(2) (gcc regno = 10)
1424 14 for %st(3) (gcc regno = 11)
1425 15 for %st(4) (gcc regno = 12)
1426 16 for %st(5) (gcc regno = 13)
1427 17 for %st(6) (gcc regno = 14)
1428 18 for %st(7) (gcc regno = 15)
1430 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1432 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1433 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1434 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1435 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1436 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1437 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1438 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1441 /* Test and compare insns in i386.md store the information needed to
1442 generate branch and scc insns here. */
1444 rtx ix86_compare_op0
= NULL_RTX
;
1445 rtx ix86_compare_op1
= NULL_RTX
;
1446 rtx ix86_compare_emitted
= NULL_RTX
;
1448 /* Size of the register save area. */
1449 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1451 /* Define the structure for the machine field in struct function. */
1453 struct stack_local_entry
GTY(())
1455 unsigned short mode
;
1458 struct stack_local_entry
*next
;
1461 /* Structure describing stack frame layout.
1462 Stack grows downward:
1468 saved frame pointer if frame_pointer_needed
1469 <- HARD_FRAME_POINTER
1474 [va_arg registers] (
1475 > to_allocate <- FRAME_POINTER
1485 HOST_WIDE_INT frame
;
1487 int outgoing_arguments_size
;
1490 HOST_WIDE_INT to_allocate
;
1491 /* The offsets relative to ARG_POINTER. */
1492 HOST_WIDE_INT frame_pointer_offset
;
1493 HOST_WIDE_INT hard_frame_pointer_offset
;
1494 HOST_WIDE_INT stack_pointer_offset
;
1496 /* When save_regs_using_mov is set, emit prologue using
1497 move instead of push instructions. */
1498 bool save_regs_using_mov
;
1501 /* Code model option. */
1502 enum cmodel ix86_cmodel
;
1504 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1506 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1508 /* Which unit we are generating floating point math for. */
1509 enum fpmath_unit ix86_fpmath
;
1511 /* Which cpu are we scheduling for. */
1512 enum processor_type ix86_tune
;
1514 /* Which instruction set architecture to use. */
1515 enum processor_type ix86_arch
;
1517 /* true if sse prefetch instruction is not NOOP. */
1518 int x86_prefetch_sse
;
1520 /* ix86_regparm_string as a number */
1521 static int ix86_regparm
;
1523 /* -mstackrealign option */
1524 extern int ix86_force_align_arg_pointer
;
1525 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1527 /* Preferred alignment for stack boundary in bits. */
1528 unsigned int ix86_preferred_stack_boundary
;
1530 /* Values 1-5: see jump.c */
1531 int ix86_branch_cost
;
1533 /* Variables which are this size or smaller are put in the data/bss
1534 or ldata/lbss sections. */
1536 int ix86_section_threshold
= 65536;
1538 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1539 char internal_label_prefix
[16];
1540 int internal_label_prefix_len
;
1542 /* Fence to use after loop using movnt. */
1545 /* Register class used for passing given 64bit part of the argument.
1546 These represent classes as documented by the PS ABI, with the exception
1547 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1548 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1550 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1551 whenever possible (upper half does contain padding). */
1552 enum x86_64_reg_class
1555 X86_64_INTEGER_CLASS
,
1556 X86_64_INTEGERSI_CLASS
,
1563 X86_64_COMPLEX_X87_CLASS
,
1566 static const char * const x86_64_reg_class_name
[] =
1568 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1569 "sseup", "x87", "x87up", "cplx87", "no"
1572 #define MAX_CLASSES 4
1574 /* Table of constants used by fldpi, fldln2, etc.... */
1575 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1576 static bool ext_80387_constants_init
= 0;
1579 static struct machine_function
* ix86_init_machine_status (void);
1580 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1581 static int ix86_function_regparm (const_tree
, const_tree
);
1582 static void ix86_compute_frame_layout (struct ix86_frame
*);
1583 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1587 /* The svr4 ABI for the i386 says that records and unions are returned
1589 #ifndef DEFAULT_PCC_STRUCT_RETURN
1590 #define DEFAULT_PCC_STRUCT_RETURN 1
1593 /* Bit flags that specify the ISA we are compiling for. */
1594 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1596 /* A mask of ix86_isa_flags that includes bit X if X
1597 was set or cleared on the command line. */
1598 static int ix86_isa_flags_explicit
;
1600 /* Define a set of ISAs which aren't available for a given ISA. MMX
1601 and SSE ISAs are handled separately. */
1603 #define OPTION_MASK_ISA_MMX_UNSET \
1604 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1605 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1607 #define OPTION_MASK_ISA_SSE_UNSET \
1608 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1609 #define OPTION_MASK_ISA_SSE2_UNSET \
1610 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1611 #define OPTION_MASK_ISA_SSE3_UNSET \
1612 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1613 #define OPTION_MASK_ISA_SSSE3_UNSET \
1614 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1615 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1616 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1617 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1619 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1620 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1621 #define OPTION_MASK_ISA_SSE4 \
1622 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1623 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1625 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1627 /* Vectorization library interface and handlers. */
1628 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
1629 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
1631 /* Implement TARGET_HANDLE_OPTION. */
1634 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1639 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX
;
1642 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
1643 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
1648 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW
;
1651 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
1652 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
1660 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE
;
1663 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
1664 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
1669 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2
;
1672 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
1673 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
1678 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3
;
1681 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
1682 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
1687 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3
;
1690 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
1691 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
1696 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1
;
1699 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
1700 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
1705 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2
;
1708 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
1709 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
1714 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4
;
1715 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4
;
1719 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
1720 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
1724 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A
;
1727 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
1728 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
1737 /* Sometimes certain combinations of command options do not make
1738 sense on a particular target machine. You can define a macro
1739 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1740 defined, is executed once just after all the command options have
1743 Don't use this macro to turn on various extra optimizations for
1744 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1747 override_options (void)
1750 int ix86_tune_defaulted
= 0;
1751 int ix86_arch_specified
= 0;
1752 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1754 /* Comes from final.c -- no real reason to change it. */
1755 #define MAX_CODE_ALIGN 16
1759 const struct processor_costs
*cost
; /* Processor costs */
1760 const int align_loop
; /* Default alignments. */
1761 const int align_loop_max_skip
;
1762 const int align_jump
;
1763 const int align_jump_max_skip
;
1764 const int align_func
;
1766 const processor_target_table
[PROCESSOR_max
] =
1768 {&i386_cost
, 4, 3, 4, 3, 4},
1769 {&i486_cost
, 16, 15, 16, 15, 16},
1770 {&pentium_cost
, 16, 7, 16, 7, 16},
1771 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
1772 {&geode_cost
, 0, 0, 0, 0, 0},
1773 {&k6_cost
, 32, 7, 32, 7, 32},
1774 {&athlon_cost
, 16, 7, 16, 7, 16},
1775 {&pentium4_cost
, 0, 0, 0, 0, 0},
1776 {&k8_cost
, 16, 7, 16, 7, 16},
1777 {&nocona_cost
, 0, 0, 0, 0, 0},
1778 {&core2_cost
, 16, 10, 16, 10, 16},
1779 {&generic32_cost
, 16, 7, 16, 7, 16},
1780 {&generic64_cost
, 16, 10, 16, 10, 16},
1781 {&amdfam10_cost
, 32, 24, 32, 7, 32}
1784 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1791 PTA_PREFETCH_SSE
= 1 << 4,
1793 PTA_3DNOW_A
= 1 << 6,
1797 PTA_POPCNT
= 1 << 10,
1799 PTA_SSE4A
= 1 << 12,
1800 PTA_NO_SAHF
= 1 << 13,
1801 PTA_SSE4_1
= 1 << 14,
1802 PTA_SSE4_2
= 1 << 15
1807 const char *const name
; /* processor name or nickname. */
1808 const enum processor_type processor
;
1809 const unsigned /*enum pta_flags*/ flags
;
1811 const processor_alias_table
[] =
1813 {"i386", PROCESSOR_I386
, 0},
1814 {"i486", PROCESSOR_I486
, 0},
1815 {"i586", PROCESSOR_PENTIUM
, 0},
1816 {"pentium", PROCESSOR_PENTIUM
, 0},
1817 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1818 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1819 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1820 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1821 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1822 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1823 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1824 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1825 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1826 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1827 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
1828 {"pentium4", PROCESSOR_PENTIUM4
, PTA_MMX
|PTA_SSE
| PTA_SSE2
},
1829 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
1830 {"prescott", PROCESSOR_NOCONA
, PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
1831 {"nocona", PROCESSOR_NOCONA
, (PTA_64BIT
1832 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
1833 | PTA_CX16
| PTA_NO_SAHF
)},
1834 {"core2", PROCESSOR_CORE2
, (PTA_64BIT
1835 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
1838 {"geode", PROCESSOR_GEODE
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1839 |PTA_PREFETCH_SSE
)},
1840 {"k6", PROCESSOR_K6
, PTA_MMX
},
1841 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1842 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1843 {"athlon", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1844 | PTA_PREFETCH_SSE
)},
1845 {"athlon-tbird", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1846 | PTA_PREFETCH_SSE
)},
1847 {"athlon-4", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1849 {"athlon-xp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1851 {"athlon-mp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1853 {"x86-64", PROCESSOR_K8
, (PTA_64BIT
1854 | PTA_MMX
| PTA_SSE
| PTA_SSE2
1856 {"k8", PROCESSOR_K8
, (PTA_64BIT
1857 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1858 | PTA_SSE
| PTA_SSE2
1860 {"k8-sse3", PROCESSOR_K8
, (PTA_64BIT
1861 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1862 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1864 {"opteron", PROCESSOR_K8
, (PTA_64BIT
1865 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1866 | PTA_SSE
| PTA_SSE2
1868 {"opteron-sse3", PROCESSOR_K8
, (PTA_64BIT
1869 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1870 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1872 {"athlon64", PROCESSOR_K8
, (PTA_64BIT
1873 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1874 | PTA_SSE
| PTA_SSE2
1876 {"athlon64-sse3", PROCESSOR_K8
, (PTA_64BIT
1877 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1878 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1880 {"athlon-fx", PROCESSOR_K8
, (PTA_64BIT
1881 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1882 | PTA_SSE
| PTA_SSE2
1884 {"amdfam10", PROCESSOR_AMDFAM10
, (PTA_64BIT
1885 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1886 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1888 | PTA_CX16
| PTA_ABM
)},
1889 {"barcelona", PROCESSOR_AMDFAM10
, (PTA_64BIT
1890 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1891 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1893 | PTA_CX16
| PTA_ABM
)},
1894 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1895 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1898 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1900 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1901 SUBTARGET_OVERRIDE_OPTIONS
;
1904 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1905 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1908 /* -fPIC is the default for x86_64. */
1909 if (TARGET_MACHO
&& TARGET_64BIT
)
1912 /* Set the default values for switches whose default depends on TARGET_64BIT
1913 in case they weren't overwritten by command line options. */
1916 /* Mach-O doesn't support omitting the frame pointer for now. */
1917 if (flag_omit_frame_pointer
== 2)
1918 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1919 if (flag_asynchronous_unwind_tables
== 2)
1920 flag_asynchronous_unwind_tables
= 1;
1921 if (flag_pcc_struct_return
== 2)
1922 flag_pcc_struct_return
= 0;
1926 if (flag_omit_frame_pointer
== 2)
1927 flag_omit_frame_pointer
= 0;
1928 if (flag_asynchronous_unwind_tables
== 2)
1929 flag_asynchronous_unwind_tables
= 0;
1930 if (flag_pcc_struct_return
== 2)
1931 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1934 /* Need to check -mtune=generic first. */
1935 if (ix86_tune_string
)
1937 if (!strcmp (ix86_tune_string
, "generic")
1938 || !strcmp (ix86_tune_string
, "i686")
1939 /* As special support for cross compilers we read -mtune=native
1940 as -mtune=generic. With native compilers we won't see the
1941 -mtune=native, as it was changed by the driver. */
1942 || !strcmp (ix86_tune_string
, "native"))
1945 ix86_tune_string
= "generic64";
1947 ix86_tune_string
= "generic32";
1949 else if (!strncmp (ix86_tune_string
, "generic", 7))
1950 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1954 if (ix86_arch_string
)
1955 ix86_tune_string
= ix86_arch_string
;
1956 if (!ix86_tune_string
)
1958 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1959 ix86_tune_defaulted
= 1;
1962 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1963 need to use a sensible tune option. */
1964 if (!strcmp (ix86_tune_string
, "generic")
1965 || !strcmp (ix86_tune_string
, "x86-64")
1966 || !strcmp (ix86_tune_string
, "i686"))
1969 ix86_tune_string
= "generic64";
1971 ix86_tune_string
= "generic32";
1974 if (ix86_stringop_string
)
1976 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1977 stringop_alg
= rep_prefix_1_byte
;
1978 else if (!strcmp (ix86_stringop_string
, "libcall"))
1979 stringop_alg
= libcall
;
1980 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
1981 stringop_alg
= rep_prefix_4_byte
;
1982 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
1983 stringop_alg
= rep_prefix_8_byte
;
1984 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
1985 stringop_alg
= loop_1_byte
;
1986 else if (!strcmp (ix86_stringop_string
, "loop"))
1987 stringop_alg
= loop
;
1988 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
1989 stringop_alg
= unrolled_loop
;
1991 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
1993 if (!strcmp (ix86_tune_string
, "x86-64"))
1994 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1995 "-mtune=generic instead as appropriate.");
1997 if (!ix86_arch_string
)
1998 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2000 ix86_arch_specified
= 1;
2002 if (!strcmp (ix86_arch_string
, "generic"))
2003 error ("generic CPU can be used only for -mtune= switch");
2004 if (!strncmp (ix86_arch_string
, "generic", 7))
2005 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2007 if (ix86_cmodel_string
!= 0)
2009 if (!strcmp (ix86_cmodel_string
, "small"))
2010 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2011 else if (!strcmp (ix86_cmodel_string
, "medium"))
2012 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2013 else if (!strcmp (ix86_cmodel_string
, "large"))
2014 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2016 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2017 else if (!strcmp (ix86_cmodel_string
, "32"))
2018 ix86_cmodel
= CM_32
;
2019 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2020 ix86_cmodel
= CM_KERNEL
;
2022 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2026 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2027 use of rip-relative addressing. This eliminates fixups that
2028 would otherwise be needed if this object is to be placed in a
2029 DLL, and is essentially just as efficient as direct addressing. */
2030 if (TARGET_64BIT_MS_ABI
)
2031 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2032 else if (TARGET_64BIT
)
2033 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2035 ix86_cmodel
= CM_32
;
2037 if (ix86_asm_string
!= 0)
2040 && !strcmp (ix86_asm_string
, "intel"))
2041 ix86_asm_dialect
= ASM_INTEL
;
2042 else if (!strcmp (ix86_asm_string
, "att"))
2043 ix86_asm_dialect
= ASM_ATT
;
2045 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2047 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2048 error ("code model %qs not supported in the %s bit mode",
2049 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2050 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2051 sorry ("%i-bit mode not compiled in",
2052 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2054 for (i
= 0; i
< pta_size
; i
++)
2055 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2057 ix86_arch
= processor_alias_table
[i
].processor
;
2058 /* Default cpu tuning to the architecture. */
2059 ix86_tune
= ix86_arch
;
2061 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2062 error ("CPU you selected does not support x86-64 "
2065 if (processor_alias_table
[i
].flags
& PTA_MMX
2066 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2067 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2068 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2069 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2070 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2071 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2072 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2073 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2074 if (processor_alias_table
[i
].flags
& PTA_SSE
2075 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2076 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2077 if (processor_alias_table
[i
].flags
& PTA_SSE2
2078 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2079 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2080 if (processor_alias_table
[i
].flags
& PTA_SSE3
2081 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2082 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2083 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2084 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2085 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2086 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2087 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2088 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2089 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2090 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2091 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2092 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2093 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2094 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2096 if (processor_alias_table
[i
].flags
& PTA_ABM
)
2098 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2099 x86_cmpxchg16b
= true;
2100 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
))
2102 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2103 x86_prefetch_sse
= true;
2104 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2111 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2113 ix86_arch_mask
= 1u << ix86_arch
;
2114 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2115 ix86_arch_features
[i
] &= ix86_arch_mask
;
2117 for (i
= 0; i
< pta_size
; i
++)
2118 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2120 ix86_tune
= processor_alias_table
[i
].processor
;
2121 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2123 if (ix86_tune_defaulted
)
2125 ix86_tune_string
= "x86-64";
2126 for (i
= 0; i
< pta_size
; i
++)
2127 if (! strcmp (ix86_tune_string
,
2128 processor_alias_table
[i
].name
))
2130 ix86_tune
= processor_alias_table
[i
].processor
;
2133 error ("CPU you selected does not support x86-64 "
2136 /* Intel CPUs have always interpreted SSE prefetch instructions as
2137 NOPs; so, we can enable SSE prefetch instructions even when
2138 -mtune (rather than -march) points us to a processor that has them.
2139 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2140 higher processors. */
2142 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
2143 x86_prefetch_sse
= true;
2147 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2149 ix86_tune_mask
= 1u << ix86_tune
;
2150 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2151 ix86_tune_features
[i
] &= ix86_tune_mask
;
2154 ix86_cost
= &size_cost
;
2156 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2158 /* Arrange to set up i386_stack_locals for all functions. */
2159 init_machine_status
= ix86_init_machine_status
;
2161 /* Validate -mregparm= value. */
2162 if (ix86_regparm_string
)
2165 warning (0, "-mregparm is ignored in 64-bit mode");
2166 i
= atoi (ix86_regparm_string
);
2167 if (i
< 0 || i
> REGPARM_MAX
)
2168 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2173 ix86_regparm
= REGPARM_MAX
;
2175 /* If the user has provided any of the -malign-* options,
2176 warn and use that value only if -falign-* is not set.
2177 Remove this code in GCC 3.2 or later. */
2178 if (ix86_align_loops_string
)
2180 warning (0, "-malign-loops is obsolete, use -falign-loops");
2181 if (align_loops
== 0)
2183 i
= atoi (ix86_align_loops_string
);
2184 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2185 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2187 align_loops
= 1 << i
;
2191 if (ix86_align_jumps_string
)
2193 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2194 if (align_jumps
== 0)
2196 i
= atoi (ix86_align_jumps_string
);
2197 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2198 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2200 align_jumps
= 1 << i
;
2204 if (ix86_align_funcs_string
)
2206 warning (0, "-malign-functions is obsolete, use -falign-functions");
2207 if (align_functions
== 0)
2209 i
= atoi (ix86_align_funcs_string
);
2210 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2211 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2213 align_functions
= 1 << i
;
2217 /* Default align_* from the processor table. */
2218 if (align_loops
== 0)
2220 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2221 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2223 if (align_jumps
== 0)
2225 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2226 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2228 if (align_functions
== 0)
2230 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2233 /* Validate -mbranch-cost= value, or provide default. */
2234 ix86_branch_cost
= ix86_cost
->branch_cost
;
2235 if (ix86_branch_cost_string
)
2237 i
= atoi (ix86_branch_cost_string
);
2239 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2241 ix86_branch_cost
= i
;
2243 if (ix86_section_threshold_string
)
2245 i
= atoi (ix86_section_threshold_string
);
2247 error ("-mlarge-data-threshold=%d is negative", i
);
2249 ix86_section_threshold
= i
;
2252 if (ix86_tls_dialect_string
)
2254 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2255 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2256 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2257 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2258 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2259 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2261 error ("bad value (%s) for -mtls-dialect= switch",
2262 ix86_tls_dialect_string
);
2265 if (ix87_precision_string
)
2267 i
= atoi (ix87_precision_string
);
2268 if (i
!= 32 && i
!= 64 && i
!= 80)
2269 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2274 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
2276 /* Enable by default the SSE and MMX builtins. Do allow the user to
2277 explicitly disable any of these. In particular, disabling SSE and
2278 MMX for kernel code is extremely useful. */
2279 if (!ix86_arch_specified
)
2281 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
2282 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
2285 warning (0, "-mrtd is ignored in 64bit mode");
2289 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
2291 if (!ix86_arch_specified
)
2293 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
2295 /* i386 ABI does not specify red zone. It still makes sense to use it
2296 when programmer takes care to stack from being destroyed. */
2297 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2298 target_flags
|= MASK_NO_RED_ZONE
;
2301 /* Keep nonleaf frame pointers. */
2302 if (flag_omit_frame_pointer
)
2303 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2304 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2305 flag_omit_frame_pointer
= 1;
2307 /* If we're doing fast math, we don't care about comparison order
2308 wrt NaNs. This lets us use a shorter comparison sequence. */
2309 if (flag_finite_math_only
)
2310 target_flags
&= ~MASK_IEEE_FP
;
2312 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2313 since the insns won't need emulation. */
2314 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2315 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2317 /* Likewise, if the target doesn't have a 387, or we've specified
2318 software floating point, don't use 387 inline intrinsics. */
2320 target_flags
|= MASK_NO_FANCY_MATH_387
;
2322 /* Turn on SSE4.1 builtins for -msse4.2. */
2324 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2326 /* Turn on SSSE3 builtins for -msse4.1. */
2328 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2330 /* Turn on SSE3 builtins for -mssse3. */
2332 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2334 /* Turn on SSE3 builtins for -msse4a. */
2336 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2338 /* Turn on SSE2 builtins for -msse3. */
2340 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2342 /* Turn on SSE builtins for -msse2. */
2344 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2346 /* Turn on MMX builtins for -msse. */
2349 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
2350 x86_prefetch_sse
= true;
2353 /* Turn on MMX builtins for 3Dnow. */
2355 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2357 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2358 if (TARGET_SSE4_2
|| TARGET_ABM
)
2361 /* Validate -mpreferred-stack-boundary= value, or provide default.
2362 The default of 128 bits is for Pentium III's SSE __m128. We can't
2363 change it because of optimize_size. Otherwise, we can't mix object
2364 files compiled with -Os and -On. */
2365 ix86_preferred_stack_boundary
= 128;
2366 if (ix86_preferred_stack_boundary_string
)
2368 i
= atoi (ix86_preferred_stack_boundary_string
);
2369 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2370 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2371 TARGET_64BIT
? 4 : 2);
2373 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2376 /* Accept -msseregparm only if at least SSE support is enabled. */
2377 if (TARGET_SSEREGPARM
2379 error ("-msseregparm used without SSE enabled");
2381 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2382 if (ix86_fpmath_string
!= 0)
2384 if (! strcmp (ix86_fpmath_string
, "387"))
2385 ix86_fpmath
= FPMATH_387
;
2386 else if (! strcmp (ix86_fpmath_string
, "sse"))
2390 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2391 ix86_fpmath
= FPMATH_387
;
2394 ix86_fpmath
= FPMATH_SSE
;
2396 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2397 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2401 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2402 ix86_fpmath
= FPMATH_387
;
2404 else if (!TARGET_80387
)
2406 warning (0, "387 instruction set disabled, using SSE arithmetics");
2407 ix86_fpmath
= FPMATH_SSE
;
2410 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
2413 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2416 /* If the i387 is disabled, then do not return values in it. */
2418 target_flags
&= ~MASK_FLOAT_RETURNS
;
2420 /* Use external vectorized library in vectorizing intrinsics. */
2421 if (ix86_veclibabi_string
)
2423 if (strcmp (ix86_veclibabi_string
, "acml") == 0)
2424 ix86_veclib_handler
= ix86_veclibabi_acml
;
2426 error ("unknown vectorization library ABI type (%s) for "
2427 "-mveclibabi= switch", ix86_veclibabi_string
);
2430 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2431 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2433 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2435 /* ??? Unwind info is not correct around the CFG unless either a frame
2436 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2437 unwind info generation to be aware of the CFG and propagating states
2439 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2440 || flag_exceptions
|| flag_non_call_exceptions
)
2441 && flag_omit_frame_pointer
2442 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2444 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2445 warning (0, "unwind tables currently require either a frame pointer "
2446 "or -maccumulate-outgoing-args for correctness");
2447 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2450 /* For sane SSE instruction set generation we need fcomi instruction.
2451 It is safe to enable all CMOVE instructions. */
2455 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2458 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2459 p
= strchr (internal_label_prefix
, 'X');
2460 internal_label_prefix_len
= p
- internal_label_prefix
;
2464 /* When scheduling description is not available, disable scheduler pass
2465 so it won't slow down the compilation and make x87 code slower. */
2466 if (!TARGET_SCHEDULE
)
2467 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2469 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2470 set_param_value ("simultaneous-prefetches",
2471 ix86_cost
->simultaneous_prefetches
);
2472 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2473 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2474 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
2475 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
2476 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
2477 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
2480 /* Return true if this goes in large data/bss. */
2483 ix86_in_large_data_p (tree exp
)
2485 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2488 /* Functions are never large data. */
2489 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2492 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2494 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2495 if (strcmp (section
, ".ldata") == 0
2496 || strcmp (section
, ".lbss") == 0)
2502 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2504 /* If this is an incomplete type with size 0, then we can't put it
2505 in data because it might be too big when completed. */
2506 if (!size
|| size
> ix86_section_threshold
)
2513 /* Switch to the appropriate section for output of DECL.
2514 DECL is either a `VAR_DECL' node or a constant of some sort.
2515 RELOC indicates whether forming the initial value of DECL requires
2516 link-time relocations. */
2518 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2522 x86_64_elf_select_section (tree decl
, int reloc
,
2523 unsigned HOST_WIDE_INT align
)
2525 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2526 && ix86_in_large_data_p (decl
))
2528 const char *sname
= NULL
;
2529 unsigned int flags
= SECTION_WRITE
;
2530 switch (categorize_decl_for_section (decl
, reloc
))
2535 case SECCAT_DATA_REL
:
2536 sname
= ".ldata.rel";
2538 case SECCAT_DATA_REL_LOCAL
:
2539 sname
= ".ldata.rel.local";
2541 case SECCAT_DATA_REL_RO
:
2542 sname
= ".ldata.rel.ro";
2544 case SECCAT_DATA_REL_RO_LOCAL
:
2545 sname
= ".ldata.rel.ro.local";
2549 flags
|= SECTION_BSS
;
2552 case SECCAT_RODATA_MERGE_STR
:
2553 case SECCAT_RODATA_MERGE_STR_INIT
:
2554 case SECCAT_RODATA_MERGE_CONST
:
2558 case SECCAT_SRODATA
:
2565 /* We don't split these for medium model. Place them into
2566 default sections and hope for best. */
2571 /* We might get called with string constants, but get_named_section
2572 doesn't like them as they are not DECLs. Also, we need to set
2573 flags in that case. */
2575 return get_section (sname
, flags
, NULL
);
2576 return get_named_section (decl
, sname
, reloc
);
2579 return default_elf_select_section (decl
, reloc
, align
);
2582 /* Build up a unique section name, expressed as a
2583 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2584 RELOC indicates whether the initial value of EXP requires
2585 link-time relocations. */
2587 static void ATTRIBUTE_UNUSED
2588 x86_64_elf_unique_section (tree decl
, int reloc
)
2590 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2591 && ix86_in_large_data_p (decl
))
2593 const char *prefix
= NULL
;
2594 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2595 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2597 switch (categorize_decl_for_section (decl
, reloc
))
2600 case SECCAT_DATA_REL
:
2601 case SECCAT_DATA_REL_LOCAL
:
2602 case SECCAT_DATA_REL_RO
:
2603 case SECCAT_DATA_REL_RO_LOCAL
:
2604 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2607 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2610 case SECCAT_RODATA_MERGE_STR
:
2611 case SECCAT_RODATA_MERGE_STR_INIT
:
2612 case SECCAT_RODATA_MERGE_CONST
:
2613 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2615 case SECCAT_SRODATA
:
2622 /* We don't split these for medium model. Place them into
2623 default sections and hope for best. */
2631 plen
= strlen (prefix
);
2633 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2634 name
= targetm
.strip_name_encoding (name
);
2635 nlen
= strlen (name
);
2637 string
= (char *) alloca (nlen
+ plen
+ 1);
2638 memcpy (string
, prefix
, plen
);
2639 memcpy (string
+ plen
, name
, nlen
+ 1);
2641 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2645 default_unique_section (decl
, reloc
);
2648 #ifdef COMMON_ASM_OP
2649 /* This says how to output assembler code to declare an
2650 uninitialized external linkage data object.
2652 For medium model x86-64 we need to use .largecomm opcode for
2655 x86_elf_aligned_common (FILE *file
,
2656 const char *name
, unsigned HOST_WIDE_INT size
,
2659 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2660 && size
> (unsigned int)ix86_section_threshold
)
2661 fprintf (file
, ".largecomm\t");
2663 fprintf (file
, "%s", COMMON_ASM_OP
);
2664 assemble_name (file
, name
);
2665 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2666 size
, align
/ BITS_PER_UNIT
);
2670 /* Utility function for targets to use in implementing
2671 ASM_OUTPUT_ALIGNED_BSS. */
2674 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2675 const char *name
, unsigned HOST_WIDE_INT size
,
2678 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2679 && size
> (unsigned int)ix86_section_threshold
)
2680 switch_to_section (get_named_section (decl
, ".lbss", 0));
2682 switch_to_section (bss_section
);
2683 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2684 #ifdef ASM_DECLARE_OBJECT_NAME
2685 last_assemble_variable_decl
= decl
;
2686 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2688 /* Standard thing is just output label for the object. */
2689 ASM_OUTPUT_LABEL (file
, name
);
2690 #endif /* ASM_DECLARE_OBJECT_NAME */
2691 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2695 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2698 make the problem with not enough registers even worse. */
2699 #ifdef INSN_SCHEDULING
2701 flag_schedule_insns
= 0;
2705 /* The Darwin libraries never set errno, so we might as well
2706 avoid calling them when that's the only reason we would. */
2707 flag_errno_math
= 0;
2709 /* The default values of these switches depend on the TARGET_64BIT
2710 that is not known at this moment. Mark these values with 2 and
2711 let user the to override these. In case there is no command line option
2712 specifying them, we will set the defaults in override_options. */
2714 flag_omit_frame_pointer
= 2;
2715 flag_pcc_struct_return
= 2;
2716 flag_asynchronous_unwind_tables
= 2;
2717 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2718 SUBTARGET_OPTIMIZATION_OPTIONS
;
2722 /* Decide whether we can make a sibling call to a function. DECL is the
2723 declaration of the function being targeted by the call and EXP is the
2724 CALL_EXPR representing the call. */
2727 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2732 /* If we are generating position-independent code, we cannot sibcall
2733 optimize any indirect call, or a direct call to a global function,
2734 as the PLT requires %ebx be live. */
2735 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2742 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2743 if (POINTER_TYPE_P (func
))
2744 func
= TREE_TYPE (func
);
2747 /* Check that the return value locations are the same. Like
2748 if we are returning floats on the 80387 register stack, we cannot
2749 make a sibcall from a function that doesn't return a float to a
2750 function that does or, conversely, from a function that does return
2751 a float to a function that doesn't; the necessary stack adjustment
2752 would not be executed. This is also the place we notice
2753 differences in the return value ABI. Note that it is ok for one
2754 of the functions to have void return type as long as the return
2755 value of the other is passed in a register. */
2756 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2757 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2759 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2761 if (!rtx_equal_p (a
, b
))
2764 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2766 else if (!rtx_equal_p (a
, b
))
2769 /* If this call is indirect, we'll need to be able to use a call-clobbered
2770 register for the address of the target function. Make sure that all
2771 such registers are not used for passing parameters. */
2772 if (!decl
&& !TARGET_64BIT
)
2776 /* We're looking at the CALL_EXPR, we need the type of the function. */
2777 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2778 type
= TREE_TYPE (type
); /* pointer type */
2779 type
= TREE_TYPE (type
); /* function type */
2781 if (ix86_function_regparm (type
, NULL
) >= 3)
2783 /* ??? Need to count the actual number of registers to be used,
2784 not the possible number of registers. Fix later. */
2789 /* Dllimport'd functions are also called indirectly. */
2790 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2791 && decl
&& DECL_DLLIMPORT_P (decl
)
2792 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2795 /* If we forced aligned the stack, then sibcalling would unalign the
2796 stack, which may break the called function. */
2797 if (cfun
->machine
->force_align_arg_pointer
)
2800 /* Otherwise okay. That also includes certain types of indirect calls. */
2804 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2805 calling convention attributes;
2806 arguments as in struct attribute_spec.handler. */
2809 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2811 int flags ATTRIBUTE_UNUSED
,
2814 if (TREE_CODE (*node
) != FUNCTION_TYPE
2815 && TREE_CODE (*node
) != METHOD_TYPE
2816 && TREE_CODE (*node
) != FIELD_DECL
2817 && TREE_CODE (*node
) != TYPE_DECL
)
2819 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2820 IDENTIFIER_POINTER (name
));
2821 *no_add_attrs
= true;
2825 /* Can combine regparm with all attributes but fastcall. */
2826 if (is_attribute_p ("regparm", name
))
2830 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2832 error ("fastcall and regparm attributes are not compatible");
2835 cst
= TREE_VALUE (args
);
2836 if (TREE_CODE (cst
) != INTEGER_CST
)
2838 warning (OPT_Wattributes
,
2839 "%qs attribute requires an integer constant argument",
2840 IDENTIFIER_POINTER (name
));
2841 *no_add_attrs
= true;
2843 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2845 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2846 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2847 *no_add_attrs
= true;
2851 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2852 TYPE_ATTRIBUTES (*node
))
2853 && compare_tree_int (cst
, REGPARM_MAX
-1))
2855 error ("%s functions limited to %d register parameters",
2856 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2864 /* Do not warn when emulating the MS ABI. */
2865 if (!TARGET_64BIT_MS_ABI
)
2866 warning (OPT_Wattributes
, "%qs attribute ignored",
2867 IDENTIFIER_POINTER (name
));
2868 *no_add_attrs
= true;
2872 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2873 if (is_attribute_p ("fastcall", name
))
2875 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2877 error ("fastcall and cdecl attributes are not compatible");
2879 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2881 error ("fastcall and stdcall attributes are not compatible");
2883 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2885 error ("fastcall and regparm attributes are not compatible");
2889 /* Can combine stdcall with fastcall (redundant), regparm and
2891 else if (is_attribute_p ("stdcall", name
))
2893 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2895 error ("stdcall and cdecl attributes are not compatible");
2897 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2899 error ("stdcall and fastcall attributes are not compatible");
2903 /* Can combine cdecl with regparm and sseregparm. */
2904 else if (is_attribute_p ("cdecl", name
))
2906 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2908 error ("stdcall and cdecl attributes are not compatible");
2910 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2912 error ("fastcall and cdecl attributes are not compatible");
2916 /* Can combine sseregparm with all attributes. */
2921 /* Return 0 if the attributes for two types are incompatible, 1 if they
2922 are compatible, and 2 if they are nearly compatible (which causes a
2923 warning to be generated). */
2926 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
2928 /* Check for mismatch of non-default calling convention. */
2929 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2931 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2934 /* Check for mismatched fastcall/regparm types. */
2935 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2936 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2937 || (ix86_function_regparm (type1
, NULL
)
2938 != ix86_function_regparm (type2
, NULL
)))
2941 /* Check for mismatched sseregparm types. */
2942 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2943 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2946 /* Check for mismatched return types (cdecl vs stdcall). */
2947 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2948 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2954 /* Return the regparm value for a function with the indicated TYPE and DECL.
2955 DECL may be NULL when calling function indirectly
2956 or considering a libcall. */
2959 ix86_function_regparm (const_tree type
, const_tree decl
)
2962 int regparm
= ix86_regparm
;
2967 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2969 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2971 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2974 /* Use register calling convention for local functions when possible. */
2975 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
2976 && flag_unit_at_a_time
&& !profile_flag
)
2978 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
2979 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
2982 int local_regparm
, globals
= 0, regno
;
2985 /* Make sure no regparm register is taken by a
2986 global register variable. */
2987 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2988 if (global_regs
[local_regparm
])
2991 /* We can't use regparm(3) for nested functions as these use
2992 static chain pointer in third argument. */
2993 if (local_regparm
== 3
2994 && (decl_function_context (decl
)
2995 || ix86_force_align_arg_pointer
)
2996 && !DECL_NO_STATIC_CHAIN (decl
))
2999 /* If the function realigns its stackpointer, the prologue will
3000 clobber %ecx. If we've already generated code for the callee,
3001 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3002 scanning the attributes for the self-realigning property. */
3003 f
= DECL_STRUCT_FUNCTION (decl
);
3004 if (local_regparm
== 3
3005 && (f
? !!f
->machine
->force_align_arg_pointer
3006 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3007 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3010 /* Each global register variable increases register preassure,
3011 so the more global reg vars there are, the smaller regparm
3012 optimization use, unless requested by the user explicitly. */
3013 for (regno
= 0; regno
< 6; regno
++)
3014 if (global_regs
[regno
])
3017 = globals
< local_regparm
? local_regparm
- globals
: 0;
3019 if (local_regparm
> regparm
)
3020 regparm
= local_regparm
;
3027 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3028 DFmode (2) arguments in SSE registers for a function with the
3029 indicated TYPE and DECL. DECL may be NULL when calling function
3030 indirectly or considering a libcall. Otherwise return 0. */
3033 ix86_function_sseregparm (const_tree type
, const_tree decl
)
3035 gcc_assert (!TARGET_64BIT
);
3037 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3038 by the sseregparm attribute. */
3039 if (TARGET_SSEREGPARM
3040 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3045 error ("Calling %qD with attribute sseregparm without "
3046 "SSE/SSE2 enabled", decl
);
3048 error ("Calling %qT with attribute sseregparm without "
3049 "SSE/SSE2 enabled", type
);
3056 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3057 (and DFmode for SSE2) arguments in SSE registers. */
3058 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3060 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3061 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3063 return TARGET_SSE2
? 2 : 1;
3069 /* Return true if EAX is live at the start of the function. Used by
3070 ix86_expand_prologue to determine if we need special help before
3071 calling allocate_stack_worker. */
3074 ix86_eax_live_at_start_p (void)
3076 /* Cheat. Don't bother working forward from ix86_function_regparm
3077 to the function type to whether an actual argument is located in
3078 eax. Instead just look at cfg info, which is still close enough
3079 to correct at this point. This gives false positives for broken
3080 functions that might use uninitialized data that happens to be
3081 allocated in eax, but who cares? */
3082 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
3085 /* Return true if TYPE has a variable argument list. */
3088 type_has_variadic_args_p (tree type
)
3090 tree n
, t
= TYPE_ARG_TYPES (type
);
3095 while ((n
= TREE_CHAIN (t
)) != NULL
)
3098 return TREE_VALUE (t
) != void_type_node
;
3101 /* Value is the number of bytes of arguments automatically
3102 popped when returning from a subroutine call.
3103 FUNDECL is the declaration node of the function (as a tree),
3104 FUNTYPE is the data type of the function (as a tree),
3105 or for a library call it is an identifier node for the subroutine name.
3106 SIZE is the number of bytes of arguments passed on the stack.
3108 On the 80386, the RTD insn may be used to pop them if the number
3109 of args is fixed, but if the number is variable then the caller
3110 must pop them all. RTD can't be used for library calls now
3111 because the library is compiled with the Unix compiler.
3112 Use of RTD is a selectable option, since it is incompatible with
3113 standard Unix calling sequences. If the option is not selected,
3114 the caller must always pop the args.
3116 The attribute stdcall is equivalent to RTD on a per module basis. */
3119 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3123 /* None of the 64-bit ABIs pop arguments. */
3127 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3129 /* Cdecl functions override -mrtd, and never pop the stack. */
3130 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3132 /* Stdcall and fastcall functions will pop the stack if not
3134 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3135 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3138 if (rtd
&& ! type_has_variadic_args_p (funtype
))
3142 /* Lose any fake structure return argument if it is passed on the stack. */
3143 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3144 && !KEEP_AGGREGATE_RETURN_POINTER
)
3146 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3148 return GET_MODE_SIZE (Pmode
);
3154 /* Argument support functions. */
3156 /* Return true when register may be used to pass function parameters. */
3158 ix86_function_arg_regno_p (int regno
)
3161 const int *parm_regs
;
3166 return (regno
< REGPARM_MAX
3167 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3169 return (regno
< REGPARM_MAX
3170 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3171 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3172 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3173 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3178 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3183 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3184 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3188 /* RAX is used as hidden argument to va_arg functions. */
3189 if (!TARGET_64BIT_MS_ABI
&& regno
== 0)
3192 if (TARGET_64BIT_MS_ABI
)
3193 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3195 parm_regs
= x86_64_int_parameter_registers
;
3196 for (i
= 0; i
< REGPARM_MAX
; i
++)
3197 if (regno
== parm_regs
[i
])
3202 /* Return if we do not know how to pass TYPE solely in registers. */
3205 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
3207 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3210 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3211 The layout_type routine is crafty and tries to trick us into passing
3212 currently unsupported vector types on the stack by using TImode. */
3213 return (!TARGET_64BIT
&& mode
== TImode
3214 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3217 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3218 for a call to a function whose data type is FNTYPE.
3219 For a library call, FNTYPE is 0. */
3222 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3223 tree fntype
, /* tree ptr for function decl */
3224 rtx libname
, /* SYMBOL_REF of library name or 0 */
3227 memset (cum
, 0, sizeof (*cum
));
3229 /* Set up the number of registers to use for passing arguments. */
3230 cum
->nregs
= ix86_regparm
;
3232 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3234 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3235 cum
->warn_sse
= true;
3236 cum
->warn_mmx
= true;
3237 cum
->maybe_vaarg
= (fntype
3238 ? (!TYPE_ARG_TYPES (fntype
)
3239 || type_has_variadic_args_p (fntype
))
3244 /* If there are variable arguments, then we won't pass anything
3245 in registers in 32-bit mode. */
3246 if (cum
->maybe_vaarg
)
3256 /* Use ecx and edx registers if function has fastcall attribute,
3257 else look for regparm information. */
3260 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3266 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3269 /* Set up the number of SSE registers used for passing SFmode
3270 and DFmode arguments. Warn for mismatching ABI. */
3271 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3275 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3276 But in the case of vector types, it is some vector mode.
3278 When we have only some of our vector isa extensions enabled, then there
3279 are some modes for which vector_mode_supported_p is false. For these
3280 modes, the generic vector support in gcc will choose some non-vector mode
3281 in order to implement the type. By computing the natural mode, we'll
3282 select the proper ABI location for the operand and not depend on whatever
3283 the middle-end decides to do with these vector types. */
3285 static enum machine_mode
3286 type_natural_mode (const_tree type
)
3288 enum machine_mode mode
= TYPE_MODE (type
);
3290 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3292 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3293 if ((size
== 8 || size
== 16)
3294 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3295 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3297 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3299 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3300 mode
= MIN_MODE_VECTOR_FLOAT
;
3302 mode
= MIN_MODE_VECTOR_INT
;
3304 /* Get the mode which has this inner mode and number of units. */
3305 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3306 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3307 && GET_MODE_INNER (mode
) == innermode
)
3317 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3318 this may not agree with the mode that the type system has chosen for the
3319 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3320 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3323 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3328 if (orig_mode
!= BLKmode
)
3329 tmp
= gen_rtx_REG (orig_mode
, regno
);
3332 tmp
= gen_rtx_REG (mode
, regno
);
3333 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3334 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3340 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3341 of this code is to classify each 8bytes of incoming argument by the register
3342 class and assign registers accordingly. */
3344 /* Return the union class of CLASS1 and CLASS2.
3345 See the x86-64 PS ABI for details. */
3347 static enum x86_64_reg_class
3348 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3350 /* Rule #1: If both classes are equal, this is the resulting class. */
3351 if (class1
== class2
)
3354 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3356 if (class1
== X86_64_NO_CLASS
)
3358 if (class2
== X86_64_NO_CLASS
)
3361 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3362 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3363 return X86_64_MEMORY_CLASS
;
3365 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3366 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3367 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3368 return X86_64_INTEGERSI_CLASS
;
3369 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3370 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3371 return X86_64_INTEGER_CLASS
;
3373 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3375 if (class1
== X86_64_X87_CLASS
3376 || class1
== X86_64_X87UP_CLASS
3377 || class1
== X86_64_COMPLEX_X87_CLASS
3378 || class2
== X86_64_X87_CLASS
3379 || class2
== X86_64_X87UP_CLASS
3380 || class2
== X86_64_COMPLEX_X87_CLASS
)
3381 return X86_64_MEMORY_CLASS
;
3383 /* Rule #6: Otherwise class SSE is used. */
3384 return X86_64_SSE_CLASS
;
3387 /* Classify the argument of type TYPE and mode MODE.
3388 CLASSES will be filled by the register class used to pass each word
3389 of the operand. The number of words is returned. In case the parameter
3390 should be passed in memory, 0 is returned. As a special case for zero
3391 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3393 BIT_OFFSET is used internally for handling records and specifies offset
3394 of the offset in bits modulo 256 to avoid overflow cases.
3396 See the x86-64 PS ABI for details.
3400 classify_argument (enum machine_mode mode
, const_tree type
,
3401 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3403 HOST_WIDE_INT bytes
=
3404 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3405 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3407 /* Variable sized entities are always passed/returned in memory. */
3411 if (mode
!= VOIDmode
3412 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3415 if (type
&& AGGREGATE_TYPE_P (type
))
3419 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3421 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3425 for (i
= 0; i
< words
; i
++)
3426 classes
[i
] = X86_64_NO_CLASS
;
3428 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3429 signalize memory class, so handle it as special case. */
3432 classes
[0] = X86_64_NO_CLASS
;
3436 /* Classify each field of record and merge classes. */
3437 switch (TREE_CODE (type
))
3440 /* And now merge the fields of structure. */
3441 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3443 if (TREE_CODE (field
) == FIELD_DECL
)
3447 if (TREE_TYPE (field
) == error_mark_node
)
3450 /* Bitfields are always classified as integer. Handle them
3451 early, since later code would consider them to be
3452 misaligned integers. */
3453 if (DECL_BIT_FIELD (field
))
3455 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3456 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3457 + tree_low_cst (DECL_SIZE (field
), 0)
3460 merge_classes (X86_64_INTEGER_CLASS
,
3465 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3466 TREE_TYPE (field
), subclasses
,
3467 (int_bit_position (field
)
3468 + bit_offset
) % 256);
3471 for (i
= 0; i
< num
; i
++)
3474 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3476 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3484 /* Arrays are handled as small records. */
3487 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3488 TREE_TYPE (type
), subclasses
, bit_offset
);
3492 /* The partial classes are now full classes. */
3493 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3494 subclasses
[0] = X86_64_SSE_CLASS
;
3495 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3496 subclasses
[0] = X86_64_INTEGER_CLASS
;
3498 for (i
= 0; i
< words
; i
++)
3499 classes
[i
] = subclasses
[i
% num
];
3504 case QUAL_UNION_TYPE
:
3505 /* Unions are similar to RECORD_TYPE but offset is always 0.
3507 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3509 if (TREE_CODE (field
) == FIELD_DECL
)
3513 if (TREE_TYPE (field
) == error_mark_node
)
3516 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3517 TREE_TYPE (field
), subclasses
,
3521 for (i
= 0; i
< num
; i
++)
3522 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3531 /* Final merger cleanup. */
3532 for (i
= 0; i
< words
; i
++)
3534 /* If one class is MEMORY, everything should be passed in
3536 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3539 /* The X86_64_SSEUP_CLASS should be always preceded by
3540 X86_64_SSE_CLASS. */
3541 if (classes
[i
] == X86_64_SSEUP_CLASS
3542 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3543 classes
[i
] = X86_64_SSE_CLASS
;
3545 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3546 if (classes
[i
] == X86_64_X87UP_CLASS
3547 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3548 classes
[i
] = X86_64_SSE_CLASS
;
3553 /* Compute alignment needed. We align all types to natural boundaries with
3554 exception of XFmode that is aligned to 64bits. */
3555 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3557 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3560 mode_alignment
= 128;
3561 else if (mode
== XCmode
)
3562 mode_alignment
= 256;
3563 if (COMPLEX_MODE_P (mode
))
3564 mode_alignment
/= 2;
3565 /* Misaligned fields are always returned in memory. */
3566 if (bit_offset
% mode_alignment
)
3570 /* for V1xx modes, just use the base mode */
3571 if (VECTOR_MODE_P (mode
)
3572 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3573 mode
= GET_MODE_INNER (mode
);
3575 /* Classification of atomic types. */
3580 classes
[0] = X86_64_SSE_CLASS
;
3583 classes
[0] = X86_64_SSE_CLASS
;
3584 classes
[1] = X86_64_SSEUP_CLASS
;
3593 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3594 classes
[0] = X86_64_INTEGERSI_CLASS
;
3596 classes
[0] = X86_64_INTEGER_CLASS
;
3600 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3605 if (!(bit_offset
% 64))
3606 classes
[0] = X86_64_SSESF_CLASS
;
3608 classes
[0] = X86_64_SSE_CLASS
;
3611 classes
[0] = X86_64_SSEDF_CLASS
;
3614 classes
[0] = X86_64_X87_CLASS
;
3615 classes
[1] = X86_64_X87UP_CLASS
;
3618 classes
[0] = X86_64_SSE_CLASS
;
3619 classes
[1] = X86_64_SSEUP_CLASS
;
3622 classes
[0] = X86_64_SSE_CLASS
;
3625 classes
[0] = X86_64_SSEDF_CLASS
;
3626 classes
[1] = X86_64_SSEDF_CLASS
;
3629 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3632 /* This modes is larger than 16 bytes. */
3640 classes
[0] = X86_64_SSE_CLASS
;
3641 classes
[1] = X86_64_SSEUP_CLASS
;
3647 classes
[0] = X86_64_SSE_CLASS
;
3653 gcc_assert (VECTOR_MODE_P (mode
));
3658 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3660 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3661 classes
[0] = X86_64_INTEGERSI_CLASS
;
3663 classes
[0] = X86_64_INTEGER_CLASS
;
3664 classes
[1] = X86_64_INTEGER_CLASS
;
3665 return 1 + (bytes
> 8);
3669 /* Examine the argument and return set number of register required in each
3670 class. Return 0 iff parameter should be passed in memory. */
3672 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
3673 int *int_nregs
, int *sse_nregs
)
3675 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3676 int n
= classify_argument (mode
, type
, regclass
, 0);
3682 for (n
--; n
>= 0; n
--)
3683 switch (regclass
[n
])
3685 case X86_64_INTEGER_CLASS
:
3686 case X86_64_INTEGERSI_CLASS
:
3689 case X86_64_SSE_CLASS
:
3690 case X86_64_SSESF_CLASS
:
3691 case X86_64_SSEDF_CLASS
:
3694 case X86_64_NO_CLASS
:
3695 case X86_64_SSEUP_CLASS
:
3697 case X86_64_X87_CLASS
:
3698 case X86_64_X87UP_CLASS
:
3702 case X86_64_COMPLEX_X87_CLASS
:
3703 return in_return
? 2 : 0;
3704 case X86_64_MEMORY_CLASS
:
3710 /* Construct container for the argument used by GCC interface. See
3711 FUNCTION_ARG for the detailed description. */
3714 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3715 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
3716 const int *intreg
, int sse_regno
)
3718 /* The following variables hold the static issued_error state. */
3719 static bool issued_sse_arg_error
;
3720 static bool issued_sse_ret_error
;
3721 static bool issued_x87_ret_error
;
3723 enum machine_mode tmpmode
;
3725 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3726 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3730 int needed_sseregs
, needed_intregs
;
3731 rtx exp
[MAX_CLASSES
];
3734 n
= classify_argument (mode
, type
, regclass
, 0);
3737 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3740 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3743 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3744 some less clueful developer tries to use floating-point anyway. */
3745 if (needed_sseregs
&& !TARGET_SSE
)
3749 if (!issued_sse_ret_error
)
3751 error ("SSE register return with SSE disabled");
3752 issued_sse_ret_error
= true;
3755 else if (!issued_sse_arg_error
)
3757 error ("SSE register argument with SSE disabled");
3758 issued_sse_arg_error
= true;
3763 /* Likewise, error if the ABI requires us to return values in the
3764 x87 registers and the user specified -mno-80387. */
3765 if (!TARGET_80387
&& in_return
)
3766 for (i
= 0; i
< n
; i
++)
3767 if (regclass
[i
] == X86_64_X87_CLASS
3768 || regclass
[i
] == X86_64_X87UP_CLASS
3769 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
3771 if (!issued_x87_ret_error
)
3773 error ("x87 register return with x87 disabled");
3774 issued_x87_ret_error
= true;
3779 /* First construct simple cases. Avoid SCmode, since we want to use
3780 single register to pass this type. */
3781 if (n
== 1 && mode
!= SCmode
)
3782 switch (regclass
[0])
3784 case X86_64_INTEGER_CLASS
:
3785 case X86_64_INTEGERSI_CLASS
:
3786 return gen_rtx_REG (mode
, intreg
[0]);
3787 case X86_64_SSE_CLASS
:
3788 case X86_64_SSESF_CLASS
:
3789 case X86_64_SSEDF_CLASS
:
3790 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3791 case X86_64_X87_CLASS
:
3792 case X86_64_COMPLEX_X87_CLASS
:
3793 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3794 case X86_64_NO_CLASS
:
3795 /* Zero sized array, struct or class. */
3800 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
3801 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
3802 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3805 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
3806 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3807 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
3808 && regclass
[1] == X86_64_INTEGER_CLASS
3809 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3810 && intreg
[0] + 1 == intreg
[1])
3811 return gen_rtx_REG (mode
, intreg
[0]);
3813 /* Otherwise figure out the entries of the PARALLEL. */
3814 for (i
= 0; i
< n
; i
++)
3816 switch (regclass
[i
])
3818 case X86_64_NO_CLASS
:
3820 case X86_64_INTEGER_CLASS
:
3821 case X86_64_INTEGERSI_CLASS
:
3822 /* Merge TImodes on aligned occasions here too. */
3823 if (i
* 8 + 8 > bytes
)
3824 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3825 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
3829 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3830 if (tmpmode
== BLKmode
)
3832 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3833 gen_rtx_REG (tmpmode
, *intreg
),
3837 case X86_64_SSESF_CLASS
:
3838 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3839 gen_rtx_REG (SFmode
,
3840 SSE_REGNO (sse_regno
)),
3844 case X86_64_SSEDF_CLASS
:
3845 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3846 gen_rtx_REG (DFmode
,
3847 SSE_REGNO (sse_regno
)),
3851 case X86_64_SSE_CLASS
:
3852 if (i
< n
- 1 && regclass
[i
+ 1] == X86_64_SSEUP_CLASS
)
3856 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3857 gen_rtx_REG (tmpmode
,
3858 SSE_REGNO (sse_regno
)),
3860 if (tmpmode
== TImode
)
3869 /* Empty aligned struct, union or class. */
3873 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3874 for (i
= 0; i
< nexps
; i
++)
3875 XVECEXP (ret
, 0, i
) = exp
[i
];
3879 /* Update the data in CUM to advance over an argument of mode MODE
3880 and data type TYPE. (TYPE is null for libcalls where that information
3881 may not be available.) */
3884 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3885 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3901 cum
->words
+= words
;
3902 cum
->nregs
-= words
;
3903 cum
->regno
+= words
;
3905 if (cum
->nregs
<= 0)
3913 if (cum
->float_in_sse
< 2)
3916 if (cum
->float_in_sse
< 1)
3927 if (!type
|| !AGGREGATE_TYPE_P (type
))
3929 cum
->sse_words
+= words
;
3930 cum
->sse_nregs
-= 1;
3931 cum
->sse_regno
+= 1;
3932 if (cum
->sse_nregs
<= 0)
3944 if (!type
|| !AGGREGATE_TYPE_P (type
))
3946 cum
->mmx_words
+= words
;
3947 cum
->mmx_nregs
-= 1;
3948 cum
->mmx_regno
+= 1;
3949 if (cum
->mmx_nregs
<= 0)
3960 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3961 tree type
, HOST_WIDE_INT words
)
3963 int int_nregs
, sse_nregs
;
3965 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3966 cum
->words
+= words
;
3967 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3969 cum
->nregs
-= int_nregs
;
3970 cum
->sse_nregs
-= sse_nregs
;
3971 cum
->regno
+= int_nregs
;
3972 cum
->sse_regno
+= sse_nregs
;
3975 cum
->words
+= words
;
3979 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
3980 HOST_WIDE_INT words
)
3982 /* Otherwise, this should be passed indirect. */
3983 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
3985 cum
->words
+= words
;
3994 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3995 tree type
, int named ATTRIBUTE_UNUSED
)
3997 HOST_WIDE_INT bytes
, words
;
3999 if (mode
== BLKmode
)
4000 bytes
= int_size_in_bytes (type
);
4002 bytes
= GET_MODE_SIZE (mode
);
4003 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4006 mode
= type_natural_mode (type
);
4008 if (TARGET_64BIT_MS_ABI
)
4009 function_arg_advance_ms_64 (cum
, bytes
, words
);
4010 else if (TARGET_64BIT
)
4011 function_arg_advance_64 (cum
, mode
, type
, words
);
4013 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4016 /* Define where to put the arguments to a function.
4017 Value is zero to push the argument on the stack,
4018 or a hard register in which to store the argument.
4020 MODE is the argument's machine mode.
4021 TYPE is the data type of the argument (as a tree).
4022 This is null for libcalls where that information may
4024 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4025 the preceding args and about the function being called.
4026 NAMED is nonzero if this argument is a named parameter
4027 (otherwise it is an extra parameter matching an ellipsis). */
4030 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4031 enum machine_mode orig_mode
, tree type
,
4032 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4034 static bool warnedsse
, warnedmmx
;
4036 /* Avoid the AL settings for the Unix64 ABI. */
4037 if (mode
== VOIDmode
)
4053 if (words
<= cum
->nregs
)
4055 int regno
= cum
->regno
;
4057 /* Fastcall allocates the first two DWORD (SImode) or
4058 smaller arguments to ECX and EDX. */
4061 if (mode
== BLKmode
|| mode
== DImode
)
4064 /* ECX not EAX is the first allocated register. */
4068 return gen_rtx_REG (mode
, regno
);
4073 if (cum
->float_in_sse
< 2)
4076 if (cum
->float_in_sse
< 1)
4086 if (!type
|| !AGGREGATE_TYPE_P (type
))
4088 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4091 warning (0, "SSE vector argument without SSE enabled "
4095 return gen_reg_or_parallel (mode
, orig_mode
,
4096 cum
->sse_regno
+ FIRST_SSE_REG
);
4104 if (!type
|| !AGGREGATE_TYPE_P (type
))
4106 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4109 warning (0, "MMX vector argument without MMX enabled "
4113 return gen_reg_or_parallel (mode
, orig_mode
,
4114 cum
->mmx_regno
+ FIRST_MMX_REG
);
4123 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4124 enum machine_mode orig_mode
, tree type
)
4126 /* Handle a hidden AL argument containing number of registers
4127 for varargs x86-64 functions. */
4128 if (mode
== VOIDmode
)
4129 return GEN_INT (cum
->maybe_vaarg
4130 ? (cum
->sse_nregs
< 0
4135 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4137 &x86_64_int_parameter_registers
[cum
->regno
],
4142 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4143 enum machine_mode orig_mode
, int named
)
4147 /* Avoid the AL settings for the Unix64 ABI. */
4148 if (mode
== VOIDmode
)
4151 /* If we've run out of registers, it goes on the stack. */
4152 if (cum
->nregs
== 0)
4155 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
4157 /* Only floating point modes are passed in anything but integer regs. */
4158 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4161 regno
= cum
->regno
+ FIRST_SSE_REG
;
4166 /* Unnamed floating parameters are passed in both the
4167 SSE and integer registers. */
4168 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4169 t2
= gen_rtx_REG (mode
, regno
);
4170 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4171 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4172 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4176 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4180 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4181 tree type
, int named
)
4183 enum machine_mode mode
= omode
;
4184 HOST_WIDE_INT bytes
, words
;
4186 if (mode
== BLKmode
)
4187 bytes
= int_size_in_bytes (type
);
4189 bytes
= GET_MODE_SIZE (mode
);
4190 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4192 /* To simplify the code below, represent vector types with a vector mode
4193 even if MMX/SSE are not active. */
4194 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4195 mode
= type_natural_mode (type
);
4197 if (TARGET_64BIT_MS_ABI
)
4198 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4199 else if (TARGET_64BIT
)
4200 return function_arg_64 (cum
, mode
, omode
, type
);
4202 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4205 /* A C expression that indicates when an argument must be passed by
4206 reference. If nonzero for an argument, a copy of that argument is
4207 made in memory and a pointer to the argument is passed instead of
4208 the argument itself. The pointer is passed in whatever way is
4209 appropriate for passing a pointer to that type. */
4212 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4213 enum machine_mode mode ATTRIBUTE_UNUSED
,
4214 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4216 if (TARGET_64BIT_MS_ABI
)
4220 /* Arrays are passed by reference. */
4221 if (TREE_CODE (type
) == ARRAY_TYPE
)
4224 if (AGGREGATE_TYPE_P (type
))
4226 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4227 are passed by reference. */
4228 int el2
= exact_log2 (int_size_in_bytes (type
));
4229 return !(el2
>= 0 && el2
<= 3);
4233 /* __m128 is passed by reference. */
4234 /* ??? How to handle complex? For now treat them as structs,
4235 and pass them by reference if they're too large. */
4236 if (GET_MODE_SIZE (mode
) > 8)
4239 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4245 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4246 ABI. Only called if TARGET_SSE. */
4248 contains_128bit_aligned_vector_p (tree type
)
4250 enum machine_mode mode
= TYPE_MODE (type
);
4251 if (SSE_REG_MODE_P (mode
)
4252 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4254 if (TYPE_ALIGN (type
) < 128)
4257 if (AGGREGATE_TYPE_P (type
))
4259 /* Walk the aggregates recursively. */
4260 switch (TREE_CODE (type
))
4264 case QUAL_UNION_TYPE
:
4268 /* Walk all the structure fields. */
4269 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4271 if (TREE_CODE (field
) == FIELD_DECL
4272 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4279 /* Just for use if some languages passes arrays by value. */
4280 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4291 /* Gives the alignment boundary, in bits, of an argument with the
4292 specified mode and type. */
4295 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4299 align
= TYPE_ALIGN (type
);
4301 align
= GET_MODE_ALIGNMENT (mode
);
4302 if (align
< PARM_BOUNDARY
)
4303 align
= PARM_BOUNDARY
;
4306 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4307 make an exception for SSE modes since these require 128bit
4310 The handling here differs from field_alignment. ICC aligns MMX
4311 arguments to 4 byte boundaries, while structure fields are aligned
4312 to 8 byte boundaries. */
4314 align
= PARM_BOUNDARY
;
4317 if (!SSE_REG_MODE_P (mode
))
4318 align
= PARM_BOUNDARY
;
4322 if (!contains_128bit_aligned_vector_p (type
))
4323 align
= PARM_BOUNDARY
;
4331 /* Return true if N is a possible register number of function value. */
4334 ix86_function_value_regno_p (int regno
)
4341 case FIRST_FLOAT_REG
:
4342 if (TARGET_64BIT_MS_ABI
)
4344 return TARGET_FLOAT_RETURNS_IN_80387
;
4350 if (TARGET_MACHO
|| TARGET_64BIT
)
4358 /* Define how to find the value returned by a function.
4359 VALTYPE is the data type of the value (as a tree).
4360 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4361 otherwise, FUNC is 0. */
4364 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4365 const_tree fntype
, const_tree fn
)
4369 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4370 we normally prevent this case when mmx is not available. However
4371 some ABIs may require the result to be returned like DImode. */
4372 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4373 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4375 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4376 we prevent this case when sse is not available. However some ABIs
4377 may require the result to be returned like integer TImode. */
4378 else if (mode
== TImode
4379 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4380 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4382 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4383 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4384 regno
= FIRST_FLOAT_REG
;
4386 /* Most things go in %eax. */
4389 /* Override FP return register with %xmm0 for local functions when
4390 SSE math is enabled or for functions with sseregparm attribute. */
4391 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4393 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4394 if ((sse_level
>= 1 && mode
== SFmode
)
4395 || (sse_level
== 2 && mode
== DFmode
))
4396 regno
= FIRST_SSE_REG
;
4399 return gen_rtx_REG (orig_mode
, regno
);
4403 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4408 /* Handle libcalls, which don't provide a type node. */
4409 if (valtype
== NULL
)
4421 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4424 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4428 return gen_rtx_REG (mode
, 0);
4432 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4433 REGPARM_MAX
, SSE_REGPARM_MAX
,
4434 x86_64_int_return_registers
, 0);
4436 /* For zero sized structures, construct_container returns NULL, but we
4437 need to keep rest of compiler happy by returning meaningful value. */
4439 ret
= gen_rtx_REG (orig_mode
, 0);
4445 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4447 unsigned int regno
= 0;
4451 if (mode
== SFmode
|| mode
== DFmode
)
4452 regno
= FIRST_SSE_REG
;
4453 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4454 regno
= FIRST_SSE_REG
;
4457 return gen_rtx_REG (orig_mode
, regno
);
4461 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4462 enum machine_mode orig_mode
, enum machine_mode mode
)
4464 const_tree fn
, fntype
;
4467 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4468 fn
= fntype_or_decl
;
4469 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4471 if (TARGET_64BIT_MS_ABI
)
4472 return function_value_ms_64 (orig_mode
, mode
);
4473 else if (TARGET_64BIT
)
4474 return function_value_64 (orig_mode
, mode
, valtype
);
4476 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4480 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
4481 bool outgoing ATTRIBUTE_UNUSED
)
4483 enum machine_mode mode
, orig_mode
;
4485 orig_mode
= TYPE_MODE (valtype
);
4486 mode
= type_natural_mode (valtype
);
4487 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4491 ix86_libcall_value (enum machine_mode mode
)
4493 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4496 /* Return true iff type is returned in memory. */
4499 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
4503 if (mode
== BLKmode
)
4506 size
= int_size_in_bytes (type
);
4508 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4511 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4513 /* User-created vectors small enough to fit in EAX. */
4517 /* MMX/3dNow values are returned in MM0,
4518 except when it doesn't exits. */
4520 return (TARGET_MMX
? 0 : 1);
4522 /* SSE values are returned in XMM0, except when it doesn't exist. */
4524 return (TARGET_SSE
? 0 : 1);
4539 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
4541 int needed_intregs
, needed_sseregs
;
4542 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4546 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
4548 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4550 /* __m128 and friends are returned in xmm0. */
4551 if (size
== 16 && VECTOR_MODE_P (mode
))
4554 /* Otherwise, the size must be exactly in [1248]. */
4555 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8);
4559 ix86_return_in_memory (const_tree type
)
4561 const enum machine_mode mode
= type_natural_mode (type
);
4563 if (TARGET_64BIT_MS_ABI
)
4564 return return_in_memory_ms_64 (type
, mode
);
4565 else if (TARGET_64BIT
)
4566 return return_in_memory_64 (type
, mode
);
4568 return return_in_memory_32 (type
, mode
);
4571 /* Return false iff TYPE is returned in memory. This version is used
4572 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4573 but differs notably in that when MMX is available, 8-byte vectors
4574 are returned in memory, rather than in MMX registers. */
4577 ix86_sol10_return_in_memory (const_tree type
)
4580 enum machine_mode mode
= type_natural_mode (type
);
4583 return return_in_memory_64 (type
, mode
);
4585 if (mode
== BLKmode
)
4588 size
= int_size_in_bytes (type
);
4590 if (VECTOR_MODE_P (mode
))
4592 /* Return in memory only if MMX registers *are* available. This
4593 seems backwards, but it is consistent with the existing
4600 else if (mode
== TImode
)
4602 else if (mode
== XFmode
)
4608 /* When returning SSE vector types, we have a choice of either
4609 (1) being abi incompatible with a -march switch, or
4610 (2) generating an error.
4611 Given no good solution, I think the safest thing is one warning.
4612 The user won't be able to use -Werror, but....
4614 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4615 called in response to actually generating a caller or callee that
4616 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4617 via aggregate_value_p for general type probing from tree-ssa. */
4620 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4622 static bool warnedsse
, warnedmmx
;
4624 if (!TARGET_64BIT
&& type
)
4626 /* Look at the return type of the function, not the function type. */
4627 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4629 if (!TARGET_SSE
&& !warnedsse
)
4632 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4635 warning (0, "SSE vector return without SSE enabled "
4640 if (!TARGET_MMX
&& !warnedmmx
)
4642 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4645 warning (0, "MMX vector return without MMX enabled "
4655 /* Create the va_list data type. */
4658 ix86_build_builtin_va_list (void)
4660 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4662 /* For i386 we use plain pointer to argument area. */
4663 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4664 return build_pointer_type (char_type_node
);
4666 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4667 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4669 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4670 unsigned_type_node
);
4671 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4672 unsigned_type_node
);
4673 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4675 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4678 va_list_gpr_counter_field
= f_gpr
;
4679 va_list_fpr_counter_field
= f_fpr
;
4681 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4682 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4683 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4684 DECL_FIELD_CONTEXT (f_sav
) = record
;
4686 TREE_CHAIN (record
) = type_decl
;
4687 TYPE_NAME (record
) = type_decl
;
4688 TYPE_FIELDS (record
) = f_gpr
;
4689 TREE_CHAIN (f_gpr
) = f_fpr
;
4690 TREE_CHAIN (f_fpr
) = f_ovf
;
4691 TREE_CHAIN (f_ovf
) = f_sav
;
4693 layout_type (record
);
4695 /* The correct type is an array type of one element. */
4696 return build_array_type (record
, build_index_type (size_zero_node
));
4699 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4702 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4712 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4715 /* Indicate to allocate space on the stack for varargs save area. */
4716 ix86_save_varrargs_registers
= 1;
4717 /* We need 16-byte stack alignment to save SSE registers. If user
4718 asked for lower preferred_stack_boundary, lets just hope that he knows
4719 what he is doing and won't varargs SSE values.
4721 We also may end up assuming that only 64bit values are stored in SSE
4722 register let some floating point program work. */
4723 if (ix86_preferred_stack_boundary
>= 128)
4724 cfun
->stack_alignment_needed
= 128;
4726 save_area
= frame_pointer_rtx
;
4727 set
= get_varargs_alias_set ();
4729 for (i
= cum
->regno
;
4731 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4734 mem
= gen_rtx_MEM (Pmode
,
4735 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4736 MEM_NOTRAP_P (mem
) = 1;
4737 set_mem_alias_set (mem
, set
);
4738 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4739 x86_64_int_parameter_registers
[i
]));
4742 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4744 /* Now emit code to save SSE registers. The AX parameter contains number
4745 of SSE parameter registers used to call this function. We use
4746 sse_prologue_save insn template that produces computed jump across
4747 SSE saves. We need some preparation work to get this working. */
4749 label
= gen_label_rtx ();
4750 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4752 /* Compute address to jump to :
4753 label - 5*eax + nnamed_sse_arguments*5 */
4754 tmp_reg
= gen_reg_rtx (Pmode
);
4755 nsse_reg
= gen_reg_rtx (Pmode
);
4756 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4757 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4758 gen_rtx_MULT (Pmode
, nsse_reg
,
4763 gen_rtx_CONST (DImode
,
4764 gen_rtx_PLUS (DImode
,
4766 GEN_INT (cum
->sse_regno
* 4))));
4768 emit_move_insn (nsse_reg
, label_ref
);
4769 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4771 /* Compute address of memory block we save into. We always use pointer
4772 pointing 127 bytes after first byte to store - this is needed to keep
4773 instruction size limited by 4 bytes. */
4774 tmp_reg
= gen_reg_rtx (Pmode
);
4775 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4776 plus_constant (save_area
,
4777 8 * REGPARM_MAX
+ 127)));
4778 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4779 MEM_NOTRAP_P (mem
) = 1;
4780 set_mem_alias_set (mem
, set
);
4781 set_mem_align (mem
, BITS_PER_WORD
);
4783 /* And finally do the dirty job! */
4784 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4785 GEN_INT (cum
->sse_regno
), label
));
4790 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4792 alias_set_type set
= get_varargs_alias_set ();
4795 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
4799 mem
= gen_rtx_MEM (Pmode
,
4800 plus_constant (virtual_incoming_args_rtx
,
4801 i
* UNITS_PER_WORD
));
4802 MEM_NOTRAP_P (mem
) = 1;
4803 set_mem_alias_set (mem
, set
);
4805 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4806 emit_move_insn (mem
, reg
);
4811 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4812 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4815 CUMULATIVE_ARGS next_cum
;
4819 /* This argument doesn't appear to be used anymore. Which is good,
4820 because the old code here didn't suppress rtl generation. */
4821 gcc_assert (!no_rtl
);
4826 fntype
= TREE_TYPE (current_function_decl
);
4827 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4828 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4829 != void_type_node
));
4831 /* For varargs, we do not want to skip the dummy va_dcl argument.
4832 For stdargs, we do want to skip the last named argument. */
4835 function_arg_advance (&next_cum
, mode
, type
, 1);
4837 if (TARGET_64BIT_MS_ABI
)
4838 setup_incoming_varargs_ms_64 (&next_cum
);
4840 setup_incoming_varargs_64 (&next_cum
);
4843 /* Implement va_start. */
4846 ix86_va_start (tree valist
, rtx nextarg
)
4848 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4849 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4850 tree gpr
, fpr
, ovf
, sav
, t
;
4853 /* Only 64bit target needs something special. */
4854 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4856 std_expand_builtin_va_start (valist
, nextarg
);
4860 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4861 f_fpr
= TREE_CHAIN (f_gpr
);
4862 f_ovf
= TREE_CHAIN (f_fpr
);
4863 f_sav
= TREE_CHAIN (f_ovf
);
4865 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4866 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4867 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4868 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4869 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4871 /* Count number of gp and fp argument registers used. */
4872 words
= current_function_args_info
.words
;
4873 n_gpr
= current_function_args_info
.regno
;
4874 n_fpr
= current_function_args_info
.sse_regno
;
4876 if (cfun
->va_list_gpr_size
)
4878 type
= TREE_TYPE (gpr
);
4879 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4880 build_int_cst (type
, n_gpr
* 8));
4881 TREE_SIDE_EFFECTS (t
) = 1;
4882 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4885 if (cfun
->va_list_fpr_size
)
4887 type
= TREE_TYPE (fpr
);
4888 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4889 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4890 TREE_SIDE_EFFECTS (t
) = 1;
4891 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4894 /* Find the overflow area. */
4895 type
= TREE_TYPE (ovf
);
4896 t
= make_tree (type
, virtual_incoming_args_rtx
);
4898 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
4899 size_int (words
* UNITS_PER_WORD
));
4900 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4901 TREE_SIDE_EFFECTS (t
) = 1;
4902 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4904 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4906 /* Find the register save area.
4907 Prologue of the function save it right above stack frame. */
4908 type
= TREE_TYPE (sav
);
4909 t
= make_tree (type
, frame_pointer_rtx
);
4910 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4911 TREE_SIDE_EFFECTS (t
) = 1;
4912 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4916 /* Implement va_arg. */
4919 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4921 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4922 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4923 tree gpr
, fpr
, ovf
, sav
, t
;
4925 tree lab_false
, lab_over
= NULL_TREE
;
4930 enum machine_mode nat_mode
;
4932 /* Only 64bit target needs something special. */
4933 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4934 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4936 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4937 f_fpr
= TREE_CHAIN (f_gpr
);
4938 f_ovf
= TREE_CHAIN (f_fpr
);
4939 f_sav
= TREE_CHAIN (f_ovf
);
4941 valist
= build_va_arg_indirect_ref (valist
);
4942 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4943 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4944 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4945 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4947 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4949 type
= build_pointer_type (type
);
4950 size
= int_size_in_bytes (type
);
4951 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4953 nat_mode
= type_natural_mode (type
);
4954 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4955 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4957 /* Pull the value out of the saved registers. */
4959 addr
= create_tmp_var (ptr_type_node
, "addr");
4960 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4964 int needed_intregs
, needed_sseregs
;
4966 tree int_addr
, sse_addr
;
4968 lab_false
= create_artificial_label ();
4969 lab_over
= create_artificial_label ();
4971 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4973 need_temp
= (!REG_P (container
)
4974 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4975 || TYPE_ALIGN (type
) > 128));
4977 /* In case we are passing structure, verify that it is consecutive block
4978 on the register save area. If not we need to do moves. */
4979 if (!need_temp
&& !REG_P (container
))
4981 /* Verify that all registers are strictly consecutive */
4982 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4986 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4988 rtx slot
= XVECEXP (container
, 0, i
);
4989 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4990 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4998 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5000 rtx slot
= XVECEXP (container
, 0, i
);
5001 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
5002 || INTVAL (XEXP (slot
, 1)) != i
* 8)
5014 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
5015 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
5016 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
5017 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
5020 /* First ensure that we fit completely in registers. */
5023 t
= build_int_cst (TREE_TYPE (gpr
),
5024 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
5025 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
5026 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5027 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5028 gimplify_and_add (t
, pre_p
);
5032 t
= build_int_cst (TREE_TYPE (fpr
),
5033 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
5035 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
5036 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5037 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5038 gimplify_and_add (t
, pre_p
);
5041 /* Compute index to start of area used for integer regs. */
5044 /* int_addr = gpr + sav; */
5045 t
= fold_convert (sizetype
, gpr
);
5046 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5047 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
5048 gimplify_and_add (t
, pre_p
);
5052 /* sse_addr = fpr + sav; */
5053 t
= fold_convert (sizetype
, fpr
);
5054 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5055 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
5056 gimplify_and_add (t
, pre_p
);
5061 tree temp
= create_tmp_var (type
, "va_arg_tmp");
5064 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
5065 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5066 gimplify_and_add (t
, pre_p
);
5068 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
5070 rtx slot
= XVECEXP (container
, 0, i
);
5071 rtx reg
= XEXP (slot
, 0);
5072 enum machine_mode mode
= GET_MODE (reg
);
5073 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5074 tree addr_type
= build_pointer_type (piece_type
);
5077 tree dest_addr
, dest
;
5079 if (SSE_REGNO_P (REGNO (reg
)))
5081 src_addr
= sse_addr
;
5082 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5086 src_addr
= int_addr
;
5087 src_offset
= REGNO (reg
) * 8;
5089 src_addr
= fold_convert (addr_type
, src_addr
);
5090 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
5091 size_int (src_offset
));
5092 src
= build_va_arg_indirect_ref (src_addr
);
5094 dest_addr
= fold_convert (addr_type
, addr
);
5095 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, dest_addr
,
5096 size_int (INTVAL (XEXP (slot
, 1))));
5097 dest
= build_va_arg_indirect_ref (dest_addr
);
5099 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
5100 gimplify_and_add (t
, pre_p
);
5106 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5107 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5108 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
5109 gimplify_and_add (t
, pre_p
);
5113 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5114 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5115 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
5116 gimplify_and_add (t
, pre_p
);
5119 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
5120 gimplify_and_add (t
, pre_p
);
5122 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
5123 append_to_statement_list (t
, pre_p
);
5126 /* ... otherwise out of the overflow area. */
5128 /* Care for on-stack alignment if needed. */
5129 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
5130 || integer_zerop (TYPE_SIZE (type
)))
5134 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
5135 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
5136 size_int (align
- 1));
5137 t
= fold_convert (sizetype
, t
);
5138 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5140 t
= fold_convert (TREE_TYPE (ovf
), t
);
5142 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5144 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5145 gimplify_and_add (t2
, pre_p
);
5147 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
5148 size_int (rsize
* UNITS_PER_WORD
));
5149 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
5150 gimplify_and_add (t
, pre_p
);
5154 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
5155 append_to_statement_list (t
, pre_p
);
5158 ptrtype
= build_pointer_type (type
);
5159 addr
= fold_convert (ptrtype
, addr
);
5162 addr
= build_va_arg_indirect_ref (addr
);
5163 return build_va_arg_indirect_ref (addr
);
5166 /* Return nonzero if OPNUM's MEM should be matched
5167 in movabs* patterns. */
5170 ix86_check_movabs (rtx insn
, int opnum
)
5174 set
= PATTERN (insn
);
5175 if (GET_CODE (set
) == PARALLEL
)
5176 set
= XVECEXP (set
, 0, 0);
5177 gcc_assert (GET_CODE (set
) == SET
);
5178 mem
= XEXP (set
, opnum
);
5179 while (GET_CODE (mem
) == SUBREG
)
5180 mem
= SUBREG_REG (mem
);
5181 gcc_assert (MEM_P (mem
));
5182 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5185 /* Initialize the table of extra 80387 mathematical constants. */
5188 init_ext_80387_constants (void)
5190 static const char * cst
[5] =
5192 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5193 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5194 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5195 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5196 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5200 for (i
= 0; i
< 5; i
++)
5202 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5203 /* Ensure each constant is rounded to XFmode precision. */
5204 real_convert (&ext_80387_constants_table
[i
],
5205 XFmode
, &ext_80387_constants_table
[i
]);
5208 ext_80387_constants_init
= 1;
5211 /* Return true if the constant is something that can be loaded with
5212 a special instruction. */
5215 standard_80387_constant_p (rtx x
)
5217 enum machine_mode mode
= GET_MODE (x
);
5221 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5224 if (x
== CONST0_RTX (mode
))
5226 if (x
== CONST1_RTX (mode
))
5229 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5231 /* For XFmode constants, try to find a special 80387 instruction when
5232 optimizing for size or on those CPUs that benefit from them. */
5234 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5238 if (! ext_80387_constants_init
)
5239 init_ext_80387_constants ();
5241 for (i
= 0; i
< 5; i
++)
5242 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5246 /* Load of the constant -0.0 or -1.0 will be split as
5247 fldz;fchs or fld1;fchs sequence. */
5248 if (real_isnegzero (&r
))
5250 if (real_identical (&r
, &dconstm1
))
5256 /* Return the opcode of the special instruction to be used to load
5260 standard_80387_constant_opcode (rtx x
)
5262 switch (standard_80387_constant_p (x
))
5286 /* Return the CONST_DOUBLE representing the 80387 constant that is
5287 loaded by the specified special instruction. The argument IDX
5288 matches the return value from standard_80387_constant_p. */
5291 standard_80387_constant_rtx (int idx
)
5295 if (! ext_80387_constants_init
)
5296 init_ext_80387_constants ();
5312 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5316 /* Return 1 if mode is a valid mode for sse. */
5318 standard_sse_mode_p (enum machine_mode mode
)
5335 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5338 standard_sse_constant_p (rtx x
)
5340 enum machine_mode mode
= GET_MODE (x
);
5342 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5344 if (vector_all_ones_operand (x
, mode
)
5345 && standard_sse_mode_p (mode
))
5346 return TARGET_SSE2
? 2 : -1;
5351 /* Return the opcode of the special instruction to be used to load
5355 standard_sse_constant_opcode (rtx insn
, rtx x
)
5357 switch (standard_sse_constant_p (x
))
5360 if (get_attr_mode (insn
) == MODE_V4SF
)
5361 return "xorps\t%0, %0";
5362 else if (get_attr_mode (insn
) == MODE_V2DF
)
5363 return "xorpd\t%0, %0";
5365 return "pxor\t%0, %0";
5367 return "pcmpeqd\t%0, %0";
5372 /* Returns 1 if OP contains a symbol reference */
5375 symbolic_reference_mentioned_p (rtx op
)
5380 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5383 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5384 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5390 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5391 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5395 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5402 /* Return 1 if it is appropriate to emit `ret' instructions in the
5403 body of a function. Do this only if the epilogue is simple, needing a
5404 couple of insns. Prior to reloading, we can't tell how many registers
5405 must be saved, so return 0 then. Return 0 if there is no frame
5406 marker to de-allocate. */
5409 ix86_can_use_return_insn_p (void)
5411 struct ix86_frame frame
;
5413 if (! reload_completed
|| frame_pointer_needed
)
5416 /* Don't allow more than 32 pop, since that's all we can do
5417 with one instruction. */
5418 if (current_function_pops_args
5419 && current_function_args_size
>= 32768)
5422 ix86_compute_frame_layout (&frame
);
5423 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5426 /* Value should be nonzero if functions must have frame pointers.
5427 Zero means the frame pointer need not be set up (and parms may
5428 be accessed via the stack pointer) in functions that seem suitable. */
5431 ix86_frame_pointer_required (void)
5433 /* If we accessed previous frames, then the generated code expects
5434 to be able to access the saved ebp value in our frame. */
5435 if (cfun
->machine
->accesses_prev_frame
)
5438 /* Several x86 os'es need a frame pointer for other reasons,
5439 usually pertaining to setjmp. */
5440 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5443 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5444 the frame pointer by default. Turn it back on now if we've not
5445 got a leaf function. */
5446 if (TARGET_OMIT_LEAF_FRAME_POINTER
5447 && (!current_function_is_leaf
5448 || ix86_current_function_calls_tls_descriptor
))
5451 if (current_function_profile
)
5457 /* Record that the current function accesses previous call frames. */
5460 ix86_setup_frame_addresses (void)
5462 cfun
->machine
->accesses_prev_frame
= 1;
5465 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5466 # define USE_HIDDEN_LINKONCE 1
5468 # define USE_HIDDEN_LINKONCE 0
5471 static int pic_labels_used
;
5473 /* Fills in the label name that should be used for a pc thunk for
5474 the given register. */
5477 get_pc_thunk_name (char name
[32], unsigned int regno
)
5479 gcc_assert (!TARGET_64BIT
);
5481 if (USE_HIDDEN_LINKONCE
)
5482 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5484 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5488 /* This function generates code for -fpic that loads %ebx with
5489 the return address of the caller and then returns. */
5492 ix86_file_end (void)
5497 for (regno
= 0; regno
< 8; ++regno
)
5501 if (! ((pic_labels_used
>> regno
) & 1))
5504 get_pc_thunk_name (name
, regno
);
5509 switch_to_section (darwin_sections
[text_coal_section
]);
5510 fputs ("\t.weak_definition\t", asm_out_file
);
5511 assemble_name (asm_out_file
, name
);
5512 fputs ("\n\t.private_extern\t", asm_out_file
);
5513 assemble_name (asm_out_file
, name
);
5514 fputs ("\n", asm_out_file
);
5515 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5519 if (USE_HIDDEN_LINKONCE
)
5523 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5525 TREE_PUBLIC (decl
) = 1;
5526 TREE_STATIC (decl
) = 1;
5527 DECL_ONE_ONLY (decl
) = 1;
5529 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5530 switch_to_section (get_named_section (decl
, NULL
, 0));
5532 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5533 fputs ("\t.hidden\t", asm_out_file
);
5534 assemble_name (asm_out_file
, name
);
5535 fputc ('\n', asm_out_file
);
5536 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5540 switch_to_section (text_section
);
5541 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5544 xops
[0] = gen_rtx_REG (SImode
, regno
);
5545 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5546 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5547 output_asm_insn ("ret", xops
);
5550 if (NEED_INDICATE_EXEC_STACK
)
5551 file_end_indicate_exec_stack ();
5554 /* Emit code for the SET_GOT patterns. */
5557 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5563 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5565 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5566 xops
[2] = gen_rtx_MEM (Pmode
,
5567 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5568 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5570 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5571 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5572 an unadorned address. */
5573 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5574 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5575 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5579 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5581 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5583 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5586 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5588 output_asm_insn ("call\t%a2", xops
);
5591 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5592 is what will be referenced by the Mach-O PIC subsystem. */
5594 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5597 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5598 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5601 output_asm_insn ("pop{l}\t%0", xops
);
5606 get_pc_thunk_name (name
, REGNO (dest
));
5607 pic_labels_used
|= 1 << REGNO (dest
);
5609 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5610 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5611 output_asm_insn ("call\t%X2", xops
);
5612 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5613 is what will be referenced by the Mach-O PIC subsystem. */
5616 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5618 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5619 CODE_LABEL_NUMBER (label
));
5626 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5627 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5629 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5634 /* Generate an "push" pattern for input ARG. */
5639 return gen_rtx_SET (VOIDmode
,
5641 gen_rtx_PRE_DEC (Pmode
,
5642 stack_pointer_rtx
)),
5646 /* Return >= 0 if there is an unused call-clobbered register available
5647 for the entire function. */
5650 ix86_select_alt_pic_regnum (void)
5652 if (current_function_is_leaf
&& !current_function_profile
5653 && !ix86_current_function_calls_tls_descriptor
)
5656 for (i
= 2; i
>= 0; --i
)
5657 if (!df_regs_ever_live_p (i
))
5661 return INVALID_REGNUM
;
5664 /* Return 1 if we need to save REGNO. */
5666 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5668 if (pic_offset_table_rtx
5669 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5670 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5671 || current_function_profile
5672 || current_function_calls_eh_return
5673 || current_function_uses_const_pool
))
5675 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5680 if (current_function_calls_eh_return
&& maybe_eh_return
)
5685 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5686 if (test
== INVALID_REGNUM
)
5693 if (cfun
->machine
->force_align_arg_pointer
5694 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5697 return (df_regs_ever_live_p (regno
)
5698 && !call_used_regs
[regno
]
5699 && !fixed_regs
[regno
]
5700 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5703 /* Return number of registers to be saved on the stack. */
5706 ix86_nsaved_regs (void)
5711 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5712 if (ix86_save_reg (regno
, true))
5717 /* Return the offset between two registers, one to be eliminated, and the other
5718 its replacement, at the start of a routine. */
5721 ix86_initial_elimination_offset (int from
, int to
)
5723 struct ix86_frame frame
;
5724 ix86_compute_frame_layout (&frame
);
5726 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5727 return frame
.hard_frame_pointer_offset
;
5728 else if (from
== FRAME_POINTER_REGNUM
5729 && to
== HARD_FRAME_POINTER_REGNUM
)
5730 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5733 gcc_assert (to
== STACK_POINTER_REGNUM
);
5735 if (from
== ARG_POINTER_REGNUM
)
5736 return frame
.stack_pointer_offset
;
5738 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5739 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5743 /* Fill structure ix86_frame about frame of currently computed function. */
5746 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5748 HOST_WIDE_INT total_size
;
5749 unsigned int stack_alignment_needed
;
5750 HOST_WIDE_INT offset
;
5751 unsigned int preferred_alignment
;
5752 HOST_WIDE_INT size
= get_frame_size ();
5754 frame
->nregs
= ix86_nsaved_regs ();
5757 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5758 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5760 /* During reload iteration the amount of registers saved can change.
5761 Recompute the value as needed. Do not recompute when amount of registers
5762 didn't change as reload does multiple calls to the function and does not
5763 expect the decision to change within single iteration. */
5765 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5767 int count
= frame
->nregs
;
5769 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5770 /* The fast prologue uses move instead of push to save registers. This
5771 is significantly longer, but also executes faster as modern hardware
5772 can execute the moves in parallel, but can't do that for push/pop.
5774 Be careful about choosing what prologue to emit: When function takes
5775 many instructions to execute we may use slow version as well as in
5776 case function is known to be outside hot spot (this is known with
5777 feedback only). Weight the size of function by number of registers
5778 to save as it is cheap to use one or two push instructions but very
5779 slow to use many of them. */
5781 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5782 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5783 || (flag_branch_probabilities
5784 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5785 cfun
->machine
->use_fast_prologue_epilogue
= false;
5787 cfun
->machine
->use_fast_prologue_epilogue
5788 = !expensive_function_p (count
);
5790 if (TARGET_PROLOGUE_USING_MOVE
5791 && cfun
->machine
->use_fast_prologue_epilogue
)
5792 frame
->save_regs_using_mov
= true;
5794 frame
->save_regs_using_mov
= false;
5797 /* Skip return address and saved base pointer. */
5798 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5800 frame
->hard_frame_pointer_offset
= offset
;
5802 /* Do some sanity checking of stack_alignment_needed and
5803 preferred_alignment, since i386 port is the only using those features
5804 that may break easily. */
5806 gcc_assert (!size
|| stack_alignment_needed
);
5807 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5808 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5809 gcc_assert (stack_alignment_needed
5810 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5812 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5813 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5815 /* Register save area */
5816 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5819 if (ix86_save_varrargs_registers
)
5821 offset
+= X86_64_VARARGS_SIZE
;
5822 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5825 frame
->va_arg_size
= 0;
5827 /* Align start of frame for local function. */
5828 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5829 & -stack_alignment_needed
) - offset
;
5831 offset
+= frame
->padding1
;
5833 /* Frame pointer points here. */
5834 frame
->frame_pointer_offset
= offset
;
5838 /* Add outgoing arguments area. Can be skipped if we eliminated
5839 all the function calls as dead code.
5840 Skipping is however impossible when function calls alloca. Alloca
5841 expander assumes that last current_function_outgoing_args_size
5842 of stack frame are unused. */
5843 if (ACCUMULATE_OUTGOING_ARGS
5844 && (!current_function_is_leaf
|| current_function_calls_alloca
5845 || ix86_current_function_calls_tls_descriptor
))
5847 offset
+= current_function_outgoing_args_size
;
5848 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5851 frame
->outgoing_arguments_size
= 0;
5853 /* Align stack boundary. Only needed if we're calling another function
5855 if (!current_function_is_leaf
|| current_function_calls_alloca
5856 || ix86_current_function_calls_tls_descriptor
)
5857 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5858 & -preferred_alignment
) - offset
;
5860 frame
->padding2
= 0;
5862 offset
+= frame
->padding2
;
5864 /* We've reached end of stack frame. */
5865 frame
->stack_pointer_offset
= offset
;
5867 /* Size prologue needs to allocate. */
5868 frame
->to_allocate
=
5869 (size
+ frame
->padding1
+ frame
->padding2
5870 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5872 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5873 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5874 frame
->save_regs_using_mov
= false;
5876 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5877 && current_function_is_leaf
5878 && !ix86_current_function_calls_tls_descriptor
)
5880 frame
->red_zone_size
= frame
->to_allocate
;
5881 if (frame
->save_regs_using_mov
)
5882 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5883 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5884 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5887 frame
->red_zone_size
= 0;
5888 frame
->to_allocate
-= frame
->red_zone_size
;
5889 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5891 fprintf (stderr
, "\n");
5892 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5893 fprintf (stderr
, "size: %ld\n", (long)size
);
5894 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5895 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5896 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5897 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5898 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5899 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5900 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5901 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5902 (long)frame
->hard_frame_pointer_offset
);
5903 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5904 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5905 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5906 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5910 /* Emit code to save registers in the prologue. */
5913 ix86_emit_save_regs (void)
5918 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5919 if (ix86_save_reg (regno
, true))
5921 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5922 RTX_FRAME_RELATED_P (insn
) = 1;
5926 /* Emit code to save registers using MOV insns. First register
5927 is restored from POINTER + OFFSET. */
5929 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5934 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5935 if (ix86_save_reg (regno
, true))
5937 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5939 gen_rtx_REG (Pmode
, regno
));
5940 RTX_FRAME_RELATED_P (insn
) = 1;
5941 offset
+= UNITS_PER_WORD
;
5945 /* Expand prologue or epilogue stack adjustment.
5946 The pattern exist to put a dependency on all ebp-based memory accesses.
5947 STYLE should be negative if instructions should be marked as frame related,
5948 zero if %r11 register is live and cannot be freely used and positive
5952 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5957 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5958 else if (x86_64_immediate_operand (offset
, DImode
))
5959 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5963 /* r11 is used by indirect sibcall return as well, set before the
5964 epilogue and used after the epilogue. ATM indirect sibcall
5965 shouldn't be used together with huge frame sizes in one
5966 function because of the frame_size check in sibcall.c. */
5968 r11
= gen_rtx_REG (DImode
, R11_REG
);
5969 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5971 RTX_FRAME_RELATED_P (insn
) = 1;
5972 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5976 RTX_FRAME_RELATED_P (insn
) = 1;
5979 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5982 ix86_internal_arg_pointer (void)
5984 bool has_force_align_arg_pointer
=
5985 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5986 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5987 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5988 && DECL_NAME (current_function_decl
)
5989 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5990 && DECL_FILE_SCOPE_P (current_function_decl
))
5991 || ix86_force_align_arg_pointer
5992 || has_force_align_arg_pointer
)
5994 /* Nested functions can't realign the stack due to a register
5996 if (DECL_CONTEXT (current_function_decl
)
5997 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5999 if (ix86_force_align_arg_pointer
)
6000 warning (0, "-mstackrealign ignored for nested functions");
6001 if (has_force_align_arg_pointer
)
6002 error ("%s not supported for nested functions",
6003 ix86_force_align_arg_pointer_string
);
6004 return virtual_incoming_args_rtx
;
6006 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
6007 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
6010 return virtual_incoming_args_rtx
;
6013 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6014 This is called from dwarf2out.c to emit call frame instructions
6015 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6017 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
6019 rtx unspec
= SET_SRC (pattern
);
6020 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
6024 case UNSPEC_REG_SAVE
:
6025 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
6026 SET_DEST (pattern
));
6028 case UNSPEC_DEF_CFA
:
6029 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
6030 INTVAL (XVECEXP (unspec
, 0, 0)));
6037 /* Expand the prologue into a bunch of separate insns. */
6040 ix86_expand_prologue (void)
6044 struct ix86_frame frame
;
6045 HOST_WIDE_INT allocate
;
6047 ix86_compute_frame_layout (&frame
);
6049 if (cfun
->machine
->force_align_arg_pointer
)
6053 /* Grab the argument pointer. */
6054 x
= plus_constant (stack_pointer_rtx
, 4);
6055 y
= cfun
->machine
->force_align_arg_pointer
;
6056 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
6057 RTX_FRAME_RELATED_P (insn
) = 1;
6059 /* The unwind info consists of two parts: install the fafp as the cfa,
6060 and record the fafp as the "save register" of the stack pointer.
6061 The later is there in order that the unwinder can see where it
6062 should restore the stack pointer across the and insn. */
6063 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
6064 x
= gen_rtx_SET (VOIDmode
, y
, x
);
6065 RTX_FRAME_RELATED_P (x
) = 1;
6066 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
6068 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
6069 RTX_FRAME_RELATED_P (y
) = 1;
6070 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
6071 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6072 REG_NOTES (insn
) = x
;
6074 /* Align the stack. */
6075 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6078 /* And here we cheat like madmen with the unwind info. We force the
6079 cfa register back to sp+4, which is exactly what it was at the
6080 start of the function. Re-pushing the return address results in
6081 the return at the same spot relative to the cfa, and thus is
6082 correct wrt the unwind info. */
6083 x
= cfun
->machine
->force_align_arg_pointer
;
6084 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
6085 insn
= emit_insn (gen_push (x
));
6086 RTX_FRAME_RELATED_P (insn
) = 1;
6089 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
6090 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
6091 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6092 REG_NOTES (insn
) = x
;
6095 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6096 slower on all targets. Also sdb doesn't like it. */
6098 if (frame_pointer_needed
)
6100 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
6101 RTX_FRAME_RELATED_P (insn
) = 1;
6103 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
6104 RTX_FRAME_RELATED_P (insn
) = 1;
6107 allocate
= frame
.to_allocate
;
6109 if (!frame
.save_regs_using_mov
)
6110 ix86_emit_save_regs ();
6112 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
6114 /* When using red zone we may start register saving before allocating
6115 the stack frame saving one cycle of the prologue. */
6116 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
6117 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
6118 : stack_pointer_rtx
,
6119 -frame
.nregs
* UNITS_PER_WORD
);
6123 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
6124 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6125 GEN_INT (-allocate
), -1);
6128 /* Only valid for Win32. */
6129 rtx eax
= gen_rtx_REG (Pmode
, 0);
6133 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
6135 if (TARGET_64BIT_MS_ABI
)
6138 eax_live
= ix86_eax_live_at_start_p ();
6142 emit_insn (gen_push (eax
));
6143 allocate
-= UNITS_PER_WORD
;
6146 emit_move_insn (eax
, GEN_INT (allocate
));
6149 insn
= gen_allocate_stack_worker_64 (eax
);
6151 insn
= gen_allocate_stack_worker_32 (eax
);
6152 insn
= emit_insn (insn
);
6153 RTX_FRAME_RELATED_P (insn
) = 1;
6154 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
6155 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
6156 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
6157 t
, REG_NOTES (insn
));
6161 if (frame_pointer_needed
)
6162 t
= plus_constant (hard_frame_pointer_rtx
,
6165 - frame
.nregs
* UNITS_PER_WORD
);
6167 t
= plus_constant (stack_pointer_rtx
, allocate
);
6168 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6172 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6174 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6175 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6177 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6178 -frame
.nregs
* UNITS_PER_WORD
);
6181 pic_reg_used
= false;
6182 if (pic_offset_table_rtx
6183 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6184 || current_function_profile
))
6186 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6188 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6189 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
6191 pic_reg_used
= true;
6198 if (ix86_cmodel
== CM_LARGE_PIC
)
6200 rtx tmp_reg
= gen_rtx_REG (DImode
,
6201 FIRST_REX_INT_REG
+ 3 /* R11 */);
6202 rtx label
= gen_label_rtx ();
6204 LABEL_PRESERVE_P (label
) = 1;
6205 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6206 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6207 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6208 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6209 pic_offset_table_rtx
, tmp_reg
));
6212 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6215 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6218 /* Prevent function calls from be scheduled before the call to mcount.
6219 In the pic_reg_used case, make sure that the got load isn't deleted. */
6220 if (current_function_profile
)
6223 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
6224 emit_insn (gen_blockage ());
6228 /* Emit code to restore saved registers using MOV insns. First register
6229 is restored from POINTER + OFFSET. */
6231 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6232 int maybe_eh_return
)
6235 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6237 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6238 if (ix86_save_reg (regno
, maybe_eh_return
))
6240 /* Ensure that adjust_address won't be forced to produce pointer
6241 out of range allowed by x86-64 instruction set. */
6242 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6246 r11
= gen_rtx_REG (DImode
, R11_REG
);
6247 emit_move_insn (r11
, GEN_INT (offset
));
6248 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6249 base_address
= gen_rtx_MEM (Pmode
, r11
);
6252 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6253 adjust_address (base_address
, Pmode
, offset
));
6254 offset
+= UNITS_PER_WORD
;
6258 /* Restore function stack, frame, and registers. */
6261 ix86_expand_epilogue (int style
)
6264 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6265 struct ix86_frame frame
;
6266 HOST_WIDE_INT offset
;
6268 ix86_compute_frame_layout (&frame
);
6270 /* Calculate start of saved registers relative to ebp. Special care
6271 must be taken for the normal return case of a function using
6272 eh_return: the eax and edx registers are marked as saved, but not
6273 restored along this path. */
6274 offset
= frame
.nregs
;
6275 if (current_function_calls_eh_return
&& style
!= 2)
6277 offset
*= -UNITS_PER_WORD
;
6279 /* If we're only restoring one register and sp is not valid then
6280 using a move instruction to restore the register since it's
6281 less work than reloading sp and popping the register.
6283 The default code result in stack adjustment using add/lea instruction,
6284 while this code results in LEAVE instruction (or discrete equivalent),
6285 so it is profitable in some other cases as well. Especially when there
6286 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6287 and there is exactly one register to pop. This heuristic may need some
6288 tuning in future. */
6289 if ((!sp_valid
&& frame
.nregs
<= 1)
6290 || (TARGET_EPILOGUE_USING_MOVE
6291 && cfun
->machine
->use_fast_prologue_epilogue
6292 && (frame
.nregs
> 1 || frame
.to_allocate
))
6293 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6294 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6295 && cfun
->machine
->use_fast_prologue_epilogue
6296 && frame
.nregs
== 1)
6297 || current_function_calls_eh_return
)
6299 /* Restore registers. We can use ebp or esp to address the memory
6300 locations. If both are available, default to ebp, since offsets
6301 are known to be small. Only exception is esp pointing directly to the
6302 end of block of saved registers, where we may simplify addressing
6305 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6306 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6307 frame
.to_allocate
, style
== 2);
6309 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6310 offset
, style
== 2);
6312 /* eh_return epilogues need %ecx added to the stack pointer. */
6315 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6317 if (frame_pointer_needed
)
6319 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6320 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6321 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6323 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6324 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6326 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6331 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6332 tmp
= plus_constant (tmp
, (frame
.to_allocate
6333 + frame
.nregs
* UNITS_PER_WORD
));
6334 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6337 else if (!frame_pointer_needed
)
6338 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6339 GEN_INT (frame
.to_allocate
6340 + frame
.nregs
* UNITS_PER_WORD
),
6342 /* If not an i386, mov & pop is faster than "leave". */
6343 else if (TARGET_USE_LEAVE
|| optimize_size
6344 || !cfun
->machine
->use_fast_prologue_epilogue
)
6345 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6348 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6349 hard_frame_pointer_rtx
,
6352 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6354 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6359 /* First step is to deallocate the stack frame so that we can
6360 pop the registers. */
6363 gcc_assert (frame_pointer_needed
);
6364 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6365 hard_frame_pointer_rtx
,
6366 GEN_INT (offset
), style
);
6368 else if (frame
.to_allocate
)
6369 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6370 GEN_INT (frame
.to_allocate
), style
);
6372 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6373 if (ix86_save_reg (regno
, false))
6376 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6378 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6380 if (frame_pointer_needed
)
6382 /* Leave results in shorter dependency chains on CPUs that are
6383 able to grok it fast. */
6384 if (TARGET_USE_LEAVE
)
6385 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6386 else if (TARGET_64BIT
)
6387 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6389 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6393 if (cfun
->machine
->force_align_arg_pointer
)
6395 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6396 cfun
->machine
->force_align_arg_pointer
,
6400 /* Sibcall epilogues don't want a return instruction. */
6404 if (current_function_pops_args
&& current_function_args_size
)
6406 rtx popc
= GEN_INT (current_function_pops_args
);
6408 /* i386 can only pop 64K bytes. If asked to pop more, pop
6409 return address, do explicit add, and jump indirectly to the
6412 if (current_function_pops_args
>= 65536)
6414 rtx ecx
= gen_rtx_REG (SImode
, 2);
6416 /* There is no "pascal" calling convention in any 64bit ABI. */
6417 gcc_assert (!TARGET_64BIT
);
6419 emit_insn (gen_popsi1 (ecx
));
6420 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6421 emit_jump_insn (gen_return_indirect_internal (ecx
));
6424 emit_jump_insn (gen_return_pop_internal (popc
));
6427 emit_jump_insn (gen_return_internal ());
6430 /* Reset from the function's potential modifications. */
6433 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6434 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6436 if (pic_offset_table_rtx
)
6437 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
6439 /* Mach-O doesn't support labels at the end of objects, so if
6440 it looks like we might want one, insert a NOP. */
6442 rtx insn
= get_last_insn ();
6445 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6446 insn
= PREV_INSN (insn
);
6450 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6451 fputs ("\tnop\n", file
);
6457 /* Extract the parts of an RTL expression that is a valid memory address
6458 for an instruction. Return 0 if the structure of the address is
6459 grossly off. Return -1 if the address contains ASHIFT, so it is not
6460 strictly valid, but still used for computing length of lea instruction. */
6463 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6465 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6466 rtx base_reg
, index_reg
;
6467 HOST_WIDE_INT scale
= 1;
6468 rtx scale_rtx
= NULL_RTX
;
6470 enum ix86_address_seg seg
= SEG_DEFAULT
;
6472 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6474 else if (GET_CODE (addr
) == PLUS
)
6484 addends
[n
++] = XEXP (op
, 1);
6487 while (GET_CODE (op
) == PLUS
);
6492 for (i
= n
; i
>= 0; --i
)
6495 switch (GET_CODE (op
))
6500 index
= XEXP (op
, 0);
6501 scale_rtx
= XEXP (op
, 1);
6505 if (XINT (op
, 1) == UNSPEC_TP
6506 && TARGET_TLS_DIRECT_SEG_REFS
6507 && seg
== SEG_DEFAULT
)
6508 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6537 else if (GET_CODE (addr
) == MULT
)
6539 index
= XEXP (addr
, 0); /* index*scale */
6540 scale_rtx
= XEXP (addr
, 1);
6542 else if (GET_CODE (addr
) == ASHIFT
)
6546 /* We're called for lea too, which implements ashift on occasion. */
6547 index
= XEXP (addr
, 0);
6548 tmp
= XEXP (addr
, 1);
6549 if (!CONST_INT_P (tmp
))
6551 scale
= INTVAL (tmp
);
6552 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6558 disp
= addr
; /* displacement */
6560 /* Extract the integral value of scale. */
6563 if (!CONST_INT_P (scale_rtx
))
6565 scale
= INTVAL (scale_rtx
);
6568 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6569 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6571 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6572 if (base_reg
&& index_reg
&& scale
== 1
6573 && (index_reg
== arg_pointer_rtx
6574 || index_reg
== frame_pointer_rtx
6575 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6578 tmp
= base
, base
= index
, index
= tmp
;
6579 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6582 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6583 if ((base_reg
== hard_frame_pointer_rtx
6584 || base_reg
== frame_pointer_rtx
6585 || base_reg
== arg_pointer_rtx
) && !disp
)
6588 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6589 Avoid this by transforming to [%esi+0]. */
6590 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6591 && base_reg
&& !index_reg
&& !disp
6593 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6596 /* Special case: encode reg+reg instead of reg*2. */
6597 if (!base
&& index
&& scale
&& scale
== 2)
6598 base
= index
, base_reg
= index_reg
, scale
= 1;
6600 /* Special case: scaling cannot be encoded without base or displacement. */
6601 if (!base
&& !disp
&& index
&& scale
!= 1)
6613 /* Return cost of the memory address x.
6614 For i386, it is better to use a complex address than let gcc copy
6615 the address into a reg and make a new pseudo. But not if the address
6616 requires to two regs - that would mean more pseudos with longer
6619 ix86_address_cost (rtx x
)
6621 struct ix86_address parts
;
6623 int ok
= ix86_decompose_address (x
, &parts
);
6627 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6628 parts
.base
= SUBREG_REG (parts
.base
);
6629 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6630 parts
.index
= SUBREG_REG (parts
.index
);
6632 /* Attempt to minimize number of registers in the address. */
6634 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6636 && (!REG_P (parts
.index
)
6637 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6641 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6643 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6644 && parts
.base
!= parts
.index
)
6647 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6648 since it's predecode logic can't detect the length of instructions
6649 and it degenerates to vector decoded. Increase cost of such
6650 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6651 to split such addresses or even refuse such addresses at all.
6653 Following addressing modes are affected:
6658 The first and last case may be avoidable by explicitly coding the zero in
6659 memory address, but I don't have AMD-K6 machine handy to check this
6663 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6664 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6665 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6671 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6672 this is used for to form addresses to local data when -fPIC is in
6676 darwin_local_data_pic (rtx disp
)
6678 if (GET_CODE (disp
) == MINUS
)
6680 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6681 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6682 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6684 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6685 if (! strcmp (sym_name
, "<pic base>"))
6693 /* Determine if a given RTX is a valid constant. We already know this
6694 satisfies CONSTANT_P. */
6697 legitimate_constant_p (rtx x
)
6699 switch (GET_CODE (x
))
6704 if (GET_CODE (x
) == PLUS
)
6706 if (!CONST_INT_P (XEXP (x
, 1)))
6711 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6714 /* Only some unspecs are valid as "constants". */
6715 if (GET_CODE (x
) == UNSPEC
)
6716 switch (XINT (x
, 1))
6721 return TARGET_64BIT
;
6724 x
= XVECEXP (x
, 0, 0);
6725 return (GET_CODE (x
) == SYMBOL_REF
6726 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6728 x
= XVECEXP (x
, 0, 0);
6729 return (GET_CODE (x
) == SYMBOL_REF
6730 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6735 /* We must have drilled down to a symbol. */
6736 if (GET_CODE (x
) == LABEL_REF
)
6738 if (GET_CODE (x
) != SYMBOL_REF
)
6743 /* TLS symbols are never valid. */
6744 if (SYMBOL_REF_TLS_MODEL (x
))
6747 /* DLLIMPORT symbols are never valid. */
6748 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6749 && SYMBOL_REF_DLLIMPORT_P (x
))
6754 if (GET_MODE (x
) == TImode
6755 && x
!= CONST0_RTX (TImode
)
6761 if (x
== CONST0_RTX (GET_MODE (x
)))
6769 /* Otherwise we handle everything else in the move patterns. */
6773 /* Determine if it's legal to put X into the constant pool. This
6774 is not possible for the address of thread-local symbols, which
6775 is checked above. */
6778 ix86_cannot_force_const_mem (rtx x
)
6780 /* We can always put integral constants and vectors in memory. */
6781 switch (GET_CODE (x
))
6791 return !legitimate_constant_p (x
);
6794 /* Determine if a given RTX is a valid constant address. */
6797 constant_address_p (rtx x
)
6799 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6802 /* Nonzero if the constant value X is a legitimate general operand
6803 when generating PIC code. It is given that flag_pic is on and
6804 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6807 legitimate_pic_operand_p (rtx x
)
6811 switch (GET_CODE (x
))
6814 inner
= XEXP (x
, 0);
6815 if (GET_CODE (inner
) == PLUS
6816 && CONST_INT_P (XEXP (inner
, 1)))
6817 inner
= XEXP (inner
, 0);
6819 /* Only some unspecs are valid as "constants". */
6820 if (GET_CODE (inner
) == UNSPEC
)
6821 switch (XINT (inner
, 1))
6826 return TARGET_64BIT
;
6828 x
= XVECEXP (inner
, 0, 0);
6829 return (GET_CODE (x
) == SYMBOL_REF
6830 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6838 return legitimate_pic_address_disp_p (x
);
6845 /* Determine if a given CONST RTX is a valid memory displacement
6849 legitimate_pic_address_disp_p (rtx disp
)
6853 /* In 64bit mode we can allow direct addresses of symbols and labels
6854 when they are not dynamic symbols. */
6857 rtx op0
= disp
, op1
;
6859 switch (GET_CODE (disp
))
6865 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6867 op0
= XEXP (XEXP (disp
, 0), 0);
6868 op1
= XEXP (XEXP (disp
, 0), 1);
6869 if (!CONST_INT_P (op1
)
6870 || INTVAL (op1
) >= 16*1024*1024
6871 || INTVAL (op1
) < -16*1024*1024)
6873 if (GET_CODE (op0
) == LABEL_REF
)
6875 if (GET_CODE (op0
) != SYMBOL_REF
)
6880 /* TLS references should always be enclosed in UNSPEC. */
6881 if (SYMBOL_REF_TLS_MODEL (op0
))
6883 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6884 && ix86_cmodel
!= CM_LARGE_PIC
)
6892 if (GET_CODE (disp
) != CONST
)
6894 disp
= XEXP (disp
, 0);
6898 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6899 of GOT tables. We should not need these anyway. */
6900 if (GET_CODE (disp
) != UNSPEC
6901 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6902 && XINT (disp
, 1) != UNSPEC_GOTOFF
6903 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6906 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6907 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6913 if (GET_CODE (disp
) == PLUS
)
6915 if (!CONST_INT_P (XEXP (disp
, 1)))
6917 disp
= XEXP (disp
, 0);
6921 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6924 if (GET_CODE (disp
) != UNSPEC
)
6927 switch (XINT (disp
, 1))
6932 /* We need to check for both symbols and labels because VxWorks loads
6933 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6935 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6936 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6938 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6939 While ABI specify also 32bit relocation but we don't produce it in
6940 small PIC model at all. */
6941 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6942 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6944 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6946 case UNSPEC_GOTTPOFF
:
6947 case UNSPEC_GOTNTPOFF
:
6948 case UNSPEC_INDNTPOFF
:
6951 disp
= XVECEXP (disp
, 0, 0);
6952 return (GET_CODE (disp
) == SYMBOL_REF
6953 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6955 disp
= XVECEXP (disp
, 0, 0);
6956 return (GET_CODE (disp
) == SYMBOL_REF
6957 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6959 disp
= XVECEXP (disp
, 0, 0);
6960 return (GET_CODE (disp
) == SYMBOL_REF
6961 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6967 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6968 memory address for an instruction. The MODE argument is the machine mode
6969 for the MEM expression that wants to use this address.
6971 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6972 convert common non-canonical forms to canonical form so that they will
6976 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
6977 rtx addr
, int strict
)
6979 struct ix86_address parts
;
6980 rtx base
, index
, disp
;
6981 HOST_WIDE_INT scale
;
6982 const char *reason
= NULL
;
6983 rtx reason_rtx
= NULL_RTX
;
6985 if (ix86_decompose_address (addr
, &parts
) <= 0)
6987 reason
= "decomposition failed";
6992 index
= parts
.index
;
6994 scale
= parts
.scale
;
6996 /* Validate base register.
6998 Don't allow SUBREG's that span more than a word here. It can lead to spill
6999 failures when the base is one word out of a two word structure, which is
7000 represented internally as a DImode int. */
7009 else if (GET_CODE (base
) == SUBREG
7010 && REG_P (SUBREG_REG (base
))
7011 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
7013 reg
= SUBREG_REG (base
);
7016 reason
= "base is not a register";
7020 if (GET_MODE (base
) != Pmode
)
7022 reason
= "base is not in Pmode";
7026 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
7027 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
7029 reason
= "base is not valid";
7034 /* Validate index register.
7036 Don't allow SUBREG's that span more than a word here -- same as above. */
7045 else if (GET_CODE (index
) == SUBREG
7046 && REG_P (SUBREG_REG (index
))
7047 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
7049 reg
= SUBREG_REG (index
);
7052 reason
= "index is not a register";
7056 if (GET_MODE (index
) != Pmode
)
7058 reason
= "index is not in Pmode";
7062 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
7063 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
7065 reason
= "index is not valid";
7070 /* Validate scale factor. */
7073 reason_rtx
= GEN_INT (scale
);
7076 reason
= "scale without index";
7080 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
7082 reason
= "scale is not a valid multiplier";
7087 /* Validate displacement. */
7092 if (GET_CODE (disp
) == CONST
7093 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
7094 switch (XINT (XEXP (disp
, 0), 1))
7096 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7097 used. While ABI specify also 32bit relocations, we don't produce
7098 them at all and use IP relative instead. */
7101 gcc_assert (flag_pic
);
7103 goto is_legitimate_pic
;
7104 reason
= "64bit address unspec";
7107 case UNSPEC_GOTPCREL
:
7108 gcc_assert (flag_pic
);
7109 goto is_legitimate_pic
;
7111 case UNSPEC_GOTTPOFF
:
7112 case UNSPEC_GOTNTPOFF
:
7113 case UNSPEC_INDNTPOFF
:
7119 reason
= "invalid address unspec";
7123 else if (SYMBOLIC_CONST (disp
)
7127 && MACHOPIC_INDIRECT
7128 && !machopic_operand_p (disp
)
7134 if (TARGET_64BIT
&& (index
|| base
))
7136 /* foo@dtpoff(%rX) is ok. */
7137 if (GET_CODE (disp
) != CONST
7138 || GET_CODE (XEXP (disp
, 0)) != PLUS
7139 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7140 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7141 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7142 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7144 reason
= "non-constant pic memory reference";
7148 else if (! legitimate_pic_address_disp_p (disp
))
7150 reason
= "displacement is an invalid pic construct";
7154 /* This code used to verify that a symbolic pic displacement
7155 includes the pic_offset_table_rtx register.
7157 While this is good idea, unfortunately these constructs may
7158 be created by "adds using lea" optimization for incorrect
7167 This code is nonsensical, but results in addressing
7168 GOT table with pic_offset_table_rtx base. We can't
7169 just refuse it easily, since it gets matched by
7170 "addsi3" pattern, that later gets split to lea in the
7171 case output register differs from input. While this
7172 can be handled by separate addsi pattern for this case
7173 that never results in lea, this seems to be easier and
7174 correct fix for crash to disable this test. */
7176 else if (GET_CODE (disp
) != LABEL_REF
7177 && !CONST_INT_P (disp
)
7178 && (GET_CODE (disp
) != CONST
7179 || !legitimate_constant_p (disp
))
7180 && (GET_CODE (disp
) != SYMBOL_REF
7181 || !legitimate_constant_p (disp
)))
7183 reason
= "displacement is not constant";
7186 else if (TARGET_64BIT
7187 && !x86_64_immediate_operand (disp
, VOIDmode
))
7189 reason
= "displacement is out of range";
7194 /* Everything looks valid. */
7201 /* Return a unique alias set for the GOT. */
7203 static alias_set_type
7204 ix86_GOT_alias_set (void)
7206 static alias_set_type set
= -1;
7208 set
= new_alias_set ();
7212 /* Return a legitimate reference for ORIG (an address) using the
7213 register REG. If REG is 0, a new pseudo is generated.
7215 There are two types of references that must be handled:
7217 1. Global data references must load the address from the GOT, via
7218 the PIC reg. An insn is emitted to do this load, and the reg is
7221 2. Static data references, constant pool addresses, and code labels
7222 compute the address as an offset from the GOT, whose base is in
7223 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7224 differentiate them from global data objects. The returned
7225 address is the PIC reg + an unspec constant.
7227 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7228 reg also appears in the address. */
7231 legitimize_pic_address (rtx orig
, rtx reg
)
7238 if (TARGET_MACHO
&& !TARGET_64BIT
)
7241 reg
= gen_reg_rtx (Pmode
);
7242 /* Use the generic Mach-O PIC machinery. */
7243 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7247 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7249 else if (TARGET_64BIT
7250 && ix86_cmodel
!= CM_SMALL_PIC
7251 && gotoff_operand (addr
, Pmode
))
7254 /* This symbol may be referenced via a displacement from the PIC
7255 base address (@GOTOFF). */
7257 if (reload_in_progress
)
7258 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7259 if (GET_CODE (addr
) == CONST
)
7260 addr
= XEXP (addr
, 0);
7261 if (GET_CODE (addr
) == PLUS
)
7263 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7265 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7268 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7269 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7271 tmpreg
= gen_reg_rtx (Pmode
);
7274 emit_move_insn (tmpreg
, new_rtx
);
7278 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7279 tmpreg
, 1, OPTAB_DIRECT
);
7282 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7284 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7286 /* This symbol may be referenced via a displacement from the PIC
7287 base address (@GOTOFF). */
7289 if (reload_in_progress
)
7290 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7291 if (GET_CODE (addr
) == CONST
)
7292 addr
= XEXP (addr
, 0);
7293 if (GET_CODE (addr
) == PLUS
)
7295 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7297 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7300 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7301 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7302 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7306 emit_move_insn (reg
, new_rtx
);
7310 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7311 /* We can't use @GOTOFF for text labels on VxWorks;
7312 see gotoff_operand. */
7313 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7315 /* Given that we've already handled dllimport variables separately
7316 in legitimize_address, and all other variables should satisfy
7317 legitimate_pic_address_disp_p, we should never arrive here. */
7318 gcc_assert (!TARGET_64BIT_MS_ABI
);
7320 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7322 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7323 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7324 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7325 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7328 reg
= gen_reg_rtx (Pmode
);
7329 /* Use directly gen_movsi, otherwise the address is loaded
7330 into register for CSE. We don't want to CSE this addresses,
7331 instead we CSE addresses from the GOT table, so skip this. */
7332 emit_insn (gen_movsi (reg
, new_rtx
));
7337 /* This symbol must be referenced via a load from the
7338 Global Offset Table (@GOT). */
7340 if (reload_in_progress
)
7341 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7342 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7343 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7345 new_rtx
= force_reg (Pmode
, new_rtx
);
7346 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7347 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7348 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7351 reg
= gen_reg_rtx (Pmode
);
7352 emit_move_insn (reg
, new_rtx
);
7358 if (CONST_INT_P (addr
)
7359 && !x86_64_immediate_operand (addr
, VOIDmode
))
7363 emit_move_insn (reg
, addr
);
7367 new_rtx
= force_reg (Pmode
, addr
);
7369 else if (GET_CODE (addr
) == CONST
)
7371 addr
= XEXP (addr
, 0);
7373 /* We must match stuff we generate before. Assume the only
7374 unspecs that can get here are ours. Not that we could do
7375 anything with them anyway.... */
7376 if (GET_CODE (addr
) == UNSPEC
7377 || (GET_CODE (addr
) == PLUS
7378 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7380 gcc_assert (GET_CODE (addr
) == PLUS
);
7382 if (GET_CODE (addr
) == PLUS
)
7384 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7386 /* Check first to see if this is a constant offset from a @GOTOFF
7387 symbol reference. */
7388 if (gotoff_operand (op0
, Pmode
)
7389 && CONST_INT_P (op1
))
7393 if (reload_in_progress
)
7394 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7395 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7397 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
7398 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7399 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7403 emit_move_insn (reg
, new_rtx
);
7409 if (INTVAL (op1
) < -16*1024*1024
7410 || INTVAL (op1
) >= 16*1024*1024)
7412 if (!x86_64_immediate_operand (op1
, Pmode
))
7413 op1
= force_reg (Pmode
, op1
);
7414 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7420 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7421 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
7422 base
== reg
? NULL_RTX
: reg
);
7424 if (CONST_INT_P (new_rtx
))
7425 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
7428 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
7430 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
7431 new_rtx
= XEXP (new_rtx
, 1);
7433 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
7441 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7444 get_thread_pointer (int to_reg
)
7448 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7452 reg
= gen_reg_rtx (Pmode
);
7453 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7454 insn
= emit_insn (insn
);
7459 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7460 false if we expect this to be used for a memory address and true if
7461 we expect to load the address into a register. */
7464 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7466 rtx dest
, base
, off
, pic
, tp
;
7471 case TLS_MODEL_GLOBAL_DYNAMIC
:
7472 dest
= gen_reg_rtx (Pmode
);
7473 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7475 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7477 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7480 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7481 insns
= get_insns ();
7484 CONST_OR_PURE_CALL_P (insns
) = 1;
7485 emit_libcall_block (insns
, dest
, rax
, x
);
7487 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7488 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7490 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7492 if (TARGET_GNU2_TLS
)
7494 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7496 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7500 case TLS_MODEL_LOCAL_DYNAMIC
:
7501 base
= gen_reg_rtx (Pmode
);
7502 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7504 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7506 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7509 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7510 insns
= get_insns ();
7513 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7514 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7515 CONST_OR_PURE_CALL_P (insns
) = 1;
7516 emit_libcall_block (insns
, base
, rax
, note
);
7518 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7519 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7521 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7523 if (TARGET_GNU2_TLS
)
7525 rtx x
= ix86_tls_module_base ();
7527 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7528 gen_rtx_MINUS (Pmode
, x
, tp
));
7531 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7532 off
= gen_rtx_CONST (Pmode
, off
);
7534 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7536 if (TARGET_GNU2_TLS
)
7538 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7540 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7545 case TLS_MODEL_INITIAL_EXEC
:
7549 type
= UNSPEC_GOTNTPOFF
;
7553 if (reload_in_progress
)
7554 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7555 pic
= pic_offset_table_rtx
;
7556 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7558 else if (!TARGET_ANY_GNU_TLS
)
7560 pic
= gen_reg_rtx (Pmode
);
7561 emit_insn (gen_set_got (pic
));
7562 type
= UNSPEC_GOTTPOFF
;
7567 type
= UNSPEC_INDNTPOFF
;
7570 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7571 off
= gen_rtx_CONST (Pmode
, off
);
7573 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7574 off
= gen_const_mem (Pmode
, off
);
7575 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7577 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7579 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7580 off
= force_reg (Pmode
, off
);
7581 return gen_rtx_PLUS (Pmode
, base
, off
);
7585 base
= get_thread_pointer (true);
7586 dest
= gen_reg_rtx (Pmode
);
7587 emit_insn (gen_subsi3 (dest
, base
, off
));
7591 case TLS_MODEL_LOCAL_EXEC
:
7592 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7593 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7594 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7595 off
= gen_rtx_CONST (Pmode
, off
);
7597 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7599 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7600 return gen_rtx_PLUS (Pmode
, base
, off
);
7604 base
= get_thread_pointer (true);
7605 dest
= gen_reg_rtx (Pmode
);
7606 emit_insn (gen_subsi3 (dest
, base
, off
));
7617 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7620 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7621 htab_t dllimport_map
;
7624 get_dllimport_decl (tree decl
)
7626 struct tree_map
*h
, in
;
7630 size_t namelen
, prefixlen
;
7636 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7638 in
.hash
= htab_hash_pointer (decl
);
7639 in
.base
.from
= decl
;
7640 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7641 h
= (struct tree_map
*) *loc
;
7645 *loc
= h
= GGC_NEW (struct tree_map
);
7647 h
->base
.from
= decl
;
7648 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7649 DECL_ARTIFICIAL (to
) = 1;
7650 DECL_IGNORED_P (to
) = 1;
7651 DECL_EXTERNAL (to
) = 1;
7652 TREE_READONLY (to
) = 1;
7654 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7655 name
= targetm
.strip_name_encoding (name
);
7656 if (name
[0] == FASTCALL_PREFIX
)
7662 prefix
= "*__imp__";
7664 namelen
= strlen (name
);
7665 prefixlen
= strlen (prefix
);
7666 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
7667 memcpy (imp_name
, prefix
, prefixlen
);
7668 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7670 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7671 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7672 SET_SYMBOL_REF_DECL (rtl
, to
);
7673 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7675 rtl
= gen_const_mem (Pmode
, rtl
);
7676 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7678 SET_DECL_RTL (to
, rtl
);
7683 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7684 true if we require the result be a register. */
7687 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7692 gcc_assert (SYMBOL_REF_DECL (symbol
));
7693 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7695 x
= DECL_RTL (imp_decl
);
7697 x
= force_reg (Pmode
, x
);
7701 /* Try machine-dependent ways of modifying an illegitimate address
7702 to be legitimate. If we find one, return the new, valid address.
7703 This macro is used in only one place: `memory_address' in explow.c.
7705 OLDX is the address as it was before break_out_memory_refs was called.
7706 In some cases it is useful to look at this to decide what needs to be done.
7708 MODE and WIN are passed so that this macro can use
7709 GO_IF_LEGITIMATE_ADDRESS.
7711 It is always safe for this macro to do nothing. It exists to recognize
7712 opportunities to optimize the output.
7714 For the 80386, we handle X+REG by loading X into a register R and
7715 using R+REG. R will go in a general reg and indexing will be used.
7716 However, if REG is a broken-out memory address or multiplication,
7717 nothing needs to be done because REG can certainly go in a general reg.
7719 When -fpic is used, special handling is needed for symbolic references.
7720 See comments by legitimize_pic_address in i386.c for details. */
7723 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7728 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7730 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
7731 if (GET_CODE (x
) == CONST
7732 && GET_CODE (XEXP (x
, 0)) == PLUS
7733 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7734 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7736 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
7737 (enum tls_model
) log
, false);
7738 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7741 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7743 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7744 return legitimize_dllimport_symbol (x
, true);
7745 if (GET_CODE (x
) == CONST
7746 && GET_CODE (XEXP (x
, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7748 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7750 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7751 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7755 if (flag_pic
&& SYMBOLIC_CONST (x
))
7756 return legitimize_pic_address (x
, 0);
7758 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7759 if (GET_CODE (x
) == ASHIFT
7760 && CONST_INT_P (XEXP (x
, 1))
7761 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7764 log
= INTVAL (XEXP (x
, 1));
7765 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7766 GEN_INT (1 << log
));
7769 if (GET_CODE (x
) == PLUS
)
7771 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7773 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7774 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7775 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7778 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7779 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7780 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7781 GEN_INT (1 << log
));
7784 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7785 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7786 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7789 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7790 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7791 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7792 GEN_INT (1 << log
));
7795 /* Put multiply first if it isn't already. */
7796 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7798 rtx tmp
= XEXP (x
, 0);
7799 XEXP (x
, 0) = XEXP (x
, 1);
7804 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7805 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7806 created by virtual register instantiation, register elimination, and
7807 similar optimizations. */
7808 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7811 x
= gen_rtx_PLUS (Pmode
,
7812 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7813 XEXP (XEXP (x
, 1), 0)),
7814 XEXP (XEXP (x
, 1), 1));
7818 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7819 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7820 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7821 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7822 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7823 && CONSTANT_P (XEXP (x
, 1)))
7826 rtx other
= NULL_RTX
;
7828 if (CONST_INT_P (XEXP (x
, 1)))
7830 constant
= XEXP (x
, 1);
7831 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7833 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7835 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7836 other
= XEXP (x
, 1);
7844 x
= gen_rtx_PLUS (Pmode
,
7845 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7846 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7847 plus_constant (other
, INTVAL (constant
)));
7851 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7854 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7857 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7860 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7863 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7867 && REG_P (XEXP (x
, 1))
7868 && REG_P (XEXP (x
, 0)))
7871 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7874 x
= legitimize_pic_address (x
, 0);
7877 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7880 if (REG_P (XEXP (x
, 0)))
7882 rtx temp
= gen_reg_rtx (Pmode
);
7883 rtx val
= force_operand (XEXP (x
, 1), temp
);
7885 emit_move_insn (temp
, val
);
7891 else if (REG_P (XEXP (x
, 1)))
7893 rtx temp
= gen_reg_rtx (Pmode
);
7894 rtx val
= force_operand (XEXP (x
, 0), temp
);
7896 emit_move_insn (temp
, val
);
7906 /* Print an integer constant expression in assembler syntax. Addition
7907 and subtraction are the only arithmetic that may appear in these
7908 expressions. FILE is the stdio stream to write to, X is the rtx, and
7909 CODE is the operand print code from the output string. */
7912 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7916 switch (GET_CODE (x
))
7919 gcc_assert (flag_pic
);
7924 if (! TARGET_MACHO
|| TARGET_64BIT
)
7925 output_addr_const (file
, x
);
7928 const char *name
= XSTR (x
, 0);
7930 /* Mark the decl as referenced so that cgraph will
7931 output the function. */
7932 if (SYMBOL_REF_DECL (x
))
7933 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7936 if (MACHOPIC_INDIRECT
7937 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7938 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7940 assemble_name (file
, name
);
7942 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
7943 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7944 fputs ("@PLT", file
);
7951 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7952 assemble_name (asm_out_file
, buf
);
7956 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7960 /* This used to output parentheses around the expression,
7961 but that does not work on the 386 (either ATT or BSD assembler). */
7962 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7966 if (GET_MODE (x
) == VOIDmode
)
7968 /* We can use %d if the number is <32 bits and positive. */
7969 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7970 fprintf (file
, "0x%lx%08lx",
7971 (unsigned long) CONST_DOUBLE_HIGH (x
),
7972 (unsigned long) CONST_DOUBLE_LOW (x
));
7974 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7977 /* We can't handle floating point constants;
7978 PRINT_OPERAND must handle them. */
7979 output_operand_lossage ("floating constant misused");
7983 /* Some assemblers need integer constants to appear first. */
7984 if (CONST_INT_P (XEXP (x
, 0)))
7986 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7988 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7992 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7993 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7995 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8001 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
8002 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8004 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8006 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
8010 gcc_assert (XVECLEN (x
, 0) == 1);
8011 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
8012 switch (XINT (x
, 1))
8015 fputs ("@GOT", file
);
8018 fputs ("@GOTOFF", file
);
8021 fputs ("@PLTOFF", file
);
8023 case UNSPEC_GOTPCREL
:
8024 fputs ("@GOTPCREL(%rip)", file
);
8026 case UNSPEC_GOTTPOFF
:
8027 /* FIXME: This might be @TPOFF in Sun ld too. */
8028 fputs ("@GOTTPOFF", file
);
8031 fputs ("@TPOFF", file
);
8035 fputs ("@TPOFF", file
);
8037 fputs ("@NTPOFF", file
);
8040 fputs ("@DTPOFF", file
);
8042 case UNSPEC_GOTNTPOFF
:
8044 fputs ("@GOTTPOFF(%rip)", file
);
8046 fputs ("@GOTNTPOFF", file
);
8048 case UNSPEC_INDNTPOFF
:
8049 fputs ("@INDNTPOFF", file
);
8052 output_operand_lossage ("invalid UNSPEC as operand");
8058 output_operand_lossage ("invalid expression as operand");
8062 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8063 We need to emit DTP-relative relocations. */
8065 static void ATTRIBUTE_UNUSED
8066 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8068 fputs (ASM_LONG
, file
);
8069 output_addr_const (file
, x
);
8070 fputs ("@DTPOFF", file
);
8076 fputs (", 0", file
);
8083 /* In the name of slightly smaller debug output, and to cater to
8084 general assembler lossage, recognize PIC+GOTOFF and turn it back
8085 into a direct symbol reference.
8087 On Darwin, this is necessary to avoid a crash, because Darwin
8088 has a different PIC label for each routine but the DWARF debugging
8089 information is not associated with any particular routine, so it's
8090 necessary to remove references to the PIC label from RTL stored by
8091 the DWARF output code. */
8094 ix86_delegitimize_address (rtx orig_x
)
8097 /* reg_addend is NULL or a multiple of some register. */
8098 rtx reg_addend
= NULL_RTX
;
8099 /* const_addend is NULL or a const_int. */
8100 rtx const_addend
= NULL_RTX
;
8101 /* This is the result, or NULL. */
8102 rtx result
= NULL_RTX
;
8109 if (GET_CODE (x
) != CONST
8110 || GET_CODE (XEXP (x
, 0)) != UNSPEC
8111 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
8114 return XVECEXP (XEXP (x
, 0), 0, 0);
8117 if (GET_CODE (x
) != PLUS
8118 || GET_CODE (XEXP (x
, 1)) != CONST
)
8121 if (REG_P (XEXP (x
, 0))
8122 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8123 /* %ebx + GOT/GOTOFF */
8125 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
8127 /* %ebx + %reg * scale + GOT/GOTOFF */
8128 reg_addend
= XEXP (x
, 0);
8129 if (REG_P (XEXP (reg_addend
, 0))
8130 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8131 reg_addend
= XEXP (reg_addend
, 1);
8132 else if (REG_P (XEXP (reg_addend
, 1))
8133 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
8134 reg_addend
= XEXP (reg_addend
, 0);
8137 if (!REG_P (reg_addend
)
8138 && GET_CODE (reg_addend
) != MULT
8139 && GET_CODE (reg_addend
) != ASHIFT
)
8145 x
= XEXP (XEXP (x
, 1), 0);
8146 if (GET_CODE (x
) == PLUS
8147 && CONST_INT_P (XEXP (x
, 1)))
8149 const_addend
= XEXP (x
, 1);
8153 if (GET_CODE (x
) == UNSPEC
8154 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
8155 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
8156 result
= XVECEXP (x
, 0, 0);
8158 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8160 result
= XEXP (x
, 0);
8166 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8168 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8172 /* If X is a machine specific address (i.e. a symbol or label being
8173 referenced as a displacement from the GOT implemented using an
8174 UNSPEC), then return the base term. Otherwise return X. */
8177 ix86_find_base_term (rtx x
)
8183 if (GET_CODE (x
) != CONST
)
8186 if (GET_CODE (term
) == PLUS
8187 && (CONST_INT_P (XEXP (term
, 1))
8188 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8189 term
= XEXP (term
, 0);
8190 if (GET_CODE (term
) != UNSPEC
8191 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8194 term
= XVECEXP (term
, 0, 0);
8196 if (GET_CODE (term
) != SYMBOL_REF
8197 && GET_CODE (term
) != LABEL_REF
)
8203 term
= ix86_delegitimize_address (x
);
8205 if (GET_CODE (term
) != SYMBOL_REF
8206 && GET_CODE (term
) != LABEL_REF
)
8213 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8218 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8220 enum rtx_code second_code
, bypass_code
;
8221 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8222 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8223 code
= ix86_fp_compare_code_to_integer (code
);
8227 code
= reverse_condition (code
);
8278 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8282 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8283 Those same assemblers have the same but opposite lossage on cmov. */
8285 suffix
= fp
? "nbe" : "a";
8286 else if (mode
== CCCmode
)
8309 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8331 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8332 suffix
= fp
? "nb" : "ae";
8335 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8342 else if (mode
== CCCmode
)
8343 suffix
= fp
? "nb" : "ae";
8348 suffix
= fp
? "u" : "p";
8351 suffix
= fp
? "nu" : "np";
8356 fputs (suffix
, file
);
8359 /* Print the name of register X to FILE based on its machine mode and number.
8360 If CODE is 'w', pretend the mode is HImode.
8361 If CODE is 'b', pretend the mode is QImode.
8362 If CODE is 'k', pretend the mode is SImode.
8363 If CODE is 'q', pretend the mode is DImode.
8364 If CODE is 'h', pretend the reg is the 'high' byte register.
8365 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8368 print_reg (rtx x
, int code
, FILE *file
)
8370 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8371 && REGNO (x
) != FRAME_POINTER_REGNUM
8372 && REGNO (x
) != FLAGS_REG
8373 && REGNO (x
) != FPSR_REG
8374 && REGNO (x
) != FPCR_REG
);
8376 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8379 if (code
== 'w' || MMX_REG_P (x
))
8381 else if (code
== 'b')
8383 else if (code
== 'k')
8385 else if (code
== 'q')
8387 else if (code
== 'y')
8389 else if (code
== 'h')
8392 code
= GET_MODE_SIZE (GET_MODE (x
));
8394 /* Irritatingly, AMD extended registers use different naming convention
8395 from the normal registers. */
8396 if (REX_INT_REG_P (x
))
8398 gcc_assert (TARGET_64BIT
);
8402 error ("extended registers have no high halves");
8405 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8408 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8411 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8414 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8417 error ("unsupported operand size for extended register");
8425 if (STACK_TOP_P (x
))
8427 fputs ("st(0)", file
);
8434 if (! ANY_FP_REG_P (x
))
8435 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8440 fputs (hi_reg_name
[REGNO (x
)], file
);
8443 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8445 fputs (qi_reg_name
[REGNO (x
)], file
);
8448 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8450 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8457 /* Locate some local-dynamic symbol still in use by this function
8458 so that we can print its name in some tls_local_dynamic_base
8462 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8466 if (GET_CODE (x
) == SYMBOL_REF
8467 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8469 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8477 get_some_local_dynamic_name (void)
8481 if (cfun
->machine
->some_ld_name
)
8482 return cfun
->machine
->some_ld_name
;
8484 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8486 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8487 return cfun
->machine
->some_ld_name
;
8493 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8494 C -- print opcode suffix for set/cmov insn.
8495 c -- like C, but print reversed condition
8496 F,f -- likewise, but for floating-point.
8497 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8499 R -- print the prefix for register names.
8500 z -- print the opcode suffix for the size of the current operand.
8501 * -- print a star (in certain assembler syntax)
8502 A -- print an absolute memory reference.
8503 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8504 s -- print a shift double count, followed by the assemblers argument
8506 b -- print the QImode name of the register for the indicated operand.
8507 %b0 would print %al if operands[0] is reg 0.
8508 w -- likewise, print the HImode name of the register.
8509 k -- likewise, print the SImode name of the register.
8510 q -- likewise, print the DImode name of the register.
8511 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8512 y -- print "st(0)" instead of "st" as a register.
8513 D -- print condition for SSE cmp instruction.
8514 P -- if PIC, print an @PLT suffix.
8515 X -- don't print any sort of PIC '@' suffix for a symbol.
8516 & -- print some in-use local-dynamic symbol name.
8517 H -- print a memory address offset by 8; used for sse high-parts
8518 + -- print a branch hint as 'cs' or 'ds' prefix
8519 ; -- print a semicolon (after prefixes due to bug in older gas).
8523 print_operand (FILE *file
, rtx x
, int code
)
8530 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8535 assemble_name (file
, get_some_local_dynamic_name ());
8539 switch (ASSEMBLER_DIALECT
)
8546 /* Intel syntax. For absolute addresses, registers should not
8547 be surrounded by braces. */
8551 PRINT_OPERAND (file
, x
, 0);
8561 PRINT_OPERAND (file
, x
, 0);
8566 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8571 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8576 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8581 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8586 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8591 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8596 /* 387 opcodes don't get size suffixes if the operands are
8598 if (STACK_REG_P (x
))
8601 /* Likewise if using Intel opcodes. */
8602 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8605 /* This is the size of op from size of operand. */
8606 switch (GET_MODE_SIZE (GET_MODE (x
)))
8615 #ifdef HAVE_GAS_FILDS_FISTS
8625 if (GET_MODE (x
) == SFmode
)
8640 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8642 #ifdef GAS_MNEMONICS
8668 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8670 PRINT_OPERAND (file
, x
, 0);
8676 /* Little bit of braindamage here. The SSE compare instructions
8677 does use completely different names for the comparisons that the
8678 fp conditional moves. */
8679 switch (GET_CODE (x
))
8694 fputs ("unord", file
);
8698 fputs ("neq", file
);
8702 fputs ("nlt", file
);
8706 fputs ("nle", file
);
8709 fputs ("ord", file
);
8716 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8717 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8719 switch (GET_MODE (x
))
8721 case HImode
: putc ('w', file
); break;
8723 case SFmode
: putc ('l', file
); break;
8725 case DFmode
: putc ('q', file
); break;
8726 default: gcc_unreachable ();
8733 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8736 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8737 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8740 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8743 /* Like above, but reverse condition */
8745 /* Check to see if argument to %c is really a constant
8746 and not a condition code which needs to be reversed. */
8747 if (!COMPARISON_P (x
))
8749 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8752 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8755 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8756 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8759 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8763 /* It doesn't actually matter what mode we use here, as we're
8764 only going to use this for printing. */
8765 x
= adjust_address_nv (x
, DImode
, 8);
8772 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8775 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8778 int pred_val
= INTVAL (XEXP (x
, 0));
8780 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8781 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8783 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8784 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8786 /* Emit hints only in the case default branch prediction
8787 heuristics would fail. */
8788 if (taken
!= cputaken
)
8790 /* We use 3e (DS) prefix for taken branches and
8791 2e (CS) prefix for not taken branches. */
8793 fputs ("ds ; ", file
);
8795 fputs ("cs ; ", file
);
8804 fputs (" ; ", file
);
8811 output_operand_lossage ("invalid operand code '%c'", code
);
8816 print_reg (x
, code
, file
);
8820 /* No `byte ptr' prefix for call instructions. */
8821 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8824 switch (GET_MODE_SIZE (GET_MODE (x
)))
8826 case 1: size
= "BYTE"; break;
8827 case 2: size
= "WORD"; break;
8828 case 4: size
= "DWORD"; break;
8829 case 8: size
= "QWORD"; break;
8830 case 12: size
= "XWORD"; break;
8831 case 16: size
= "XMMWORD"; break;
8836 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8839 else if (code
== 'w')
8841 else if (code
== 'k')
8845 fputs (" PTR ", file
);
8849 /* Avoid (%rip) for call operands. */
8850 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8851 && !CONST_INT_P (x
))
8852 output_addr_const (file
, x
);
8853 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8854 output_operand_lossage ("invalid constraints for operand");
8859 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8864 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8865 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8867 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8869 fprintf (file
, "0x%08lx", l
);
8872 /* These float cases don't actually occur as immediate operands. */
8873 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8877 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8878 fprintf (file
, "%s", dstr
);
8881 else if (GET_CODE (x
) == CONST_DOUBLE
8882 && GET_MODE (x
) == XFmode
)
8886 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8887 fprintf (file
, "%s", dstr
);
8892 /* We have patterns that allow zero sets of memory, for instance.
8893 In 64-bit mode, we should probably support all 8-byte vectors,
8894 since we can in fact encode that into an immediate. */
8895 if (GET_CODE (x
) == CONST_VECTOR
)
8897 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8903 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8905 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8908 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8909 || GET_CODE (x
) == LABEL_REF
)
8911 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8914 fputs ("OFFSET FLAT:", file
);
8917 if (CONST_INT_P (x
))
8918 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8920 output_pic_addr_const (file
, x
, code
);
8922 output_addr_const (file
, x
);
8926 /* Print a memory operand whose address is ADDR. */
8929 print_operand_address (FILE *file
, rtx addr
)
8931 struct ix86_address parts
;
8932 rtx base
, index
, disp
;
8934 int ok
= ix86_decompose_address (addr
, &parts
);
8939 index
= parts
.index
;
8941 scale
= parts
.scale
;
8949 if (USER_LABEL_PREFIX
[0] == 0)
8951 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8957 if (!base
&& !index
)
8959 /* Displacement only requires special attention. */
8961 if (CONST_INT_P (disp
))
8963 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8965 if (USER_LABEL_PREFIX
[0] == 0)
8967 fputs ("ds:", file
);
8969 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8972 output_pic_addr_const (file
, disp
, 0);
8974 output_addr_const (file
, disp
);
8976 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8979 if (GET_CODE (disp
) == CONST
8980 && GET_CODE (XEXP (disp
, 0)) == PLUS
8981 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8982 disp
= XEXP (XEXP (disp
, 0), 0);
8983 if (GET_CODE (disp
) == LABEL_REF
8984 || (GET_CODE (disp
) == SYMBOL_REF
8985 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8986 fputs ("(%rip)", file
);
8991 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8996 output_pic_addr_const (file
, disp
, 0);
8997 else if (GET_CODE (disp
) == LABEL_REF
)
8998 output_asm_label (disp
);
9000 output_addr_const (file
, disp
);
9005 print_reg (base
, 0, file
);
9009 print_reg (index
, 0, file
);
9011 fprintf (file
, ",%d", scale
);
9017 rtx offset
= NULL_RTX
;
9021 /* Pull out the offset of a symbol; print any symbol itself. */
9022 if (GET_CODE (disp
) == CONST
9023 && GET_CODE (XEXP (disp
, 0)) == PLUS
9024 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9026 offset
= XEXP (XEXP (disp
, 0), 1);
9027 disp
= gen_rtx_CONST (VOIDmode
,
9028 XEXP (XEXP (disp
, 0), 0));
9032 output_pic_addr_const (file
, disp
, 0);
9033 else if (GET_CODE (disp
) == LABEL_REF
)
9034 output_asm_label (disp
);
9035 else if (CONST_INT_P (disp
))
9038 output_addr_const (file
, disp
);
9044 print_reg (base
, 0, file
);
9047 if (INTVAL (offset
) >= 0)
9049 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9053 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9060 print_reg (index
, 0, file
);
9062 fprintf (file
, "*%d", scale
);
9070 output_addr_const_extra (FILE *file
, rtx x
)
9074 if (GET_CODE (x
) != UNSPEC
)
9077 op
= XVECEXP (x
, 0, 0);
9078 switch (XINT (x
, 1))
9080 case UNSPEC_GOTTPOFF
:
9081 output_addr_const (file
, op
);
9082 /* FIXME: This might be @TPOFF in Sun ld. */
9083 fputs ("@GOTTPOFF", file
);
9086 output_addr_const (file
, op
);
9087 fputs ("@TPOFF", file
);
9090 output_addr_const (file
, op
);
9092 fputs ("@TPOFF", file
);
9094 fputs ("@NTPOFF", file
);
9097 output_addr_const (file
, op
);
9098 fputs ("@DTPOFF", file
);
9100 case UNSPEC_GOTNTPOFF
:
9101 output_addr_const (file
, op
);
9103 fputs ("@GOTTPOFF(%rip)", file
);
9105 fputs ("@GOTNTPOFF", file
);
9107 case UNSPEC_INDNTPOFF
:
9108 output_addr_const (file
, op
);
9109 fputs ("@INDNTPOFF", file
);
9119 /* Split one or more DImode RTL references into pairs of SImode
9120 references. The RTL can be REG, offsettable MEM, integer constant, or
9121 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9122 split and "num" is its length. lo_half and hi_half are output arrays
9123 that parallel "operands". */
9126 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9130 rtx op
= operands
[num
];
9132 /* simplify_subreg refuse to split volatile memory addresses,
9133 but we still have to handle it. */
9136 lo_half
[num
] = adjust_address (op
, SImode
, 0);
9137 hi_half
[num
] = adjust_address (op
, SImode
, 4);
9141 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
9142 GET_MODE (op
) == VOIDmode
9143 ? DImode
: GET_MODE (op
), 0);
9144 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
9145 GET_MODE (op
) == VOIDmode
9146 ? DImode
: GET_MODE (op
), 4);
9150 /* Split one or more TImode RTL references into pairs of DImode
9151 references. The RTL can be REG, offsettable MEM, integer constant, or
9152 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9153 split and "num" is its length. lo_half and hi_half are output arrays
9154 that parallel "operands". */
9157 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9161 rtx op
= operands
[num
];
9163 /* simplify_subreg refuse to split volatile memory addresses, but we
9164 still have to handle it. */
9167 lo_half
[num
] = adjust_address (op
, DImode
, 0);
9168 hi_half
[num
] = adjust_address (op
, DImode
, 8);
9172 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
9173 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
9178 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9179 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9180 is the expression of the binary operation. The output may either be
9181 emitted here, or returned to the caller, like all output_* functions.
9183 There is no guarantee that the operands are the same mode, as they
9184 might be within FLOAT or FLOAT_EXTEND expressions. */
9186 #ifndef SYSV386_COMPAT
9187 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9188 wants to fix the assemblers because that causes incompatibility
9189 with gcc. No-one wants to fix gcc because that causes
9190 incompatibility with assemblers... You can use the option of
9191 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9192 #define SYSV386_COMPAT 1
9196 output_387_binary_op (rtx insn
, rtx
*operands
)
9198 static char buf
[30];
9201 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
9203 #ifdef ENABLE_CHECKING
9204 /* Even if we do not want to check the inputs, this documents input
9205 constraints. Which helps in understanding the following code. */
9206 if (STACK_REG_P (operands
[0])
9207 && ((REG_P (operands
[1])
9208 && REGNO (operands
[0]) == REGNO (operands
[1])
9209 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
9210 || (REG_P (operands
[2])
9211 && REGNO (operands
[0]) == REGNO (operands
[2])
9212 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
9213 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
9216 gcc_assert (is_sse
);
9219 switch (GET_CODE (operands
[3]))
9222 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9223 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9231 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9232 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9240 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9241 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9249 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9250 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9264 if (GET_MODE (operands
[0]) == SFmode
)
9265 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9267 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9272 switch (GET_CODE (operands
[3]))
9276 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9278 rtx temp
= operands
[2];
9279 operands
[2] = operands
[1];
9283 /* know operands[0] == operands[1]. */
9285 if (MEM_P (operands
[2]))
9291 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9293 if (STACK_TOP_P (operands
[0]))
9294 /* How is it that we are storing to a dead operand[2]?
9295 Well, presumably operands[1] is dead too. We can't
9296 store the result to st(0) as st(0) gets popped on this
9297 instruction. Instead store to operands[2] (which I
9298 think has to be st(1)). st(1) will be popped later.
9299 gcc <= 2.8.1 didn't have this check and generated
9300 assembly code that the Unixware assembler rejected. */
9301 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9303 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9307 if (STACK_TOP_P (operands
[0]))
9308 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9310 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9315 if (MEM_P (operands
[1]))
9321 if (MEM_P (operands
[2]))
9327 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9330 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9331 derived assemblers, confusingly reverse the direction of
9332 the operation for fsub{r} and fdiv{r} when the
9333 destination register is not st(0). The Intel assembler
9334 doesn't have this brain damage. Read !SYSV386_COMPAT to
9335 figure out what the hardware really does. */
9336 if (STACK_TOP_P (operands
[0]))
9337 p
= "{p\t%0, %2|rp\t%2, %0}";
9339 p
= "{rp\t%2, %0|p\t%0, %2}";
9341 if (STACK_TOP_P (operands
[0]))
9342 /* As above for fmul/fadd, we can't store to st(0). */
9343 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9345 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9350 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9353 if (STACK_TOP_P (operands
[0]))
9354 p
= "{rp\t%0, %1|p\t%1, %0}";
9356 p
= "{p\t%1, %0|rp\t%0, %1}";
9358 if (STACK_TOP_P (operands
[0]))
9359 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9361 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9366 if (STACK_TOP_P (operands
[0]))
9368 if (STACK_TOP_P (operands
[1]))
9369 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9371 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9374 else if (STACK_TOP_P (operands
[1]))
9377 p
= "{\t%1, %0|r\t%0, %1}";
9379 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9385 p
= "{r\t%2, %0|\t%0, %2}";
9387 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9400 /* Return needed mode for entity in optimize_mode_switching pass. */
9403 ix86_mode_needed (int entity
, rtx insn
)
9405 enum attr_i387_cw mode
;
9407 /* The mode UNINITIALIZED is used to store control word after a
9408 function call or ASM pattern. The mode ANY specify that function
9409 has no requirements on the control word and make no changes in the
9410 bits we are interested in. */
9413 || (NONJUMP_INSN_P (insn
)
9414 && (asm_noperands (PATTERN (insn
)) >= 0
9415 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9416 return I387_CW_UNINITIALIZED
;
9418 if (recog_memoized (insn
) < 0)
9421 mode
= get_attr_i387_cw (insn
);
9426 if (mode
== I387_CW_TRUNC
)
9431 if (mode
== I387_CW_FLOOR
)
9436 if (mode
== I387_CW_CEIL
)
9441 if (mode
== I387_CW_MASK_PM
)
9452 /* Output code to initialize control word copies used by trunc?f?i and
9453 rounding patterns. CURRENT_MODE is set to current control word,
9454 while NEW_MODE is set to new control word. */
9457 emit_i387_cw_initialization (int mode
)
9459 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9462 enum ix86_stack_slot slot
;
9464 rtx reg
= gen_reg_rtx (HImode
);
9466 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9467 emit_move_insn (reg
, copy_rtx (stored_mode
));
9469 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9474 /* round toward zero (truncate) */
9475 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9476 slot
= SLOT_CW_TRUNC
;
9480 /* round down toward -oo */
9481 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9482 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9483 slot
= SLOT_CW_FLOOR
;
9487 /* round up toward +oo */
9488 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9489 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9490 slot
= SLOT_CW_CEIL
;
9493 case I387_CW_MASK_PM
:
9494 /* mask precision exception for nearbyint() */
9495 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9496 slot
= SLOT_CW_MASK_PM
;
9508 /* round toward zero (truncate) */
9509 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9510 slot
= SLOT_CW_TRUNC
;
9514 /* round down toward -oo */
9515 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9516 slot
= SLOT_CW_FLOOR
;
9520 /* round up toward +oo */
9521 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9522 slot
= SLOT_CW_CEIL
;
9525 case I387_CW_MASK_PM
:
9526 /* mask precision exception for nearbyint() */
9527 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9528 slot
= SLOT_CW_MASK_PM
;
9536 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9538 new_mode
= assign_386_stack_local (HImode
, slot
);
9539 emit_move_insn (new_mode
, reg
);
9542 /* Output code for INSN to convert a float to a signed int. OPERANDS
9543 are the insn operands. The output may be [HSD]Imode and the input
9544 operand may be [SDX]Fmode. */
9547 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9549 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9550 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9551 int round_mode
= get_attr_i387_cw (insn
);
9553 /* Jump through a hoop or two for DImode, since the hardware has no
9554 non-popping instruction. We used to do this a different way, but
9555 that was somewhat fragile and broke with post-reload splitters. */
9556 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9557 output_asm_insn ("fld\t%y1", operands
);
9559 gcc_assert (STACK_TOP_P (operands
[1]));
9560 gcc_assert (MEM_P (operands
[0]));
9561 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9564 output_asm_insn ("fisttp%z0\t%0", operands
);
9567 if (round_mode
!= I387_CW_ANY
)
9568 output_asm_insn ("fldcw\t%3", operands
);
9569 if (stack_top_dies
|| dimode_p
)
9570 output_asm_insn ("fistp%z0\t%0", operands
);
9572 output_asm_insn ("fist%z0\t%0", operands
);
9573 if (round_mode
!= I387_CW_ANY
)
9574 output_asm_insn ("fldcw\t%2", operands
);
9580 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9581 have the values zero or one, indicates the ffreep insn's operand
9582 from the OPERANDS array. */
9585 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9587 if (TARGET_USE_FFREEP
)
9588 #if HAVE_AS_IX86_FFREEP
9589 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9592 static char retval
[] = ".word\t0xc_df";
9593 int regno
= REGNO (operands
[opno
]);
9595 gcc_assert (FP_REGNO_P (regno
));
9597 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9602 return opno
? "fstp\t%y1" : "fstp\t%y0";
9606 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9607 should be used. UNORDERED_P is true when fucom should be used. */
9610 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9613 rtx cmp_op0
, cmp_op1
;
9614 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9618 cmp_op0
= operands
[0];
9619 cmp_op1
= operands
[1];
9623 cmp_op0
= operands
[1];
9624 cmp_op1
= operands
[2];
9629 if (GET_MODE (operands
[0]) == SFmode
)
9631 return "ucomiss\t{%1, %0|%0, %1}";
9633 return "comiss\t{%1, %0|%0, %1}";
9636 return "ucomisd\t{%1, %0|%0, %1}";
9638 return "comisd\t{%1, %0|%0, %1}";
9641 gcc_assert (STACK_TOP_P (cmp_op0
));
9643 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9645 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9649 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9650 return output_387_ffreep (operands
, 1);
9653 return "ftst\n\tfnstsw\t%0";
9656 if (STACK_REG_P (cmp_op1
)
9658 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9659 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9661 /* If both the top of the 387 stack dies, and the other operand
9662 is also a stack register that dies, then this must be a
9663 `fcompp' float compare */
9667 /* There is no double popping fcomi variant. Fortunately,
9668 eflags is immune from the fstp's cc clobbering. */
9670 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9672 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9673 return output_387_ffreep (operands
, 0);
9678 return "fucompp\n\tfnstsw\t%0";
9680 return "fcompp\n\tfnstsw\t%0";
9685 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9687 static const char * const alt
[16] =
9689 "fcom%z2\t%y2\n\tfnstsw\t%0",
9690 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9691 "fucom%z2\t%y2\n\tfnstsw\t%0",
9692 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9694 "ficom%z2\t%y2\n\tfnstsw\t%0",
9695 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9699 "fcomi\t{%y1, %0|%0, %y1}",
9700 "fcomip\t{%y1, %0|%0, %y1}",
9701 "fucomi\t{%y1, %0|%0, %y1}",
9702 "fucomip\t{%y1, %0|%0, %y1}",
9713 mask
= eflags_p
<< 3;
9714 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9715 mask
|= unordered_p
<< 1;
9716 mask
|= stack_top_dies
;
9718 gcc_assert (mask
< 16);
9727 ix86_output_addr_vec_elt (FILE *file
, int value
)
9729 const char *directive
= ASM_LONG
;
9733 directive
= ASM_QUAD
;
9735 gcc_assert (!TARGET_64BIT
);
9738 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9742 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9744 const char *directive
= ASM_LONG
;
9747 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9748 directive
= ASM_QUAD
;
9750 gcc_assert (!TARGET_64BIT
);
9752 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9753 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9754 fprintf (file
, "%s%s%d-%s%d\n",
9755 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9756 else if (HAVE_AS_GOTOFF_IN_DATA
)
9757 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9759 else if (TARGET_MACHO
)
9761 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9762 machopic_output_function_base_name (file
);
9763 fprintf(file
, "\n");
9767 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9768 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9771 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9775 ix86_expand_clear (rtx dest
)
9779 /* We play register width games, which are only valid after reload. */
9780 gcc_assert (reload_completed
);
9782 /* Avoid HImode and its attendant prefix byte. */
9783 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9784 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9785 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9787 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9788 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9790 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9791 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9797 /* X is an unchanging MEM. If it is a constant pool reference, return
9798 the constant pool rtx, else NULL. */
9801 maybe_get_pool_constant (rtx x
)
9803 x
= ix86_delegitimize_address (XEXP (x
, 0));
9805 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9806 return get_pool_constant (x
);
9812 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9814 int strict
= (reload_in_progress
|| reload_completed
);
9816 enum tls_model model
;
9821 if (GET_CODE (op1
) == SYMBOL_REF
)
9823 model
= SYMBOL_REF_TLS_MODEL (op1
);
9826 op1
= legitimize_tls_address (op1
, model
, true);
9827 op1
= force_operand (op1
, op0
);
9831 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9832 && SYMBOL_REF_DLLIMPORT_P (op1
))
9833 op1
= legitimize_dllimport_symbol (op1
, false);
9835 else if (GET_CODE (op1
) == CONST
9836 && GET_CODE (XEXP (op1
, 0)) == PLUS
9837 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9839 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9840 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
9843 model
= SYMBOL_REF_TLS_MODEL (symbol
);
9845 tmp
= legitimize_tls_address (symbol
, model
, true);
9846 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9847 && SYMBOL_REF_DLLIMPORT_P (symbol
))
9848 tmp
= legitimize_dllimport_symbol (symbol
, true);
9852 tmp
= force_operand (tmp
, NULL
);
9853 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
9854 op0
, 1, OPTAB_DIRECT
);
9860 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9862 if (TARGET_MACHO
&& !TARGET_64BIT
)
9867 rtx temp
= ((reload_in_progress
9868 || ((op0
&& REG_P (op0
))
9870 ? op0
: gen_reg_rtx (Pmode
));
9871 op1
= machopic_indirect_data_reference (op1
, temp
);
9872 op1
= machopic_legitimize_pic_address (op1
, mode
,
9873 temp
== op1
? 0 : temp
);
9875 else if (MACHOPIC_INDIRECT
)
9876 op1
= machopic_indirect_data_reference (op1
, 0);
9884 op1
= force_reg (Pmode
, op1
);
9885 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9887 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
9888 op1
= legitimize_pic_address (op1
, reg
);
9897 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9898 || !push_operand (op0
, mode
))
9900 op1
= force_reg (mode
, op1
);
9902 if (push_operand (op0
, mode
)
9903 && ! general_no_elim_operand (op1
, mode
))
9904 op1
= copy_to_mode_reg (mode
, op1
);
9906 /* Force large constants in 64bit compilation into register
9907 to get them CSEed. */
9908 if (TARGET_64BIT
&& mode
== DImode
9909 && immediate_operand (op1
, mode
)
9910 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9911 && !register_operand (op0
, mode
)
9912 && optimize
&& !reload_completed
&& !reload_in_progress
)
9913 op1
= copy_to_mode_reg (mode
, op1
);
9915 if (FLOAT_MODE_P (mode
))
9917 /* If we are loading a floating point constant to a register,
9918 force the value to memory now, since we'll get better code
9919 out the back end. */
9923 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9925 op1
= validize_mem (force_const_mem (mode
, op1
));
9926 if (!register_operand (op0
, mode
))
9928 rtx temp
= gen_reg_rtx (mode
);
9929 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9930 emit_move_insn (op0
, temp
);
9937 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9941 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9943 rtx op0
= operands
[0], op1
= operands
[1];
9944 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
9946 /* Force constants other than zero into memory. We do not know how
9947 the instructions used to build constants modify the upper 64 bits
9948 of the register, once we have that information we may be able
9949 to handle some of them more efficiently. */
9950 if ((reload_in_progress
| reload_completed
) == 0
9951 && register_operand (op0
, mode
)
9952 && (CONSTANT_P (op1
)
9953 || (GET_CODE (op1
) == SUBREG
9954 && CONSTANT_P (SUBREG_REG (op1
))))
9955 && standard_sse_constant_p (op1
) <= 0)
9956 op1
= validize_mem (force_const_mem (mode
, op1
));
9958 /* TDmode values are passed as TImode on the stack. Timode values
9959 are moved via xmm registers, and moving them to stack can result in
9960 unaligned memory access. Use ix86_expand_vector_move_misalign()
9961 if memory operand is not aligned correctly. */
9962 if (can_create_pseudo_p ()
9963 && (mode
== TImode
) && !TARGET_64BIT
9964 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
9965 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
9969 /* ix86_expand_vector_move_misalign() does not like constants ... */
9970 if (CONSTANT_P (op1
)
9971 || (GET_CODE (op1
) == SUBREG
9972 && CONSTANT_P (SUBREG_REG (op1
))))
9973 op1
= validize_mem (force_const_mem (mode
, op1
));
9975 /* ... nor both arguments in memory. */
9976 if (!register_operand (op0
, mode
)
9977 && !register_operand (op1
, mode
))
9978 op1
= force_reg (mode
, op1
);
9980 tmp
[0] = op0
; tmp
[1] = op1
;
9981 ix86_expand_vector_move_misalign (mode
, tmp
);
9985 /* Make operand1 a register if it isn't already. */
9986 if (can_create_pseudo_p ()
9987 && !register_operand (op0
, mode
)
9988 && !register_operand (op1
, mode
))
9990 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9994 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9997 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9998 straight to ix86_expand_vector_move. */
9999 /* Code generation for scalar reg-reg moves of single and double precision data:
10000 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10004 if (x86_sse_partial_reg_dependency == true)
10009 Code generation for scalar loads of double precision data:
10010 if (x86_sse_split_regs == true)
10011 movlpd mem, reg (gas syntax)
10015 Code generation for unaligned packed loads of single precision data
10016 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10017 if (x86_sse_unaligned_move_optimal)
10020 if (x86_sse_partial_reg_dependency == true)
10032 Code generation for unaligned packed loads of double precision data
10033 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10034 if (x86_sse_unaligned_move_optimal)
10037 if (x86_sse_split_regs == true)
10050 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
10059 /* If we're optimizing for size, movups is the smallest. */
10062 op0
= gen_lowpart (V4SFmode
, op0
);
10063 op1
= gen_lowpart (V4SFmode
, op1
);
10064 emit_insn (gen_sse_movups (op0
, op1
));
10068 /* ??? If we have typed data, then it would appear that using
10069 movdqu is the only way to get unaligned data loaded with
10071 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10073 op0
= gen_lowpart (V16QImode
, op0
);
10074 op1
= gen_lowpart (V16QImode
, op1
);
10075 emit_insn (gen_sse2_movdqu (op0
, op1
));
10079 if (TARGET_SSE2
&& mode
== V2DFmode
)
10083 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10085 op0
= gen_lowpart (V2DFmode
, op0
);
10086 op1
= gen_lowpart (V2DFmode
, op1
);
10087 emit_insn (gen_sse2_movupd (op0
, op1
));
10091 /* When SSE registers are split into halves, we can avoid
10092 writing to the top half twice. */
10093 if (TARGET_SSE_SPLIT_REGS
)
10095 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10100 /* ??? Not sure about the best option for the Intel chips.
10101 The following would seem to satisfy; the register is
10102 entirely cleared, breaking the dependency chain. We
10103 then store to the upper half, with a dependency depth
10104 of one. A rumor has it that Intel recommends two movsd
10105 followed by an unpacklpd, but this is unconfirmed. And
10106 given that the dependency depth of the unpacklpd would
10107 still be one, I'm not sure why this would be better. */
10108 zero
= CONST0_RTX (V2DFmode
);
10111 m
= adjust_address (op1
, DFmode
, 0);
10112 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
10113 m
= adjust_address (op1
, DFmode
, 8);
10114 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
10118 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10120 op0
= gen_lowpart (V4SFmode
, op0
);
10121 op1
= gen_lowpart (V4SFmode
, op1
);
10122 emit_insn (gen_sse_movups (op0
, op1
));
10126 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
10127 emit_move_insn (op0
, CONST0_RTX (mode
));
10129 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10131 if (mode
!= V4SFmode
)
10132 op0
= gen_lowpart (V4SFmode
, op0
);
10133 m
= adjust_address (op1
, V2SFmode
, 0);
10134 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
10135 m
= adjust_address (op1
, V2SFmode
, 8);
10136 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
10139 else if (MEM_P (op0
))
10141 /* If we're optimizing for size, movups is the smallest. */
10144 op0
= gen_lowpart (V4SFmode
, op0
);
10145 op1
= gen_lowpart (V4SFmode
, op1
);
10146 emit_insn (gen_sse_movups (op0
, op1
));
10150 /* ??? Similar to above, only less clear because of quote
10151 typeless stores unquote. */
10152 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
10153 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10155 op0
= gen_lowpart (V16QImode
, op0
);
10156 op1
= gen_lowpart (V16QImode
, op1
);
10157 emit_insn (gen_sse2_movdqu (op0
, op1
));
10161 if (TARGET_SSE2
&& mode
== V2DFmode
)
10163 m
= adjust_address (op0
, DFmode
, 0);
10164 emit_insn (gen_sse2_storelpd (m
, op1
));
10165 m
= adjust_address (op0
, DFmode
, 8);
10166 emit_insn (gen_sse2_storehpd (m
, op1
));
10170 if (mode
!= V4SFmode
)
10171 op1
= gen_lowpart (V4SFmode
, op1
);
10172 m
= adjust_address (op0
, V2SFmode
, 0);
10173 emit_insn (gen_sse_storelps (m
, op1
));
10174 m
= adjust_address (op0
, V2SFmode
, 8);
10175 emit_insn (gen_sse_storehps (m
, op1
));
10179 gcc_unreachable ();
10182 /* Expand a push in MODE. This is some mode for which we do not support
10183 proper push instructions, at least from the registers that we expect
10184 the value to live in. */
10187 ix86_expand_push (enum machine_mode mode
, rtx x
)
10191 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
10192 GEN_INT (-GET_MODE_SIZE (mode
)),
10193 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
10194 if (tmp
!= stack_pointer_rtx
)
10195 emit_move_insn (stack_pointer_rtx
, tmp
);
10197 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
10198 emit_move_insn (tmp
, x
);
10201 /* Helper function of ix86_fixup_binary_operands to canonicalize
10202 operand order. Returns true if the operands should be swapped. */
10205 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
10208 rtx dst
= operands
[0];
10209 rtx src1
= operands
[1];
10210 rtx src2
= operands
[2];
10212 /* If the operation is not commutative, we can't do anything. */
10213 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
10216 /* Highest priority is that src1 should match dst. */
10217 if (rtx_equal_p (dst
, src1
))
10219 if (rtx_equal_p (dst
, src2
))
10222 /* Next highest priority is that immediate constants come second. */
10223 if (immediate_operand (src2
, mode
))
10225 if (immediate_operand (src1
, mode
))
10228 /* Lowest priority is that memory references should come second. */
10238 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10239 destination to use for the operation. If different from the true
10240 destination in operands[0], a copy operation will be required. */
10243 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
10246 rtx dst
= operands
[0];
10247 rtx src1
= operands
[1];
10248 rtx src2
= operands
[2];
10250 /* Canonicalize operand order. */
10251 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10258 /* Both source operands cannot be in memory. */
10259 if (MEM_P (src1
) && MEM_P (src2
))
10261 /* Optimization: Only read from memory once. */
10262 if (rtx_equal_p (src1
, src2
))
10264 src2
= force_reg (mode
, src2
);
10268 src2
= force_reg (mode
, src2
);
10271 /* If the destination is memory, and we do not have matching source
10272 operands, do things in registers. */
10273 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10274 dst
= gen_reg_rtx (mode
);
10276 /* Source 1 cannot be a constant. */
10277 if (CONSTANT_P (src1
))
10278 src1
= force_reg (mode
, src1
);
10280 /* Source 1 cannot be a non-matching memory. */
10281 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10282 src1
= force_reg (mode
, src1
);
10284 operands
[1] = src1
;
10285 operands
[2] = src2
;
10289 /* Similarly, but assume that the destination has already been
10290 set up properly. */
10293 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10294 enum machine_mode mode
, rtx operands
[])
10296 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10297 gcc_assert (dst
== operands
[0]);
10300 /* Attempt to expand a binary operator. Make the expansion closer to the
10301 actual machine, then just general_operand, which will allow 3 separate
10302 memory references (one output, two input) in a single insn. */
10305 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10308 rtx src1
, src2
, dst
, op
, clob
;
10310 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10311 src1
= operands
[1];
10312 src2
= operands
[2];
10314 /* Emit the instruction. */
10316 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10317 if (reload_in_progress
)
10319 /* Reload doesn't know about the flags register, and doesn't know that
10320 it doesn't want to clobber it. We can only do this with PLUS. */
10321 gcc_assert (code
== PLUS
);
10326 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10327 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10330 /* Fix up the destination if needed. */
10331 if (dst
!= operands
[0])
10332 emit_move_insn (operands
[0], dst
);
10335 /* Return TRUE or FALSE depending on whether the binary operator meets the
10336 appropriate constraints. */
10339 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10342 rtx dst
= operands
[0];
10343 rtx src1
= operands
[1];
10344 rtx src2
= operands
[2];
10346 /* Both source operands cannot be in memory. */
10347 if (MEM_P (src1
) && MEM_P (src2
))
10350 /* Canonicalize operand order for commutative operators. */
10351 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10358 /* If the destination is memory, we must have a matching source operand. */
10359 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10362 /* Source 1 cannot be a constant. */
10363 if (CONSTANT_P (src1
))
10366 /* Source 1 cannot be a non-matching memory. */
10367 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10373 /* Attempt to expand a unary operator. Make the expansion closer to the
10374 actual machine, then just general_operand, which will allow 2 separate
10375 memory references (one output, one input) in a single insn. */
10378 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10381 int matching_memory
;
10382 rtx src
, dst
, op
, clob
;
10387 /* If the destination is memory, and we do not have matching source
10388 operands, do things in registers. */
10389 matching_memory
= 0;
10392 if (rtx_equal_p (dst
, src
))
10393 matching_memory
= 1;
10395 dst
= gen_reg_rtx (mode
);
10398 /* When source operand is memory, destination must match. */
10399 if (MEM_P (src
) && !matching_memory
)
10400 src
= force_reg (mode
, src
);
10402 /* Emit the instruction. */
10404 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10405 if (reload_in_progress
|| code
== NOT
)
10407 /* Reload doesn't know about the flags register, and doesn't know that
10408 it doesn't want to clobber it. */
10409 gcc_assert (code
== NOT
);
10414 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10415 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10418 /* Fix up the destination if needed. */
10419 if (dst
!= operands
[0])
10420 emit_move_insn (operands
[0], dst
);
10423 /* Return TRUE or FALSE depending on whether the unary operator meets the
10424 appropriate constraints. */
10427 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10428 enum machine_mode mode ATTRIBUTE_UNUSED
,
10429 rtx operands
[2] ATTRIBUTE_UNUSED
)
10431 /* If one of operands is memory, source and destination must match. */
10432 if ((MEM_P (operands
[0])
10433 || MEM_P (operands
[1]))
10434 && ! rtx_equal_p (operands
[0], operands
[1]))
10439 /* Post-reload splitter for converting an SF or DFmode value in an
10440 SSE register into an unsigned SImode. */
10443 ix86_split_convert_uns_si_sse (rtx operands
[])
10445 enum machine_mode vecmode
;
10446 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10448 large
= operands
[1];
10449 zero_or_two31
= operands
[2];
10450 input
= operands
[3];
10451 two31
= operands
[4];
10452 vecmode
= GET_MODE (large
);
10453 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10455 /* Load up the value into the low element. We must ensure that the other
10456 elements are valid floats -- zero is the easiest such value. */
10459 if (vecmode
== V4SFmode
)
10460 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10462 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10466 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10467 emit_move_insn (value
, CONST0_RTX (vecmode
));
10468 if (vecmode
== V4SFmode
)
10469 emit_insn (gen_sse_movss (value
, value
, input
));
10471 emit_insn (gen_sse2_movsd (value
, value
, input
));
10474 emit_move_insn (large
, two31
);
10475 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10477 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10478 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10480 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10481 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10483 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10484 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10486 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10487 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10489 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10490 if (vecmode
== V4SFmode
)
10491 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10493 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10496 emit_insn (gen_xorv4si3 (value
, value
, large
));
10499 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10500 Expects the 64-bit DImode to be supplied in a pair of integral
10501 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10502 -mfpmath=sse, !optimize_size only. */
10505 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10507 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10508 rtx int_xmm
, fp_xmm
;
10509 rtx biases
, exponents
;
10512 int_xmm
= gen_reg_rtx (V4SImode
);
10513 if (TARGET_INTER_UNIT_MOVES
)
10514 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10515 else if (TARGET_SSE_SPLIT_REGS
)
10517 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10518 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10522 x
= gen_reg_rtx (V2DImode
);
10523 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10524 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10527 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10528 gen_rtvec (4, GEN_INT (0x43300000UL
),
10529 GEN_INT (0x45300000UL
),
10530 const0_rtx
, const0_rtx
));
10531 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10533 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10534 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10536 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10537 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10538 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10539 (0x1.0p84 + double(fp_value_hi_xmm)).
10540 Note these exponents differ by 32. */
10542 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10544 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10545 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10546 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10547 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10548 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10549 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10550 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10551 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10552 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10554 /* Add the upper and lower DFmode values together. */
10556 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10559 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10560 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10561 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10564 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10567 /* Convert an unsigned SImode value into a DFmode. Only currently used
10568 for SSE, but applicable anywhere. */
10571 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10573 REAL_VALUE_TYPE TWO31r
;
10576 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10577 NULL
, 1, OPTAB_DIRECT
);
10579 fp
= gen_reg_rtx (DFmode
);
10580 emit_insn (gen_floatsidf2 (fp
, x
));
10582 real_ldexp (&TWO31r
, &dconst1
, 31);
10583 x
= const_double_from_real_value (TWO31r
, DFmode
);
10585 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10587 emit_move_insn (target
, x
);
10590 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10591 32-bit mode; otherwise we have a direct convert instruction. */
10594 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10596 REAL_VALUE_TYPE TWO32r
;
10597 rtx fp_lo
, fp_hi
, x
;
10599 fp_lo
= gen_reg_rtx (DFmode
);
10600 fp_hi
= gen_reg_rtx (DFmode
);
10602 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10604 real_ldexp (&TWO32r
, &dconst1
, 32);
10605 x
= const_double_from_real_value (TWO32r
, DFmode
);
10606 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10608 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10610 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10613 emit_move_insn (target
, x
);
10616 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10617 For x86_32, -mfpmath=sse, !optimize_size only. */
10619 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10621 REAL_VALUE_TYPE ONE16r
;
10622 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10624 real_ldexp (&ONE16r
, &dconst1
, 16);
10625 x
= const_double_from_real_value (ONE16r
, SFmode
);
10626 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10627 NULL
, 0, OPTAB_DIRECT
);
10628 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10629 NULL
, 0, OPTAB_DIRECT
);
10630 fp_hi
= gen_reg_rtx (SFmode
);
10631 fp_lo
= gen_reg_rtx (SFmode
);
10632 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10633 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10634 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10636 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10638 if (!rtx_equal_p (target
, fp_hi
))
10639 emit_move_insn (target
, fp_hi
);
10642 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10643 then replicate the value for all elements of the vector
10647 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10654 v
= gen_rtvec (4, value
, value
, value
, value
);
10655 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
10659 v
= gen_rtvec (2, value
, value
);
10660 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
10664 v
= gen_rtvec (4, value
, value
, value
, value
);
10666 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10667 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10668 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10672 v
= gen_rtvec (2, value
, value
);
10674 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10675 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10678 gcc_unreachable ();
10682 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10683 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10684 for an SSE register. If VECT is true, then replicate the mask for
10685 all elements of the vector register. If INVERT is true, then create
10686 a mask excluding the sign bit. */
10689 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10691 enum machine_mode vec_mode
, imode
;
10692 HOST_WIDE_INT hi
, lo
;
10697 /* Find the sign bit, sign extended to 2*HWI. */
10703 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
10704 lo
= 0x80000000, hi
= lo
< 0;
10710 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
10711 if (HOST_BITS_PER_WIDE_INT
>= 64)
10712 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10714 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10720 vec_mode
= VOIDmode
;
10721 gcc_assert (HOST_BITS_PER_WIDE_INT
>= 64);
10722 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
10726 gcc_unreachable ();
10730 lo
= ~lo
, hi
= ~hi
;
10732 /* Force this value into the low part of a fp vector constant. */
10733 mask
= immed_double_const (lo
, hi
, imode
);
10734 mask
= gen_lowpart (mode
, mask
);
10736 if (vec_mode
== VOIDmode
)
10737 return force_reg (mode
, mask
);
10739 v
= ix86_build_const_vector (mode
, vect
, mask
);
10740 return force_reg (vec_mode
, v
);
10743 /* Generate code for floating point ABS or NEG. */
10746 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10749 rtx mask
, set
, use
, clob
, dst
, src
;
10750 bool matching_memory
;
10751 bool use_sse
= false;
10752 bool vector_mode
= VECTOR_MODE_P (mode
);
10753 enum machine_mode elt_mode
= mode
;
10757 elt_mode
= GET_MODE_INNER (mode
);
10760 else if (mode
== TFmode
)
10762 else if (TARGET_SSE_MATH
)
10763 use_sse
= SSE_FLOAT_MODE_P (mode
);
10765 /* NEG and ABS performed with SSE use bitwise mask operations.
10766 Create the appropriate mask now. */
10768 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10775 /* If the destination is memory, and we don't have matching source
10776 operands or we're using the x87, do things in registers. */
10777 matching_memory
= false;
10780 if (use_sse
&& rtx_equal_p (dst
, src
))
10781 matching_memory
= true;
10783 dst
= gen_reg_rtx (mode
);
10785 if (MEM_P (src
) && !matching_memory
)
10786 src
= force_reg (mode
, src
);
10790 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10791 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10796 set
= gen_rtx_fmt_e (code
, mode
, src
);
10797 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10800 use
= gen_rtx_USE (VOIDmode
, mask
);
10801 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10802 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10803 gen_rtvec (3, set
, use
, clob
)));
10809 if (dst
!= operands
[0])
10810 emit_move_insn (operands
[0], dst
);
10813 /* Expand a copysign operation. Special case operand 0 being a constant. */
10816 ix86_expand_copysign (rtx operands
[])
10818 enum machine_mode mode
, vmode
;
10819 rtx dest
, op0
, op1
, mask
, nmask
;
10821 dest
= operands
[0];
10825 mode
= GET_MODE (dest
);
10826 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10828 if (GET_CODE (op0
) == CONST_DOUBLE
)
10830 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
10832 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10833 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10835 if (mode
== SFmode
|| mode
== DFmode
)
10837 if (op0
== CONST0_RTX (mode
))
10838 op0
= CONST0_RTX (vmode
);
10843 if (mode
== SFmode
)
10844 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10845 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10847 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10848 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10852 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10854 if (mode
== SFmode
)
10855 copysign_insn
= gen_copysignsf3_const
;
10856 else if (mode
== DFmode
)
10857 copysign_insn
= gen_copysigndf3_const
;
10859 copysign_insn
= gen_copysigntf3_const
;
10861 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
10865 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
10867 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10868 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10870 if (mode
== SFmode
)
10871 copysign_insn
= gen_copysignsf3_var
;
10872 else if (mode
== DFmode
)
10873 copysign_insn
= gen_copysigndf3_var
;
10875 copysign_insn
= gen_copysigntf3_var
;
10877 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
10881 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10882 be a constant, and so has already been expanded into a vector constant. */
10885 ix86_split_copysign_const (rtx operands
[])
10887 enum machine_mode mode
, vmode
;
10888 rtx dest
, op0
, op1
, mask
, x
;
10890 dest
= operands
[0];
10893 mask
= operands
[3];
10895 mode
= GET_MODE (dest
);
10896 vmode
= GET_MODE (mask
);
10898 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10899 x
= gen_rtx_AND (vmode
, dest
, mask
);
10900 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10902 if (op0
!= CONST0_RTX (vmode
))
10904 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10905 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10909 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10910 so we have to do two masks. */
10913 ix86_split_copysign_var (rtx operands
[])
10915 enum machine_mode mode
, vmode
;
10916 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10918 dest
= operands
[0];
10919 scratch
= operands
[1];
10922 nmask
= operands
[4];
10923 mask
= operands
[5];
10925 mode
= GET_MODE (dest
);
10926 vmode
= GET_MODE (mask
);
10928 if (rtx_equal_p (op0
, op1
))
10930 /* Shouldn't happen often (it's useless, obviously), but when it does
10931 we'd generate incorrect code if we continue below. */
10932 emit_move_insn (dest
, op0
);
10936 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10938 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10940 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10941 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10944 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10945 x
= gen_rtx_NOT (vmode
, dest
);
10946 x
= gen_rtx_AND (vmode
, x
, op0
);
10947 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10951 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10953 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10955 else /* alternative 2,4 */
10957 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10958 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10959 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10961 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10963 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10965 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10966 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10968 else /* alternative 3,4 */
10970 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10972 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10973 x
= gen_rtx_AND (vmode
, dest
, op0
);
10975 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10978 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10979 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10982 /* Return TRUE or FALSE depending on whether the first SET in INSN
10983 has source and destination with matching CC modes, and that the
10984 CC mode is at least as constrained as REQ_MODE. */
10987 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10990 enum machine_mode set_mode
;
10992 set
= PATTERN (insn
);
10993 if (GET_CODE (set
) == PARALLEL
)
10994 set
= XVECEXP (set
, 0, 0);
10995 gcc_assert (GET_CODE (set
) == SET
);
10996 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10998 set_mode
= GET_MODE (SET_DEST (set
));
11002 if (req_mode
!= CCNOmode
11003 && (req_mode
!= CCmode
11004 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
11008 if (req_mode
== CCGCmode
)
11012 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
11016 if (req_mode
== CCZmode
)
11023 gcc_unreachable ();
11026 return (GET_MODE (SET_SRC (set
)) == set_mode
);
11029 /* Generate insn patterns to do an integer compare of OPERANDS. */
11032 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
11034 enum machine_mode cmpmode
;
11037 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
11038 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
11040 /* This is very simple, but making the interface the same as in the
11041 FP case makes the rest of the code easier. */
11042 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
11043 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
11045 /* Return the test that should be put into the flags user, i.e.
11046 the bcc, scc, or cmov instruction. */
11047 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
11050 /* Figure out whether to use ordered or unordered fp comparisons.
11051 Return the appropriate mode to use. */
11054 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
11056 /* ??? In order to make all comparisons reversible, we do all comparisons
11057 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11058 all forms trapping and nontrapping comparisons, we can make inequality
11059 comparisons trapping again, since it results in better code when using
11060 FCOM based compares. */
11061 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
11065 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
11067 enum machine_mode mode
= GET_MODE (op0
);
11069 if (SCALAR_FLOAT_MODE_P (mode
))
11071 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11072 return ix86_fp_compare_mode (code
);
11077 /* Only zero flag is needed. */
11078 case EQ
: /* ZF=0 */
11079 case NE
: /* ZF!=0 */
11081 /* Codes needing carry flag. */
11082 case GEU
: /* CF=0 */
11083 case LTU
: /* CF=1 */
11084 /* Detect overflow checks. They need just the carry flag. */
11085 if (GET_CODE (op0
) == PLUS
11086 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11090 case GTU
: /* CF=0 & ZF=0 */
11091 case LEU
: /* CF=1 | ZF=1 */
11092 /* Detect overflow checks. They need just the carry flag. */
11093 if (GET_CODE (op0
) == MINUS
11094 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11098 /* Codes possibly doable only with sign flag when
11099 comparing against zero. */
11100 case GE
: /* SF=OF or SF=0 */
11101 case LT
: /* SF<>OF or SF=1 */
11102 if (op1
== const0_rtx
)
11105 /* For other cases Carry flag is not required. */
11107 /* Codes doable only with sign flag when comparing
11108 against zero, but we miss jump instruction for it
11109 so we need to use relational tests against overflow
11110 that thus needs to be zero. */
11111 case GT
: /* ZF=0 & SF=OF */
11112 case LE
: /* ZF=1 | SF<>OF */
11113 if (op1
== const0_rtx
)
11117 /* strcmp pattern do (use flags) and combine may ask us for proper
11122 gcc_unreachable ();
11126 /* Return the fixed registers used for condition codes. */
11129 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
11136 /* If two condition code modes are compatible, return a condition code
11137 mode which is compatible with both. Otherwise, return
11140 static enum machine_mode
11141 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
11146 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
11149 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
11150 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
11156 gcc_unreachable ();
11186 /* These are only compatible with themselves, which we already
11192 /* Split comparison code CODE into comparisons we can do using branch
11193 instructions. BYPASS_CODE is comparison code for branch that will
11194 branch around FIRST_CODE and SECOND_CODE. If some of branches
11195 is not required, set value to UNKNOWN.
11196 We never require more than two branches. */
11199 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
11200 enum rtx_code
*first_code
,
11201 enum rtx_code
*second_code
)
11203 *first_code
= code
;
11204 *bypass_code
= UNKNOWN
;
11205 *second_code
= UNKNOWN
;
11207 /* The fcomi comparison sets flags as follows:
11217 case GT
: /* GTU - CF=0 & ZF=0 */
11218 case GE
: /* GEU - CF=0 */
11219 case ORDERED
: /* PF=0 */
11220 case UNORDERED
: /* PF=1 */
11221 case UNEQ
: /* EQ - ZF=1 */
11222 case UNLT
: /* LTU - CF=1 */
11223 case UNLE
: /* LEU - CF=1 | ZF=1 */
11224 case LTGT
: /* EQ - ZF=0 */
11226 case LT
: /* LTU - CF=1 - fails on unordered */
11227 *first_code
= UNLT
;
11228 *bypass_code
= UNORDERED
;
11230 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
11231 *first_code
= UNLE
;
11232 *bypass_code
= UNORDERED
;
11234 case EQ
: /* EQ - ZF=1 - fails on unordered */
11235 *first_code
= UNEQ
;
11236 *bypass_code
= UNORDERED
;
11238 case NE
: /* NE - ZF=0 - fails on unordered */
11239 *first_code
= LTGT
;
11240 *second_code
= UNORDERED
;
11242 case UNGE
: /* GEU - CF=0 - fails on unordered */
11244 *second_code
= UNORDERED
;
11246 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
11248 *second_code
= UNORDERED
;
11251 gcc_unreachable ();
11253 if (!TARGET_IEEE_FP
)
11255 *second_code
= UNKNOWN
;
11256 *bypass_code
= UNKNOWN
;
11260 /* Return cost of comparison done fcom + arithmetics operations on AX.
11261 All following functions do use number of instructions as a cost metrics.
11262 In future this should be tweaked to compute bytes for optimize_size and
11263 take into account performance of various instructions on various CPUs. */
11265 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
11267 if (!TARGET_IEEE_FP
)
11269 /* The cost of code output by ix86_expand_fp_compare. */
11293 gcc_unreachable ();
11297 /* Return cost of comparison done using fcomi operation.
11298 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11300 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
11302 enum rtx_code bypass_code
, first_code
, second_code
;
11303 /* Return arbitrarily high cost when instruction is not supported - this
11304 prevents gcc from using it. */
11307 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11308 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
11311 /* Return cost of comparison done using sahf operation.
11312 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11314 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11316 enum rtx_code bypass_code
, first_code
, second_code
;
11317 /* Return arbitrarily high cost when instruction is not preferred - this
11318 avoids gcc from using it. */
11319 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11321 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11322 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11325 /* Compute cost of the comparison done using any method.
11326 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11328 ix86_fp_comparison_cost (enum rtx_code code
)
11330 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11333 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11334 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11336 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11337 if (min
> sahf_cost
)
11339 if (min
> fcomi_cost
)
11344 /* Return true if we should use an FCOMI instruction for this
11348 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11350 enum rtx_code swapped_code
= swap_condition (code
);
11352 return ((ix86_fp_comparison_cost (code
)
11353 == ix86_fp_comparison_fcomi_cost (code
))
11354 || (ix86_fp_comparison_cost (swapped_code
)
11355 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11358 /* Swap, force into registers, or otherwise massage the two operands
11359 to a fp comparison. The operands are updated in place; the new
11360 comparison code is returned. */
11362 static enum rtx_code
11363 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11365 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11366 rtx op0
= *pop0
, op1
= *pop1
;
11367 enum machine_mode op_mode
= GET_MODE (op0
);
11368 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11370 /* All of the unordered compare instructions only work on registers.
11371 The same is true of the fcomi compare instructions. The XFmode
11372 compare instructions require registers except when comparing
11373 against zero or when converting operand 1 from fixed point to
11377 && (fpcmp_mode
== CCFPUmode
11378 || (op_mode
== XFmode
11379 && ! (standard_80387_constant_p (op0
) == 1
11380 || standard_80387_constant_p (op1
) == 1)
11381 && GET_CODE (op1
) != FLOAT
)
11382 || ix86_use_fcomi_compare (code
)))
11384 op0
= force_reg (op_mode
, op0
);
11385 op1
= force_reg (op_mode
, op1
);
11389 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11390 things around if they appear profitable, otherwise force op0
11391 into a register. */
11393 if (standard_80387_constant_p (op0
) == 0
11395 && ! (standard_80387_constant_p (op1
) == 0
11399 tmp
= op0
, op0
= op1
, op1
= tmp
;
11400 code
= swap_condition (code
);
11404 op0
= force_reg (op_mode
, op0
);
11406 if (CONSTANT_P (op1
))
11408 int tmp
= standard_80387_constant_p (op1
);
11410 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11414 op1
= force_reg (op_mode
, op1
);
11417 op1
= force_reg (op_mode
, op1
);
11421 /* Try to rearrange the comparison to make it cheaper. */
11422 if (ix86_fp_comparison_cost (code
)
11423 > ix86_fp_comparison_cost (swap_condition (code
))
11424 && (REG_P (op1
) || can_create_pseudo_p ()))
11427 tmp
= op0
, op0
= op1
, op1
= tmp
;
11428 code
= swap_condition (code
);
11430 op0
= force_reg (op_mode
, op0
);
11438 /* Convert comparison codes we use to represent FP comparison to integer
11439 code that will result in proper branch. Return UNKNOWN if no such code
11443 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11472 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11475 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11476 rtx
*second_test
, rtx
*bypass_test
)
11478 enum machine_mode fpcmp_mode
, intcmp_mode
;
11480 int cost
= ix86_fp_comparison_cost (code
);
11481 enum rtx_code bypass_code
, first_code
, second_code
;
11483 fpcmp_mode
= ix86_fp_compare_mode (code
);
11484 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11487 *second_test
= NULL_RTX
;
11489 *bypass_test
= NULL_RTX
;
11491 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11493 /* Do fcomi/sahf based test when profitable. */
11494 if (ix86_fp_comparison_arithmetics_cost (code
) > cost
11495 && (bypass_code
== UNKNOWN
|| bypass_test
)
11496 && (second_code
== UNKNOWN
|| second_test
))
11498 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11499 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11505 gcc_assert (TARGET_SAHF
);
11508 scratch
= gen_reg_rtx (HImode
);
11509 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11511 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
11514 /* The FP codes work out to act like unsigned. */
11515 intcmp_mode
= fpcmp_mode
;
11517 if (bypass_code
!= UNKNOWN
)
11518 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11519 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11521 if (second_code
!= UNKNOWN
)
11522 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11523 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11528 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11529 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11530 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11532 scratch
= gen_reg_rtx (HImode
);
11533 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11535 /* In the unordered case, we have to check C2 for NaN's, which
11536 doesn't happen to work out to anything nice combination-wise.
11537 So do some bit twiddling on the value we've got in AH to come
11538 up with an appropriate set of condition codes. */
11540 intcmp_mode
= CCNOmode
;
11545 if (code
== GT
|| !TARGET_IEEE_FP
)
11547 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11552 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11553 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11554 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11555 intcmp_mode
= CCmode
;
11561 if (code
== LT
&& TARGET_IEEE_FP
)
11563 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11564 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11565 intcmp_mode
= CCmode
;
11570 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11576 if (code
== GE
|| !TARGET_IEEE_FP
)
11578 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11583 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11584 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11591 if (code
== LE
&& TARGET_IEEE_FP
)
11593 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11594 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11595 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11596 intcmp_mode
= CCmode
;
11601 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11607 if (code
== EQ
&& TARGET_IEEE_FP
)
11609 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11610 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11611 intcmp_mode
= CCmode
;
11616 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11623 if (code
== NE
&& TARGET_IEEE_FP
)
11625 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11626 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11632 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11638 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11642 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11647 gcc_unreachable ();
11651 /* Return the test that should be put into the flags user, i.e.
11652 the bcc, scc, or cmov instruction. */
11653 return gen_rtx_fmt_ee (code
, VOIDmode
,
11654 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11659 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11662 op0
= ix86_compare_op0
;
11663 op1
= ix86_compare_op1
;
11666 *second_test
= NULL_RTX
;
11668 *bypass_test
= NULL_RTX
;
11670 if (ix86_compare_emitted
)
11672 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11673 ix86_compare_emitted
= NULL_RTX
;
11675 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11677 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11678 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11679 second_test
, bypass_test
);
11682 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11687 /* Return true if the CODE will result in nontrivial jump sequence. */
11689 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11691 enum rtx_code bypass_code
, first_code
, second_code
;
11694 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11695 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11699 ix86_expand_branch (enum rtx_code code
, rtx label
)
11703 /* If we have emitted a compare insn, go straight to simple.
11704 ix86_expand_compare won't emit anything if ix86_compare_emitted
11706 if (ix86_compare_emitted
)
11709 switch (GET_MODE (ix86_compare_op0
))
11715 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11716 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11717 gen_rtx_LABEL_REF (VOIDmode
, label
),
11719 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11728 enum rtx_code bypass_code
, first_code
, second_code
;
11730 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11731 &ix86_compare_op1
);
11733 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11735 /* Check whether we will use the natural sequence with one jump. If
11736 so, we can expand jump early. Otherwise delay expansion by
11737 creating compound insn to not confuse optimizers. */
11738 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
)
11740 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11741 gen_rtx_LABEL_REF (VOIDmode
, label
),
11742 pc_rtx
, NULL_RTX
, NULL_RTX
);
11746 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11747 ix86_compare_op0
, ix86_compare_op1
);
11748 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11749 gen_rtx_LABEL_REF (VOIDmode
, label
),
11751 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11753 use_fcomi
= ix86_use_fcomi_compare (code
);
11754 vec
= rtvec_alloc (3 + !use_fcomi
);
11755 RTVEC_ELT (vec
, 0) = tmp
;
11757 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FPSR_REG
));
11759 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FLAGS_REG
));
11762 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11764 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11773 /* Expand DImode branch into multiple compare+branch. */
11775 rtx lo
[2], hi
[2], label2
;
11776 enum rtx_code code1
, code2
, code3
;
11777 enum machine_mode submode
;
11779 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11781 tmp
= ix86_compare_op0
;
11782 ix86_compare_op0
= ix86_compare_op1
;
11783 ix86_compare_op1
= tmp
;
11784 code
= swap_condition (code
);
11786 if (GET_MODE (ix86_compare_op0
) == DImode
)
11788 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11789 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11794 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11795 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11799 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11800 avoid two branches. This costs one extra insn, so disable when
11801 optimizing for size. */
11803 if ((code
== EQ
|| code
== NE
)
11805 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11810 if (hi
[1] != const0_rtx
)
11811 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11812 NULL_RTX
, 0, OPTAB_WIDEN
);
11815 if (lo
[1] != const0_rtx
)
11816 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11817 NULL_RTX
, 0, OPTAB_WIDEN
);
11819 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11820 NULL_RTX
, 0, OPTAB_WIDEN
);
11822 ix86_compare_op0
= tmp
;
11823 ix86_compare_op1
= const0_rtx
;
11824 ix86_expand_branch (code
, label
);
11828 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11829 op1 is a constant and the low word is zero, then we can just
11830 examine the high word. */
11832 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11835 case LT
: case LTU
: case GE
: case GEU
:
11836 ix86_compare_op0
= hi
[0];
11837 ix86_compare_op1
= hi
[1];
11838 ix86_expand_branch (code
, label
);
11844 /* Otherwise, we need two or three jumps. */
11846 label2
= gen_label_rtx ();
11849 code2
= swap_condition (code
);
11850 code3
= unsigned_condition (code
);
11854 case LT
: case GT
: case LTU
: case GTU
:
11857 case LE
: code1
= LT
; code2
= GT
; break;
11858 case GE
: code1
= GT
; code2
= LT
; break;
11859 case LEU
: code1
= LTU
; code2
= GTU
; break;
11860 case GEU
: code1
= GTU
; code2
= LTU
; break;
11862 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11863 case NE
: code2
= UNKNOWN
; break;
11866 gcc_unreachable ();
11871 * if (hi(a) < hi(b)) goto true;
11872 * if (hi(a) > hi(b)) goto false;
11873 * if (lo(a) < lo(b)) goto true;
11877 ix86_compare_op0
= hi
[0];
11878 ix86_compare_op1
= hi
[1];
11880 if (code1
!= UNKNOWN
)
11881 ix86_expand_branch (code1
, label
);
11882 if (code2
!= UNKNOWN
)
11883 ix86_expand_branch (code2
, label2
);
11885 ix86_compare_op0
= lo
[0];
11886 ix86_compare_op1
= lo
[1];
11887 ix86_expand_branch (code3
, label
);
11889 if (code2
!= UNKNOWN
)
11890 emit_label (label2
);
11895 gcc_unreachable ();
11899 /* Split branch based on floating point condition. */
11901 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11902 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11904 rtx second
, bypass
;
11905 rtx label
= NULL_RTX
;
11907 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11910 if (target2
!= pc_rtx
)
11913 code
= reverse_condition_maybe_unordered (code
);
11918 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11919 tmp
, &second
, &bypass
);
11921 /* Remove pushed operand from stack. */
11923 ix86_free_from_memory (GET_MODE (pushed
));
11925 if (split_branch_probability
>= 0)
11927 /* Distribute the probabilities across the jumps.
11928 Assume the BYPASS and SECOND to be always test
11930 probability
= split_branch_probability
;
11932 /* Value of 1 is low enough to make no need for probability
11933 to be updated. Later we may run some experiments and see
11934 if unordered values are more frequent in practice. */
11936 bypass_probability
= 1;
11938 second_probability
= 1;
11940 if (bypass
!= NULL_RTX
)
11942 label
= gen_label_rtx ();
11943 i
= emit_jump_insn (gen_rtx_SET
11945 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11947 gen_rtx_LABEL_REF (VOIDmode
,
11950 if (bypass_probability
>= 0)
11952 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11953 GEN_INT (bypass_probability
),
11956 i
= emit_jump_insn (gen_rtx_SET
11958 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11959 condition
, target1
, target2
)));
11960 if (probability
>= 0)
11962 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11963 GEN_INT (probability
),
11965 if (second
!= NULL_RTX
)
11967 i
= emit_jump_insn (gen_rtx_SET
11969 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11971 if (second_probability
>= 0)
11973 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11974 GEN_INT (second_probability
),
11977 if (label
!= NULL_RTX
)
11978 emit_label (label
);
11982 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11984 rtx ret
, tmp
, tmpreg
, equiv
;
11985 rtx second_test
, bypass_test
;
11987 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11988 return 0; /* FAIL */
11990 gcc_assert (GET_MODE (dest
) == QImode
);
11992 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11993 PUT_MODE (ret
, QImode
);
11998 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11999 if (bypass_test
|| second_test
)
12001 rtx test
= second_test
;
12003 rtx tmp2
= gen_reg_rtx (QImode
);
12006 gcc_assert (!second_test
);
12007 test
= bypass_test
;
12009 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
12011 PUT_MODE (test
, QImode
);
12012 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
12015 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
12017 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
12020 /* Attach a REG_EQUAL note describing the comparison result. */
12021 if (ix86_compare_op0
&& ix86_compare_op1
)
12023 equiv
= simplify_gen_relational (code
, QImode
,
12024 GET_MODE (ix86_compare_op0
),
12025 ix86_compare_op0
, ix86_compare_op1
);
12026 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
12029 return 1; /* DONE */
12032 /* Expand comparison setting or clearing carry flag. Return true when
12033 successful and set pop for the operation. */
12035 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
12037 enum machine_mode mode
=
12038 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
12040 /* Do not handle DImode compares that go through special path. */
12041 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
12044 if (SCALAR_FLOAT_MODE_P (mode
))
12046 rtx second_test
= NULL
, bypass_test
= NULL
;
12047 rtx compare_op
, compare_seq
;
12049 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
12051 /* Shortcut: following common codes never translate
12052 into carry flag compares. */
12053 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
12054 || code
== ORDERED
|| code
== UNORDERED
)
12057 /* These comparisons require zero flag; swap operands so they won't. */
12058 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
12059 && !TARGET_IEEE_FP
)
12064 code
= swap_condition (code
);
12067 /* Try to expand the comparison and verify that we end up with
12068 carry flag based comparison. This fails to be true only when
12069 we decide to expand comparison using arithmetic that is not
12070 too common scenario. */
12072 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
12073 &second_test
, &bypass_test
);
12074 compare_seq
= get_insns ();
12077 if (second_test
|| bypass_test
)
12080 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12081 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12082 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
12084 code
= GET_CODE (compare_op
);
12086 if (code
!= LTU
&& code
!= GEU
)
12089 emit_insn (compare_seq
);
12094 if (!INTEGRAL_MODE_P (mode
))
12103 /* Convert a==0 into (unsigned)a<1. */
12106 if (op1
!= const0_rtx
)
12109 code
= (code
== EQ
? LTU
: GEU
);
12112 /* Convert a>b into b<a or a>=b-1. */
12115 if (CONST_INT_P (op1
))
12117 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
12118 /* Bail out on overflow. We still can swap operands but that
12119 would force loading of the constant into register. */
12120 if (op1
== const0_rtx
12121 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
12123 code
= (code
== GTU
? GEU
: LTU
);
12130 code
= (code
== GTU
? LTU
: GEU
);
12134 /* Convert a>=0 into (unsigned)a<0x80000000. */
12137 if (mode
== DImode
|| op1
!= const0_rtx
)
12139 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12140 code
= (code
== LT
? GEU
: LTU
);
12144 if (mode
== DImode
|| op1
!= constm1_rtx
)
12146 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12147 code
= (code
== LE
? GEU
: LTU
);
12153 /* Swapping operands may cause constant to appear as first operand. */
12154 if (!nonimmediate_operand (op0
, VOIDmode
))
12156 if (!can_create_pseudo_p ())
12158 op0
= force_reg (mode
, op0
);
12160 ix86_compare_op0
= op0
;
12161 ix86_compare_op1
= op1
;
12162 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
12163 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
12168 ix86_expand_int_movcc (rtx operands
[])
12170 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
12171 rtx compare_seq
, compare_op
;
12172 rtx second_test
, bypass_test
;
12173 enum machine_mode mode
= GET_MODE (operands
[0]);
12174 bool sign_bit_compare_p
= false;;
12177 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12178 compare_seq
= get_insns ();
12181 compare_code
= GET_CODE (compare_op
);
12183 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
12184 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
12185 sign_bit_compare_p
= true;
12187 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12188 HImode insns, we'd be swallowed in word prefix ops. */
12190 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
12191 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
12192 && CONST_INT_P (operands
[2])
12193 && CONST_INT_P (operands
[3]))
12195 rtx out
= operands
[0];
12196 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
12197 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
12198 HOST_WIDE_INT diff
;
12201 /* Sign bit compares are better done using shifts than we do by using
12203 if (sign_bit_compare_p
12204 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12205 ix86_compare_op1
, &compare_op
))
12207 /* Detect overlap between destination and compare sources. */
12210 if (!sign_bit_compare_p
)
12212 bool fpcmp
= false;
12214 compare_code
= GET_CODE (compare_op
);
12216 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12217 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12220 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
12223 /* To simplify rest of code, restrict to the GEU case. */
12224 if (compare_code
== LTU
)
12226 HOST_WIDE_INT tmp
= ct
;
12229 compare_code
= reverse_condition (compare_code
);
12230 code
= reverse_condition (code
);
12235 PUT_CODE (compare_op
,
12236 reverse_condition_maybe_unordered
12237 (GET_CODE (compare_op
)));
12239 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12243 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
12244 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
12245 tmp
= gen_reg_rtx (mode
);
12247 if (mode
== DImode
)
12248 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
12250 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
12254 if (code
== GT
|| code
== GE
)
12255 code
= reverse_condition (code
);
12258 HOST_WIDE_INT tmp
= ct
;
12263 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
12264 ix86_compare_op1
, VOIDmode
, 0, -1);
12277 tmp
= expand_simple_binop (mode
, PLUS
,
12279 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12290 tmp
= expand_simple_binop (mode
, IOR
,
12292 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12294 else if (diff
== -1 && ct
)
12304 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12306 tmp
= expand_simple_binop (mode
, PLUS
,
12307 copy_rtx (tmp
), GEN_INT (cf
),
12308 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12316 * andl cf - ct, dest
12326 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12329 tmp
= expand_simple_binop (mode
, AND
,
12331 gen_int_mode (cf
- ct
, mode
),
12332 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12334 tmp
= expand_simple_binop (mode
, PLUS
,
12335 copy_rtx (tmp
), GEN_INT (ct
),
12336 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12339 if (!rtx_equal_p (tmp
, out
))
12340 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12342 return 1; /* DONE */
12347 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12350 tmp
= ct
, ct
= cf
, cf
= tmp
;
12353 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12355 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12357 /* We may be reversing unordered compare to normal compare, that
12358 is not valid in general (we may convert non-trapping condition
12359 to trapping one), however on i386 we currently emit all
12360 comparisons unordered. */
12361 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12362 code
= reverse_condition_maybe_unordered (code
);
12366 compare_code
= reverse_condition (compare_code
);
12367 code
= reverse_condition (code
);
12371 compare_code
= UNKNOWN
;
12372 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12373 && CONST_INT_P (ix86_compare_op1
))
12375 if (ix86_compare_op1
== const0_rtx
12376 && (code
== LT
|| code
== GE
))
12377 compare_code
= code
;
12378 else if (ix86_compare_op1
== constm1_rtx
)
12382 else if (code
== GT
)
12387 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12388 if (compare_code
!= UNKNOWN
12389 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12390 && (cf
== -1 || ct
== -1))
12392 /* If lea code below could be used, only optimize
12393 if it results in a 2 insn sequence. */
12395 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12396 || diff
== 3 || diff
== 5 || diff
== 9)
12397 || (compare_code
== LT
&& ct
== -1)
12398 || (compare_code
== GE
&& cf
== -1))
12401 * notl op1 (if necessary)
12409 code
= reverse_condition (code
);
12412 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12413 ix86_compare_op1
, VOIDmode
, 0, -1);
12415 out
= expand_simple_binop (mode
, IOR
,
12417 out
, 1, OPTAB_DIRECT
);
12418 if (out
!= operands
[0])
12419 emit_move_insn (operands
[0], out
);
12421 return 1; /* DONE */
12426 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12427 || diff
== 3 || diff
== 5 || diff
== 9)
12428 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12430 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12436 * lea cf(dest*(ct-cf)),dest
12440 * This also catches the degenerate setcc-only case.
12446 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12447 ix86_compare_op1
, VOIDmode
, 0, 1);
12450 /* On x86_64 the lea instruction operates on Pmode, so we need
12451 to get arithmetics done in proper mode to match. */
12453 tmp
= copy_rtx (out
);
12457 out1
= copy_rtx (out
);
12458 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12462 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12468 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12471 if (!rtx_equal_p (tmp
, out
))
12474 out
= force_operand (tmp
, copy_rtx (out
));
12476 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12478 if (!rtx_equal_p (out
, operands
[0]))
12479 emit_move_insn (operands
[0], copy_rtx (out
));
12481 return 1; /* DONE */
12485 * General case: Jumpful:
12486 * xorl dest,dest cmpl op1, op2
12487 * cmpl op1, op2 movl ct, dest
12488 * setcc dest jcc 1f
12489 * decl dest movl cf, dest
12490 * andl (cf-ct),dest 1:
12493 * Size 20. Size 14.
12495 * This is reasonably steep, but branch mispredict costs are
12496 * high on modern cpus, so consider failing only if optimizing
12500 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12501 && BRANCH_COST
>= 2)
12505 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12510 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12512 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12514 /* We may be reversing unordered compare to normal compare,
12515 that is not valid in general (we may convert non-trapping
12516 condition to trapping one), however on i386 we currently
12517 emit all comparisons unordered. */
12518 code
= reverse_condition_maybe_unordered (code
);
12522 code
= reverse_condition (code
);
12523 if (compare_code
!= UNKNOWN
)
12524 compare_code
= reverse_condition (compare_code
);
12528 if (compare_code
!= UNKNOWN
)
12530 /* notl op1 (if needed)
12535 For x < 0 (resp. x <= -1) there will be no notl,
12536 so if possible swap the constants to get rid of the
12538 True/false will be -1/0 while code below (store flag
12539 followed by decrement) is 0/-1, so the constants need
12540 to be exchanged once more. */
12542 if (compare_code
== GE
|| !cf
)
12544 code
= reverse_condition (code
);
12549 HOST_WIDE_INT tmp
= cf
;
12554 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12555 ix86_compare_op1
, VOIDmode
, 0, -1);
12559 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12560 ix86_compare_op1
, VOIDmode
, 0, 1);
12562 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12563 copy_rtx (out
), 1, OPTAB_DIRECT
);
12566 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12567 gen_int_mode (cf
- ct
, mode
),
12568 copy_rtx (out
), 1, OPTAB_DIRECT
);
12570 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12571 copy_rtx (out
), 1, OPTAB_DIRECT
);
12572 if (!rtx_equal_p (out
, operands
[0]))
12573 emit_move_insn (operands
[0], copy_rtx (out
));
12575 return 1; /* DONE */
12579 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12581 /* Try a few things more with specific constants and a variable. */
12584 rtx var
, orig_out
, out
, tmp
;
12586 if (BRANCH_COST
<= 2)
12587 return 0; /* FAIL */
12589 /* If one of the two operands is an interesting constant, load a
12590 constant with the above and mask it in with a logical operation. */
12592 if (CONST_INT_P (operands
[2]))
12595 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12596 operands
[3] = constm1_rtx
, op
= and_optab
;
12597 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12598 operands
[3] = const0_rtx
, op
= ior_optab
;
12600 return 0; /* FAIL */
12602 else if (CONST_INT_P (operands
[3]))
12605 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12606 operands
[2] = constm1_rtx
, op
= and_optab
;
12607 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12608 operands
[2] = const0_rtx
, op
= ior_optab
;
12610 return 0; /* FAIL */
12613 return 0; /* FAIL */
12615 orig_out
= operands
[0];
12616 tmp
= gen_reg_rtx (mode
);
12619 /* Recurse to get the constant loaded. */
12620 if (ix86_expand_int_movcc (operands
) == 0)
12621 return 0; /* FAIL */
12623 /* Mask in the interesting variable. */
12624 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12626 if (!rtx_equal_p (out
, orig_out
))
12627 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12629 return 1; /* DONE */
12633 * For comparison with above,
12643 if (! nonimmediate_operand (operands
[2], mode
))
12644 operands
[2] = force_reg (mode
, operands
[2]);
12645 if (! nonimmediate_operand (operands
[3], mode
))
12646 operands
[3] = force_reg (mode
, operands
[3]);
12648 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12650 rtx tmp
= gen_reg_rtx (mode
);
12651 emit_move_insn (tmp
, operands
[3]);
12654 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12656 rtx tmp
= gen_reg_rtx (mode
);
12657 emit_move_insn (tmp
, operands
[2]);
12661 if (! register_operand (operands
[2], VOIDmode
)
12663 || ! register_operand (operands
[3], VOIDmode
)))
12664 operands
[2] = force_reg (mode
, operands
[2]);
12667 && ! register_operand (operands
[3], VOIDmode
))
12668 operands
[3] = force_reg (mode
, operands
[3]);
12670 emit_insn (compare_seq
);
12671 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12672 gen_rtx_IF_THEN_ELSE (mode
,
12673 compare_op
, operands
[2],
12676 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12677 gen_rtx_IF_THEN_ELSE (mode
,
12679 copy_rtx (operands
[3]),
12680 copy_rtx (operands
[0]))));
12682 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12683 gen_rtx_IF_THEN_ELSE (mode
,
12685 copy_rtx (operands
[2]),
12686 copy_rtx (operands
[0]))));
12688 return 1; /* DONE */
12691 /* Swap, force into registers, or otherwise massage the two operands
12692 to an sse comparison with a mask result. Thus we differ a bit from
12693 ix86_prepare_fp_compare_args which expects to produce a flags result.
12695 The DEST operand exists to help determine whether to commute commutative
12696 operators. The POP0/POP1 operands are updated in place. The new
12697 comparison code is returned, or UNKNOWN if not implementable. */
12699 static enum rtx_code
12700 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12701 rtx
*pop0
, rtx
*pop1
)
12709 /* We have no LTGT as an operator. We could implement it with
12710 NE & ORDERED, but this requires an extra temporary. It's
12711 not clear that it's worth it. */
12718 /* These are supported directly. */
12725 /* For commutative operators, try to canonicalize the destination
12726 operand to be first in the comparison - this helps reload to
12727 avoid extra moves. */
12728 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12736 /* These are not supported directly. Swap the comparison operands
12737 to transform into something that is supported. */
12741 code
= swap_condition (code
);
12745 gcc_unreachable ();
12751 /* Detect conditional moves that exactly match min/max operational
12752 semantics. Note that this is IEEE safe, as long as we don't
12753 interchange the operands.
12755 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12756 and TRUE if the operation is successful and instructions are emitted. */
12759 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12760 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12762 enum machine_mode mode
;
12768 else if (code
== UNGE
)
12771 if_true
= if_false
;
12777 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12779 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12784 mode
= GET_MODE (dest
);
12786 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12787 but MODE may be a vector mode and thus not appropriate. */
12788 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12790 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12793 if_true
= force_reg (mode
, if_true
);
12794 v
= gen_rtvec (2, if_true
, if_false
);
12795 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12799 code
= is_min
? SMIN
: SMAX
;
12800 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12803 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12807 /* Expand an sse vector comparison. Return the register with the result. */
12810 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12811 rtx op_true
, rtx op_false
)
12813 enum machine_mode mode
= GET_MODE (dest
);
12816 cmp_op0
= force_reg (mode
, cmp_op0
);
12817 if (!nonimmediate_operand (cmp_op1
, mode
))
12818 cmp_op1
= force_reg (mode
, cmp_op1
);
12821 || reg_overlap_mentioned_p (dest
, op_true
)
12822 || reg_overlap_mentioned_p (dest
, op_false
))
12823 dest
= gen_reg_rtx (mode
);
12825 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12826 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12831 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12832 operations. This is used for both scalar and vector conditional moves. */
12835 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12837 enum machine_mode mode
= GET_MODE (dest
);
12840 if (op_false
== CONST0_RTX (mode
))
12842 op_true
= force_reg (mode
, op_true
);
12843 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12844 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12846 else if (op_true
== CONST0_RTX (mode
))
12848 op_false
= force_reg (mode
, op_false
);
12849 x
= gen_rtx_NOT (mode
, cmp
);
12850 x
= gen_rtx_AND (mode
, x
, op_false
);
12851 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12855 op_true
= force_reg (mode
, op_true
);
12856 op_false
= force_reg (mode
, op_false
);
12858 t2
= gen_reg_rtx (mode
);
12860 t3
= gen_reg_rtx (mode
);
12864 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12865 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12867 x
= gen_rtx_NOT (mode
, cmp
);
12868 x
= gen_rtx_AND (mode
, x
, op_false
);
12869 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12871 x
= gen_rtx_IOR (mode
, t3
, t2
);
12872 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12876 /* Expand a floating-point conditional move. Return true if successful. */
12879 ix86_expand_fp_movcc (rtx operands
[])
12881 enum machine_mode mode
= GET_MODE (operands
[0]);
12882 enum rtx_code code
= GET_CODE (operands
[1]);
12883 rtx tmp
, compare_op
, second_test
, bypass_test
;
12885 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12887 enum machine_mode cmode
;
12889 /* Since we've no cmove for sse registers, don't force bad register
12890 allocation just to gain access to it. Deny movcc when the
12891 comparison mode doesn't match the move mode. */
12892 cmode
= GET_MODE (ix86_compare_op0
);
12893 if (cmode
== VOIDmode
)
12894 cmode
= GET_MODE (ix86_compare_op1
);
12898 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12900 &ix86_compare_op1
);
12901 if (code
== UNKNOWN
)
12904 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12905 ix86_compare_op1
, operands
[2],
12909 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12910 ix86_compare_op1
, operands
[2], operands
[3]);
12911 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12915 /* The floating point conditional move instructions don't directly
12916 support conditions resulting from a signed integer comparison. */
12918 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12920 /* The floating point conditional move instructions don't directly
12921 support signed integer comparisons. */
12923 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12925 gcc_assert (!second_test
&& !bypass_test
);
12926 tmp
= gen_reg_rtx (QImode
);
12927 ix86_expand_setcc (code
, tmp
);
12929 ix86_compare_op0
= tmp
;
12930 ix86_compare_op1
= const0_rtx
;
12931 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12933 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12935 tmp
= gen_reg_rtx (mode
);
12936 emit_move_insn (tmp
, operands
[3]);
12939 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12941 tmp
= gen_reg_rtx (mode
);
12942 emit_move_insn (tmp
, operands
[2]);
12946 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12947 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12948 operands
[2], operands
[3])));
12950 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12951 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12952 operands
[3], operands
[0])));
12954 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12955 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12956 operands
[2], operands
[0])));
12961 /* Expand a floating-point vector conditional move; a vcond operation
12962 rather than a movcc operation. */
12965 ix86_expand_fp_vcond (rtx operands
[])
12967 enum rtx_code code
= GET_CODE (operands
[3]);
12970 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12971 &operands
[4], &operands
[5]);
12972 if (code
== UNKNOWN
)
12975 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12976 operands
[5], operands
[1], operands
[2]))
12979 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12980 operands
[1], operands
[2]);
12981 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12985 /* Expand a signed/unsigned integral vector conditional move. */
12988 ix86_expand_int_vcond (rtx operands
[])
12990 enum machine_mode mode
= GET_MODE (operands
[0]);
12991 enum rtx_code code
= GET_CODE (operands
[3]);
12992 bool negate
= false;
12995 cop0
= operands
[4];
12996 cop1
= operands
[5];
12998 /* Canonicalize the comparison to EQ, GT, GTU. */
13009 code
= reverse_condition (code
);
13015 code
= reverse_condition (code
);
13021 code
= swap_condition (code
);
13022 x
= cop0
, cop0
= cop1
, cop1
= x
;
13026 gcc_unreachable ();
13029 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13030 if (mode
== V2DImode
)
13035 /* SSE4.1 supports EQ. */
13036 if (!TARGET_SSE4_1
)
13042 /* SSE4.2 supports GT/GTU. */
13043 if (!TARGET_SSE4_2
)
13048 gcc_unreachable ();
13052 /* Unsigned parallel compare is not supported by the hardware. Play some
13053 tricks to turn this into a signed comparison against 0. */
13056 cop0
= force_reg (mode
, cop0
);
13065 /* Perform a parallel modulo subtraction. */
13066 t1
= gen_reg_rtx (mode
);
13067 emit_insn ((mode
== V4SImode
13069 : gen_subv2di3
) (t1
, cop0
, cop1
));
13071 /* Extract the original sign bit of op0. */
13072 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
13074 t2
= gen_reg_rtx (mode
);
13075 emit_insn ((mode
== V4SImode
13077 : gen_andv2di3
) (t2
, cop0
, mask
));
13079 /* XOR it back into the result of the subtraction. This results
13080 in the sign bit set iff we saw unsigned underflow. */
13081 x
= gen_reg_rtx (mode
);
13082 emit_insn ((mode
== V4SImode
13084 : gen_xorv2di3
) (x
, t1
, t2
));
13092 /* Perform a parallel unsigned saturating subtraction. */
13093 x
= gen_reg_rtx (mode
);
13094 emit_insn (gen_rtx_SET (VOIDmode
, x
,
13095 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
13102 gcc_unreachable ();
13106 cop1
= CONST0_RTX (mode
);
13109 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
13110 operands
[1+negate
], operands
[2-negate
]);
13112 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
13113 operands
[2-negate
]);
13117 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13118 true if we should do zero extension, else sign extension. HIGH_P is
13119 true if we want the N/2 high elements, else the low elements. */
13122 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13124 enum machine_mode imode
= GET_MODE (operands
[1]);
13125 rtx (*unpack
)(rtx
, rtx
, rtx
);
13132 unpack
= gen_vec_interleave_highv16qi
;
13134 unpack
= gen_vec_interleave_lowv16qi
;
13138 unpack
= gen_vec_interleave_highv8hi
;
13140 unpack
= gen_vec_interleave_lowv8hi
;
13144 unpack
= gen_vec_interleave_highv4si
;
13146 unpack
= gen_vec_interleave_lowv4si
;
13149 gcc_unreachable ();
13152 dest
= gen_lowpart (imode
, operands
[0]);
13155 se
= force_reg (imode
, CONST0_RTX (imode
));
13157 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
13158 operands
[1], pc_rtx
, pc_rtx
);
13160 emit_insn (unpack (dest
, operands
[1], se
));
13163 /* This function performs the same task as ix86_expand_sse_unpack,
13164 but with SSE4.1 instructions. */
13167 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13169 enum machine_mode imode
= GET_MODE (operands
[1]);
13170 rtx (*unpack
)(rtx
, rtx
);
13177 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
13179 unpack
= gen_sse4_1_extendv8qiv8hi2
;
13183 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
13185 unpack
= gen_sse4_1_extendv4hiv4si2
;
13189 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
13191 unpack
= gen_sse4_1_extendv2siv2di2
;
13194 gcc_unreachable ();
13197 dest
= operands
[0];
13200 /* Shift higher 8 bytes to lower 8 bytes. */
13201 src
= gen_reg_rtx (imode
);
13202 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
13203 gen_lowpart (TImode
, operands
[1]),
13209 emit_insn (unpack (dest
, src
));
13212 /* Expand conditional increment or decrement using adb/sbb instructions.
13213 The default case using setcc followed by the conditional move can be
13214 done by generic code. */
13216 ix86_expand_int_addcc (rtx operands
[])
13218 enum rtx_code code
= GET_CODE (operands
[1]);
13220 rtx val
= const0_rtx
;
13221 bool fpcmp
= false;
13222 enum machine_mode mode
= GET_MODE (operands
[0]);
13224 if (operands
[3] != const1_rtx
13225 && operands
[3] != constm1_rtx
)
13227 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
13228 ix86_compare_op1
, &compare_op
))
13230 code
= GET_CODE (compare_op
);
13232 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
13233 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
13236 code
= ix86_fp_compare_code_to_integer (code
);
13243 PUT_CODE (compare_op
,
13244 reverse_condition_maybe_unordered
13245 (GET_CODE (compare_op
)));
13247 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
13249 PUT_MODE (compare_op
, mode
);
13251 /* Construct either adc or sbb insn. */
13252 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
13254 switch (GET_MODE (operands
[0]))
13257 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13260 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13263 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13266 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13269 gcc_unreachable ();
13274 switch (GET_MODE (operands
[0]))
13277 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13280 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13283 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13286 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13289 gcc_unreachable ();
13292 return 1; /* DONE */
13296 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13297 works for floating pointer parameters and nonoffsetable memories.
13298 For pushes, it returns just stack offsets; the values will be saved
13299 in the right order. Maximally three parts are generated. */
13302 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
13307 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
13309 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
13311 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
13312 gcc_assert (size
>= 2 && size
<= 3);
13314 /* Optimize constant pool reference to immediates. This is used by fp
13315 moves, that force all constants to memory to allow combining. */
13316 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
13318 rtx tmp
= maybe_get_pool_constant (operand
);
13323 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
13325 /* The only non-offsetable memories we handle are pushes. */
13326 int ok
= push_operand (operand
, VOIDmode
);
13330 operand
= copy_rtx (operand
);
13331 PUT_MODE (operand
, Pmode
);
13332 parts
[0] = parts
[1] = parts
[2] = operand
;
13336 if (GET_CODE (operand
) == CONST_VECTOR
)
13338 enum machine_mode imode
= int_mode_for_mode (mode
);
13339 /* Caution: if we looked through a constant pool memory above,
13340 the operand may actually have a different mode now. That's
13341 ok, since we want to pun this all the way back to an integer. */
13342 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
13343 gcc_assert (operand
!= NULL
);
13349 if (mode
== DImode
)
13350 split_di (&operand
, 1, &parts
[0], &parts
[1]);
13353 if (REG_P (operand
))
13355 gcc_assert (reload_completed
);
13356 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
13357 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
13359 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
13361 else if (offsettable_memref_p (operand
))
13363 operand
= adjust_address (operand
, SImode
, 0);
13364 parts
[0] = operand
;
13365 parts
[1] = adjust_address (operand
, SImode
, 4);
13367 parts
[2] = adjust_address (operand
, SImode
, 8);
13369 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13374 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13378 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
13379 parts
[2] = gen_int_mode (l
[2], SImode
);
13382 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
13385 gcc_unreachable ();
13387 parts
[1] = gen_int_mode (l
[1], SImode
);
13388 parts
[0] = gen_int_mode (l
[0], SImode
);
13391 gcc_unreachable ();
13396 if (mode
== TImode
)
13397 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
13398 if (mode
== XFmode
|| mode
== TFmode
)
13400 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13401 if (REG_P (operand
))
13403 gcc_assert (reload_completed
);
13404 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13405 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13407 else if (offsettable_memref_p (operand
))
13409 operand
= adjust_address (operand
, DImode
, 0);
13410 parts
[0] = operand
;
13411 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13413 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13418 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13419 real_to_target (l
, &r
, mode
);
13421 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13422 if (HOST_BITS_PER_WIDE_INT
>= 64)
13425 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13426 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13429 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13431 if (upper_mode
== SImode
)
13432 parts
[1] = gen_int_mode (l
[2], SImode
);
13433 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13436 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13437 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13440 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13443 gcc_unreachable ();
13450 /* Emit insns to perform a move or push of DI, DF, and XF values.
13451 Return false when normal moves are needed; true when all required
13452 insns have been emitted. Operands 2-4 contain the input values
13453 int the correct order; operands 5-7 contain the output values. */
13456 ix86_split_long_move (rtx operands
[])
13461 int collisions
= 0;
13462 enum machine_mode mode
= GET_MODE (operands
[0]);
13464 /* The DFmode expanders may ask us to move double.
13465 For 64bit target this is single move. By hiding the fact
13466 here we simplify i386.md splitters. */
13467 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13469 /* Optimize constant pool reference to immediates. This is used by
13470 fp moves, that force all constants to memory to allow combining. */
13472 if (MEM_P (operands
[1])
13473 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13474 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13475 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13476 if (push_operand (operands
[0], VOIDmode
))
13478 operands
[0] = copy_rtx (operands
[0]);
13479 PUT_MODE (operands
[0], Pmode
);
13482 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13483 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13484 emit_move_insn (operands
[0], operands
[1]);
13488 /* The only non-offsettable memory we handle is push. */
13489 if (push_operand (operands
[0], VOIDmode
))
13492 gcc_assert (!MEM_P (operands
[0])
13493 || offsettable_memref_p (operands
[0]));
13495 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13496 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13498 /* When emitting push, take care for source operands on the stack. */
13499 if (push
&& MEM_P (operands
[1])
13500 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13503 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13504 XEXP (part
[1][2], 0));
13505 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13506 XEXP (part
[1][1], 0));
13509 /* We need to do copy in the right order in case an address register
13510 of the source overlaps the destination. */
13511 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13513 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13515 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13518 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13521 /* Collision in the middle part can be handled by reordering. */
13522 if (collisions
== 1 && nparts
== 3
13523 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13526 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13527 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13530 /* If there are more collisions, we can't handle it by reordering.
13531 Do an lea to the last part and use only one colliding move. */
13532 else if (collisions
> 1)
13538 base
= part
[0][nparts
- 1];
13540 /* Handle the case when the last part isn't valid for lea.
13541 Happens in 64-bit mode storing the 12-byte XFmode. */
13542 if (GET_MODE (base
) != Pmode
)
13543 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13545 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13546 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13547 part
[1][1] = replace_equiv_address (part
[1][1],
13548 plus_constant (base
, UNITS_PER_WORD
));
13550 part
[1][2] = replace_equiv_address (part
[1][2],
13551 plus_constant (base
, 8));
13561 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13562 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13563 emit_move_insn (part
[0][2], part
[1][2]);
13568 /* In 64bit mode we don't have 32bit push available. In case this is
13569 register, it is OK - we will just use larger counterpart. We also
13570 retype memory - these comes from attempt to avoid REX prefix on
13571 moving of second half of TFmode value. */
13572 if (GET_MODE (part
[1][1]) == SImode
)
13574 switch (GET_CODE (part
[1][1]))
13577 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13581 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13585 gcc_unreachable ();
13588 if (GET_MODE (part
[1][0]) == SImode
)
13589 part
[1][0] = part
[1][1];
13592 emit_move_insn (part
[0][1], part
[1][1]);
13593 emit_move_insn (part
[0][0], part
[1][0]);
13597 /* Choose correct order to not overwrite the source before it is copied. */
13598 if ((REG_P (part
[0][0])
13599 && REG_P (part
[1][1])
13600 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13602 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13604 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13608 operands
[2] = part
[0][2];
13609 operands
[3] = part
[0][1];
13610 operands
[4] = part
[0][0];
13611 operands
[5] = part
[1][2];
13612 operands
[6] = part
[1][1];
13613 operands
[7] = part
[1][0];
13617 operands
[2] = part
[0][1];
13618 operands
[3] = part
[0][0];
13619 operands
[5] = part
[1][1];
13620 operands
[6] = part
[1][0];
13627 operands
[2] = part
[0][0];
13628 operands
[3] = part
[0][1];
13629 operands
[4] = part
[0][2];
13630 operands
[5] = part
[1][0];
13631 operands
[6] = part
[1][1];
13632 operands
[7] = part
[1][2];
13636 operands
[2] = part
[0][0];
13637 operands
[3] = part
[0][1];
13638 operands
[5] = part
[1][0];
13639 operands
[6] = part
[1][1];
13643 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13646 if (CONST_INT_P (operands
[5])
13647 && operands
[5] != const0_rtx
13648 && REG_P (operands
[2]))
13650 if (CONST_INT_P (operands
[6])
13651 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13652 operands
[6] = operands
[2];
13655 && CONST_INT_P (operands
[7])
13656 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13657 operands
[7] = operands
[2];
13661 && CONST_INT_P (operands
[6])
13662 && operands
[6] != const0_rtx
13663 && REG_P (operands
[3])
13664 && CONST_INT_P (operands
[7])
13665 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13666 operands
[7] = operands
[3];
13669 emit_move_insn (operands
[2], operands
[5]);
13670 emit_move_insn (operands
[3], operands
[6]);
13672 emit_move_insn (operands
[4], operands
[7]);
13677 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13678 left shift by a constant, either using a single shift or
13679 a sequence of add instructions. */
13682 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13686 emit_insn ((mode
== DImode
13688 : gen_adddi3
) (operand
, operand
, operand
));
13690 else if (!optimize_size
13691 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13694 for (i
=0; i
<count
; i
++)
13696 emit_insn ((mode
== DImode
13698 : gen_adddi3
) (operand
, operand
, operand
));
13702 emit_insn ((mode
== DImode
13704 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13708 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13710 rtx low
[2], high
[2];
13712 const int single_width
= mode
== DImode
? 32 : 64;
13714 if (CONST_INT_P (operands
[2]))
13716 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13717 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13719 if (count
>= single_width
)
13721 emit_move_insn (high
[0], low
[1]);
13722 emit_move_insn (low
[0], const0_rtx
);
13724 if (count
> single_width
)
13725 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13729 if (!rtx_equal_p (operands
[0], operands
[1]))
13730 emit_move_insn (operands
[0], operands
[1]);
13731 emit_insn ((mode
== DImode
13733 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13734 ix86_expand_ashl_const (low
[0], count
, mode
);
13739 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13741 if (operands
[1] == const1_rtx
)
13743 /* Assuming we've chosen a QImode capable registers, then 1 << N
13744 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13745 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13747 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13749 ix86_expand_clear (low
[0]);
13750 ix86_expand_clear (high
[0]);
13751 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13753 d
= gen_lowpart (QImode
, low
[0]);
13754 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13755 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13756 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13758 d
= gen_lowpart (QImode
, high
[0]);
13759 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13760 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13761 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13764 /* Otherwise, we can get the same results by manually performing
13765 a bit extract operation on bit 5/6, and then performing the two
13766 shifts. The two methods of getting 0/1 into low/high are exactly
13767 the same size. Avoiding the shift in the bit extract case helps
13768 pentium4 a bit; no one else seems to care much either way. */
13773 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13774 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13776 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13777 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13779 emit_insn ((mode
== DImode
13781 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13782 emit_insn ((mode
== DImode
13784 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13785 emit_move_insn (low
[0], high
[0]);
13786 emit_insn ((mode
== DImode
13788 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13791 emit_insn ((mode
== DImode
13793 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13794 emit_insn ((mode
== DImode
13796 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13800 if (operands
[1] == constm1_rtx
)
13802 /* For -1 << N, we can avoid the shld instruction, because we
13803 know that we're shifting 0...31/63 ones into a -1. */
13804 emit_move_insn (low
[0], constm1_rtx
);
13806 emit_move_insn (high
[0], low
[0]);
13808 emit_move_insn (high
[0], constm1_rtx
);
13812 if (!rtx_equal_p (operands
[0], operands
[1]))
13813 emit_move_insn (operands
[0], operands
[1]);
13815 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13816 emit_insn ((mode
== DImode
13818 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13821 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13823 if (TARGET_CMOVE
&& scratch
)
13825 ix86_expand_clear (scratch
);
13826 emit_insn ((mode
== DImode
13827 ? gen_x86_shift_adj_1
13828 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13831 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13835 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13837 rtx low
[2], high
[2];
13839 const int single_width
= mode
== DImode
? 32 : 64;
13841 if (CONST_INT_P (operands
[2]))
13843 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13844 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13846 if (count
== single_width
* 2 - 1)
13848 emit_move_insn (high
[0], high
[1]);
13849 emit_insn ((mode
== DImode
13851 : gen_ashrdi3
) (high
[0], high
[0],
13852 GEN_INT (single_width
- 1)));
13853 emit_move_insn (low
[0], high
[0]);
13856 else if (count
>= single_width
)
13858 emit_move_insn (low
[0], high
[1]);
13859 emit_move_insn (high
[0], low
[0]);
13860 emit_insn ((mode
== DImode
13862 : gen_ashrdi3
) (high
[0], high
[0],
13863 GEN_INT (single_width
- 1)));
13864 if (count
> single_width
)
13865 emit_insn ((mode
== DImode
13867 : gen_ashrdi3
) (low
[0], low
[0],
13868 GEN_INT (count
- single_width
)));
13872 if (!rtx_equal_p (operands
[0], operands
[1]))
13873 emit_move_insn (operands
[0], operands
[1]);
13874 emit_insn ((mode
== DImode
13876 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13877 emit_insn ((mode
== DImode
13879 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13884 if (!rtx_equal_p (operands
[0], operands
[1]))
13885 emit_move_insn (operands
[0], operands
[1]);
13887 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13889 emit_insn ((mode
== DImode
13891 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13892 emit_insn ((mode
== DImode
13894 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13896 if (TARGET_CMOVE
&& scratch
)
13898 emit_move_insn (scratch
, high
[0]);
13899 emit_insn ((mode
== DImode
13901 : gen_ashrdi3
) (scratch
, scratch
,
13902 GEN_INT (single_width
- 1)));
13903 emit_insn ((mode
== DImode
13904 ? gen_x86_shift_adj_1
13905 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13909 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13914 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13916 rtx low
[2], high
[2];
13918 const int single_width
= mode
== DImode
? 32 : 64;
13920 if (CONST_INT_P (operands
[2]))
13922 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13923 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13925 if (count
>= single_width
)
13927 emit_move_insn (low
[0], high
[1]);
13928 ix86_expand_clear (high
[0]);
13930 if (count
> single_width
)
13931 emit_insn ((mode
== DImode
13933 : gen_lshrdi3
) (low
[0], low
[0],
13934 GEN_INT (count
- single_width
)));
13938 if (!rtx_equal_p (operands
[0], operands
[1]))
13939 emit_move_insn (operands
[0], operands
[1]);
13940 emit_insn ((mode
== DImode
13942 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13943 emit_insn ((mode
== DImode
13945 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13950 if (!rtx_equal_p (operands
[0], operands
[1]))
13951 emit_move_insn (operands
[0], operands
[1]);
13953 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13955 emit_insn ((mode
== DImode
13957 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13958 emit_insn ((mode
== DImode
13960 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13962 /* Heh. By reversing the arguments, we can reuse this pattern. */
13963 if (TARGET_CMOVE
&& scratch
)
13965 ix86_expand_clear (scratch
);
13966 emit_insn ((mode
== DImode
13967 ? gen_x86_shift_adj_1
13968 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13972 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13976 /* Predict just emitted jump instruction to be taken with probability PROB. */
13978 predict_jump (int prob
)
13980 rtx insn
= get_last_insn ();
13981 gcc_assert (JUMP_P (insn
));
13983 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13988 /* Helper function for the string operations below. Dest VARIABLE whether
13989 it is aligned to VALUE bytes. If true, jump to the label. */
13991 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13993 rtx label
= gen_label_rtx ();
13994 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13995 if (GET_MODE (variable
) == DImode
)
13996 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13998 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13999 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
14002 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
14004 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14008 /* Adjust COUNTER by the VALUE. */
14010 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
14012 if (GET_MODE (countreg
) == DImode
)
14013 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
14015 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
14018 /* Zero extend possibly SImode EXP to Pmode register. */
14020 ix86_zero_extend_to_Pmode (rtx exp
)
14023 if (GET_MODE (exp
) == VOIDmode
)
14024 return force_reg (Pmode
, exp
);
14025 if (GET_MODE (exp
) == Pmode
)
14026 return copy_to_mode_reg (Pmode
, exp
);
14027 r
= gen_reg_rtx (Pmode
);
14028 emit_insn (gen_zero_extendsidi2 (r
, exp
));
14032 /* Divide COUNTREG by SCALE. */
14034 scale_counter (rtx countreg
, int scale
)
14037 rtx piece_size_mask
;
14041 if (CONST_INT_P (countreg
))
14042 return GEN_INT (INTVAL (countreg
) / scale
);
14043 gcc_assert (REG_P (countreg
));
14045 piece_size_mask
= GEN_INT (scale
- 1);
14046 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
14047 GEN_INT (exact_log2 (scale
)),
14048 NULL
, 1, OPTAB_DIRECT
);
14052 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14053 DImode for constant loop counts. */
14055 static enum machine_mode
14056 counter_mode (rtx count_exp
)
14058 if (GET_MODE (count_exp
) != VOIDmode
)
14059 return GET_MODE (count_exp
);
14060 if (GET_CODE (count_exp
) != CONST_INT
)
14062 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
14067 /* When SRCPTR is non-NULL, output simple loop to move memory
14068 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14069 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14070 equivalent loop to set memory by VALUE (supposed to be in MODE).
14072 The size is rounded down to whole number of chunk size moved at once.
14073 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14077 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
14078 rtx destptr
, rtx srcptr
, rtx value
,
14079 rtx count
, enum machine_mode mode
, int unroll
,
14082 rtx out_label
, top_label
, iter
, tmp
;
14083 enum machine_mode iter_mode
= counter_mode (count
);
14084 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
14085 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
14091 top_label
= gen_label_rtx ();
14092 out_label
= gen_label_rtx ();
14093 iter
= gen_reg_rtx (iter_mode
);
14095 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
14096 NULL
, 1, OPTAB_DIRECT
);
14097 /* Those two should combine. */
14098 if (piece_size
== const1_rtx
)
14100 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
14102 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
14104 emit_move_insn (iter
, const0_rtx
);
14106 emit_label (top_label
);
14108 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
14109 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
14110 destmem
= change_address (destmem
, mode
, x_addr
);
14114 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
14115 srcmem
= change_address (srcmem
, mode
, y_addr
);
14117 /* When unrolling for chips that reorder memory reads and writes,
14118 we can save registers by using single temporary.
14119 Also using 4 temporaries is overkill in 32bit mode. */
14120 if (!TARGET_64BIT
&& 0)
14122 for (i
= 0; i
< unroll
; i
++)
14127 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14129 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14131 emit_move_insn (destmem
, srcmem
);
14137 gcc_assert (unroll
<= 4);
14138 for (i
= 0; i
< unroll
; i
++)
14140 tmpreg
[i
] = gen_reg_rtx (mode
);
14144 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14146 emit_move_insn (tmpreg
[i
], srcmem
);
14148 for (i
= 0; i
< unroll
; i
++)
14153 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14155 emit_move_insn (destmem
, tmpreg
[i
]);
14160 for (i
= 0; i
< unroll
; i
++)
14164 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14165 emit_move_insn (destmem
, value
);
14168 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
14169 true, OPTAB_LIB_WIDEN
);
14171 emit_move_insn (iter
, tmp
);
14173 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
14175 if (expected_size
!= -1)
14177 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
14178 if (expected_size
== 0)
14180 else if (expected_size
> REG_BR_PROB_BASE
)
14181 predict_jump (REG_BR_PROB_BASE
- 1);
14183 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
14186 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
14187 iter
= ix86_zero_extend_to_Pmode (iter
);
14188 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
14189 true, OPTAB_LIB_WIDEN
);
14190 if (tmp
!= destptr
)
14191 emit_move_insn (destptr
, tmp
);
14194 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
14195 true, OPTAB_LIB_WIDEN
);
14197 emit_move_insn (srcptr
, tmp
);
14199 emit_label (out_label
);
14202 /* Output "rep; mov" instruction.
14203 Arguments have same meaning as for previous function */
14205 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
14206 rtx destptr
, rtx srcptr
,
14208 enum machine_mode mode
)
14214 /* If the size is known, it is shorter to use rep movs. */
14215 if (mode
== QImode
&& CONST_INT_P (count
)
14216 && !(INTVAL (count
) & 3))
14219 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14220 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14221 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
14222 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
14223 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14224 if (mode
!= QImode
)
14226 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14227 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14228 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14229 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14230 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14231 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
14235 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14236 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
14238 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
14242 /* Output "rep; stos" instruction.
14243 Arguments have same meaning as for previous function */
14245 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
14247 enum machine_mode mode
)
14252 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14253 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14254 value
= force_reg (mode
, gen_lowpart (mode
, value
));
14255 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14256 if (mode
!= QImode
)
14258 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14259 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14260 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14263 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14264 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
14268 emit_strmov (rtx destmem
, rtx srcmem
,
14269 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
14271 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
14272 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
14273 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14276 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14278 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
14279 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
14282 if (CONST_INT_P (count
))
14284 HOST_WIDE_INT countval
= INTVAL (count
);
14287 if ((countval
& 0x10) && max_size
> 16)
14291 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14292 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
14295 gcc_unreachable ();
14298 if ((countval
& 0x08) && max_size
> 8)
14301 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14304 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14305 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
14309 if ((countval
& 0x04) && max_size
> 4)
14311 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14314 if ((countval
& 0x02) && max_size
> 2)
14316 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
14319 if ((countval
& 0x01) && max_size
> 1)
14321 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
14328 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
14329 count
, 1, OPTAB_DIRECT
);
14330 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
14331 count
, QImode
, 1, 4);
14335 /* When there are stringops, we can cheaply increase dest and src pointers.
14336 Otherwise we save code size by maintaining offset (zero is readily
14337 available from preceding rep operation) and using x86 addressing modes.
14339 if (TARGET_SINGLE_STRINGOP
)
14343 rtx label
= ix86_expand_aligntest (count
, 4, true);
14344 src
= change_address (srcmem
, SImode
, srcptr
);
14345 dest
= change_address (destmem
, SImode
, destptr
);
14346 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14347 emit_label (label
);
14348 LABEL_NUSES (label
) = 1;
14352 rtx label
= ix86_expand_aligntest (count
, 2, true);
14353 src
= change_address (srcmem
, HImode
, srcptr
);
14354 dest
= change_address (destmem
, HImode
, destptr
);
14355 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14356 emit_label (label
);
14357 LABEL_NUSES (label
) = 1;
14361 rtx label
= ix86_expand_aligntest (count
, 1, true);
14362 src
= change_address (srcmem
, QImode
, srcptr
);
14363 dest
= change_address (destmem
, QImode
, destptr
);
14364 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14365 emit_label (label
);
14366 LABEL_NUSES (label
) = 1;
14371 rtx offset
= force_reg (Pmode
, const0_rtx
);
14376 rtx label
= ix86_expand_aligntest (count
, 4, true);
14377 src
= change_address (srcmem
, SImode
, srcptr
);
14378 dest
= change_address (destmem
, SImode
, destptr
);
14379 emit_move_insn (dest
, src
);
14380 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
14381 true, OPTAB_LIB_WIDEN
);
14383 emit_move_insn (offset
, tmp
);
14384 emit_label (label
);
14385 LABEL_NUSES (label
) = 1;
14389 rtx label
= ix86_expand_aligntest (count
, 2, true);
14390 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14391 src
= change_address (srcmem
, HImode
, tmp
);
14392 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14393 dest
= change_address (destmem
, HImode
, tmp
);
14394 emit_move_insn (dest
, src
);
14395 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
14396 true, OPTAB_LIB_WIDEN
);
14398 emit_move_insn (offset
, tmp
);
14399 emit_label (label
);
14400 LABEL_NUSES (label
) = 1;
14404 rtx label
= ix86_expand_aligntest (count
, 1, true);
14405 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14406 src
= change_address (srcmem
, QImode
, tmp
);
14407 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14408 dest
= change_address (destmem
, QImode
, tmp
);
14409 emit_move_insn (dest
, src
);
14410 emit_label (label
);
14411 LABEL_NUSES (label
) = 1;
14416 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14418 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14419 rtx count
, int max_size
)
14422 expand_simple_binop (counter_mode (count
), AND
, count
,
14423 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14424 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14425 gen_lowpart (QImode
, value
), count
, QImode
,
14429 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14431 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14435 if (CONST_INT_P (count
))
14437 HOST_WIDE_INT countval
= INTVAL (count
);
14440 if ((countval
& 0x10) && max_size
> 16)
14444 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14445 emit_insn (gen_strset (destptr
, dest
, value
));
14446 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14447 emit_insn (gen_strset (destptr
, dest
, value
));
14450 gcc_unreachable ();
14453 if ((countval
& 0x08) && max_size
> 8)
14457 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14458 emit_insn (gen_strset (destptr
, dest
, value
));
14462 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14463 emit_insn (gen_strset (destptr
, dest
, value
));
14464 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14465 emit_insn (gen_strset (destptr
, dest
, value
));
14469 if ((countval
& 0x04) && max_size
> 4)
14471 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14472 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14475 if ((countval
& 0x02) && max_size
> 2)
14477 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14478 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14481 if ((countval
& 0x01) && max_size
> 1)
14483 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14484 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14491 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14496 rtx label
= ix86_expand_aligntest (count
, 16, true);
14499 dest
= change_address (destmem
, DImode
, destptr
);
14500 emit_insn (gen_strset (destptr
, dest
, value
));
14501 emit_insn (gen_strset (destptr
, dest
, value
));
14505 dest
= change_address (destmem
, SImode
, destptr
);
14506 emit_insn (gen_strset (destptr
, dest
, value
));
14507 emit_insn (gen_strset (destptr
, dest
, value
));
14508 emit_insn (gen_strset (destptr
, dest
, value
));
14509 emit_insn (gen_strset (destptr
, dest
, value
));
14511 emit_label (label
);
14512 LABEL_NUSES (label
) = 1;
14516 rtx label
= ix86_expand_aligntest (count
, 8, true);
14519 dest
= change_address (destmem
, DImode
, destptr
);
14520 emit_insn (gen_strset (destptr
, dest
, value
));
14524 dest
= change_address (destmem
, SImode
, destptr
);
14525 emit_insn (gen_strset (destptr
, dest
, value
));
14526 emit_insn (gen_strset (destptr
, dest
, value
));
14528 emit_label (label
);
14529 LABEL_NUSES (label
) = 1;
14533 rtx label
= ix86_expand_aligntest (count
, 4, true);
14534 dest
= change_address (destmem
, SImode
, destptr
);
14535 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14536 emit_label (label
);
14537 LABEL_NUSES (label
) = 1;
14541 rtx label
= ix86_expand_aligntest (count
, 2, true);
14542 dest
= change_address (destmem
, HImode
, destptr
);
14543 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14544 emit_label (label
);
14545 LABEL_NUSES (label
) = 1;
14549 rtx label
= ix86_expand_aligntest (count
, 1, true);
14550 dest
= change_address (destmem
, QImode
, destptr
);
14551 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14552 emit_label (label
);
14553 LABEL_NUSES (label
) = 1;
14557 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14558 DESIRED_ALIGNMENT. */
14560 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14561 rtx destptr
, rtx srcptr
, rtx count
,
14562 int align
, int desired_alignment
)
14564 if (align
<= 1 && desired_alignment
> 1)
14566 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14567 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14568 destmem
= change_address (destmem
, QImode
, destptr
);
14569 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14570 ix86_adjust_counter (count
, 1);
14571 emit_label (label
);
14572 LABEL_NUSES (label
) = 1;
14574 if (align
<= 2 && desired_alignment
> 2)
14576 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14577 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14578 destmem
= change_address (destmem
, HImode
, destptr
);
14579 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14580 ix86_adjust_counter (count
, 2);
14581 emit_label (label
);
14582 LABEL_NUSES (label
) = 1;
14584 if (align
<= 4 && desired_alignment
> 4)
14586 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14587 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14588 destmem
= change_address (destmem
, SImode
, destptr
);
14589 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14590 ix86_adjust_counter (count
, 4);
14591 emit_label (label
);
14592 LABEL_NUSES (label
) = 1;
14594 gcc_assert (desired_alignment
<= 8);
14597 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14598 DESIRED_ALIGNMENT. */
14600 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14601 int align
, int desired_alignment
)
14603 if (align
<= 1 && desired_alignment
> 1)
14605 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14606 destmem
= change_address (destmem
, QImode
, destptr
);
14607 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14608 ix86_adjust_counter (count
, 1);
14609 emit_label (label
);
14610 LABEL_NUSES (label
) = 1;
14612 if (align
<= 2 && desired_alignment
> 2)
14614 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14615 destmem
= change_address (destmem
, HImode
, destptr
);
14616 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14617 ix86_adjust_counter (count
, 2);
14618 emit_label (label
);
14619 LABEL_NUSES (label
) = 1;
14621 if (align
<= 4 && desired_alignment
> 4)
14623 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14624 destmem
= change_address (destmem
, SImode
, destptr
);
14625 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14626 ix86_adjust_counter (count
, 4);
14627 emit_label (label
);
14628 LABEL_NUSES (label
) = 1;
14630 gcc_assert (desired_alignment
<= 8);
14633 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14634 static enum stringop_alg
14635 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14636 int *dynamic_check
)
14638 const struct stringop_algs
* algs
;
14640 *dynamic_check
= -1;
14642 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14644 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14645 if (stringop_alg
!= no_stringop
)
14646 return stringop_alg
;
14647 /* rep; movq or rep; movl is the smallest variant. */
14648 else if (optimize_size
)
14650 if (!count
|| (count
& 3))
14651 return rep_prefix_1_byte
;
14653 return rep_prefix_4_byte
;
14655 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14657 else if (expected_size
!= -1 && expected_size
< 4)
14658 return loop_1_byte
;
14659 else if (expected_size
!= -1)
14662 enum stringop_alg alg
= libcall
;
14663 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14665 gcc_assert (algs
->size
[i
].max
);
14666 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14668 if (algs
->size
[i
].alg
!= libcall
)
14669 alg
= algs
->size
[i
].alg
;
14670 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14671 last non-libcall inline algorithm. */
14672 if (TARGET_INLINE_ALL_STRINGOPS
)
14674 /* When the current size is best to be copied by a libcall,
14675 but we are still forced to inline, run the heuristic bellow
14676 that will pick code for medium sized blocks. */
14677 if (alg
!= libcall
)
14682 return algs
->size
[i
].alg
;
14685 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14687 /* When asked to inline the call anyway, try to pick meaningful choice.
14688 We look for maximal size of block that is faster to copy by hand and
14689 take blocks of at most of that size guessing that average size will
14690 be roughly half of the block.
14692 If this turns out to be bad, we might simply specify the preferred
14693 choice in ix86_costs. */
14694 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14695 && algs
->unknown_size
== libcall
)
14698 enum stringop_alg alg
;
14701 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14702 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14703 max
= algs
->size
[i
].max
;
14706 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14707 gcc_assert (*dynamic_check
== -1);
14708 gcc_assert (alg
!= libcall
);
14709 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14710 *dynamic_check
= max
;
14713 return algs
->unknown_size
;
14716 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14717 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14719 decide_alignment (int align
,
14720 enum stringop_alg alg
,
14723 int desired_align
= 0;
14727 gcc_unreachable ();
14729 case unrolled_loop
:
14730 desired_align
= GET_MODE_SIZE (Pmode
);
14732 case rep_prefix_8_byte
:
14735 case rep_prefix_4_byte
:
14736 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14737 copying whole cacheline at once. */
14738 if (TARGET_PENTIUMPRO
)
14743 case rep_prefix_1_byte
:
14744 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14745 copying whole cacheline at once. */
14746 if (TARGET_PENTIUMPRO
)
14760 if (desired_align
< align
)
14761 desired_align
= align
;
14762 if (expected_size
!= -1 && expected_size
< 4)
14763 desired_align
= align
;
14764 return desired_align
;
14767 /* Return the smallest power of 2 greater than VAL. */
14769 smallest_pow2_greater_than (int val
)
14777 /* Expand string move (memcpy) operation. Use i386 string operations when
14778 profitable. expand_clrmem contains similar code. The code depends upon
14779 architecture, block size and alignment, but always has the same
14782 1) Prologue guard: Conditional that jumps up to epilogues for small
14783 blocks that can be handled by epilogue alone. This is faster but
14784 also needed for correctness, since prologue assume the block is larger
14785 than the desired alignment.
14787 Optional dynamic check for size and libcall for large
14788 blocks is emitted here too, with -minline-stringops-dynamically.
14790 2) Prologue: copy first few bytes in order to get destination aligned
14791 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14792 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14793 We emit either a jump tree on power of two sized blocks, or a byte loop.
14795 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14796 with specified algorithm.
14798 4) Epilogue: code copying tail of the block that is too small to be
14799 handled by main body (or up to size guarded by prologue guard). */
14802 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14803 rtx expected_align_exp
, rtx expected_size_exp
)
14809 rtx jump_around_label
= NULL
;
14810 HOST_WIDE_INT align
= 1;
14811 unsigned HOST_WIDE_INT count
= 0;
14812 HOST_WIDE_INT expected_size
= -1;
14813 int size_needed
= 0, epilogue_size_needed
;
14814 int desired_align
= 0;
14815 enum stringop_alg alg
;
14818 if (CONST_INT_P (align_exp
))
14819 align
= INTVAL (align_exp
);
14820 /* i386 can do misaligned access on reasonably increased cost. */
14821 if (CONST_INT_P (expected_align_exp
)
14822 && INTVAL (expected_align_exp
) > align
)
14823 align
= INTVAL (expected_align_exp
);
14824 if (CONST_INT_P (count_exp
))
14825 count
= expected_size
= INTVAL (count_exp
);
14826 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14827 expected_size
= INTVAL (expected_size_exp
);
14829 /* Step 0: Decide on preferred algorithm, desired alignment and
14830 size of chunks to be copied by main loop. */
14832 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14833 desired_align
= decide_alignment (align
, alg
, expected_size
);
14835 if (!TARGET_ALIGN_STRINGOPS
)
14836 align
= desired_align
;
14838 if (alg
== libcall
)
14840 gcc_assert (alg
!= no_stringop
);
14842 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14843 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14844 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14849 gcc_unreachable ();
14851 size_needed
= GET_MODE_SIZE (Pmode
);
14853 case unrolled_loop
:
14854 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14856 case rep_prefix_8_byte
:
14859 case rep_prefix_4_byte
:
14862 case rep_prefix_1_byte
:
14868 epilogue_size_needed
= size_needed
;
14870 /* Step 1: Prologue guard. */
14872 /* Alignment code needs count to be in register. */
14873 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14875 enum machine_mode mode
= SImode
;
14876 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14878 count_exp
= force_reg (mode
, count_exp
);
14880 gcc_assert (desired_align
>= 1 && align
>= 1);
14882 /* Ensure that alignment prologue won't copy past end of block. */
14883 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14885 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14886 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14887 Make sure it is power of 2. */
14888 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14890 label
= gen_label_rtx ();
14891 emit_cmp_and_jump_insns (count_exp
,
14892 GEN_INT (epilogue_size_needed
),
14893 LTU
, 0, counter_mode (count_exp
), 1, label
);
14894 if (GET_CODE (count_exp
) == CONST_INT
)
14896 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14897 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14899 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14901 /* Emit code to decide on runtime whether library call or inline should be
14903 if (dynamic_check
!= -1)
14905 rtx hot_label
= gen_label_rtx ();
14906 jump_around_label
= gen_label_rtx ();
14907 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14908 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14909 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14910 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14911 emit_jump (jump_around_label
);
14912 emit_label (hot_label
);
14915 /* Step 2: Alignment prologue. */
14917 if (desired_align
> align
)
14919 /* Except for the first move in epilogue, we no longer know
14920 constant offset in aliasing info. It don't seems to worth
14921 the pain to maintain it for the first move, so throw away
14923 src
= change_address (src
, BLKmode
, srcreg
);
14924 dst
= change_address (dst
, BLKmode
, destreg
);
14925 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14928 if (label
&& size_needed
== 1)
14930 emit_label (label
);
14931 LABEL_NUSES (label
) = 1;
14935 /* Step 3: Main loop. */
14941 gcc_unreachable ();
14943 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14944 count_exp
, QImode
, 1, expected_size
);
14947 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14948 count_exp
, Pmode
, 1, expected_size
);
14950 case unrolled_loop
:
14951 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14952 registers for 4 temporaries anyway. */
14953 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14954 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14957 case rep_prefix_8_byte
:
14958 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14961 case rep_prefix_4_byte
:
14962 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14965 case rep_prefix_1_byte
:
14966 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14970 /* Adjust properly the offset of src and dest memory for aliasing. */
14971 if (CONST_INT_P (count_exp
))
14973 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14974 (count
/ size_needed
) * size_needed
);
14975 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14976 (count
/ size_needed
) * size_needed
);
14980 src
= change_address (src
, BLKmode
, srcreg
);
14981 dst
= change_address (dst
, BLKmode
, destreg
);
14984 /* Step 4: Epilogue to copy the remaining bytes. */
14988 /* When the main loop is done, COUNT_EXP might hold original count,
14989 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14990 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14991 bytes. Compensate if needed. */
14993 if (size_needed
< epilogue_size_needed
)
14996 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14997 GEN_INT (size_needed
- 1), count_exp
, 1,
14999 if (tmp
!= count_exp
)
15000 emit_move_insn (count_exp
, tmp
);
15002 emit_label (label
);
15003 LABEL_NUSES (label
) = 1;
15006 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15007 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
15008 epilogue_size_needed
);
15009 if (jump_around_label
)
15010 emit_label (jump_around_label
);
15014 /* Helper function for memcpy. For QImode value 0xXY produce
15015 0xXYXYXYXY of wide specified by MODE. This is essentially
15016 a * 0x10101010, but we can do slightly better than
15017 synth_mult by unwinding the sequence by hand on CPUs with
15020 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
15022 enum machine_mode valmode
= GET_MODE (val
);
15024 int nops
= mode
== DImode
? 3 : 2;
15026 gcc_assert (mode
== SImode
|| mode
== DImode
);
15027 if (val
== const0_rtx
)
15028 return copy_to_mode_reg (mode
, const0_rtx
);
15029 if (CONST_INT_P (val
))
15031 HOST_WIDE_INT v
= INTVAL (val
) & 255;
15035 if (mode
== DImode
)
15036 v
|= (v
<< 16) << 16;
15037 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
15040 if (valmode
== VOIDmode
)
15042 if (valmode
!= QImode
)
15043 val
= gen_lowpart (QImode
, val
);
15044 if (mode
== QImode
)
15046 if (!TARGET_PARTIAL_REG_STALL
)
15048 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
15049 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
15050 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
15051 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
15053 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15054 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
15055 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
15060 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15062 if (!TARGET_PARTIAL_REG_STALL
)
15063 if (mode
== SImode
)
15064 emit_insn (gen_movsi_insv_1 (reg
, reg
));
15066 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
15069 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
15070 NULL
, 1, OPTAB_DIRECT
);
15072 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15074 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
15075 NULL
, 1, OPTAB_DIRECT
);
15076 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15077 if (mode
== SImode
)
15079 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
15080 NULL
, 1, OPTAB_DIRECT
);
15081 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15086 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15087 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15088 alignment from ALIGN to DESIRED_ALIGN. */
15090 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
15095 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
15096 promoted_val
= promote_duplicated_reg (DImode
, val
);
15097 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
15098 promoted_val
= promote_duplicated_reg (SImode
, val
);
15099 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
15100 promoted_val
= promote_duplicated_reg (HImode
, val
);
15102 promoted_val
= val
;
15104 return promoted_val
;
15107 /* Expand string clear operation (bzero). Use i386 string operations when
15108 profitable. See expand_movmem comment for explanation of individual
15109 steps performed. */
15111 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
15112 rtx expected_align_exp
, rtx expected_size_exp
)
15117 rtx jump_around_label
= NULL
;
15118 HOST_WIDE_INT align
= 1;
15119 unsigned HOST_WIDE_INT count
= 0;
15120 HOST_WIDE_INT expected_size
= -1;
15121 int size_needed
= 0, epilogue_size_needed
;
15122 int desired_align
= 0;
15123 enum stringop_alg alg
;
15124 rtx promoted_val
= NULL
;
15125 bool force_loopy_epilogue
= false;
15128 if (CONST_INT_P (align_exp
))
15129 align
= INTVAL (align_exp
);
15130 /* i386 can do misaligned access on reasonably increased cost. */
15131 if (CONST_INT_P (expected_align_exp
)
15132 && INTVAL (expected_align_exp
) > align
)
15133 align
= INTVAL (expected_align_exp
);
15134 if (CONST_INT_P (count_exp
))
15135 count
= expected_size
= INTVAL (count_exp
);
15136 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15137 expected_size
= INTVAL (expected_size_exp
);
15139 /* Step 0: Decide on preferred algorithm, desired alignment and
15140 size of chunks to be copied by main loop. */
15142 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
15143 desired_align
= decide_alignment (align
, alg
, expected_size
);
15145 if (!TARGET_ALIGN_STRINGOPS
)
15146 align
= desired_align
;
15148 if (alg
== libcall
)
15150 gcc_assert (alg
!= no_stringop
);
15152 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
15153 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15158 gcc_unreachable ();
15160 size_needed
= GET_MODE_SIZE (Pmode
);
15162 case unrolled_loop
:
15163 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
15165 case rep_prefix_8_byte
:
15168 case rep_prefix_4_byte
:
15171 case rep_prefix_1_byte
:
15176 epilogue_size_needed
= size_needed
;
15178 /* Step 1: Prologue guard. */
15180 /* Alignment code needs count to be in register. */
15181 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15183 enum machine_mode mode
= SImode
;
15184 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15186 count_exp
= force_reg (mode
, count_exp
);
15188 /* Do the cheap promotion to allow better CSE across the
15189 main loop and epilogue (ie one load of the big constant in the
15190 front of all code. */
15191 if (CONST_INT_P (val_exp
))
15192 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15193 desired_align
, align
);
15194 /* Ensure that alignment prologue won't copy past end of block. */
15195 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15197 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15198 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15199 Make sure it is power of 2. */
15200 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15202 /* To improve performance of small blocks, we jump around the VAL
15203 promoting mode. This mean that if the promoted VAL is not constant,
15204 we might not use it in the epilogue and have to use byte
15206 if (epilogue_size_needed
> 2 && !promoted_val
)
15207 force_loopy_epilogue
= true;
15208 label
= gen_label_rtx ();
15209 emit_cmp_and_jump_insns (count_exp
,
15210 GEN_INT (epilogue_size_needed
),
15211 LTU
, 0, counter_mode (count_exp
), 1, label
);
15212 if (GET_CODE (count_exp
) == CONST_INT
)
15214 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
15215 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15217 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15219 if (dynamic_check
!= -1)
15221 rtx hot_label
= gen_label_rtx ();
15222 jump_around_label
= gen_label_rtx ();
15223 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15224 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
15225 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15226 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
15227 emit_jump (jump_around_label
);
15228 emit_label (hot_label
);
15231 /* Step 2: Alignment prologue. */
15233 /* Do the expensive promotion once we branched off the small blocks. */
15235 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15236 desired_align
, align
);
15237 gcc_assert (desired_align
>= 1 && align
>= 1);
15239 if (desired_align
> align
)
15241 /* Except for the first move in epilogue, we no longer know
15242 constant offset in aliasing info. It don't seems to worth
15243 the pain to maintain it for the first move, so throw away
15245 dst
= change_address (dst
, BLKmode
, destreg
);
15246 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
15249 if (label
&& size_needed
== 1)
15251 emit_label (label
);
15252 LABEL_NUSES (label
) = 1;
15256 /* Step 3: Main loop. */
15262 gcc_unreachable ();
15264 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15265 count_exp
, QImode
, 1, expected_size
);
15268 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15269 count_exp
, Pmode
, 1, expected_size
);
15271 case unrolled_loop
:
15272 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15273 count_exp
, Pmode
, 4, expected_size
);
15275 case rep_prefix_8_byte
:
15276 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15279 case rep_prefix_4_byte
:
15280 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15283 case rep_prefix_1_byte
:
15284 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15288 /* Adjust properly the offset of src and dest memory for aliasing. */
15289 if (CONST_INT_P (count_exp
))
15290 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15291 (count
/ size_needed
) * size_needed
);
15293 dst
= change_address (dst
, BLKmode
, destreg
);
15295 /* Step 4: Epilogue to copy the remaining bytes. */
15299 /* When the main loop is done, COUNT_EXP might hold original count,
15300 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15301 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15302 bytes. Compensate if needed. */
15304 if (size_needed
< desired_align
- align
)
15307 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15308 GEN_INT (size_needed
- 1), count_exp
, 1,
15310 size_needed
= desired_align
- align
+ 1;
15311 if (tmp
!= count_exp
)
15312 emit_move_insn (count_exp
, tmp
);
15314 emit_label (label
);
15315 LABEL_NUSES (label
) = 1;
15317 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15319 if (force_loopy_epilogue
)
15320 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
15323 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
15326 if (jump_around_label
)
15327 emit_label (jump_around_label
);
15331 /* Expand the appropriate insns for doing strlen if not just doing
15334 out = result, initialized with the start address
15335 align_rtx = alignment of the address.
15336 scratch = scratch register, initialized with the startaddress when
15337 not aligned, otherwise undefined
15339 This is just the body. It needs the initializations mentioned above and
15340 some address computing at the end. These things are done in i386.md. */
15343 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
15347 rtx align_2_label
= NULL_RTX
;
15348 rtx align_3_label
= NULL_RTX
;
15349 rtx align_4_label
= gen_label_rtx ();
15350 rtx end_0_label
= gen_label_rtx ();
15352 rtx tmpreg
= gen_reg_rtx (SImode
);
15353 rtx scratch
= gen_reg_rtx (SImode
);
15357 if (CONST_INT_P (align_rtx
))
15358 align
= INTVAL (align_rtx
);
15360 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15362 /* Is there a known alignment and is it less than 4? */
15365 rtx scratch1
= gen_reg_rtx (Pmode
);
15366 emit_move_insn (scratch1
, out
);
15367 /* Is there a known alignment and is it not 2? */
15370 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
15371 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
15373 /* Leave just the 3 lower bits. */
15374 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
15375 NULL_RTX
, 0, OPTAB_WIDEN
);
15377 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15378 Pmode
, 1, align_4_label
);
15379 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
15380 Pmode
, 1, align_2_label
);
15381 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
15382 Pmode
, 1, align_3_label
);
15386 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15387 check if is aligned to 4 - byte. */
15389 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
15390 NULL_RTX
, 0, OPTAB_WIDEN
);
15392 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15393 Pmode
, 1, align_4_label
);
15396 mem
= change_address (src
, QImode
, out
);
15398 /* Now compare the bytes. */
15400 /* Compare the first n unaligned byte on a byte per byte basis. */
15401 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15402 QImode
, 1, end_0_label
);
15404 /* Increment the address. */
15406 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15408 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15410 /* Not needed with an alignment of 2 */
15413 emit_label (align_2_label
);
15415 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15419 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15421 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15423 emit_label (align_3_label
);
15426 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15430 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15432 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15435 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15436 align this loop. It gives only huge programs, but does not help to
15438 emit_label (align_4_label
);
15440 mem
= change_address (src
, SImode
, out
);
15441 emit_move_insn (scratch
, mem
);
15443 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15445 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15447 /* This formula yields a nonzero result iff one of the bytes is zero.
15448 This saves three branches inside loop and many cycles. */
15450 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15451 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15452 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15453 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15454 gen_int_mode (0x80808080, SImode
)));
15455 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15460 rtx reg
= gen_reg_rtx (SImode
);
15461 rtx reg2
= gen_reg_rtx (Pmode
);
15462 emit_move_insn (reg
, tmpreg
);
15463 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15465 /* If zero is not in the first two bytes, move two bytes forward. */
15466 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15467 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15468 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15469 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15470 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15473 /* Emit lea manually to avoid clobbering of flags. */
15474 emit_insn (gen_rtx_SET (SImode
, reg2
,
15475 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15477 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15478 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15479 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15480 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15487 rtx end_2_label
= gen_label_rtx ();
15488 /* Is zero in the first two bytes? */
15490 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15491 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15492 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15493 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15494 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15496 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15497 JUMP_LABEL (tmp
) = end_2_label
;
15499 /* Not in the first two. Move two bytes forward. */
15500 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15502 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15504 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15506 emit_label (end_2_label
);
15510 /* Avoid branch in fixing the byte. */
15511 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15512 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15513 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
15515 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15517 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15519 emit_label (end_0_label
);
15522 /* Expand strlen. */
15525 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
15527 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
15529 /* The generic case of strlen expander is long. Avoid it's
15530 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15532 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15533 && !TARGET_INLINE_ALL_STRINGOPS
15535 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
15538 addr
= force_reg (Pmode
, XEXP (src
, 0));
15539 scratch1
= gen_reg_rtx (Pmode
);
15541 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15544 /* Well it seems that some optimizer does not combine a call like
15545 foo(strlen(bar), strlen(bar));
15546 when the move and the subtraction is done here. It does calculate
15547 the length just once when these instructions are done inside of
15548 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15549 often used and I use one fewer register for the lifetime of
15550 output_strlen_unroll() this is better. */
15552 emit_move_insn (out
, addr
);
15554 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
15556 /* strlensi_unroll_1 returns the address of the zero at the end of
15557 the string, like memchr(), so compute the length by subtracting
15558 the start address. */
15560 emit_insn (gen_subdi3 (out
, out
, addr
));
15562 emit_insn (gen_subsi3 (out
, out
, addr
));
15567 scratch2
= gen_reg_rtx (Pmode
);
15568 scratch3
= gen_reg_rtx (Pmode
);
15569 scratch4
= force_reg (Pmode
, constm1_rtx
);
15571 emit_move_insn (scratch3
, addr
);
15572 eoschar
= force_reg (QImode
, eoschar
);
15574 src
= replace_equiv_address_nv (src
, scratch3
);
15576 /* If .md starts supporting :P, this can be done in .md. */
15577 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
15578 scratch4
), UNSPEC_SCAS
);
15579 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
15582 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
15583 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
15587 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
15588 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
15594 /* For given symbol (function) construct code to compute address of it's PLT
15595 entry in large x86-64 PIC model. */
15597 construct_plt_address (rtx symbol
)
15599 rtx tmp
= gen_reg_rtx (Pmode
);
15600 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15602 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15603 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15605 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15606 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15611 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15612 rtx callarg2 ATTRIBUTE_UNUSED
,
15613 rtx pop
, int sibcall
)
15615 rtx use
= NULL
, call
;
15617 if (pop
== const0_rtx
)
15619 gcc_assert (!TARGET_64BIT
|| !pop
);
15621 if (TARGET_MACHO
&& !TARGET_64BIT
)
15624 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15625 fnaddr
= machopic_indirect_call_target (fnaddr
);
15630 /* Static functions and indirect calls don't need the pic register. */
15631 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15632 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15633 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15634 use_reg (&use
, pic_offset_table_rtx
);
15637 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15639 rtx al
= gen_rtx_REG (QImode
, 0);
15640 emit_move_insn (al
, callarg2
);
15641 use_reg (&use
, al
);
15644 if (ix86_cmodel
== CM_LARGE_PIC
15645 && GET_CODE (fnaddr
) == MEM
15646 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15647 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15648 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15649 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15651 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15652 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15654 if (sibcall
&& TARGET_64BIT
15655 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15658 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15659 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15660 emit_move_insn (fnaddr
, addr
);
15661 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15664 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15666 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15669 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15670 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15671 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15674 call
= emit_call_insn (call
);
15676 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15680 /* Clear stack slot assignments remembered from previous functions.
15681 This is called from INIT_EXPANDERS once before RTL is emitted for each
15684 static struct machine_function
*
15685 ix86_init_machine_status (void)
15687 struct machine_function
*f
;
15689 f
= GGC_CNEW (struct machine_function
);
15690 f
->use_fast_prologue_epilogue_nregs
= -1;
15691 f
->tls_descriptor_call_expanded_p
= 0;
15696 /* Return a MEM corresponding to a stack slot with mode MODE.
15697 Allocate a new slot if necessary.
15699 The RTL for a function can have several slots available: N is
15700 which slot to use. */
15703 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15705 struct stack_local_entry
*s
;
15707 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15709 /* Virtual slot is valid only before vregs are instantiated. */
15710 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
15712 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15713 if (s
->mode
== mode
&& s
->n
== n
)
15714 return copy_rtx (s
->rtl
);
15716 s
= (struct stack_local_entry
*)
15717 ggc_alloc (sizeof (struct stack_local_entry
));
15720 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15722 s
->next
= ix86_stack_locals
;
15723 ix86_stack_locals
= s
;
15727 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15729 static GTY(()) rtx ix86_tls_symbol
;
15731 ix86_tls_get_addr (void)
15734 if (!ix86_tls_symbol
)
15736 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15737 (TARGET_ANY_GNU_TLS
15739 ? "___tls_get_addr"
15740 : "__tls_get_addr");
15743 return ix86_tls_symbol
;
15746 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15748 static GTY(()) rtx ix86_tls_module_base_symbol
;
15750 ix86_tls_module_base (void)
15753 if (!ix86_tls_module_base_symbol
)
15755 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15756 "_TLS_MODULE_BASE_");
15757 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15758 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15761 return ix86_tls_module_base_symbol
;
15764 /* Calculate the length of the memory address in the instruction
15765 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15768 memory_address_length (rtx addr
)
15770 struct ix86_address parts
;
15771 rtx base
, index
, disp
;
15775 if (GET_CODE (addr
) == PRE_DEC
15776 || GET_CODE (addr
) == POST_INC
15777 || GET_CODE (addr
) == PRE_MODIFY
15778 || GET_CODE (addr
) == POST_MODIFY
)
15781 ok
= ix86_decompose_address (addr
, &parts
);
15784 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15785 parts
.base
= SUBREG_REG (parts
.base
);
15786 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15787 parts
.index
= SUBREG_REG (parts
.index
);
15790 index
= parts
.index
;
15795 - esp as the base always wants an index,
15796 - ebp as the base always wants a displacement. */
15798 /* Register Indirect. */
15799 if (base
&& !index
&& !disp
)
15801 /* esp (for its index) and ebp (for its displacement) need
15802 the two-byte modrm form. */
15803 if (addr
== stack_pointer_rtx
15804 || addr
== arg_pointer_rtx
15805 || addr
== frame_pointer_rtx
15806 || addr
== hard_frame_pointer_rtx
)
15810 /* Direct Addressing. */
15811 else if (disp
&& !base
&& !index
)
15816 /* Find the length of the displacement constant. */
15819 if (base
&& satisfies_constraint_K (disp
))
15824 /* ebp always wants a displacement. */
15825 else if (base
== hard_frame_pointer_rtx
)
15828 /* An index requires the two-byte modrm form.... */
15830 /* ...like esp, which always wants an index. */
15831 || base
== stack_pointer_rtx
15832 || base
== arg_pointer_rtx
15833 || base
== frame_pointer_rtx
)
15840 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15841 is set, expect that insn have 8bit immediate alternative. */
15843 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15847 extract_insn_cached (insn
);
15848 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15849 if (CONSTANT_P (recog_data
.operand
[i
]))
15852 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15856 switch (get_attr_mode (insn
))
15867 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15872 fatal_insn ("unknown insn mode", insn
);
15878 /* Compute default value for "length_address" attribute. */
15880 ix86_attr_length_address_default (rtx insn
)
15884 if (get_attr_type (insn
) == TYPE_LEA
)
15886 rtx set
= PATTERN (insn
);
15888 if (GET_CODE (set
) == PARALLEL
)
15889 set
= XVECEXP (set
, 0, 0);
15891 gcc_assert (GET_CODE (set
) == SET
);
15893 return memory_address_length (SET_SRC (set
));
15896 extract_insn_cached (insn
);
15897 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15898 if (MEM_P (recog_data
.operand
[i
]))
15900 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15906 /* Return the maximum number of instructions a cpu can issue. */
15909 ix86_issue_rate (void)
15913 case PROCESSOR_PENTIUM
:
15917 case PROCESSOR_PENTIUMPRO
:
15918 case PROCESSOR_PENTIUM4
:
15919 case PROCESSOR_ATHLON
:
15921 case PROCESSOR_AMDFAM10
:
15922 case PROCESSOR_NOCONA
:
15923 case PROCESSOR_GENERIC32
:
15924 case PROCESSOR_GENERIC64
:
15927 case PROCESSOR_CORE2
:
15935 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15936 by DEP_INSN and nothing set by DEP_INSN. */
15939 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15943 /* Simplify the test for uninteresting insns. */
15944 if (insn_type
!= TYPE_SETCC
15945 && insn_type
!= TYPE_ICMOV
15946 && insn_type
!= TYPE_FCMOV
15947 && insn_type
!= TYPE_IBR
)
15950 if ((set
= single_set (dep_insn
)) != 0)
15952 set
= SET_DEST (set
);
15955 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15956 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15957 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15958 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15960 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15961 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15966 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15969 /* This test is true if the dependent insn reads the flags but
15970 not any other potentially set register. */
15971 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15974 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15980 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15981 address with operands set by DEP_INSN. */
15984 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15988 if (insn_type
== TYPE_LEA
15991 addr
= PATTERN (insn
);
15993 if (GET_CODE (addr
) == PARALLEL
)
15994 addr
= XVECEXP (addr
, 0, 0);
15996 gcc_assert (GET_CODE (addr
) == SET
);
15998 addr
= SET_SRC (addr
);
16003 extract_insn_cached (insn
);
16004 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16005 if (MEM_P (recog_data
.operand
[i
]))
16007 addr
= XEXP (recog_data
.operand
[i
], 0);
16014 return modified_in_p (addr
, dep_insn
);
16018 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
16020 enum attr_type insn_type
, dep_insn_type
;
16021 enum attr_memory memory
;
16023 int dep_insn_code_number
;
16025 /* Anti and output dependencies have zero cost on all CPUs. */
16026 if (REG_NOTE_KIND (link
) != 0)
16029 dep_insn_code_number
= recog_memoized (dep_insn
);
16031 /* If we can't recognize the insns, we can't really do anything. */
16032 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
16035 insn_type
= get_attr_type (insn
);
16036 dep_insn_type
= get_attr_type (dep_insn
);
16040 case PROCESSOR_PENTIUM
:
16041 /* Address Generation Interlock adds a cycle of latency. */
16042 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16045 /* ??? Compares pair with jump/setcc. */
16046 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
16049 /* Floating point stores require value to be ready one cycle earlier. */
16050 if (insn_type
== TYPE_FMOV
16051 && get_attr_memory (insn
) == MEMORY_STORE
16052 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16056 case PROCESSOR_PENTIUMPRO
:
16057 memory
= get_attr_memory (insn
);
16059 /* INT->FP conversion is expensive. */
16060 if (get_attr_fp_int_src (dep_insn
))
16063 /* There is one cycle extra latency between an FP op and a store. */
16064 if (insn_type
== TYPE_FMOV
16065 && (set
= single_set (dep_insn
)) != NULL_RTX
16066 && (set2
= single_set (insn
)) != NULL_RTX
16067 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
16068 && MEM_P (SET_DEST (set2
)))
16071 /* Show ability of reorder buffer to hide latency of load by executing
16072 in parallel with previous instruction in case
16073 previous instruction is not needed to compute the address. */
16074 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16075 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16077 /* Claim moves to take one cycle, as core can issue one load
16078 at time and the next load can start cycle later. */
16079 if (dep_insn_type
== TYPE_IMOV
16080 || dep_insn_type
== TYPE_FMOV
)
16088 memory
= get_attr_memory (insn
);
16090 /* The esp dependency is resolved before the instruction is really
16092 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
16093 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
16096 /* INT->FP conversion is expensive. */
16097 if (get_attr_fp_int_src (dep_insn
))
16100 /* Show ability of reorder buffer to hide latency of load by executing
16101 in parallel with previous instruction in case
16102 previous instruction is not needed to compute the address. */
16103 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16104 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16106 /* Claim moves to take one cycle, as core can issue one load
16107 at time and the next load can start cycle later. */
16108 if (dep_insn_type
== TYPE_IMOV
16109 || dep_insn_type
== TYPE_FMOV
)
16118 case PROCESSOR_ATHLON
:
16120 case PROCESSOR_AMDFAM10
:
16121 case PROCESSOR_GENERIC32
:
16122 case PROCESSOR_GENERIC64
:
16123 memory
= get_attr_memory (insn
);
16125 /* Show ability of reorder buffer to hide latency of load by executing
16126 in parallel with previous instruction in case
16127 previous instruction is not needed to compute the address. */
16128 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16129 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16131 enum attr_unit unit
= get_attr_unit (insn
);
16134 /* Because of the difference between the length of integer and
16135 floating unit pipeline preparation stages, the memory operands
16136 for floating point are cheaper.
16138 ??? For Athlon it the difference is most probably 2. */
16139 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
16142 loadcost
= TARGET_ATHLON
? 2 : 0;
16144 if (cost
>= loadcost
)
16157 /* How many alternative schedules to try. This should be as wide as the
16158 scheduling freedom in the DFA, but no wider. Making this value too
16159 large results extra work for the scheduler. */
16162 ia32_multipass_dfa_lookahead (void)
16164 if (ix86_tune
== PROCESSOR_PENTIUM
)
16167 if (ix86_tune
== PROCESSOR_PENTIUMPRO
16168 || ix86_tune
== PROCESSOR_K6
)
16176 /* Compute the alignment given to a constant that is being placed in memory.
16177 EXP is the constant and ALIGN is the alignment that the object would
16179 The value of this function is used instead of that alignment to align
16183 ix86_constant_alignment (tree exp
, int align
)
16185 if (TREE_CODE (exp
) == REAL_CST
)
16187 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
16189 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
16192 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16193 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16194 return BITS_PER_WORD
;
16199 /* Compute the alignment for a static variable.
16200 TYPE is the data type, and ALIGN is the alignment that
16201 the object would ordinarily have. The value of this function is used
16202 instead of that alignment to align the object. */
16205 ix86_data_alignment (tree type
, int align
)
16207 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
16209 if (AGGREGATE_TYPE_P (type
)
16210 && TYPE_SIZE (type
)
16211 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16212 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
16213 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
16214 && align
< max_align
)
16217 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16218 to 16byte boundary. */
16221 if (AGGREGATE_TYPE_P (type
)
16222 && TYPE_SIZE (type
)
16223 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16224 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
16225 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16229 if (TREE_CODE (type
) == ARRAY_TYPE
)
16231 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16233 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16236 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16239 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16241 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16244 else if ((TREE_CODE (type
) == RECORD_TYPE
16245 || TREE_CODE (type
) == UNION_TYPE
16246 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16247 && TYPE_FIELDS (type
))
16249 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16251 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16254 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16255 || TREE_CODE (type
) == INTEGER_TYPE
)
16257 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16259 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16266 /* Compute the alignment for a local variable.
16267 TYPE is the data type, and ALIGN is the alignment that
16268 the object would ordinarily have. The value of this macro is used
16269 instead of that alignment to align the object. */
16272 ix86_local_alignment (tree type
, int align
)
16274 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16275 to 16byte boundary. */
16278 if (AGGREGATE_TYPE_P (type
)
16279 && TYPE_SIZE (type
)
16280 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16281 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
16282 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16285 if (TREE_CODE (type
) == ARRAY_TYPE
)
16287 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16289 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16292 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16294 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16296 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16299 else if ((TREE_CODE (type
) == RECORD_TYPE
16300 || TREE_CODE (type
) == UNION_TYPE
16301 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16302 && TYPE_FIELDS (type
))
16304 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16306 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16309 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16310 || TREE_CODE (type
) == INTEGER_TYPE
)
16313 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16315 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16321 /* Emit RTL insns to initialize the variable parts of a trampoline.
16322 FNADDR is an RTX for the address of the function's pure code.
16323 CXT is an RTX for the static chain value for the function. */
16325 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
16329 /* Compute offset from the end of the jmp to the target function. */
16330 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16331 plus_constant (tramp
, 10),
16332 NULL_RTX
, 1, OPTAB_DIRECT
);
16333 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
16334 gen_int_mode (0xb9, QImode
));
16335 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
16336 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
16337 gen_int_mode (0xe9, QImode
));
16338 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
16343 /* Try to load address using shorter movl instead of movabs.
16344 We may want to support movq for kernel mode, but kernel does not use
16345 trampolines at the moment. */
16346 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16348 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
16349 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16350 gen_int_mode (0xbb41, HImode
));
16351 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
16352 gen_lowpart (SImode
, fnaddr
));
16357 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16358 gen_int_mode (0xbb49, HImode
));
16359 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16363 /* Load static chain using movabs to r10. */
16364 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16365 gen_int_mode (0xba49, HImode
));
16366 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16369 /* Jump to the r11 */
16370 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16371 gen_int_mode (0xff49, HImode
));
16372 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
16373 gen_int_mode (0xe3, QImode
));
16375 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16378 #ifdef ENABLE_EXECUTE_STACK
16379 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16380 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
16384 /* Codes for all the SSE/MMX builtins. */
16387 IX86_BUILTIN_ADDPS
,
16388 IX86_BUILTIN_ADDSS
,
16389 IX86_BUILTIN_DIVPS
,
16390 IX86_BUILTIN_DIVSS
,
16391 IX86_BUILTIN_MULPS
,
16392 IX86_BUILTIN_MULSS
,
16393 IX86_BUILTIN_SUBPS
,
16394 IX86_BUILTIN_SUBSS
,
16396 IX86_BUILTIN_CMPEQPS
,
16397 IX86_BUILTIN_CMPLTPS
,
16398 IX86_BUILTIN_CMPLEPS
,
16399 IX86_BUILTIN_CMPGTPS
,
16400 IX86_BUILTIN_CMPGEPS
,
16401 IX86_BUILTIN_CMPNEQPS
,
16402 IX86_BUILTIN_CMPNLTPS
,
16403 IX86_BUILTIN_CMPNLEPS
,
16404 IX86_BUILTIN_CMPNGTPS
,
16405 IX86_BUILTIN_CMPNGEPS
,
16406 IX86_BUILTIN_CMPORDPS
,
16407 IX86_BUILTIN_CMPUNORDPS
,
16408 IX86_BUILTIN_CMPEQSS
,
16409 IX86_BUILTIN_CMPLTSS
,
16410 IX86_BUILTIN_CMPLESS
,
16411 IX86_BUILTIN_CMPNEQSS
,
16412 IX86_BUILTIN_CMPNLTSS
,
16413 IX86_BUILTIN_CMPNLESS
,
16414 IX86_BUILTIN_CMPNGTSS
,
16415 IX86_BUILTIN_CMPNGESS
,
16416 IX86_BUILTIN_CMPORDSS
,
16417 IX86_BUILTIN_CMPUNORDSS
,
16419 IX86_BUILTIN_COMIEQSS
,
16420 IX86_BUILTIN_COMILTSS
,
16421 IX86_BUILTIN_COMILESS
,
16422 IX86_BUILTIN_COMIGTSS
,
16423 IX86_BUILTIN_COMIGESS
,
16424 IX86_BUILTIN_COMINEQSS
,
16425 IX86_BUILTIN_UCOMIEQSS
,
16426 IX86_BUILTIN_UCOMILTSS
,
16427 IX86_BUILTIN_UCOMILESS
,
16428 IX86_BUILTIN_UCOMIGTSS
,
16429 IX86_BUILTIN_UCOMIGESS
,
16430 IX86_BUILTIN_UCOMINEQSS
,
16432 IX86_BUILTIN_CVTPI2PS
,
16433 IX86_BUILTIN_CVTPS2PI
,
16434 IX86_BUILTIN_CVTSI2SS
,
16435 IX86_BUILTIN_CVTSI642SS
,
16436 IX86_BUILTIN_CVTSS2SI
,
16437 IX86_BUILTIN_CVTSS2SI64
,
16438 IX86_BUILTIN_CVTTPS2PI
,
16439 IX86_BUILTIN_CVTTSS2SI
,
16440 IX86_BUILTIN_CVTTSS2SI64
,
16442 IX86_BUILTIN_MAXPS
,
16443 IX86_BUILTIN_MAXSS
,
16444 IX86_BUILTIN_MINPS
,
16445 IX86_BUILTIN_MINSS
,
16447 IX86_BUILTIN_LOADUPS
,
16448 IX86_BUILTIN_STOREUPS
,
16449 IX86_BUILTIN_MOVSS
,
16451 IX86_BUILTIN_MOVHLPS
,
16452 IX86_BUILTIN_MOVLHPS
,
16453 IX86_BUILTIN_LOADHPS
,
16454 IX86_BUILTIN_LOADLPS
,
16455 IX86_BUILTIN_STOREHPS
,
16456 IX86_BUILTIN_STORELPS
,
16458 IX86_BUILTIN_MASKMOVQ
,
16459 IX86_BUILTIN_MOVMSKPS
,
16460 IX86_BUILTIN_PMOVMSKB
,
16462 IX86_BUILTIN_MOVNTPS
,
16463 IX86_BUILTIN_MOVNTQ
,
16465 IX86_BUILTIN_LOADDQU
,
16466 IX86_BUILTIN_STOREDQU
,
16468 IX86_BUILTIN_PACKSSWB
,
16469 IX86_BUILTIN_PACKSSDW
,
16470 IX86_BUILTIN_PACKUSWB
,
16472 IX86_BUILTIN_PADDB
,
16473 IX86_BUILTIN_PADDW
,
16474 IX86_BUILTIN_PADDD
,
16475 IX86_BUILTIN_PADDQ
,
16476 IX86_BUILTIN_PADDSB
,
16477 IX86_BUILTIN_PADDSW
,
16478 IX86_BUILTIN_PADDUSB
,
16479 IX86_BUILTIN_PADDUSW
,
16480 IX86_BUILTIN_PSUBB
,
16481 IX86_BUILTIN_PSUBW
,
16482 IX86_BUILTIN_PSUBD
,
16483 IX86_BUILTIN_PSUBQ
,
16484 IX86_BUILTIN_PSUBSB
,
16485 IX86_BUILTIN_PSUBSW
,
16486 IX86_BUILTIN_PSUBUSB
,
16487 IX86_BUILTIN_PSUBUSW
,
16490 IX86_BUILTIN_PANDN
,
16494 IX86_BUILTIN_PAVGB
,
16495 IX86_BUILTIN_PAVGW
,
16497 IX86_BUILTIN_PCMPEQB
,
16498 IX86_BUILTIN_PCMPEQW
,
16499 IX86_BUILTIN_PCMPEQD
,
16500 IX86_BUILTIN_PCMPGTB
,
16501 IX86_BUILTIN_PCMPGTW
,
16502 IX86_BUILTIN_PCMPGTD
,
16504 IX86_BUILTIN_PMADDWD
,
16506 IX86_BUILTIN_PMAXSW
,
16507 IX86_BUILTIN_PMAXUB
,
16508 IX86_BUILTIN_PMINSW
,
16509 IX86_BUILTIN_PMINUB
,
16511 IX86_BUILTIN_PMULHUW
,
16512 IX86_BUILTIN_PMULHW
,
16513 IX86_BUILTIN_PMULLW
,
16515 IX86_BUILTIN_PSADBW
,
16516 IX86_BUILTIN_PSHUFW
,
16518 IX86_BUILTIN_PSLLW
,
16519 IX86_BUILTIN_PSLLD
,
16520 IX86_BUILTIN_PSLLQ
,
16521 IX86_BUILTIN_PSRAW
,
16522 IX86_BUILTIN_PSRAD
,
16523 IX86_BUILTIN_PSRLW
,
16524 IX86_BUILTIN_PSRLD
,
16525 IX86_BUILTIN_PSRLQ
,
16526 IX86_BUILTIN_PSLLWI
,
16527 IX86_BUILTIN_PSLLDI
,
16528 IX86_BUILTIN_PSLLQI
,
16529 IX86_BUILTIN_PSRAWI
,
16530 IX86_BUILTIN_PSRADI
,
16531 IX86_BUILTIN_PSRLWI
,
16532 IX86_BUILTIN_PSRLDI
,
16533 IX86_BUILTIN_PSRLQI
,
16535 IX86_BUILTIN_PUNPCKHBW
,
16536 IX86_BUILTIN_PUNPCKHWD
,
16537 IX86_BUILTIN_PUNPCKHDQ
,
16538 IX86_BUILTIN_PUNPCKLBW
,
16539 IX86_BUILTIN_PUNPCKLWD
,
16540 IX86_BUILTIN_PUNPCKLDQ
,
16542 IX86_BUILTIN_SHUFPS
,
16544 IX86_BUILTIN_RCPPS
,
16545 IX86_BUILTIN_RCPSS
,
16546 IX86_BUILTIN_RSQRTPS
,
16547 IX86_BUILTIN_RSQRTSS
,
16548 IX86_BUILTIN_RSQRTF
,
16549 IX86_BUILTIN_SQRTPS
,
16550 IX86_BUILTIN_SQRTSS
,
16552 IX86_BUILTIN_UNPCKHPS
,
16553 IX86_BUILTIN_UNPCKLPS
,
16555 IX86_BUILTIN_ANDPS
,
16556 IX86_BUILTIN_ANDNPS
,
16558 IX86_BUILTIN_XORPS
,
16561 IX86_BUILTIN_LDMXCSR
,
16562 IX86_BUILTIN_STMXCSR
,
16563 IX86_BUILTIN_SFENCE
,
16565 /* 3DNow! Original */
16566 IX86_BUILTIN_FEMMS
,
16567 IX86_BUILTIN_PAVGUSB
,
16568 IX86_BUILTIN_PF2ID
,
16569 IX86_BUILTIN_PFACC
,
16570 IX86_BUILTIN_PFADD
,
16571 IX86_BUILTIN_PFCMPEQ
,
16572 IX86_BUILTIN_PFCMPGE
,
16573 IX86_BUILTIN_PFCMPGT
,
16574 IX86_BUILTIN_PFMAX
,
16575 IX86_BUILTIN_PFMIN
,
16576 IX86_BUILTIN_PFMUL
,
16577 IX86_BUILTIN_PFRCP
,
16578 IX86_BUILTIN_PFRCPIT1
,
16579 IX86_BUILTIN_PFRCPIT2
,
16580 IX86_BUILTIN_PFRSQIT1
,
16581 IX86_BUILTIN_PFRSQRT
,
16582 IX86_BUILTIN_PFSUB
,
16583 IX86_BUILTIN_PFSUBR
,
16584 IX86_BUILTIN_PI2FD
,
16585 IX86_BUILTIN_PMULHRW
,
16587 /* 3DNow! Athlon Extensions */
16588 IX86_BUILTIN_PF2IW
,
16589 IX86_BUILTIN_PFNACC
,
16590 IX86_BUILTIN_PFPNACC
,
16591 IX86_BUILTIN_PI2FW
,
16592 IX86_BUILTIN_PSWAPDSI
,
16593 IX86_BUILTIN_PSWAPDSF
,
16596 IX86_BUILTIN_ADDPD
,
16597 IX86_BUILTIN_ADDSD
,
16598 IX86_BUILTIN_DIVPD
,
16599 IX86_BUILTIN_DIVSD
,
16600 IX86_BUILTIN_MULPD
,
16601 IX86_BUILTIN_MULSD
,
16602 IX86_BUILTIN_SUBPD
,
16603 IX86_BUILTIN_SUBSD
,
16605 IX86_BUILTIN_CMPEQPD
,
16606 IX86_BUILTIN_CMPLTPD
,
16607 IX86_BUILTIN_CMPLEPD
,
16608 IX86_BUILTIN_CMPGTPD
,
16609 IX86_BUILTIN_CMPGEPD
,
16610 IX86_BUILTIN_CMPNEQPD
,
16611 IX86_BUILTIN_CMPNLTPD
,
16612 IX86_BUILTIN_CMPNLEPD
,
16613 IX86_BUILTIN_CMPNGTPD
,
16614 IX86_BUILTIN_CMPNGEPD
,
16615 IX86_BUILTIN_CMPORDPD
,
16616 IX86_BUILTIN_CMPUNORDPD
,
16617 IX86_BUILTIN_CMPEQSD
,
16618 IX86_BUILTIN_CMPLTSD
,
16619 IX86_BUILTIN_CMPLESD
,
16620 IX86_BUILTIN_CMPNEQSD
,
16621 IX86_BUILTIN_CMPNLTSD
,
16622 IX86_BUILTIN_CMPNLESD
,
16623 IX86_BUILTIN_CMPORDSD
,
16624 IX86_BUILTIN_CMPUNORDSD
,
16626 IX86_BUILTIN_COMIEQSD
,
16627 IX86_BUILTIN_COMILTSD
,
16628 IX86_BUILTIN_COMILESD
,
16629 IX86_BUILTIN_COMIGTSD
,
16630 IX86_BUILTIN_COMIGESD
,
16631 IX86_BUILTIN_COMINEQSD
,
16632 IX86_BUILTIN_UCOMIEQSD
,
16633 IX86_BUILTIN_UCOMILTSD
,
16634 IX86_BUILTIN_UCOMILESD
,
16635 IX86_BUILTIN_UCOMIGTSD
,
16636 IX86_BUILTIN_UCOMIGESD
,
16637 IX86_BUILTIN_UCOMINEQSD
,
16639 IX86_BUILTIN_MAXPD
,
16640 IX86_BUILTIN_MAXSD
,
16641 IX86_BUILTIN_MINPD
,
16642 IX86_BUILTIN_MINSD
,
16644 IX86_BUILTIN_ANDPD
,
16645 IX86_BUILTIN_ANDNPD
,
16647 IX86_BUILTIN_XORPD
,
16649 IX86_BUILTIN_SQRTPD
,
16650 IX86_BUILTIN_SQRTSD
,
16652 IX86_BUILTIN_UNPCKHPD
,
16653 IX86_BUILTIN_UNPCKLPD
,
16655 IX86_BUILTIN_SHUFPD
,
16657 IX86_BUILTIN_LOADUPD
,
16658 IX86_BUILTIN_STOREUPD
,
16659 IX86_BUILTIN_MOVSD
,
16661 IX86_BUILTIN_LOADHPD
,
16662 IX86_BUILTIN_LOADLPD
,
16664 IX86_BUILTIN_CVTDQ2PD
,
16665 IX86_BUILTIN_CVTDQ2PS
,
16667 IX86_BUILTIN_CVTPD2DQ
,
16668 IX86_BUILTIN_CVTPD2PI
,
16669 IX86_BUILTIN_CVTPD2PS
,
16670 IX86_BUILTIN_CVTTPD2DQ
,
16671 IX86_BUILTIN_CVTTPD2PI
,
16673 IX86_BUILTIN_CVTPI2PD
,
16674 IX86_BUILTIN_CVTSI2SD
,
16675 IX86_BUILTIN_CVTSI642SD
,
16677 IX86_BUILTIN_CVTSD2SI
,
16678 IX86_BUILTIN_CVTSD2SI64
,
16679 IX86_BUILTIN_CVTSD2SS
,
16680 IX86_BUILTIN_CVTSS2SD
,
16681 IX86_BUILTIN_CVTTSD2SI
,
16682 IX86_BUILTIN_CVTTSD2SI64
,
16684 IX86_BUILTIN_CVTPS2DQ
,
16685 IX86_BUILTIN_CVTPS2PD
,
16686 IX86_BUILTIN_CVTTPS2DQ
,
16688 IX86_BUILTIN_MOVNTI
,
16689 IX86_BUILTIN_MOVNTPD
,
16690 IX86_BUILTIN_MOVNTDQ
,
16693 IX86_BUILTIN_MASKMOVDQU
,
16694 IX86_BUILTIN_MOVMSKPD
,
16695 IX86_BUILTIN_PMOVMSKB128
,
16697 IX86_BUILTIN_PACKSSWB128
,
16698 IX86_BUILTIN_PACKSSDW128
,
16699 IX86_BUILTIN_PACKUSWB128
,
16701 IX86_BUILTIN_PADDB128
,
16702 IX86_BUILTIN_PADDW128
,
16703 IX86_BUILTIN_PADDD128
,
16704 IX86_BUILTIN_PADDQ128
,
16705 IX86_BUILTIN_PADDSB128
,
16706 IX86_BUILTIN_PADDSW128
,
16707 IX86_BUILTIN_PADDUSB128
,
16708 IX86_BUILTIN_PADDUSW128
,
16709 IX86_BUILTIN_PSUBB128
,
16710 IX86_BUILTIN_PSUBW128
,
16711 IX86_BUILTIN_PSUBD128
,
16712 IX86_BUILTIN_PSUBQ128
,
16713 IX86_BUILTIN_PSUBSB128
,
16714 IX86_BUILTIN_PSUBSW128
,
16715 IX86_BUILTIN_PSUBUSB128
,
16716 IX86_BUILTIN_PSUBUSW128
,
16718 IX86_BUILTIN_PAND128
,
16719 IX86_BUILTIN_PANDN128
,
16720 IX86_BUILTIN_POR128
,
16721 IX86_BUILTIN_PXOR128
,
16723 IX86_BUILTIN_PAVGB128
,
16724 IX86_BUILTIN_PAVGW128
,
16726 IX86_BUILTIN_PCMPEQB128
,
16727 IX86_BUILTIN_PCMPEQW128
,
16728 IX86_BUILTIN_PCMPEQD128
,
16729 IX86_BUILTIN_PCMPGTB128
,
16730 IX86_BUILTIN_PCMPGTW128
,
16731 IX86_BUILTIN_PCMPGTD128
,
16733 IX86_BUILTIN_PMADDWD128
,
16735 IX86_BUILTIN_PMAXSW128
,
16736 IX86_BUILTIN_PMAXUB128
,
16737 IX86_BUILTIN_PMINSW128
,
16738 IX86_BUILTIN_PMINUB128
,
16740 IX86_BUILTIN_PMULUDQ
,
16741 IX86_BUILTIN_PMULUDQ128
,
16742 IX86_BUILTIN_PMULHUW128
,
16743 IX86_BUILTIN_PMULHW128
,
16744 IX86_BUILTIN_PMULLW128
,
16746 IX86_BUILTIN_PSADBW128
,
16747 IX86_BUILTIN_PSHUFHW
,
16748 IX86_BUILTIN_PSHUFLW
,
16749 IX86_BUILTIN_PSHUFD
,
16751 IX86_BUILTIN_PSLLDQI128
,
16752 IX86_BUILTIN_PSLLWI128
,
16753 IX86_BUILTIN_PSLLDI128
,
16754 IX86_BUILTIN_PSLLQI128
,
16755 IX86_BUILTIN_PSRAWI128
,
16756 IX86_BUILTIN_PSRADI128
,
16757 IX86_BUILTIN_PSRLDQI128
,
16758 IX86_BUILTIN_PSRLWI128
,
16759 IX86_BUILTIN_PSRLDI128
,
16760 IX86_BUILTIN_PSRLQI128
,
16762 IX86_BUILTIN_PSLLDQ128
,
16763 IX86_BUILTIN_PSLLW128
,
16764 IX86_BUILTIN_PSLLD128
,
16765 IX86_BUILTIN_PSLLQ128
,
16766 IX86_BUILTIN_PSRAW128
,
16767 IX86_BUILTIN_PSRAD128
,
16768 IX86_BUILTIN_PSRLW128
,
16769 IX86_BUILTIN_PSRLD128
,
16770 IX86_BUILTIN_PSRLQ128
,
16772 IX86_BUILTIN_PUNPCKHBW128
,
16773 IX86_BUILTIN_PUNPCKHWD128
,
16774 IX86_BUILTIN_PUNPCKHDQ128
,
16775 IX86_BUILTIN_PUNPCKHQDQ128
,
16776 IX86_BUILTIN_PUNPCKLBW128
,
16777 IX86_BUILTIN_PUNPCKLWD128
,
16778 IX86_BUILTIN_PUNPCKLDQ128
,
16779 IX86_BUILTIN_PUNPCKLQDQ128
,
16781 IX86_BUILTIN_CLFLUSH
,
16782 IX86_BUILTIN_MFENCE
,
16783 IX86_BUILTIN_LFENCE
,
16785 /* Prescott New Instructions. */
16786 IX86_BUILTIN_ADDSUBPS
,
16787 IX86_BUILTIN_HADDPS
,
16788 IX86_BUILTIN_HSUBPS
,
16789 IX86_BUILTIN_MOVSHDUP
,
16790 IX86_BUILTIN_MOVSLDUP
,
16791 IX86_BUILTIN_ADDSUBPD
,
16792 IX86_BUILTIN_HADDPD
,
16793 IX86_BUILTIN_HSUBPD
,
16794 IX86_BUILTIN_LDDQU
,
16796 IX86_BUILTIN_MONITOR
,
16797 IX86_BUILTIN_MWAIT
,
16800 IX86_BUILTIN_PHADDW
,
16801 IX86_BUILTIN_PHADDD
,
16802 IX86_BUILTIN_PHADDSW
,
16803 IX86_BUILTIN_PHSUBW
,
16804 IX86_BUILTIN_PHSUBD
,
16805 IX86_BUILTIN_PHSUBSW
,
16806 IX86_BUILTIN_PMADDUBSW
,
16807 IX86_BUILTIN_PMULHRSW
,
16808 IX86_BUILTIN_PSHUFB
,
16809 IX86_BUILTIN_PSIGNB
,
16810 IX86_BUILTIN_PSIGNW
,
16811 IX86_BUILTIN_PSIGND
,
16812 IX86_BUILTIN_PALIGNR
,
16813 IX86_BUILTIN_PABSB
,
16814 IX86_BUILTIN_PABSW
,
16815 IX86_BUILTIN_PABSD
,
16817 IX86_BUILTIN_PHADDW128
,
16818 IX86_BUILTIN_PHADDD128
,
16819 IX86_BUILTIN_PHADDSW128
,
16820 IX86_BUILTIN_PHSUBW128
,
16821 IX86_BUILTIN_PHSUBD128
,
16822 IX86_BUILTIN_PHSUBSW128
,
16823 IX86_BUILTIN_PMADDUBSW128
,
16824 IX86_BUILTIN_PMULHRSW128
,
16825 IX86_BUILTIN_PSHUFB128
,
16826 IX86_BUILTIN_PSIGNB128
,
16827 IX86_BUILTIN_PSIGNW128
,
16828 IX86_BUILTIN_PSIGND128
,
16829 IX86_BUILTIN_PALIGNR128
,
16830 IX86_BUILTIN_PABSB128
,
16831 IX86_BUILTIN_PABSW128
,
16832 IX86_BUILTIN_PABSD128
,
16834 /* AMDFAM10 - SSE4A New Instructions. */
16835 IX86_BUILTIN_MOVNTSD
,
16836 IX86_BUILTIN_MOVNTSS
,
16837 IX86_BUILTIN_EXTRQI
,
16838 IX86_BUILTIN_EXTRQ
,
16839 IX86_BUILTIN_INSERTQI
,
16840 IX86_BUILTIN_INSERTQ
,
16843 IX86_BUILTIN_BLENDPD
,
16844 IX86_BUILTIN_BLENDPS
,
16845 IX86_BUILTIN_BLENDVPD
,
16846 IX86_BUILTIN_BLENDVPS
,
16847 IX86_BUILTIN_PBLENDVB128
,
16848 IX86_BUILTIN_PBLENDW128
,
16853 IX86_BUILTIN_INSERTPS128
,
16855 IX86_BUILTIN_MOVNTDQA
,
16856 IX86_BUILTIN_MPSADBW128
,
16857 IX86_BUILTIN_PACKUSDW128
,
16858 IX86_BUILTIN_PCMPEQQ
,
16859 IX86_BUILTIN_PHMINPOSUW128
,
16861 IX86_BUILTIN_PMAXSB128
,
16862 IX86_BUILTIN_PMAXSD128
,
16863 IX86_BUILTIN_PMAXUD128
,
16864 IX86_BUILTIN_PMAXUW128
,
16866 IX86_BUILTIN_PMINSB128
,
16867 IX86_BUILTIN_PMINSD128
,
16868 IX86_BUILTIN_PMINUD128
,
16869 IX86_BUILTIN_PMINUW128
,
16871 IX86_BUILTIN_PMOVSXBW128
,
16872 IX86_BUILTIN_PMOVSXBD128
,
16873 IX86_BUILTIN_PMOVSXBQ128
,
16874 IX86_BUILTIN_PMOVSXWD128
,
16875 IX86_BUILTIN_PMOVSXWQ128
,
16876 IX86_BUILTIN_PMOVSXDQ128
,
16878 IX86_BUILTIN_PMOVZXBW128
,
16879 IX86_BUILTIN_PMOVZXBD128
,
16880 IX86_BUILTIN_PMOVZXBQ128
,
16881 IX86_BUILTIN_PMOVZXWD128
,
16882 IX86_BUILTIN_PMOVZXWQ128
,
16883 IX86_BUILTIN_PMOVZXDQ128
,
16885 IX86_BUILTIN_PMULDQ128
,
16886 IX86_BUILTIN_PMULLD128
,
16888 IX86_BUILTIN_ROUNDPD
,
16889 IX86_BUILTIN_ROUNDPS
,
16890 IX86_BUILTIN_ROUNDSD
,
16891 IX86_BUILTIN_ROUNDSS
,
16893 IX86_BUILTIN_PTESTZ
,
16894 IX86_BUILTIN_PTESTC
,
16895 IX86_BUILTIN_PTESTNZC
,
16897 IX86_BUILTIN_VEC_INIT_V2SI
,
16898 IX86_BUILTIN_VEC_INIT_V4HI
,
16899 IX86_BUILTIN_VEC_INIT_V8QI
,
16900 IX86_BUILTIN_VEC_EXT_V2DF
,
16901 IX86_BUILTIN_VEC_EXT_V2DI
,
16902 IX86_BUILTIN_VEC_EXT_V4SF
,
16903 IX86_BUILTIN_VEC_EXT_V4SI
,
16904 IX86_BUILTIN_VEC_EXT_V8HI
,
16905 IX86_BUILTIN_VEC_EXT_V2SI
,
16906 IX86_BUILTIN_VEC_EXT_V4HI
,
16907 IX86_BUILTIN_VEC_EXT_V16QI
,
16908 IX86_BUILTIN_VEC_SET_V2DI
,
16909 IX86_BUILTIN_VEC_SET_V4SF
,
16910 IX86_BUILTIN_VEC_SET_V4SI
,
16911 IX86_BUILTIN_VEC_SET_V8HI
,
16912 IX86_BUILTIN_VEC_SET_V4HI
,
16913 IX86_BUILTIN_VEC_SET_V16QI
,
16915 IX86_BUILTIN_VEC_PACK_SFIX
,
16918 IX86_BUILTIN_CRC32QI
,
16919 IX86_BUILTIN_CRC32HI
,
16920 IX86_BUILTIN_CRC32SI
,
16921 IX86_BUILTIN_CRC32DI
,
16923 IX86_BUILTIN_PCMPESTRI128
,
16924 IX86_BUILTIN_PCMPESTRM128
,
16925 IX86_BUILTIN_PCMPESTRA128
,
16926 IX86_BUILTIN_PCMPESTRC128
,
16927 IX86_BUILTIN_PCMPESTRO128
,
16928 IX86_BUILTIN_PCMPESTRS128
,
16929 IX86_BUILTIN_PCMPESTRZ128
,
16930 IX86_BUILTIN_PCMPISTRI128
,
16931 IX86_BUILTIN_PCMPISTRM128
,
16932 IX86_BUILTIN_PCMPISTRA128
,
16933 IX86_BUILTIN_PCMPISTRC128
,
16934 IX86_BUILTIN_PCMPISTRO128
,
16935 IX86_BUILTIN_PCMPISTRS128
,
16936 IX86_BUILTIN_PCMPISTRZ128
,
16938 IX86_BUILTIN_PCMPGTQ
,
16940 /* TFmode support builtins. */
16942 IX86_BUILTIN_FABSQ
,
16943 IX86_BUILTIN_COPYSIGNQ
,
16948 /* Table for the ix86 builtin decls. */
16949 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16951 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16952 * if the target_flags include one of MASK. Stores the function decl
16953 * in the ix86_builtins array.
16954 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16957 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16959 tree decl
= NULL_TREE
;
16961 if (mask
& ix86_isa_flags
16962 && (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
))
16964 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16966 ix86_builtins
[(int) code
] = decl
;
16972 /* Like def_builtin, but also marks the function decl "const". */
16975 def_builtin_const (int mask
, const char *name
, tree type
,
16976 enum ix86_builtins code
)
16978 tree decl
= def_builtin (mask
, name
, type
, code
);
16980 TREE_READONLY (decl
) = 1;
16984 /* Bits for builtin_description.flag. */
16986 /* Set when we don't support the comparison natively, and should
16987 swap_comparison in order to support it. */
16988 #define BUILTIN_DESC_SWAP_OPERANDS 1
16990 struct builtin_description
16992 const unsigned int mask
;
16993 const enum insn_code icode
;
16994 const char *const name
;
16995 const enum ix86_builtins code
;
16996 const enum rtx_code comparison
;
17000 static const struct builtin_description bdesc_comi
[] =
17002 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
17003 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
17004 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
17005 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
17006 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
17007 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
17008 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
17009 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
17010 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
17011 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
17012 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
17013 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
17014 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
17015 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
17016 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
17017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
17018 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
17019 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
17020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
17021 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
17022 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
17023 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
17024 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
17025 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
17028 static const struct builtin_description bdesc_ptest
[] =
17031 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
17032 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
17033 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
17036 static const struct builtin_description bdesc_pcmpestr
[] =
17039 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
17040 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
17041 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
17042 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
17043 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
17044 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
17045 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
17048 static const struct builtin_description bdesc_pcmpistr
[] =
17051 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
17052 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
17053 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
17054 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
17055 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
17056 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
17057 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
17060 static const struct builtin_description bdesc_crc32
[] =
17063 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32qi
, 0, IX86_BUILTIN_CRC32QI
, UNKNOWN
, 0 },
17064 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32hi
, 0, IX86_BUILTIN_CRC32HI
, UNKNOWN
, 0 },
17065 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32si
, 0, IX86_BUILTIN_CRC32SI
, UNKNOWN
, 0 },
17066 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32di
, 0, IX86_BUILTIN_CRC32DI
, UNKNOWN
, 0 },
17069 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17070 static const struct builtin_description bdesc_sse_3arg
[] =
17073 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, 0 },
17074 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, 0 },
17075 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, 0 },
17076 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, 0 },
17077 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, 0 },
17078 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, 0 },
17079 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, 0 },
17080 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, 0 },
17081 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, 0 },
17082 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, 0 },
17083 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, UNKNOWN
, 0 },
17084 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, UNKNOWN
, 0 },
17087 static const struct builtin_description bdesc_2arg
[] =
17090 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, 0 },
17091 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, 0 },
17092 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, 0 },
17093 { OPTION_MASK_ISA_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, 0 },
17094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, 0 },
17095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, 0 },
17096 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, 0 },
17097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, 0 },
17099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
17100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
17101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
17102 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
17105 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
17106 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
17107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
17108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17110 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
17111 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
17112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
17113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
17114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
17115 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
17116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
17117 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
17118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
17122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, 0 },
17123 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, 0 },
17124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, 0 },
17125 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, 0 },
17127 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, 0 },
17128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, 0 },
17129 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, 0 },
17130 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, 0 },
17132 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, 0 },
17133 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, 0 },
17134 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, 0 },
17135 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, 0 },
17136 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, 0 },
17139 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, 0 },
17140 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, 0 },
17141 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, 0 },
17142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, 0 },
17143 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, 0 },
17144 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, 0 },
17145 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, 0 },
17146 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, 0 },
17148 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, 0 },
17149 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, 0 },
17150 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, 0 },
17151 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, 0 },
17152 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, 0 },
17153 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, 0 },
17154 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, 0 },
17155 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, 0 },
17157 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, 0 },
17158 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, 0 },
17159 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, 0 },
17161 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, 0 },
17162 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, 0 },
17163 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, 0 },
17164 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, 0 },
17166 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, 0 },
17167 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, 0 },
17169 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, 0 },
17170 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, 0 },
17171 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, 0 },
17172 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, 0 },
17173 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, 0 },
17174 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, 0 },
17176 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, 0 },
17177 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, 0 },
17178 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, 0 },
17179 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, 0 },
17181 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, 0 },
17182 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, 0 },
17183 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, 0 },
17184 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, 0 },
17185 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, 0 },
17186 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, 0 },
17189 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, UNKNOWN
, 0 },
17190 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, UNKNOWN
, 0 },
17191 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, UNKNOWN
, 0 },
17193 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, 0 },
17194 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, 0 },
17195 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, 0 },
17197 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, UNKNOWN
, 0 },
17198 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, UNKNOWN
, 0 },
17199 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, UNKNOWN
, 0 },
17200 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, UNKNOWN
, 0 },
17201 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, UNKNOWN
, 0 },
17202 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, UNKNOWN
, 0 },
17204 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, UNKNOWN
, 0 },
17205 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, UNKNOWN
, 0 },
17206 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, UNKNOWN
, 0 },
17207 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, UNKNOWN
, 0 },
17208 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, UNKNOWN
, 0 },
17209 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, UNKNOWN
, 0 },
17211 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, UNKNOWN
, 0 },
17212 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, UNKNOWN
, 0 },
17213 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, UNKNOWN
, 0 },
17214 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, UNKNOWN
, 0 },
17216 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, UNKNOWN
, 0 },
17217 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, UNKNOWN
, 0 },
17220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, 0 },
17221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, 0 },
17222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, 0 },
17223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, 0 },
17224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, 0 },
17225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, 0 },
17226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, 0 },
17227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, 0 },
17229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
17230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
17231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
17232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
17235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
17236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
17237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
17238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17240 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
17241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
17242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
17243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
17244 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
17245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
17246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
17247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
17248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
17250 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, 0 },
17251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, 0 },
17252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, 0 },
17253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, 0 },
17255 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, 0 },
17256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, 0 },
17257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, 0 },
17258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, 0 },
17260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, 0 },
17261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, 0 },
17262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, 0 },
17264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, 0 },
17267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, 0 },
17268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, 0 },
17269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, 0 },
17270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, 0 },
17271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, 0 },
17272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, 0 },
17273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, 0 },
17274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, 0 },
17276 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, 0 },
17277 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, 0 },
17278 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, 0 },
17279 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, 0 },
17280 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, 0 },
17281 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, 0 },
17282 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, 0 },
17283 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, 0 },
17285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, 0 },
17286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
, 0 },
17288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, 0 },
17289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, 0 },
17290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, 0 },
17291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, 0 },
17293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, 0 },
17294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, 0 },
17296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, 0 },
17297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, 0 },
17298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, 0 },
17299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, 0 },
17300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, 0 },
17301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, 0 },
17303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, 0 },
17304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, 0 },
17305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, 0 },
17306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, 0 },
17308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, 0 },
17309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, 0 },
17310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, 0 },
17311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, 0 },
17312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, 0 },
17313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, 0 },
17314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, 0 },
17315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, 0 },
17317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, 0 },
17318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, 0 },
17319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, 0 },
17321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, 0 },
17322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, UNKNOWN
, 0 },
17324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, UNKNOWN
, 0 },
17325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, 0 },
17327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, UNKNOWN
, 0 },
17328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, UNKNOWN
, 0 },
17329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, UNKNOWN
, 0 },
17331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, UNKNOWN
, 0 },
17332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, UNKNOWN
, 0 },
17333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, UNKNOWN
, 0 },
17335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, UNKNOWN
, 0 },
17336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, UNKNOWN
, 0 },
17338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, UNKNOWN
, 0 },
17340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, 0 },
17341 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, 0 },
17342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, 0 },
17343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, 0 },
17346 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, 0 },
17347 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, 0 },
17348 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, 0 },
17349 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, 0 },
17350 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, 0 },
17351 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, 0 },
17354 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, 0 },
17355 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, 0 },
17356 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, 0 },
17357 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, 0 },
17358 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, 0 },
17359 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, 0 },
17360 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, 0 },
17361 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, 0 },
17362 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, 0 },
17363 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, 0 },
17364 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, 0 },
17365 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, 0 },
17366 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, 0 },
17367 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, 0 },
17368 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, 0 },
17369 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, 0 },
17370 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, 0 },
17371 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, 0 },
17372 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, 0 },
17373 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, 0 },
17374 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, 0 },
17375 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, 0 },
17376 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, 0 },
17377 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, 0 },
17380 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, 0 },
17381 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, 0 },
17382 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, 0 },
17383 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, 0 },
17384 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, 0 },
17385 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, 0 },
17386 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, 0 },
17387 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, 0 },
17388 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, 0 },
17389 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, 0 },
17390 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, UNKNOWN
, 0 },
17391 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, 0 },
17394 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, 0 },
17397 static const struct builtin_description bdesc_1arg
[] =
17399 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, 0 },
17400 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, 0 },
17402 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, UNKNOWN
, 0 },
17403 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, UNKNOWN
, 0 },
17404 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, UNKNOWN
, 0 },
17406 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, 0 },
17407 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, 0 },
17408 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, 0 },
17409 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, 0 },
17410 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, 0 },
17411 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, 0 },
17413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, 0 },
17414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, 0 },
17416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, UNKNOWN
, 0 },
17418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, 0 },
17419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, 0 },
17421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, 0 },
17422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, 0 },
17423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, 0 },
17424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, 0 },
17425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, 0 },
17427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, 0 },
17429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, 0 },
17430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, 0 },
17431 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, 0 },
17432 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, 0 },
17434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, 0 },
17435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, 0 },
17436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, 0 },
17439 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, 0 },
17440 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, 0 },
17443 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, 0 },
17444 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, 0 },
17445 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, 0 },
17446 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, 0 },
17447 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, 0 },
17448 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, 0 },
17451 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, 0 },
17452 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, 0 },
17453 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, 0 },
17454 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, 0 },
17455 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, 0 },
17456 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, 0 },
17457 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, 0 },
17458 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, 0 },
17459 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, 0 },
17460 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, 0 },
17461 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, 0 },
17462 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, 0 },
17463 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, 0 },
17465 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17466 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, UNKNOWN
, 0 },
17467 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, UNKNOWN
, 0 },
17470 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17471 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17474 ix86_init_mmx_sse_builtins (void)
17476 const struct builtin_description
* d
;
17479 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
17480 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
17481 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
17482 tree V2DI_type_node
17483 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
17484 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
17485 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
17486 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
17487 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
17488 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
17489 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
17491 tree pchar_type_node
= build_pointer_type (char_type_node
);
17492 tree pcchar_type_node
= build_pointer_type (
17493 build_type_variant (char_type_node
, 1, 0));
17494 tree pfloat_type_node
= build_pointer_type (float_type_node
);
17495 tree pcfloat_type_node
= build_pointer_type (
17496 build_type_variant (float_type_node
, 1, 0));
17497 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
17498 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
17499 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
17502 tree int_ftype_v4sf_v4sf
17503 = build_function_type_list (integer_type_node
,
17504 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17505 tree v4si_ftype_v4sf_v4sf
17506 = build_function_type_list (V4SI_type_node
,
17507 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17508 /* MMX/SSE/integer conversions. */
17509 tree int_ftype_v4sf
17510 = build_function_type_list (integer_type_node
,
17511 V4SF_type_node
, NULL_TREE
);
17512 tree int64_ftype_v4sf
17513 = build_function_type_list (long_long_integer_type_node
,
17514 V4SF_type_node
, NULL_TREE
);
17515 tree int_ftype_v8qi
17516 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
17517 tree v4sf_ftype_v4sf_int
17518 = build_function_type_list (V4SF_type_node
,
17519 V4SF_type_node
, integer_type_node
, NULL_TREE
);
17520 tree v4sf_ftype_v4sf_int64
17521 = build_function_type_list (V4SF_type_node
,
17522 V4SF_type_node
, long_long_integer_type_node
,
17524 tree v4sf_ftype_v4sf_v2si
17525 = build_function_type_list (V4SF_type_node
,
17526 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
17528 /* Miscellaneous. */
17529 tree v8qi_ftype_v4hi_v4hi
17530 = build_function_type_list (V8QI_type_node
,
17531 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17532 tree v4hi_ftype_v2si_v2si
17533 = build_function_type_list (V4HI_type_node
,
17534 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17535 tree v4sf_ftype_v4sf_v4sf_int
17536 = build_function_type_list (V4SF_type_node
,
17537 V4SF_type_node
, V4SF_type_node
,
17538 integer_type_node
, NULL_TREE
);
17539 tree v2si_ftype_v4hi_v4hi
17540 = build_function_type_list (V2SI_type_node
,
17541 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17542 tree v4hi_ftype_v4hi_int
17543 = build_function_type_list (V4HI_type_node
,
17544 V4HI_type_node
, integer_type_node
, NULL_TREE
);
17545 tree v4hi_ftype_v4hi_di
17546 = build_function_type_list (V4HI_type_node
,
17547 V4HI_type_node
, long_long_unsigned_type_node
,
17549 tree v2si_ftype_v2si_di
17550 = build_function_type_list (V2SI_type_node
,
17551 V2SI_type_node
, long_long_unsigned_type_node
,
17553 tree void_ftype_void
17554 = build_function_type (void_type_node
, void_list_node
);
17555 tree void_ftype_unsigned
17556 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
17557 tree void_ftype_unsigned_unsigned
17558 = build_function_type_list (void_type_node
, unsigned_type_node
,
17559 unsigned_type_node
, NULL_TREE
);
17560 tree void_ftype_pcvoid_unsigned_unsigned
17561 = build_function_type_list (void_type_node
, const_ptr_type_node
,
17562 unsigned_type_node
, unsigned_type_node
,
17564 tree unsigned_ftype_void
17565 = build_function_type (unsigned_type_node
, void_list_node
);
17566 tree v2si_ftype_v4sf
17567 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
17568 /* Loads/stores. */
17569 tree void_ftype_v8qi_v8qi_pchar
17570 = build_function_type_list (void_type_node
,
17571 V8QI_type_node
, V8QI_type_node
,
17572 pchar_type_node
, NULL_TREE
);
17573 tree v4sf_ftype_pcfloat
17574 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
17575 /* @@@ the type is bogus */
17576 tree v4sf_ftype_v4sf_pv2si
17577 = build_function_type_list (V4SF_type_node
,
17578 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
17579 tree void_ftype_pv2si_v4sf
17580 = build_function_type_list (void_type_node
,
17581 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
17582 tree void_ftype_pfloat_v4sf
17583 = build_function_type_list (void_type_node
,
17584 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
17585 tree void_ftype_pdi_di
17586 = build_function_type_list (void_type_node
,
17587 pdi_type_node
, long_long_unsigned_type_node
,
17589 tree void_ftype_pv2di_v2di
17590 = build_function_type_list (void_type_node
,
17591 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
17592 /* Normal vector unops. */
17593 tree v4sf_ftype_v4sf
17594 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17595 tree v16qi_ftype_v16qi
17596 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17597 tree v8hi_ftype_v8hi
17598 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17599 tree v4si_ftype_v4si
17600 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17601 tree v8qi_ftype_v8qi
17602 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17603 tree v4hi_ftype_v4hi
17604 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17606 /* Normal vector binops. */
17607 tree v4sf_ftype_v4sf_v4sf
17608 = build_function_type_list (V4SF_type_node
,
17609 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17610 tree v8qi_ftype_v8qi_v8qi
17611 = build_function_type_list (V8QI_type_node
,
17612 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17613 tree v4hi_ftype_v4hi_v4hi
17614 = build_function_type_list (V4HI_type_node
,
17615 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17616 tree v2si_ftype_v2si_v2si
17617 = build_function_type_list (V2SI_type_node
,
17618 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17619 tree di_ftype_di_di
17620 = build_function_type_list (long_long_unsigned_type_node
,
17621 long_long_unsigned_type_node
,
17622 long_long_unsigned_type_node
, NULL_TREE
);
17624 tree di_ftype_di_di_int
17625 = build_function_type_list (long_long_unsigned_type_node
,
17626 long_long_unsigned_type_node
,
17627 long_long_unsigned_type_node
,
17628 integer_type_node
, NULL_TREE
);
17630 tree v2si_ftype_v2sf
17631 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
17632 tree v2sf_ftype_v2si
17633 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
17634 tree v2si_ftype_v2si
17635 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17636 tree v2sf_ftype_v2sf
17637 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17638 tree v2sf_ftype_v2sf_v2sf
17639 = build_function_type_list (V2SF_type_node
,
17640 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17641 tree v2si_ftype_v2sf_v2sf
17642 = build_function_type_list (V2SI_type_node
,
17643 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17644 tree pint_type_node
= build_pointer_type (integer_type_node
);
17645 tree pdouble_type_node
= build_pointer_type (double_type_node
);
17646 tree pcdouble_type_node
= build_pointer_type (
17647 build_type_variant (double_type_node
, 1, 0));
17648 tree int_ftype_v2df_v2df
17649 = build_function_type_list (integer_type_node
,
17650 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17652 tree void_ftype_pcvoid
17653 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
17654 tree v4sf_ftype_v4si
17655 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
17656 tree v4si_ftype_v4sf
17657 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
17658 tree v2df_ftype_v4si
17659 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
17660 tree v4si_ftype_v2df
17661 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
17662 tree v4si_ftype_v2df_v2df
17663 = build_function_type_list (V4SI_type_node
,
17664 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17665 tree v2si_ftype_v2df
17666 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
17667 tree v4sf_ftype_v2df
17668 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17669 tree v2df_ftype_v2si
17670 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
17671 tree v2df_ftype_v4sf
17672 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17673 tree int_ftype_v2df
17674 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
17675 tree int64_ftype_v2df
17676 = build_function_type_list (long_long_integer_type_node
,
17677 V2DF_type_node
, NULL_TREE
);
17678 tree v2df_ftype_v2df_int
17679 = build_function_type_list (V2DF_type_node
,
17680 V2DF_type_node
, integer_type_node
, NULL_TREE
);
17681 tree v2df_ftype_v2df_int64
17682 = build_function_type_list (V2DF_type_node
,
17683 V2DF_type_node
, long_long_integer_type_node
,
17685 tree v4sf_ftype_v4sf_v2df
17686 = build_function_type_list (V4SF_type_node
,
17687 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17688 tree v2df_ftype_v2df_v4sf
17689 = build_function_type_list (V2DF_type_node
,
17690 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17691 tree v2df_ftype_v2df_v2df_int
17692 = build_function_type_list (V2DF_type_node
,
17693 V2DF_type_node
, V2DF_type_node
,
17696 tree v2df_ftype_v2df_pcdouble
17697 = build_function_type_list (V2DF_type_node
,
17698 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17699 tree void_ftype_pdouble_v2df
17700 = build_function_type_list (void_type_node
,
17701 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
17702 tree void_ftype_pint_int
17703 = build_function_type_list (void_type_node
,
17704 pint_type_node
, integer_type_node
, NULL_TREE
);
17705 tree void_ftype_v16qi_v16qi_pchar
17706 = build_function_type_list (void_type_node
,
17707 V16QI_type_node
, V16QI_type_node
,
17708 pchar_type_node
, NULL_TREE
);
17709 tree v2df_ftype_pcdouble
17710 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17711 tree v2df_ftype_v2df_v2df
17712 = build_function_type_list (V2DF_type_node
,
17713 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17714 tree v16qi_ftype_v16qi_v16qi
17715 = build_function_type_list (V16QI_type_node
,
17716 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17717 tree v8hi_ftype_v8hi_v8hi
17718 = build_function_type_list (V8HI_type_node
,
17719 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17720 tree v4si_ftype_v4si_v4si
17721 = build_function_type_list (V4SI_type_node
,
17722 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17723 tree v2di_ftype_v2di_v2di
17724 = build_function_type_list (V2DI_type_node
,
17725 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17726 tree v2di_ftype_v2df_v2df
17727 = build_function_type_list (V2DI_type_node
,
17728 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17729 tree v2df_ftype_v2df
17730 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17731 tree v2di_ftype_v2di_int
17732 = build_function_type_list (V2DI_type_node
,
17733 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17734 tree v2di_ftype_v2di_v2di_int
17735 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17736 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17737 tree v4si_ftype_v4si_int
17738 = build_function_type_list (V4SI_type_node
,
17739 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17740 tree v8hi_ftype_v8hi_int
17741 = build_function_type_list (V8HI_type_node
,
17742 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17743 tree v4si_ftype_v8hi_v8hi
17744 = build_function_type_list (V4SI_type_node
,
17745 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17746 tree di_ftype_v8qi_v8qi
17747 = build_function_type_list (long_long_unsigned_type_node
,
17748 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17749 tree di_ftype_v2si_v2si
17750 = build_function_type_list (long_long_unsigned_type_node
,
17751 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17752 tree v2di_ftype_v16qi_v16qi
17753 = build_function_type_list (V2DI_type_node
,
17754 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17755 tree v2di_ftype_v4si_v4si
17756 = build_function_type_list (V2DI_type_node
,
17757 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17758 tree int_ftype_v16qi
17759 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17760 tree v16qi_ftype_pcchar
17761 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17762 tree void_ftype_pchar_v16qi
17763 = build_function_type_list (void_type_node
,
17764 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17766 tree v2di_ftype_v2di_unsigned_unsigned
17767 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17768 unsigned_type_node
, unsigned_type_node
,
17770 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17771 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17772 unsigned_type_node
, unsigned_type_node
,
17774 tree v2di_ftype_v2di_v16qi
17775 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17777 tree v2df_ftype_v2df_v2df_v2df
17778 = build_function_type_list (V2DF_type_node
,
17779 V2DF_type_node
, V2DF_type_node
,
17780 V2DF_type_node
, NULL_TREE
);
17781 tree v4sf_ftype_v4sf_v4sf_v4sf
17782 = build_function_type_list (V4SF_type_node
,
17783 V4SF_type_node
, V4SF_type_node
,
17784 V4SF_type_node
, NULL_TREE
);
17785 tree v8hi_ftype_v16qi
17786 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
17788 tree v4si_ftype_v16qi
17789 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
17791 tree v2di_ftype_v16qi
17792 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
17794 tree v4si_ftype_v8hi
17795 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
17797 tree v2di_ftype_v8hi
17798 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
17800 tree v2di_ftype_v4si
17801 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
17803 tree v2di_ftype_pv2di
17804 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
17806 tree v16qi_ftype_v16qi_v16qi_int
17807 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17808 V16QI_type_node
, integer_type_node
,
17810 tree v16qi_ftype_v16qi_v16qi_v16qi
17811 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17812 V16QI_type_node
, V16QI_type_node
,
17814 tree v8hi_ftype_v8hi_v8hi_int
17815 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17816 V8HI_type_node
, integer_type_node
,
17818 tree v4si_ftype_v4si_v4si_int
17819 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
17820 V4SI_type_node
, integer_type_node
,
17822 tree int_ftype_v2di_v2di
17823 = build_function_type_list (integer_type_node
,
17824 V2DI_type_node
, V2DI_type_node
,
17826 tree int_ftype_v16qi_int_v16qi_int_int
17827 = build_function_type_list (integer_type_node
,
17834 tree v16qi_ftype_v16qi_int_v16qi_int_int
17835 = build_function_type_list (V16QI_type_node
,
17842 tree int_ftype_v16qi_v16qi_int
17843 = build_function_type_list (integer_type_node
,
17850 /* The __float80 type. */
17851 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17852 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17856 /* The __float80 type. */
17857 tree float80_type_node
= make_node (REAL_TYPE
);
17859 TYPE_PRECISION (float80_type_node
) = 80;
17860 layout_type (float80_type_node
);
17861 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
17867 tree float128_type_node
= make_node (REAL_TYPE
);
17869 TYPE_PRECISION (float128_type_node
) = 128;
17870 layout_type (float128_type_node
);
17871 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
17874 /* TFmode support builtins. */
17875 ftype
= build_function_type (float128_type_node
,
17877 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_infq", ftype
, IX86_BUILTIN_INFQ
);
17879 ftype
= build_function_type_list (float128_type_node
,
17880 float128_type_node
,
17882 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
);
17884 ftype
= build_function_type_list (float128_type_node
,
17885 float128_type_node
,
17886 float128_type_node
,
17888 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_copysignq", ftype
, IX86_BUILTIN_COPYSIGNQ
);
17891 /* Add all SSE builtins that are more or less simple operations on
17893 for (i
= 0, d
= bdesc_sse_3arg
;
17894 i
< ARRAY_SIZE (bdesc_sse_3arg
);
17897 /* Use one of the operands; the target can have a different mode for
17898 mask-generating compares. */
17899 enum machine_mode mode
;
17904 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17909 type
= v16qi_ftype_v16qi_v16qi_int
;
17912 type
= v8hi_ftype_v8hi_v8hi_int
;
17915 type
= v4si_ftype_v4si_v4si_int
;
17918 type
= v2di_ftype_v2di_v2di_int
;
17921 type
= v2df_ftype_v2df_v2df_int
;
17924 type
= v4sf_ftype_v4sf_v4sf_int
;
17927 gcc_unreachable ();
17930 /* Override for variable blends. */
17933 case CODE_FOR_sse4_1_blendvpd
:
17934 type
= v2df_ftype_v2df_v2df_v2df
;
17936 case CODE_FOR_sse4_1_blendvps
:
17937 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
17939 case CODE_FOR_sse4_1_pblendvb
:
17940 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
17946 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
17949 /* Add all builtins that are more or less simple operations on two
17951 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17953 /* Use one of the operands; the target can have a different mode for
17954 mask-generating compares. */
17955 enum machine_mode mode
;
17960 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17965 type
= v16qi_ftype_v16qi_v16qi
;
17968 type
= v8hi_ftype_v8hi_v8hi
;
17971 type
= v4si_ftype_v4si_v4si
;
17974 type
= v2di_ftype_v2di_v2di
;
17977 type
= v2df_ftype_v2df_v2df
;
17980 type
= v4sf_ftype_v4sf_v4sf
;
17983 type
= v8qi_ftype_v8qi_v8qi
;
17986 type
= v4hi_ftype_v4hi_v4hi
;
17989 type
= v2si_ftype_v2si_v2si
;
17992 type
= di_ftype_di_di
;
17996 gcc_unreachable ();
17999 /* Override for comparisons. */
18000 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18001 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
18002 type
= v4si_ftype_v4sf_v4sf
;
18004 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18005 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18006 type
= v2di_ftype_v2df_v2df
;
18008 if (d
->icode
== CODE_FOR_vec_pack_sfix_v2df
)
18009 type
= v4si_ftype_v2df_v2df
;
18011 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18014 /* Add all builtins that are more or less simple operations on 1 operand. */
18015 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18017 enum machine_mode mode
;
18022 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18027 type
= v16qi_ftype_v16qi
;
18030 type
= v8hi_ftype_v8hi
;
18033 type
= v4si_ftype_v4si
;
18036 type
= v2df_ftype_v2df
;
18039 type
= v4sf_ftype_v4sf
;
18042 type
= v8qi_ftype_v8qi
;
18045 type
= v4hi_ftype_v4hi
;
18048 type
= v2si_ftype_v2si
;
18055 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18058 /* pcmpestr[im] insns. */
18059 for (i
= 0, d
= bdesc_pcmpestr
;
18060 i
< ARRAY_SIZE (bdesc_pcmpestr
);
18063 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
18064 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
18066 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
18067 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
18070 /* pcmpistr[im] insns. */
18071 for (i
= 0, d
= bdesc_pcmpistr
;
18072 i
< ARRAY_SIZE (bdesc_pcmpistr
);
18075 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
18076 ftype
= v16qi_ftype_v16qi_v16qi_int
;
18078 ftype
= int_ftype_v16qi_v16qi_int
;
18079 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
18082 /* Add the remaining MMX insns with somewhat more complicated types. */
18083 def_builtin (OPTION_MASK_ISA_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
18084 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
18085 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
18086 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
18088 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
18089 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
18090 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
18092 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
18093 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
18095 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
18096 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
18098 /* comi/ucomi insns. */
18099 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18100 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
18101 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
18103 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
18106 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
18107 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
18109 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
18110 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
18111 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
18113 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
18114 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
18115 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
18116 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
18117 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
18118 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
18119 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
18120 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
18121 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
18122 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
18123 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
18125 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
18127 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
18128 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
18130 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
18131 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
18132 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
18133 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
18135 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
18136 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
18137 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
18138 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
18140 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
18142 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
18144 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
18145 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
18146 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
18147 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
18148 ftype
= build_function_type_list (float_type_node
,
18151 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtf", ftype
, IX86_BUILTIN_RSQRTF
);
18152 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
18153 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
18155 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
18157 /* Original 3DNow! */
18158 def_builtin (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
18159 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
18160 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
18161 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
18162 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
18163 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
18164 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
18165 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
18166 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
18167 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
18168 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
18169 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
18170 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
18171 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
18172 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
18173 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
18174 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
18175 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
18176 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
18177 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
18179 /* 3DNow! extension as used in the Athlon CPU. */
18180 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
18181 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
18182 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
18183 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
18184 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
18185 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
18188 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
18190 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
18191 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
18193 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
18194 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
18196 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
18197 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
18198 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
18199 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
18200 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
18202 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
18203 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
18204 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
18205 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
18207 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
18208 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
18210 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
18212 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
18213 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
18215 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
18216 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
18217 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
18218 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
18219 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
18221 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
18223 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
18224 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
18225 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
18226 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
18228 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
18229 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
18230 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
18232 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
18233 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
18234 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
18235 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
18237 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
18238 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
18239 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
18241 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
18242 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
18244 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
18245 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
18247 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
18248 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
18249 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
18250 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
18251 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
18252 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
18253 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
18255 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
18256 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
18257 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
18258 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
18259 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
18260 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
18261 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
18263 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
18264 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
18265 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
18266 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
18268 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
18270 /* Prescott New Instructions. */
18271 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
18272 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
18273 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_lddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
18276 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
18277 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
, IX86_BUILTIN_PALIGNR
);
18280 def_builtin (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_movntdqa", v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
18281 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
18282 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
18283 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
18284 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
18285 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
18286 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
18287 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
18288 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
18289 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
18290 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
18291 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
18292 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
18293 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
18294 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundpd", v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
18295 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
18296 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
18297 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
18300 ftype
= build_function_type_list (unsigned_type_node
,
18301 unsigned_type_node
,
18302 unsigned_char_type_node
,
18304 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32qi", ftype
, IX86_BUILTIN_CRC32QI
);
18305 ftype
= build_function_type_list (unsigned_type_node
,
18306 unsigned_type_node
,
18307 short_unsigned_type_node
,
18309 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32hi", ftype
, IX86_BUILTIN_CRC32HI
);
18310 ftype
= build_function_type_list (unsigned_type_node
,
18311 unsigned_type_node
,
18312 unsigned_type_node
,
18314 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32si", ftype
, IX86_BUILTIN_CRC32SI
);
18315 ftype
= build_function_type_list (long_long_unsigned_type_node
,
18316 long_long_unsigned_type_node
,
18317 long_long_unsigned_type_node
,
18319 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32di", ftype
, IX86_BUILTIN_CRC32DI
);
18321 /* AMDFAM10 SSE4A New built-ins */
18322 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
18323 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
18324 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
18325 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
18326 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
18327 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
18329 /* Access to the vec_init patterns. */
18330 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
18331 integer_type_node
, NULL_TREE
);
18332 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
18334 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
18335 short_integer_type_node
,
18336 short_integer_type_node
,
18337 short_integer_type_node
, NULL_TREE
);
18338 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
18340 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
18341 char_type_node
, char_type_node
,
18342 char_type_node
, char_type_node
,
18343 char_type_node
, char_type_node
,
18344 char_type_node
, NULL_TREE
);
18345 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
18347 /* Access to the vec_extract patterns. */
18348 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
18349 integer_type_node
, NULL_TREE
);
18350 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
18352 ftype
= build_function_type_list (long_long_integer_type_node
,
18353 V2DI_type_node
, integer_type_node
,
18355 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
18357 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
18358 integer_type_node
, NULL_TREE
);
18359 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
18361 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
18362 integer_type_node
, NULL_TREE
);
18363 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
18365 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
18366 integer_type_node
, NULL_TREE
);
18367 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
18369 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
18370 integer_type_node
, NULL_TREE
);
18371 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
18373 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
18374 integer_type_node
, NULL_TREE
);
18375 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
18377 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
18378 integer_type_node
, NULL_TREE
);
18379 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
18381 /* Access to the vec_set patterns. */
18382 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18384 integer_type_node
, NULL_TREE
);
18385 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
18387 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
18389 integer_type_node
, NULL_TREE
);
18390 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
18392 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18394 integer_type_node
, NULL_TREE
);
18395 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
18397 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18399 integer_type_node
, NULL_TREE
);
18400 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
18402 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
18404 integer_type_node
, NULL_TREE
);
18405 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
18407 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18409 integer_type_node
, NULL_TREE
);
18410 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
18414 ix86_init_builtins (void)
18417 ix86_init_mmx_sse_builtins ();
18420 /* Errors in the source file can cause expand_expr to return const0_rtx
18421 where we expect a vector. To avoid crashing, use one of the vector
18422 clear instructions. */
18424 safe_vector_operand (rtx x
, enum machine_mode mode
)
18426 if (x
== const0_rtx
)
18427 x
= CONST0_RTX (mode
);
18431 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18432 4 operands. The third argument must be a constant smaller than 8
18436 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
18440 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18441 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18442 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18443 rtx op0
= expand_normal (arg0
);
18444 rtx op1
= expand_normal (arg1
);
18445 rtx op2
= expand_normal (arg2
);
18446 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18447 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18448 enum machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
18449 enum machine_mode mode3
= insn_data
[icode
].operand
[3].mode
;
18451 if (VECTOR_MODE_P (mode1
))
18452 op0
= safe_vector_operand (op0
, mode1
);
18453 if (VECTOR_MODE_P (mode2
))
18454 op1
= safe_vector_operand (op1
, mode2
);
18455 if (VECTOR_MODE_P (mode3
))
18456 op2
= safe_vector_operand (op2
, mode3
);
18460 || GET_MODE (target
) != tmode
18461 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18462 target
= gen_reg_rtx (tmode
);
18464 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18465 op0
= copy_to_mode_reg (mode1
, op0
);
18466 if ((optimize
&& !register_operand (op1
, mode2
))
18467 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18468 op1
= copy_to_mode_reg (mode2
, op1
);
18470 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18473 case CODE_FOR_sse4_1_blendvpd
:
18474 case CODE_FOR_sse4_1_blendvps
:
18475 case CODE_FOR_sse4_1_pblendvb
:
18476 op2
= copy_to_mode_reg (mode3
, op2
);
18479 case CODE_FOR_sse4_1_roundsd
:
18480 case CODE_FOR_sse4_1_roundss
:
18481 error ("the third argument must be a 4-bit immediate");
18485 error ("the third argument must be an 8-bit immediate");
18489 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18496 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18499 ix86_expand_crc32 (enum insn_code icode
, tree exp
, rtx target
)
18502 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18503 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18504 rtx op0
= expand_normal (arg0
);
18505 rtx op1
= expand_normal (arg1
);
18506 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18507 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18508 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18512 || GET_MODE (target
) != tmode
18513 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18514 target
= gen_reg_rtx (tmode
);
18516 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18517 op0
= copy_to_mode_reg (mode0
, op0
);
18518 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18520 op1
= copy_to_reg (op1
);
18521 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
18524 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18531 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18534 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
18537 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18538 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18539 rtx op0
= expand_normal (arg0
);
18540 rtx op1
= expand_normal (arg1
);
18541 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18542 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18543 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18545 if (VECTOR_MODE_P (mode0
))
18546 op0
= safe_vector_operand (op0
, mode0
);
18547 if (VECTOR_MODE_P (mode1
))
18548 op1
= safe_vector_operand (op1
, mode1
);
18550 if (optimize
|| !target
18551 || GET_MODE (target
) != tmode
18552 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18553 target
= gen_reg_rtx (tmode
);
18555 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
18557 rtx x
= gen_reg_rtx (V4SImode
);
18558 emit_insn (gen_sse2_loadd (x
, op1
));
18559 op1
= gen_lowpart (TImode
, x
);
18562 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18563 op0
= copy_to_mode_reg (mode0
, op0
);
18564 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18565 op1
= copy_to_mode_reg (mode1
, op1
);
18567 /* ??? Using ix86_fixup_binary_operands is problematic when
18568 we've got mismatched modes. Fake it. */
18574 if (tmode
== mode0
&& tmode
== mode1
)
18576 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
18580 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
18582 op0
= force_reg (mode0
, op0
);
18583 op1
= force_reg (mode1
, op1
);
18584 target
= gen_reg_rtx (tmode
);
18587 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18594 /* Subroutine of ix86_expand_builtin to take care of stores. */
18597 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
18600 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18601 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18602 rtx op0
= expand_normal (arg0
);
18603 rtx op1
= expand_normal (arg1
);
18604 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
18605 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18607 if (VECTOR_MODE_P (mode1
))
18608 op1
= safe_vector_operand (op1
, mode1
);
18610 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18611 op1
= copy_to_mode_reg (mode1
, op1
);
18613 pat
= GEN_FCN (icode
) (op0
, op1
);
18619 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18622 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
18623 rtx target
, int do_load
)
18626 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18627 rtx op0
= expand_normal (arg0
);
18628 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18629 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18631 if (optimize
|| !target
18632 || GET_MODE (target
) != tmode
18633 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18634 target
= gen_reg_rtx (tmode
);
18636 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18639 if (VECTOR_MODE_P (mode0
))
18640 op0
= safe_vector_operand (op0
, mode0
);
18642 if ((optimize
&& !register_operand (op0
, mode0
))
18643 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18644 op0
= copy_to_mode_reg (mode0
, op0
);
18649 case CODE_FOR_sse4_1_roundpd
:
18650 case CODE_FOR_sse4_1_roundps
:
18652 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18653 rtx op1
= expand_normal (arg1
);
18654 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18656 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18658 error ("the second argument must be a 4-bit immediate");
18661 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18665 pat
= GEN_FCN (icode
) (target
, op0
);
18675 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18676 sqrtss, rsqrtss, rcpss. */
18679 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
18682 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18683 rtx op1
, op0
= expand_normal (arg0
);
18684 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18685 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18687 if (optimize
|| !target
18688 || GET_MODE (target
) != tmode
18689 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18690 target
= gen_reg_rtx (tmode
);
18692 if (VECTOR_MODE_P (mode0
))
18693 op0
= safe_vector_operand (op0
, mode0
);
18695 if ((optimize
&& !register_operand (op0
, mode0
))
18696 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18697 op0
= copy_to_mode_reg (mode0
, op0
);
18700 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
18701 op1
= copy_to_mode_reg (mode0
, op1
);
18703 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18710 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18713 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
18717 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18718 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18719 rtx op0
= expand_normal (arg0
);
18720 rtx op1
= expand_normal (arg1
);
18722 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
18723 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
18724 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
18725 enum rtx_code comparison
= d
->comparison
;
18727 if (VECTOR_MODE_P (mode0
))
18728 op0
= safe_vector_operand (op0
, mode0
);
18729 if (VECTOR_MODE_P (mode1
))
18730 op1
= safe_vector_operand (op1
, mode1
);
18732 /* Swap operands if we have a comparison that isn't available in
18734 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18736 rtx tmp
= gen_reg_rtx (mode1
);
18737 emit_move_insn (tmp
, op1
);
18742 if (optimize
|| !target
18743 || GET_MODE (target
) != tmode
18744 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
18745 target
= gen_reg_rtx (tmode
);
18747 if ((optimize
&& !register_operand (op0
, mode0
))
18748 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
18749 op0
= copy_to_mode_reg (mode0
, op0
);
18750 if ((optimize
&& !register_operand (op1
, mode1
))
18751 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
18752 op1
= copy_to_mode_reg (mode1
, op1
);
18754 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
18755 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
18762 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18765 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
18769 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18770 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18771 rtx op0
= expand_normal (arg0
);
18772 rtx op1
= expand_normal (arg1
);
18773 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18774 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18775 enum rtx_code comparison
= d
->comparison
;
18777 if (VECTOR_MODE_P (mode0
))
18778 op0
= safe_vector_operand (op0
, mode0
);
18779 if (VECTOR_MODE_P (mode1
))
18780 op1
= safe_vector_operand (op1
, mode1
);
18782 /* Swap operands if we have a comparison that isn't available in
18784 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18791 target
= gen_reg_rtx (SImode
);
18792 emit_move_insn (target
, const0_rtx
);
18793 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18795 if ((optimize
&& !register_operand (op0
, mode0
))
18796 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18797 op0
= copy_to_mode_reg (mode0
, op0
);
18798 if ((optimize
&& !register_operand (op1
, mode1
))
18799 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18800 op1
= copy_to_mode_reg (mode1
, op1
);
18802 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18806 emit_insn (gen_rtx_SET (VOIDmode
,
18807 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18808 gen_rtx_fmt_ee (comparison
, QImode
,
18812 return SUBREG_REG (target
);
18815 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18818 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
18822 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18823 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18824 rtx op0
= expand_normal (arg0
);
18825 rtx op1
= expand_normal (arg1
);
18826 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18827 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18828 enum rtx_code comparison
= d
->comparison
;
18830 if (VECTOR_MODE_P (mode0
))
18831 op0
= safe_vector_operand (op0
, mode0
);
18832 if (VECTOR_MODE_P (mode1
))
18833 op1
= safe_vector_operand (op1
, mode1
);
18835 target
= gen_reg_rtx (SImode
);
18836 emit_move_insn (target
, const0_rtx
);
18837 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18839 if ((optimize
&& !register_operand (op0
, mode0
))
18840 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18841 op0
= copy_to_mode_reg (mode0
, op0
);
18842 if ((optimize
&& !register_operand (op1
, mode1
))
18843 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18844 op1
= copy_to_mode_reg (mode1
, op1
);
18846 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18850 emit_insn (gen_rtx_SET (VOIDmode
,
18851 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18852 gen_rtx_fmt_ee (comparison
, QImode
,
18856 return SUBREG_REG (target
);
18859 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18862 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
18863 tree exp
, rtx target
)
18866 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18867 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18868 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18869 tree arg3
= CALL_EXPR_ARG (exp
, 3);
18870 tree arg4
= CALL_EXPR_ARG (exp
, 4);
18871 rtx scratch0
, scratch1
;
18872 rtx op0
= expand_normal (arg0
);
18873 rtx op1
= expand_normal (arg1
);
18874 rtx op2
= expand_normal (arg2
);
18875 rtx op3
= expand_normal (arg3
);
18876 rtx op4
= expand_normal (arg4
);
18877 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
18879 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
18880 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
18881 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
18882 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
18883 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
18884 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
18885 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
18887 if (VECTOR_MODE_P (modev2
))
18888 op0
= safe_vector_operand (op0
, modev2
);
18889 if (VECTOR_MODE_P (modev4
))
18890 op2
= safe_vector_operand (op2
, modev4
);
18892 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
18893 op0
= copy_to_mode_reg (modev2
, op0
);
18894 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
18895 op1
= copy_to_mode_reg (modei3
, op1
);
18896 if ((optimize
&& !register_operand (op2
, modev4
))
18897 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
18898 op2
= copy_to_mode_reg (modev4
, op2
);
18899 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
18900 op3
= copy_to_mode_reg (modei5
, op3
);
18902 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
18904 error ("the fifth argument must be a 8-bit immediate");
18908 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
18910 if (optimize
|| !target
18911 || GET_MODE (target
) != tmode0
18912 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
18913 target
= gen_reg_rtx (tmode0
);
18915 scratch1
= gen_reg_rtx (tmode1
);
18917 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
18919 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
18921 if (optimize
|| !target
18922 || GET_MODE (target
) != tmode1
18923 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
18924 target
= gen_reg_rtx (tmode1
);
18926 scratch0
= gen_reg_rtx (tmode0
);
18928 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
18932 gcc_assert (d
->flag
);
18934 scratch0
= gen_reg_rtx (tmode0
);
18935 scratch1
= gen_reg_rtx (tmode1
);
18937 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
18947 target
= gen_reg_rtx (SImode
);
18948 emit_move_insn (target
, const0_rtx
);
18949 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18952 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18953 gen_rtx_fmt_ee (EQ
, QImode
,
18954 gen_rtx_REG ((enum machine_mode
) d
->flag
,
18957 return SUBREG_REG (target
);
18964 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18967 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
18968 tree exp
, rtx target
)
18971 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18972 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18973 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18974 rtx scratch0
, scratch1
;
18975 rtx op0
= expand_normal (arg0
);
18976 rtx op1
= expand_normal (arg1
);
18977 rtx op2
= expand_normal (arg2
);
18978 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
18980 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
18981 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
18982 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
18983 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
18984 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
18986 if (VECTOR_MODE_P (modev2
))
18987 op0
= safe_vector_operand (op0
, modev2
);
18988 if (VECTOR_MODE_P (modev3
))
18989 op1
= safe_vector_operand (op1
, modev3
);
18991 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
18992 op0
= copy_to_mode_reg (modev2
, op0
);
18993 if ((optimize
&& !register_operand (op1
, modev3
))
18994 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
18995 op1
= copy_to_mode_reg (modev3
, op1
);
18997 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
18999 error ("the third argument must be a 8-bit immediate");
19003 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
19005 if (optimize
|| !target
19006 || GET_MODE (target
) != tmode0
19007 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
19008 target
= gen_reg_rtx (tmode0
);
19010 scratch1
= gen_reg_rtx (tmode1
);
19012 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
19014 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
19016 if (optimize
|| !target
19017 || GET_MODE (target
) != tmode1
19018 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
19019 target
= gen_reg_rtx (tmode1
);
19021 scratch0
= gen_reg_rtx (tmode0
);
19023 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
19027 gcc_assert (d
->flag
);
19029 scratch0
= gen_reg_rtx (tmode0
);
19030 scratch1
= gen_reg_rtx (tmode1
);
19032 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
19042 target
= gen_reg_rtx (SImode
);
19043 emit_move_insn (target
, const0_rtx
);
19044 target
= gen_rtx_SUBREG (QImode
, target
, 0);
19047 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
19048 gen_rtx_fmt_ee (EQ
, QImode
,
19049 gen_rtx_REG ((enum machine_mode
) d
->flag
,
19052 return SUBREG_REG (target
);
19058 /* Return the integer constant in ARG. Constrain it to be in the range
19059 of the subparts of VEC_TYPE; issue an error if not. */
19062 get_element_number (tree vec_type
, tree arg
)
19064 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
19066 if (!host_integerp (arg
, 1)
19067 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
19069 error ("selector must be an integer constant in the range 0..%wi", max
);
19076 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19077 ix86_expand_vector_init. We DO have language-level syntax for this, in
19078 the form of (type){ init-list }. Except that since we can't place emms
19079 instructions from inside the compiler, we can't allow the use of MMX
19080 registers unless the user explicitly asks for it. So we do *not* define
19081 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19082 we have builtins invoked by mmintrin.h that gives us license to emit
19083 these sorts of instructions. */
19086 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
19088 enum machine_mode tmode
= TYPE_MODE (type
);
19089 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
19090 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
19091 rtvec v
= rtvec_alloc (n_elt
);
19093 gcc_assert (VECTOR_MODE_P (tmode
));
19094 gcc_assert (call_expr_nargs (exp
) == n_elt
);
19096 for (i
= 0; i
< n_elt
; ++i
)
19098 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
19099 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
19102 if (!target
|| !register_operand (target
, tmode
))
19103 target
= gen_reg_rtx (tmode
);
19105 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
19109 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19110 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19111 had a language-level syntax for referencing vector elements. */
19114 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
19116 enum machine_mode tmode
, mode0
;
19121 arg0
= CALL_EXPR_ARG (exp
, 0);
19122 arg1
= CALL_EXPR_ARG (exp
, 1);
19124 op0
= expand_normal (arg0
);
19125 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
19127 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
19128 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
19129 gcc_assert (VECTOR_MODE_P (mode0
));
19131 op0
= force_reg (mode0
, op0
);
19133 if (optimize
|| !target
|| !register_operand (target
, tmode
))
19134 target
= gen_reg_rtx (tmode
);
19136 ix86_expand_vector_extract (true, target
, op0
, elt
);
19141 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19142 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19143 a language-level syntax for referencing vector elements. */
19146 ix86_expand_vec_set_builtin (tree exp
)
19148 enum machine_mode tmode
, mode1
;
19149 tree arg0
, arg1
, arg2
;
19151 rtx op0
, op1
, target
;
19153 arg0
= CALL_EXPR_ARG (exp
, 0);
19154 arg1
= CALL_EXPR_ARG (exp
, 1);
19155 arg2
= CALL_EXPR_ARG (exp
, 2);
19157 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
19158 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
19159 gcc_assert (VECTOR_MODE_P (tmode
));
19161 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
19162 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
19163 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
19165 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
19166 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
19168 op0
= force_reg (tmode
, op0
);
19169 op1
= force_reg (mode1
, op1
);
19171 /* OP0 is the source of these builtin functions and shouldn't be
19172 modified. Create a copy, use it and return it as target. */
19173 target
= gen_reg_rtx (tmode
);
19174 emit_move_insn (target
, op0
);
19175 ix86_expand_vector_set (true, target
, op1
, elt
);
19180 /* Expand an expression EXP that calls a built-in function,
19181 with result going to TARGET if that's convenient
19182 (and in mode MODE if that's convenient).
19183 SUBTARGET may be used as the target for computing one of EXP's operands.
19184 IGNORE is nonzero if the value is to be ignored. */
19187 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
19188 enum machine_mode mode ATTRIBUTE_UNUSED
,
19189 int ignore ATTRIBUTE_UNUSED
)
19191 const struct builtin_description
*d
;
19193 enum insn_code icode
;
19194 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19195 tree arg0
, arg1
, arg2
, arg3
;
19196 rtx op0
, op1
, op2
, op3
, pat
;
19197 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
19198 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
19202 case IX86_BUILTIN_EMMS
:
19203 emit_insn (gen_mmx_emms ());
19206 case IX86_BUILTIN_SFENCE
:
19207 emit_insn (gen_sse_sfence ());
19210 case IX86_BUILTIN_MASKMOVQ
:
19211 case IX86_BUILTIN_MASKMOVDQU
:
19212 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
19213 ? CODE_FOR_mmx_maskmovq
19214 : CODE_FOR_sse2_maskmovdqu
);
19215 /* Note the arg order is different from the operand order. */
19216 arg1
= CALL_EXPR_ARG (exp
, 0);
19217 arg2
= CALL_EXPR_ARG (exp
, 1);
19218 arg0
= CALL_EXPR_ARG (exp
, 2);
19219 op0
= expand_normal (arg0
);
19220 op1
= expand_normal (arg1
);
19221 op2
= expand_normal (arg2
);
19222 mode0
= insn_data
[icode
].operand
[0].mode
;
19223 mode1
= insn_data
[icode
].operand
[1].mode
;
19224 mode2
= insn_data
[icode
].operand
[2].mode
;
19226 op0
= force_reg (Pmode
, op0
);
19227 op0
= gen_rtx_MEM (mode1
, op0
);
19229 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
19230 op0
= copy_to_mode_reg (mode0
, op0
);
19231 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
19232 op1
= copy_to_mode_reg (mode1
, op1
);
19233 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
19234 op2
= copy_to_mode_reg (mode2
, op2
);
19235 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
19241 case IX86_BUILTIN_RSQRTF
:
19242 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
19244 case IX86_BUILTIN_SQRTSS
:
19245 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
19246 case IX86_BUILTIN_RSQRTSS
:
19247 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
19248 case IX86_BUILTIN_RCPSS
:
19249 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
19251 case IX86_BUILTIN_LOADUPS
:
19252 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
19254 case IX86_BUILTIN_STOREUPS
:
19255 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
19257 case IX86_BUILTIN_LOADHPS
:
19258 case IX86_BUILTIN_LOADLPS
:
19259 case IX86_BUILTIN_LOADHPD
:
19260 case IX86_BUILTIN_LOADLPD
:
19261 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
19262 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
19263 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
19264 : CODE_FOR_sse2_loadlpd
);
19265 arg0
= CALL_EXPR_ARG (exp
, 0);
19266 arg1
= CALL_EXPR_ARG (exp
, 1);
19267 op0
= expand_normal (arg0
);
19268 op1
= expand_normal (arg1
);
19269 tmode
= insn_data
[icode
].operand
[0].mode
;
19270 mode0
= insn_data
[icode
].operand
[1].mode
;
19271 mode1
= insn_data
[icode
].operand
[2].mode
;
19273 op0
= force_reg (mode0
, op0
);
19274 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
19275 if (optimize
|| target
== 0
19276 || GET_MODE (target
) != tmode
19277 || !register_operand (target
, tmode
))
19278 target
= gen_reg_rtx (tmode
);
19279 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19285 case IX86_BUILTIN_STOREHPS
:
19286 case IX86_BUILTIN_STORELPS
:
19287 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
19288 : CODE_FOR_sse_storelps
);
19289 arg0
= CALL_EXPR_ARG (exp
, 0);
19290 arg1
= CALL_EXPR_ARG (exp
, 1);
19291 op0
= expand_normal (arg0
);
19292 op1
= expand_normal (arg1
);
19293 mode0
= insn_data
[icode
].operand
[0].mode
;
19294 mode1
= insn_data
[icode
].operand
[1].mode
;
19296 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19297 op1
= force_reg (mode1
, op1
);
19299 pat
= GEN_FCN (icode
) (op0
, op1
);
19305 case IX86_BUILTIN_MOVNTPS
:
19306 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
19307 case IX86_BUILTIN_MOVNTQ
:
19308 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
19310 case IX86_BUILTIN_LDMXCSR
:
19311 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
19312 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
19313 emit_move_insn (target
, op0
);
19314 emit_insn (gen_sse_ldmxcsr (target
));
19317 case IX86_BUILTIN_STMXCSR
:
19318 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
19319 emit_insn (gen_sse_stmxcsr (target
));
19320 return copy_to_mode_reg (SImode
, target
);
19322 case IX86_BUILTIN_SHUFPS
:
19323 case IX86_BUILTIN_SHUFPD
:
19324 icode
= (fcode
== IX86_BUILTIN_SHUFPS
19325 ? CODE_FOR_sse_shufps
19326 : CODE_FOR_sse2_shufpd
);
19327 arg0
= CALL_EXPR_ARG (exp
, 0);
19328 arg1
= CALL_EXPR_ARG (exp
, 1);
19329 arg2
= CALL_EXPR_ARG (exp
, 2);
19330 op0
= expand_normal (arg0
);
19331 op1
= expand_normal (arg1
);
19332 op2
= expand_normal (arg2
);
19333 tmode
= insn_data
[icode
].operand
[0].mode
;
19334 mode0
= insn_data
[icode
].operand
[1].mode
;
19335 mode1
= insn_data
[icode
].operand
[2].mode
;
19336 mode2
= insn_data
[icode
].operand
[3].mode
;
19338 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19339 op0
= copy_to_mode_reg (mode0
, op0
);
19340 if ((optimize
&& !register_operand (op1
, mode1
))
19341 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19342 op1
= copy_to_mode_reg (mode1
, op1
);
19343 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19345 /* @@@ better error message */
19346 error ("mask must be an immediate");
19347 return gen_reg_rtx (tmode
);
19349 if (optimize
|| target
== 0
19350 || GET_MODE (target
) != tmode
19351 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19352 target
= gen_reg_rtx (tmode
);
19353 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19359 case IX86_BUILTIN_PSHUFW
:
19360 case IX86_BUILTIN_PSHUFD
:
19361 case IX86_BUILTIN_PSHUFHW
:
19362 case IX86_BUILTIN_PSHUFLW
:
19363 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
19364 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
19365 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
19366 : CODE_FOR_mmx_pshufw
);
19367 arg0
= CALL_EXPR_ARG (exp
, 0);
19368 arg1
= CALL_EXPR_ARG (exp
, 1);
19369 op0
= expand_normal (arg0
);
19370 op1
= expand_normal (arg1
);
19371 tmode
= insn_data
[icode
].operand
[0].mode
;
19372 mode1
= insn_data
[icode
].operand
[1].mode
;
19373 mode2
= insn_data
[icode
].operand
[2].mode
;
19375 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19376 op0
= copy_to_mode_reg (mode1
, op0
);
19377 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19379 /* @@@ better error message */
19380 error ("mask must be an immediate");
19384 || GET_MODE (target
) != tmode
19385 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19386 target
= gen_reg_rtx (tmode
);
19387 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19393 case IX86_BUILTIN_PSLLWI128
:
19394 icode
= CODE_FOR_ashlv8hi3
;
19396 case IX86_BUILTIN_PSLLDI128
:
19397 icode
= CODE_FOR_ashlv4si3
;
19399 case IX86_BUILTIN_PSLLQI128
:
19400 icode
= CODE_FOR_ashlv2di3
;
19402 case IX86_BUILTIN_PSRAWI128
:
19403 icode
= CODE_FOR_ashrv8hi3
;
19405 case IX86_BUILTIN_PSRADI128
:
19406 icode
= CODE_FOR_ashrv4si3
;
19408 case IX86_BUILTIN_PSRLWI128
:
19409 icode
= CODE_FOR_lshrv8hi3
;
19411 case IX86_BUILTIN_PSRLDI128
:
19412 icode
= CODE_FOR_lshrv4si3
;
19414 case IX86_BUILTIN_PSRLQI128
:
19415 icode
= CODE_FOR_lshrv2di3
;
19418 arg0
= CALL_EXPR_ARG (exp
, 0);
19419 arg1
= CALL_EXPR_ARG (exp
, 1);
19420 op0
= expand_normal (arg0
);
19421 op1
= expand_normal (arg1
);
19423 if (!CONST_INT_P (op1
))
19425 error ("shift must be an immediate");
19428 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
19429 op1
= GEN_INT (255);
19431 tmode
= insn_data
[icode
].operand
[0].mode
;
19432 mode1
= insn_data
[icode
].operand
[1].mode
;
19433 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19434 op0
= copy_to_reg (op0
);
19436 target
= gen_reg_rtx (tmode
);
19437 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19443 case IX86_BUILTIN_PSLLW128
:
19444 icode
= CODE_FOR_ashlv8hi3
;
19446 case IX86_BUILTIN_PSLLD128
:
19447 icode
= CODE_FOR_ashlv4si3
;
19449 case IX86_BUILTIN_PSLLQ128
:
19450 icode
= CODE_FOR_ashlv2di3
;
19452 case IX86_BUILTIN_PSRAW128
:
19453 icode
= CODE_FOR_ashrv8hi3
;
19455 case IX86_BUILTIN_PSRAD128
:
19456 icode
= CODE_FOR_ashrv4si3
;
19458 case IX86_BUILTIN_PSRLW128
:
19459 icode
= CODE_FOR_lshrv8hi3
;
19461 case IX86_BUILTIN_PSRLD128
:
19462 icode
= CODE_FOR_lshrv4si3
;
19464 case IX86_BUILTIN_PSRLQ128
:
19465 icode
= CODE_FOR_lshrv2di3
;
19468 arg0
= CALL_EXPR_ARG (exp
, 0);
19469 arg1
= CALL_EXPR_ARG (exp
, 1);
19470 op0
= expand_normal (arg0
);
19471 op1
= expand_normal (arg1
);
19473 tmode
= insn_data
[icode
].operand
[0].mode
;
19474 mode1
= insn_data
[icode
].operand
[1].mode
;
19476 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19477 op0
= copy_to_reg (op0
);
19479 op1
= simplify_gen_subreg (SImode
, op1
, GET_MODE (op1
), 0);
19480 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, SImode
))
19481 op1
= copy_to_reg (op1
);
19483 target
= gen_reg_rtx (tmode
);
19484 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19490 case IX86_BUILTIN_PSLLDQI128
:
19491 case IX86_BUILTIN_PSRLDQI128
:
19492 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
19493 : CODE_FOR_sse2_lshrti3
);
19494 arg0
= CALL_EXPR_ARG (exp
, 0);
19495 arg1
= CALL_EXPR_ARG (exp
, 1);
19496 op0
= expand_normal (arg0
);
19497 op1
= expand_normal (arg1
);
19498 tmode
= insn_data
[icode
].operand
[0].mode
;
19499 mode1
= insn_data
[icode
].operand
[1].mode
;
19500 mode2
= insn_data
[icode
].operand
[2].mode
;
19502 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19504 op0
= copy_to_reg (op0
);
19505 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
19507 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19509 error ("shift must be an immediate");
19512 target
= gen_reg_rtx (V2DImode
);
19513 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
19520 case IX86_BUILTIN_FEMMS
:
19521 emit_insn (gen_mmx_femms ());
19524 case IX86_BUILTIN_PAVGUSB
:
19525 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
19527 case IX86_BUILTIN_PF2ID
:
19528 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
19530 case IX86_BUILTIN_PFACC
:
19531 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
19533 case IX86_BUILTIN_PFADD
:
19534 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
19536 case IX86_BUILTIN_PFCMPEQ
:
19537 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
19539 case IX86_BUILTIN_PFCMPGE
:
19540 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
19542 case IX86_BUILTIN_PFCMPGT
:
19543 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
19545 case IX86_BUILTIN_PFMAX
:
19546 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
19548 case IX86_BUILTIN_PFMIN
:
19549 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
19551 case IX86_BUILTIN_PFMUL
:
19552 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
19554 case IX86_BUILTIN_PFRCP
:
19555 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
19557 case IX86_BUILTIN_PFRCPIT1
:
19558 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
19560 case IX86_BUILTIN_PFRCPIT2
:
19561 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
19563 case IX86_BUILTIN_PFRSQIT1
:
19564 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
19566 case IX86_BUILTIN_PFRSQRT
:
19567 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
19569 case IX86_BUILTIN_PFSUB
:
19570 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
19572 case IX86_BUILTIN_PFSUBR
:
19573 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
19575 case IX86_BUILTIN_PI2FD
:
19576 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
19578 case IX86_BUILTIN_PMULHRW
:
19579 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
19581 case IX86_BUILTIN_PF2IW
:
19582 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
19584 case IX86_BUILTIN_PFNACC
:
19585 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
19587 case IX86_BUILTIN_PFPNACC
:
19588 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
19590 case IX86_BUILTIN_PI2FW
:
19591 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
19593 case IX86_BUILTIN_PSWAPDSI
:
19594 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
19596 case IX86_BUILTIN_PSWAPDSF
:
19597 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
19599 case IX86_BUILTIN_SQRTSD
:
19600 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
19601 case IX86_BUILTIN_LOADUPD
:
19602 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
19603 case IX86_BUILTIN_STOREUPD
:
19604 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
19606 case IX86_BUILTIN_MFENCE
:
19607 emit_insn (gen_sse2_mfence ());
19609 case IX86_BUILTIN_LFENCE
:
19610 emit_insn (gen_sse2_lfence ());
19613 case IX86_BUILTIN_CLFLUSH
:
19614 arg0
= CALL_EXPR_ARG (exp
, 0);
19615 op0
= expand_normal (arg0
);
19616 icode
= CODE_FOR_sse2_clflush
;
19617 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
19618 op0
= copy_to_mode_reg (Pmode
, op0
);
19620 emit_insn (gen_sse2_clflush (op0
));
19623 case IX86_BUILTIN_MOVNTPD
:
19624 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
19625 case IX86_BUILTIN_MOVNTDQ
:
19626 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
19627 case IX86_BUILTIN_MOVNTI
:
19628 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
19630 case IX86_BUILTIN_LOADDQU
:
19631 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
19632 case IX86_BUILTIN_STOREDQU
:
19633 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
19635 case IX86_BUILTIN_MONITOR
:
19636 arg0
= CALL_EXPR_ARG (exp
, 0);
19637 arg1
= CALL_EXPR_ARG (exp
, 1);
19638 arg2
= CALL_EXPR_ARG (exp
, 2);
19639 op0
= expand_normal (arg0
);
19640 op1
= expand_normal (arg1
);
19641 op2
= expand_normal (arg2
);
19643 op0
= copy_to_mode_reg (Pmode
, op0
);
19645 op1
= copy_to_mode_reg (SImode
, op1
);
19647 op2
= copy_to_mode_reg (SImode
, op2
);
19649 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
19651 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
19654 case IX86_BUILTIN_MWAIT
:
19655 arg0
= CALL_EXPR_ARG (exp
, 0);
19656 arg1
= CALL_EXPR_ARG (exp
, 1);
19657 op0
= expand_normal (arg0
);
19658 op1
= expand_normal (arg1
);
19660 op0
= copy_to_mode_reg (SImode
, op0
);
19662 op1
= copy_to_mode_reg (SImode
, op1
);
19663 emit_insn (gen_sse3_mwait (op0
, op1
));
19666 case IX86_BUILTIN_LDDQU
:
19667 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
19670 case IX86_BUILTIN_PALIGNR
:
19671 case IX86_BUILTIN_PALIGNR128
:
19672 if (fcode
== IX86_BUILTIN_PALIGNR
)
19674 icode
= CODE_FOR_ssse3_palignrdi
;
19679 icode
= CODE_FOR_ssse3_palignrti
;
19682 arg0
= CALL_EXPR_ARG (exp
, 0);
19683 arg1
= CALL_EXPR_ARG (exp
, 1);
19684 arg2
= CALL_EXPR_ARG (exp
, 2);
19685 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19686 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19687 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19688 tmode
= insn_data
[icode
].operand
[0].mode
;
19689 mode1
= insn_data
[icode
].operand
[1].mode
;
19690 mode2
= insn_data
[icode
].operand
[2].mode
;
19691 mode3
= insn_data
[icode
].operand
[3].mode
;
19693 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19695 op0
= copy_to_reg (op0
);
19696 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
19698 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19700 op1
= copy_to_reg (op1
);
19701 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
19703 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19705 error ("shift must be an immediate");
19708 target
= gen_reg_rtx (mode
);
19709 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
19716 case IX86_BUILTIN_MOVNTDQA
:
19717 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
19720 case IX86_BUILTIN_MOVNTSD
:
19721 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
19723 case IX86_BUILTIN_MOVNTSS
:
19724 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
19726 case IX86_BUILTIN_INSERTQ
:
19727 case IX86_BUILTIN_EXTRQ
:
19728 icode
= (fcode
== IX86_BUILTIN_EXTRQ
19729 ? CODE_FOR_sse4a_extrq
19730 : CODE_FOR_sse4a_insertq
);
19731 arg0
= CALL_EXPR_ARG (exp
, 0);
19732 arg1
= CALL_EXPR_ARG (exp
, 1);
19733 op0
= expand_normal (arg0
);
19734 op1
= expand_normal (arg1
);
19735 tmode
= insn_data
[icode
].operand
[0].mode
;
19736 mode1
= insn_data
[icode
].operand
[1].mode
;
19737 mode2
= insn_data
[icode
].operand
[2].mode
;
19738 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19739 op0
= copy_to_mode_reg (mode1
, op0
);
19740 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19741 op1
= copy_to_mode_reg (mode2
, op1
);
19742 if (optimize
|| target
== 0
19743 || GET_MODE (target
) != tmode
19744 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19745 target
= gen_reg_rtx (tmode
);
19746 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19752 case IX86_BUILTIN_EXTRQI
:
19753 icode
= CODE_FOR_sse4a_extrqi
;
19754 arg0
= CALL_EXPR_ARG (exp
, 0);
19755 arg1
= CALL_EXPR_ARG (exp
, 1);
19756 arg2
= CALL_EXPR_ARG (exp
, 2);
19757 op0
= expand_normal (arg0
);
19758 op1
= expand_normal (arg1
);
19759 op2
= expand_normal (arg2
);
19760 tmode
= insn_data
[icode
].operand
[0].mode
;
19761 mode1
= insn_data
[icode
].operand
[1].mode
;
19762 mode2
= insn_data
[icode
].operand
[2].mode
;
19763 mode3
= insn_data
[icode
].operand
[3].mode
;
19764 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19765 op0
= copy_to_mode_reg (mode1
, op0
);
19766 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19768 error ("index mask must be an immediate");
19769 return gen_reg_rtx (tmode
);
19771 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19773 error ("length mask must be an immediate");
19774 return gen_reg_rtx (tmode
);
19776 if (optimize
|| target
== 0
19777 || GET_MODE (target
) != tmode
19778 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19779 target
= gen_reg_rtx (tmode
);
19780 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19786 case IX86_BUILTIN_INSERTQI
:
19787 icode
= CODE_FOR_sse4a_insertqi
;
19788 arg0
= CALL_EXPR_ARG (exp
, 0);
19789 arg1
= CALL_EXPR_ARG (exp
, 1);
19790 arg2
= CALL_EXPR_ARG (exp
, 2);
19791 arg3
= CALL_EXPR_ARG (exp
, 3);
19792 op0
= expand_normal (arg0
);
19793 op1
= expand_normal (arg1
);
19794 op2
= expand_normal (arg2
);
19795 op3
= expand_normal (arg3
);
19796 tmode
= insn_data
[icode
].operand
[0].mode
;
19797 mode1
= insn_data
[icode
].operand
[1].mode
;
19798 mode2
= insn_data
[icode
].operand
[2].mode
;
19799 mode3
= insn_data
[icode
].operand
[3].mode
;
19800 mode4
= insn_data
[icode
].operand
[4].mode
;
19802 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19803 op0
= copy_to_mode_reg (mode1
, op0
);
19805 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19806 op1
= copy_to_mode_reg (mode2
, op1
);
19808 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19810 error ("index mask must be an immediate");
19811 return gen_reg_rtx (tmode
);
19813 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
19815 error ("length mask must be an immediate");
19816 return gen_reg_rtx (tmode
);
19818 if (optimize
|| target
== 0
19819 || GET_MODE (target
) != tmode
19820 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19821 target
= gen_reg_rtx (tmode
);
19822 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
19828 case IX86_BUILTIN_VEC_INIT_V2SI
:
19829 case IX86_BUILTIN_VEC_INIT_V4HI
:
19830 case IX86_BUILTIN_VEC_INIT_V8QI
:
19831 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
19833 case IX86_BUILTIN_VEC_EXT_V2DF
:
19834 case IX86_BUILTIN_VEC_EXT_V2DI
:
19835 case IX86_BUILTIN_VEC_EXT_V4SF
:
19836 case IX86_BUILTIN_VEC_EXT_V4SI
:
19837 case IX86_BUILTIN_VEC_EXT_V8HI
:
19838 case IX86_BUILTIN_VEC_EXT_V2SI
:
19839 case IX86_BUILTIN_VEC_EXT_V4HI
:
19840 case IX86_BUILTIN_VEC_EXT_V16QI
:
19841 return ix86_expand_vec_ext_builtin (exp
, target
);
19843 case IX86_BUILTIN_VEC_SET_V2DI
:
19844 case IX86_BUILTIN_VEC_SET_V4SF
:
19845 case IX86_BUILTIN_VEC_SET_V4SI
:
19846 case IX86_BUILTIN_VEC_SET_V8HI
:
19847 case IX86_BUILTIN_VEC_SET_V4HI
:
19848 case IX86_BUILTIN_VEC_SET_V16QI
:
19849 return ix86_expand_vec_set_builtin (exp
);
19851 case IX86_BUILTIN_INFQ
:
19853 REAL_VALUE_TYPE inf
;
19857 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
19859 tmp
= validize_mem (force_const_mem (mode
, tmp
));
19862 target
= gen_reg_rtx (mode
);
19864 emit_move_insn (target
, tmp
);
19868 case IX86_BUILTIN_FABSQ
:
19869 return ix86_expand_unop_builtin (CODE_FOR_abstf2
, exp
, target
, 0);
19871 case IX86_BUILTIN_COPYSIGNQ
:
19872 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3
, exp
, target
);
19878 for (i
= 0, d
= bdesc_sse_3arg
;
19879 i
< ARRAY_SIZE (bdesc_sse_3arg
);
19881 if (d
->code
== fcode
)
19882 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
19885 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19886 if (d
->code
== fcode
)
19888 /* Compares are treated specially. */
19889 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
19890 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
19891 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
19892 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
19893 return ix86_expand_sse_compare (d
, exp
, target
);
19895 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
19898 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19899 if (d
->code
== fcode
)
19900 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19902 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
19903 if (d
->code
== fcode
)
19904 return ix86_expand_sse_comi (d
, exp
, target
);
19906 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
19907 if (d
->code
== fcode
)
19908 return ix86_expand_sse_ptest (d
, exp
, target
);
19910 for (i
= 0, d
= bdesc_crc32
; i
< ARRAY_SIZE (bdesc_crc32
); i
++, d
++)
19911 if (d
->code
== fcode
)
19912 return ix86_expand_crc32 (d
->icode
, exp
, target
);
19914 for (i
= 0, d
= bdesc_pcmpestr
;
19915 i
< ARRAY_SIZE (bdesc_pcmpestr
);
19917 if (d
->code
== fcode
)
19918 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
19920 for (i
= 0, d
= bdesc_pcmpistr
;
19921 i
< ARRAY_SIZE (bdesc_pcmpistr
);
19923 if (d
->code
== fcode
)
19924 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
19926 gcc_unreachable ();
19929 /* Returns a function decl for a vectorized version of the builtin function
19930 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19931 if it is not available. */
19934 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
19937 enum machine_mode in_mode
, out_mode
;
19940 if (TREE_CODE (type_out
) != VECTOR_TYPE
19941 || TREE_CODE (type_in
) != VECTOR_TYPE
)
19944 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19945 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
19946 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19947 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19951 case BUILT_IN_SQRT
:
19952 if (out_mode
== DFmode
&& out_n
== 2
19953 && in_mode
== DFmode
&& in_n
== 2)
19954 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
19957 case BUILT_IN_SQRTF
:
19958 if (out_mode
== SFmode
&& out_n
== 4
19959 && in_mode
== SFmode
&& in_n
== 4)
19960 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
19963 case BUILT_IN_LRINT
:
19964 if (out_mode
== SImode
&& out_n
== 4
19965 && in_mode
== DFmode
&& in_n
== 2)
19966 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
19969 case BUILT_IN_LRINTF
:
19970 if (out_mode
== SImode
&& out_n
== 4
19971 && in_mode
== SFmode
&& in_n
== 4)
19972 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
19979 /* Dispatch to a handler for a vectorization library. */
19980 if (ix86_veclib_handler
)
19981 return (*ix86_veclib_handler
)(fn
, type_out
, type_in
);
19986 /* Handler for an ACML-style interface to a library with vectorized
19990 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
19992 char name
[20] = "__vr.._";
19993 tree fntype
, new_fndecl
, args
;
19996 enum machine_mode el_mode
, in_mode
;
19999 /* The ACML is 64bits only and suitable for unsafe math only as
20000 it does not correctly support parts of IEEE with the required
20001 precision such as denormals. */
20003 || !flag_unsafe_math_optimizations
)
20006 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
20007 n
= TYPE_VECTOR_SUBPARTS (type_out
);
20008 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
20009 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
20010 if (el_mode
!= in_mode
20020 case BUILT_IN_LOG2
:
20021 case BUILT_IN_LOG10
:
20024 if (el_mode
!= DFmode
20029 case BUILT_IN_SINF
:
20030 case BUILT_IN_COSF
:
20031 case BUILT_IN_EXPF
:
20032 case BUILT_IN_POWF
:
20033 case BUILT_IN_LOGF
:
20034 case BUILT_IN_LOG2F
:
20035 case BUILT_IN_LOG10F
:
20038 if (el_mode
!= SFmode
20047 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
20048 sprintf (name
+ 7, "%s", bname
+10);
20051 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
20052 args
= TREE_CHAIN (args
))
20056 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
20058 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
20060 /* Build a function declaration for the vectorized function. */
20061 new_fndecl
= build_decl (FUNCTION_DECL
, get_identifier (name
), fntype
);
20062 TREE_PUBLIC (new_fndecl
) = 1;
20063 DECL_EXTERNAL (new_fndecl
) = 1;
20064 DECL_IS_NOVOPS (new_fndecl
) = 1;
20065 TREE_READONLY (new_fndecl
) = 1;
20071 /* Returns a decl of a function that implements conversion of the
20072 input vector of type TYPE, or NULL_TREE if it is not available. */
20075 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
20077 if (TREE_CODE (type
) != VECTOR_TYPE
)
20083 switch (TYPE_MODE (type
))
20086 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
20091 case FIX_TRUNC_EXPR
:
20092 switch (TYPE_MODE (type
))
20095 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
20105 /* Returns a code for a target-specific builtin that implements
20106 reciprocal of the function, or NULL_TREE if not available. */
20109 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
20110 bool sqrt ATTRIBUTE_UNUSED
)
20112 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_size
20113 && flag_finite_math_only
&& !flag_trapping_math
20114 && flag_unsafe_math_optimizations
))
20118 /* Machine dependent builtins. */
20121 /* Vectorized version of sqrt to rsqrt conversion. */
20122 case IX86_BUILTIN_SQRTPS
:
20123 return ix86_builtins
[IX86_BUILTIN_RSQRTPS
];
20129 /* Normal builtins. */
20132 /* Sqrt to rsqrt conversion. */
20133 case BUILT_IN_SQRTF
:
20134 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
20141 /* Store OPERAND to the memory after reload is completed. This means
20142 that we can't easily use assign_stack_local. */
20144 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
20148 gcc_assert (reload_completed
);
20149 if (TARGET_RED_ZONE
)
20151 result
= gen_rtx_MEM (mode
,
20152 gen_rtx_PLUS (Pmode
,
20154 GEN_INT (-RED_ZONE_SIZE
)));
20155 emit_move_insn (result
, operand
);
20157 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
20163 operand
= gen_lowpart (DImode
, operand
);
20167 gen_rtx_SET (VOIDmode
,
20168 gen_rtx_MEM (DImode
,
20169 gen_rtx_PRE_DEC (DImode
,
20170 stack_pointer_rtx
)),
20174 gcc_unreachable ();
20176 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
20185 split_di (&operand
, 1, operands
, operands
+ 1);
20187 gen_rtx_SET (VOIDmode
,
20188 gen_rtx_MEM (SImode
,
20189 gen_rtx_PRE_DEC (Pmode
,
20190 stack_pointer_rtx
)),
20193 gen_rtx_SET (VOIDmode
,
20194 gen_rtx_MEM (SImode
,
20195 gen_rtx_PRE_DEC (Pmode
,
20196 stack_pointer_rtx
)),
20201 /* Store HImodes as SImodes. */
20202 operand
= gen_lowpart (SImode
, operand
);
20206 gen_rtx_SET (VOIDmode
,
20207 gen_rtx_MEM (GET_MODE (operand
),
20208 gen_rtx_PRE_DEC (SImode
,
20209 stack_pointer_rtx
)),
20213 gcc_unreachable ();
20215 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
20220 /* Free operand from the memory. */
20222 ix86_free_from_memory (enum machine_mode mode
)
20224 if (!TARGET_RED_ZONE
)
20228 if (mode
== DImode
|| TARGET_64BIT
)
20232 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20233 to pop or add instruction if registers are available. */
20234 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20235 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
20240 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20241 QImode must go into class Q_REGS.
20242 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20243 movdf to do mem-to-mem moves through integer regs. */
20245 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
20247 enum machine_mode mode
= GET_MODE (x
);
20249 /* We're only allowed to return a subclass of CLASS. Many of the
20250 following checks fail for NO_REGS, so eliminate that early. */
20251 if (regclass
== NO_REGS
)
20254 /* All classes can load zeros. */
20255 if (x
== CONST0_RTX (mode
))
20258 /* Force constants into memory if we are loading a (nonzero) constant into
20259 an MMX or SSE register. This is because there are no MMX/SSE instructions
20260 to load from a constant. */
20262 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
20265 /* Prefer SSE regs only, if we can use them for math. */
20266 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
20267 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20269 /* Floating-point constants need more complex checks. */
20270 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
20272 /* General regs can load everything. */
20273 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
20276 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20277 zero above. We only want to wind up preferring 80387 registers if
20278 we plan on doing computation with them. */
20280 && standard_80387_constant_p (x
))
20282 /* Limit class to non-sse. */
20283 if (regclass
== FLOAT_SSE_REGS
)
20285 if (regclass
== FP_TOP_SSE_REGS
)
20287 if (regclass
== FP_SECOND_SSE_REGS
)
20288 return FP_SECOND_REG
;
20289 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
20296 /* Generally when we see PLUS here, it's the function invariant
20297 (plus soft-fp const_int). Which can only be computed into general
20299 if (GET_CODE (x
) == PLUS
)
20300 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
20302 /* QImode constants are easy to load, but non-constant QImode data
20303 must go into Q_REGS. */
20304 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
20306 if (reg_class_subset_p (regclass
, Q_REGS
))
20308 if (reg_class_subset_p (Q_REGS
, regclass
))
20316 /* Discourage putting floating-point values in SSE registers unless
20317 SSE math is being used, and likewise for the 387 registers. */
20319 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
20321 enum machine_mode mode
= GET_MODE (x
);
20323 /* Restrict the output reload class to the register bank that we are doing
20324 math on. If we would like not to return a subset of CLASS, reject this
20325 alternative: if reload cannot do this, it will still use its choice. */
20326 mode
= GET_MODE (x
);
20327 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20328 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
20330 if (X87_FLOAT_MODE_P (mode
))
20332 if (regclass
== FP_TOP_SSE_REGS
)
20334 else if (regclass
== FP_SECOND_SSE_REGS
)
20335 return FP_SECOND_REG
;
20337 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20343 /* If we are copying between general and FP registers, we need a memory
20344 location. The same is true for SSE and MMX registers.
20346 To optimize register_move_cost performance, allow inline variant.
20348 The macro can't work reliably when one of the CLASSES is class containing
20349 registers from multiple units (SSE, MMX, integer). We avoid this by never
20350 combining those units in single alternative in the machine description.
20351 Ensure that this constraint holds to avoid unexpected surprises.
20353 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20354 enforce these sanity checks. */
20357 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
20358 enum machine_mode mode
, int strict
)
20360 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
20361 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
20362 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
20363 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
20364 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
20365 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
20367 gcc_assert (!strict
);
20371 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
20374 /* ??? This is a lie. We do have moves between mmx/general, and for
20375 mmx/sse2. But by saying we need secondary memory we discourage the
20376 register allocator from using the mmx registers unless needed. */
20377 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
20380 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20382 /* SSE1 doesn't have any direct moves from other classes. */
20386 /* If the target says that inter-unit moves are more expensive
20387 than moving through memory, then don't generate them. */
20388 if (!TARGET_INTER_UNIT_MOVES
)
20391 /* Between SSE and general, we have moves no larger than word size. */
20392 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20400 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
20401 enum machine_mode mode
, int strict
)
20403 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
20406 /* Return true if the registers in CLASS cannot represent the change from
20407 modes FROM to TO. */
20410 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
20411 enum reg_class regclass
)
20416 /* x87 registers can't do subreg at all, as all values are reformatted
20417 to extended precision. */
20418 if (MAYBE_FLOAT_CLASS_P (regclass
))
20421 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
20423 /* Vector registers do not support QI or HImode loads. If we don't
20424 disallow a change to these modes, reload will assume it's ok to
20425 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20426 the vec_dupv4hi pattern. */
20427 if (GET_MODE_SIZE (from
) < 4)
20430 /* Vector registers do not support subreg with nonzero offsets, which
20431 are otherwise valid for integer registers. Since we can't see
20432 whether we have a nonzero offset from here, prohibit all
20433 nonparadoxical subregs changing size. */
20434 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
20441 /* Return the cost of moving data of mode M between a
20442 register and memory. A value of 2 is the default; this cost is
20443 relative to those in `REGISTER_MOVE_COST'.
20445 This function is used extensively by register_move_cost that is used to
20446 build tables at startup. Make it inline in this case.
20447 When IN is 2, return maximum of in and out move cost.
20449 If moving between registers and memory is more expensive than
20450 between two registers, you should define this macro to express the
20453 Model also increased moving costs of QImode registers in non
20457 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
20461 if (FLOAT_CLASS_P (regclass
))
20479 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
20480 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
20482 if (SSE_CLASS_P (regclass
))
20485 switch (GET_MODE_SIZE (mode
))
20500 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
20501 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
20503 if (MMX_CLASS_P (regclass
))
20506 switch (GET_MODE_SIZE (mode
))
20518 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
20519 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
20521 switch (GET_MODE_SIZE (mode
))
20524 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
20527 return ix86_cost
->int_store
[0];
20528 if (TARGET_PARTIAL_REG_DEPENDENCY
&& !optimize_size
)
20529 cost
= ix86_cost
->movzbl_load
;
20531 cost
= ix86_cost
->int_load
[0];
20533 return MAX (cost
, ix86_cost
->int_store
[0]);
20539 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
20541 return ix86_cost
->movzbl_load
;
20543 return ix86_cost
->int_store
[0] + 4;
20548 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
20549 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
20551 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20552 if (mode
== TFmode
)
20555 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
20557 cost
= ix86_cost
->int_load
[2];
20559 cost
= ix86_cost
->int_store
[2];
20560 return (cost
* (((int) GET_MODE_SIZE (mode
)
20561 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
20566 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
20568 return inline_memory_move_cost (mode
, regclass
, in
);
20572 /* Return the cost of moving data from a register in class CLASS1 to
20573 one in class CLASS2.
20575 It is not required that the cost always equal 2 when FROM is the same as TO;
20576 on some machines it is expensive to move between registers if they are not
20577 general registers. */
20580 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
20581 enum reg_class class2
)
20583 /* In case we require secondary memory, compute cost of the store followed
20584 by load. In order to avoid bad register allocation choices, we need
20585 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20587 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
20591 cost
+= inline_memory_move_cost (mode
, class1
, 2);
20592 cost
+= inline_memory_move_cost (mode
, class2
, 2);
20594 /* In case of copying from general_purpose_register we may emit multiple
20595 stores followed by single load causing memory size mismatch stall.
20596 Count this as arbitrarily high cost of 20. */
20597 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
20600 /* In the case of FP/MMX moves, the registers actually overlap, and we
20601 have to switch modes in order to treat them differently. */
20602 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
20603 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
20609 /* Moves between SSE/MMX and integer unit are expensive. */
20610 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
20611 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20613 /* ??? By keeping returned value relatively high, we limit the number
20614 of moves between integer and MMX/SSE registers for all targets.
20615 Additionally, high value prevents problem with x86_modes_tieable_p(),
20616 where integer modes in MMX/SSE registers are not tieable
20617 because of missing QImode and HImode moves to, from or between
20618 MMX/SSE registers. */
20619 return MAX (ix86_cost
->mmxsse_to_integer
, 8);
20621 if (MAYBE_FLOAT_CLASS_P (class1
))
20622 return ix86_cost
->fp_move
;
20623 if (MAYBE_SSE_CLASS_P (class1
))
20624 return ix86_cost
->sse_move
;
20625 if (MAYBE_MMX_CLASS_P (class1
))
20626 return ix86_cost
->mmx_move
;
20630 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20633 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
20635 /* Flags and only flags can only hold CCmode values. */
20636 if (CC_REGNO_P (regno
))
20637 return GET_MODE_CLASS (mode
) == MODE_CC
;
20638 if (GET_MODE_CLASS (mode
) == MODE_CC
20639 || GET_MODE_CLASS (mode
) == MODE_RANDOM
20640 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
20642 if (FP_REGNO_P (regno
))
20643 return VALID_FP_MODE_P (mode
);
20644 if (SSE_REGNO_P (regno
))
20646 /* We implement the move patterns for all vector modes into and
20647 out of SSE registers, even when no operation instructions
20649 return (VALID_SSE_REG_MODE (mode
)
20650 || VALID_SSE2_REG_MODE (mode
)
20651 || VALID_MMX_REG_MODE (mode
)
20652 || VALID_MMX_REG_MODE_3DNOW (mode
));
20654 if (MMX_REGNO_P (regno
))
20656 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20657 so if the register is available at all, then we can move data of
20658 the given mode into or out of it. */
20659 return (VALID_MMX_REG_MODE (mode
)
20660 || VALID_MMX_REG_MODE_3DNOW (mode
));
20663 if (mode
== QImode
)
20665 /* Take care for QImode values - they can be in non-QI regs,
20666 but then they do cause partial register stalls. */
20667 if (regno
< 4 || TARGET_64BIT
)
20669 if (!TARGET_PARTIAL_REG_STALL
)
20671 return reload_in_progress
|| reload_completed
;
20673 /* We handle both integer and floats in the general purpose registers. */
20674 else if (VALID_INT_MODE_P (mode
))
20676 else if (VALID_FP_MODE_P (mode
))
20678 else if (VALID_DFP_MODE_P (mode
))
20680 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20681 on to use that value in smaller contexts, this can easily force a
20682 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20683 supporting DImode, allow it. */
20684 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
20690 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20691 tieable integer mode. */
20694 ix86_tieable_integer_mode_p (enum machine_mode mode
)
20703 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
20706 return TARGET_64BIT
;
20713 /* Return true if MODE1 is accessible in a register that can hold MODE2
20714 without copying. That is, all register classes that can hold MODE2
20715 can also hold MODE1. */
20718 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
20720 if (mode1
== mode2
)
20723 if (ix86_tieable_integer_mode_p (mode1
)
20724 && ix86_tieable_integer_mode_p (mode2
))
20727 /* MODE2 being XFmode implies fp stack or general regs, which means we
20728 can tie any smaller floating point modes to it. Note that we do not
20729 tie this with TFmode. */
20730 if (mode2
== XFmode
)
20731 return mode1
== SFmode
|| mode1
== DFmode
;
20733 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20734 that we can tie it with SFmode. */
20735 if (mode2
== DFmode
)
20736 return mode1
== SFmode
;
20738 /* If MODE2 is only appropriate for an SSE register, then tie with
20739 any other mode acceptable to SSE registers. */
20740 if (GET_MODE_SIZE (mode2
) == 16
20741 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
20742 return (GET_MODE_SIZE (mode1
) == 16
20743 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
20745 /* If MODE2 is appropriate for an MMX register, then tie
20746 with any other mode acceptable to MMX registers. */
20747 if (GET_MODE_SIZE (mode2
) == 8
20748 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
20749 return (GET_MODE_SIZE (mode1
) == 8
20750 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
20755 /* Compute a (partial) cost for rtx X. Return true if the complete
20756 cost has been computed, and false if subexpressions should be
20757 scanned. In either case, *TOTAL contains the cost result. */
20760 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
)
20762 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
20763 enum machine_mode mode
= GET_MODE (x
);
20771 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
20773 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
20775 else if (flag_pic
&& SYMBOLIC_CONST (x
)
20777 || (!GET_CODE (x
) != LABEL_REF
20778 && (GET_CODE (x
) != SYMBOL_REF
20779 || !SYMBOL_REF_LOCAL_P (x
)))))
20786 if (mode
== VOIDmode
)
20789 switch (standard_80387_constant_p (x
))
20794 default: /* Other constants */
20799 /* Start with (MEM (SYMBOL_REF)), since that's where
20800 it'll probably end up. Add a penalty for size. */
20801 *total
= (COSTS_N_INSNS (1)
20802 + (flag_pic
!= 0 && !TARGET_64BIT
)
20803 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
20809 /* The zero extensions is often completely free on x86_64, so make
20810 it as cheap as possible. */
20811 if (TARGET_64BIT
&& mode
== DImode
20812 && GET_MODE (XEXP (x
, 0)) == SImode
)
20814 else if (TARGET_ZERO_EXTEND_WITH_AND
)
20815 *total
= ix86_cost
->add
;
20817 *total
= ix86_cost
->movzx
;
20821 *total
= ix86_cost
->movsx
;
20825 if (CONST_INT_P (XEXP (x
, 1))
20826 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
20828 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
20831 *total
= ix86_cost
->add
;
20834 if ((value
== 2 || value
== 3)
20835 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
20837 *total
= ix86_cost
->lea
;
20847 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
20849 if (CONST_INT_P (XEXP (x
, 1)))
20851 if (INTVAL (XEXP (x
, 1)) > 32)
20852 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
20854 *total
= ix86_cost
->shift_const
* 2;
20858 if (GET_CODE (XEXP (x
, 1)) == AND
)
20859 *total
= ix86_cost
->shift_var
* 2;
20861 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
20866 if (CONST_INT_P (XEXP (x
, 1)))
20867 *total
= ix86_cost
->shift_const
;
20869 *total
= ix86_cost
->shift_var
;
20874 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20876 /* ??? SSE scalar cost should be used here. */
20877 *total
= ix86_cost
->fmul
;
20880 else if (X87_FLOAT_MODE_P (mode
))
20882 *total
= ix86_cost
->fmul
;
20885 else if (FLOAT_MODE_P (mode
))
20887 /* ??? SSE vector cost should be used here. */
20888 *total
= ix86_cost
->fmul
;
20893 rtx op0
= XEXP (x
, 0);
20894 rtx op1
= XEXP (x
, 1);
20896 if (CONST_INT_P (XEXP (x
, 1)))
20898 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
20899 for (nbits
= 0; value
!= 0; value
&= value
- 1)
20903 /* This is arbitrary. */
20906 /* Compute costs correctly for widening multiplication. */
20907 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
20908 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
20909 == GET_MODE_SIZE (mode
))
20911 int is_mulwiden
= 0;
20912 enum machine_mode inner_mode
= GET_MODE (op0
);
20914 if (GET_CODE (op0
) == GET_CODE (op1
))
20915 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
20916 else if (CONST_INT_P (op1
))
20918 if (GET_CODE (op0
) == SIGN_EXTEND
)
20919 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
20922 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
20926 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
20929 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
20930 + nbits
* ix86_cost
->mult_bit
20931 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
20940 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20941 /* ??? SSE cost should be used here. */
20942 *total
= ix86_cost
->fdiv
;
20943 else if (X87_FLOAT_MODE_P (mode
))
20944 *total
= ix86_cost
->fdiv
;
20945 else if (FLOAT_MODE_P (mode
))
20946 /* ??? SSE vector cost should be used here. */
20947 *total
= ix86_cost
->fdiv
;
20949 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
20953 if (GET_MODE_CLASS (mode
) == MODE_INT
20954 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
20956 if (GET_CODE (XEXP (x
, 0)) == PLUS
20957 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
20958 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
20959 && CONSTANT_P (XEXP (x
, 1)))
20961 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
20962 if (val
== 2 || val
== 4 || val
== 8)
20964 *total
= ix86_cost
->lea
;
20965 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
20966 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
20968 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20972 else if (GET_CODE (XEXP (x
, 0)) == MULT
20973 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
20975 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
20976 if (val
== 2 || val
== 4 || val
== 8)
20978 *total
= ix86_cost
->lea
;
20979 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
20980 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20984 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
20986 *total
= ix86_cost
->lea
;
20987 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
20988 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
20989 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20996 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20998 /* ??? SSE cost should be used here. */
20999 *total
= ix86_cost
->fadd
;
21002 else if (X87_FLOAT_MODE_P (mode
))
21004 *total
= ix86_cost
->fadd
;
21007 else if (FLOAT_MODE_P (mode
))
21009 /* ??? SSE vector cost should be used here. */
21010 *total
= ix86_cost
->fadd
;
21018 if (!TARGET_64BIT
&& mode
== DImode
)
21020 *total
= (ix86_cost
->add
* 2
21021 + (rtx_cost (XEXP (x
, 0), outer_code
)
21022 << (GET_MODE (XEXP (x
, 0)) != DImode
))
21023 + (rtx_cost (XEXP (x
, 1), outer_code
)
21024 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
21030 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21032 /* ??? SSE cost should be used here. */
21033 *total
= ix86_cost
->fchs
;
21036 else if (X87_FLOAT_MODE_P (mode
))
21038 *total
= ix86_cost
->fchs
;
21041 else if (FLOAT_MODE_P (mode
))
21043 /* ??? SSE vector cost should be used here. */
21044 *total
= ix86_cost
->fchs
;
21050 if (!TARGET_64BIT
&& mode
== DImode
)
21051 *total
= ix86_cost
->add
* 2;
21053 *total
= ix86_cost
->add
;
21057 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
21058 && XEXP (XEXP (x
, 0), 1) == const1_rtx
21059 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
21060 && XEXP (x
, 1) == const0_rtx
)
21062 /* This kind of construct is implemented using test[bwl].
21063 Treat it as if we had an AND. */
21064 *total
= (ix86_cost
->add
21065 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
21066 + rtx_cost (const1_rtx
, outer_code
));
21072 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
21077 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21078 /* ??? SSE cost should be used here. */
21079 *total
= ix86_cost
->fabs
;
21080 else if (X87_FLOAT_MODE_P (mode
))
21081 *total
= ix86_cost
->fabs
;
21082 else if (FLOAT_MODE_P (mode
))
21083 /* ??? SSE vector cost should be used here. */
21084 *total
= ix86_cost
->fabs
;
21088 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21089 /* ??? SSE cost should be used here. */
21090 *total
= ix86_cost
->fsqrt
;
21091 else if (X87_FLOAT_MODE_P (mode
))
21092 *total
= ix86_cost
->fsqrt
;
21093 else if (FLOAT_MODE_P (mode
))
21094 /* ??? SSE vector cost should be used here. */
21095 *total
= ix86_cost
->fsqrt
;
21099 if (XINT (x
, 1) == UNSPEC_TP
)
21110 static int current_machopic_label_num
;
21112 /* Given a symbol name and its associated stub, write out the
21113 definition of the stub. */
21116 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21118 unsigned int length
;
21119 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
21120 int label
= ++current_machopic_label_num
;
21122 /* For 64-bit we shouldn't get here. */
21123 gcc_assert (!TARGET_64BIT
);
21125 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21126 symb
= (*targetm
.strip_name_encoding
) (symb
);
21128 length
= strlen (stub
);
21129 binder_name
= alloca (length
+ 32);
21130 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
21132 length
= strlen (symb
);
21133 symbol_name
= alloca (length
+ 32);
21134 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21136 sprintf (lazy_ptr_name
, "L%d$lz", label
);
21139 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
21141 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
21143 fprintf (file
, "%s:\n", stub
);
21144 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21148 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
21149 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
21150 fprintf (file
, "\tjmp\t*%%edx\n");
21153 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
21155 fprintf (file
, "%s:\n", binder_name
);
21159 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
21160 fprintf (file
, "\tpushl\t%%eax\n");
21163 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
21165 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
21167 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
21168 fprintf (file
, "%s:\n", lazy_ptr_name
);
21169 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21170 fprintf (file
, "\t.long %s\n", binder_name
);
21174 darwin_x86_file_end (void)
21176 darwin_file_end ();
21179 #endif /* TARGET_MACHO */
21181 /* Order the registers for register allocator. */
21184 x86_order_regs_for_local_alloc (void)
21189 /* First allocate the local general purpose registers. */
21190 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21191 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
21192 reg_alloc_order
[pos
++] = i
;
21194 /* Global general purpose registers. */
21195 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21196 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
21197 reg_alloc_order
[pos
++] = i
;
21199 /* x87 registers come first in case we are doing FP math
21201 if (!TARGET_SSE_MATH
)
21202 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21203 reg_alloc_order
[pos
++] = i
;
21205 /* SSE registers. */
21206 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
21207 reg_alloc_order
[pos
++] = i
;
21208 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
21209 reg_alloc_order
[pos
++] = i
;
21211 /* x87 registers. */
21212 if (TARGET_SSE_MATH
)
21213 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21214 reg_alloc_order
[pos
++] = i
;
21216 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
21217 reg_alloc_order
[pos
++] = i
;
21219 /* Initialize the rest of array as we do not allocate some registers
21221 while (pos
< FIRST_PSEUDO_REGISTER
)
21222 reg_alloc_order
[pos
++] = 0;
21225 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21226 struct attribute_spec.handler. */
21228 ix86_handle_struct_attribute (tree
*node
, tree name
,
21229 tree args ATTRIBUTE_UNUSED
,
21230 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
21233 if (DECL_P (*node
))
21235 if (TREE_CODE (*node
) == TYPE_DECL
)
21236 type
= &TREE_TYPE (*node
);
21241 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
21242 || TREE_CODE (*type
) == UNION_TYPE
)))
21244 warning (OPT_Wattributes
, "%qs attribute ignored",
21245 IDENTIFIER_POINTER (name
));
21246 *no_add_attrs
= true;
21249 else if ((is_attribute_p ("ms_struct", name
)
21250 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
21251 || ((is_attribute_p ("gcc_struct", name
)
21252 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
21254 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
21255 IDENTIFIER_POINTER (name
));
21256 *no_add_attrs
= true;
21263 ix86_ms_bitfield_layout_p (const_tree record_type
)
21265 return (TARGET_MS_BITFIELD_LAYOUT
&&
21266 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
21267 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
21270 /* Returns an expression indicating where the this parameter is
21271 located on entry to the FUNCTION. */
21274 x86_this_parameter (tree function
)
21276 tree type
= TREE_TYPE (function
);
21277 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
21281 const int *parm_regs
;
21283 if (TARGET_64BIT_MS_ABI
)
21284 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
21286 parm_regs
= x86_64_int_parameter_registers
;
21287 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
21290 if (ix86_function_regparm (type
, function
) > 0
21291 && !type_has_variadic_args_p (type
))
21294 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
21296 return gen_rtx_REG (SImode
, regno
);
21299 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
21302 /* Determine whether x86_output_mi_thunk can succeed. */
21305 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
21306 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
21307 HOST_WIDE_INT vcall_offset
, const_tree function
)
21309 /* 64-bit can handle anything. */
21313 /* For 32-bit, everything's fine if we have one free register. */
21314 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
21317 /* Need a free register for vcall_offset. */
21321 /* Need a free register for GOT references. */
21322 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
21325 /* Otherwise ok. */
21329 /* Output the assembler code for a thunk function. THUNK_DECL is the
21330 declaration for the thunk function itself, FUNCTION is the decl for
21331 the target function. DELTA is an immediate constant offset to be
21332 added to THIS. If VCALL_OFFSET is nonzero, the word at
21333 *(*this + vcall_offset) should be added to THIS. */
21336 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
21337 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
21338 HOST_WIDE_INT vcall_offset
, tree function
)
21341 rtx this_param
= x86_this_parameter (function
);
21344 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21345 pull it in now and let DELTA benefit. */
21346 if (REG_P (this_param
))
21347 this_reg
= this_param
;
21348 else if (vcall_offset
)
21350 /* Put the this parameter into %eax. */
21351 xops
[0] = this_param
;
21352 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
21353 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21356 this_reg
= NULL_RTX
;
21358 /* Adjust the this parameter by a fixed constant. */
21361 xops
[0] = GEN_INT (delta
);
21362 xops
[1] = this_reg
? this_reg
: this_param
;
21365 if (!x86_64_general_operand (xops
[0], DImode
))
21367 tmp
= gen_rtx_REG (DImode
, R10_REG
);
21369 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
21371 xops
[1] = this_param
;
21373 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
21376 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
21379 /* Adjust the this parameter by a value stored in the vtable. */
21383 tmp
= gen_rtx_REG (DImode
, R10_REG
);
21386 int tmp_regno
= 2 /* ECX */;
21387 if (lookup_attribute ("fastcall",
21388 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
21389 tmp_regno
= 0 /* EAX */;
21390 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
21393 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
21396 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
21398 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21400 /* Adjust the this parameter. */
21401 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
21402 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
21404 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
21405 xops
[0] = GEN_INT (vcall_offset
);
21407 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
21408 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
21410 xops
[1] = this_reg
;
21412 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
21414 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
21417 /* If necessary, drop THIS back to its stack slot. */
21418 if (this_reg
&& this_reg
!= this_param
)
21420 xops
[0] = this_reg
;
21421 xops
[1] = this_param
;
21422 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21425 xops
[0] = XEXP (DECL_RTL (function
), 0);
21428 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
21429 output_asm_insn ("jmp\t%P0", xops
);
21430 /* All thunks should be in the same object as their target,
21431 and thus binds_local_p should be true. */
21432 else if (TARGET_64BIT_MS_ABI
)
21433 gcc_unreachable ();
21436 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
21437 tmp
= gen_rtx_CONST (Pmode
, tmp
);
21438 tmp
= gen_rtx_MEM (QImode
, tmp
);
21440 output_asm_insn ("jmp\t%A0", xops
);
21445 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
21446 output_asm_insn ("jmp\t%P0", xops
);
21451 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
21452 tmp
= (gen_rtx_SYMBOL_REF
21454 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
21455 tmp
= gen_rtx_MEM (QImode
, tmp
);
21457 output_asm_insn ("jmp\t%0", xops
);
21460 #endif /* TARGET_MACHO */
21462 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
21463 output_set_got (tmp
, NULL_RTX
);
21466 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
21467 output_asm_insn ("jmp\t{*}%1", xops
);
21473 x86_file_start (void)
21475 default_file_start ();
21477 darwin_file_start ();
21479 if (X86_FILE_START_VERSION_DIRECTIVE
)
21480 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
21481 if (X86_FILE_START_FLTUSED
)
21482 fputs ("\t.global\t__fltused\n", asm_out_file
);
21483 if (ix86_asm_dialect
== ASM_INTEL
)
21484 fputs ("\t.intel_syntax\n", asm_out_file
);
21488 x86_field_alignment (tree field
, int computed
)
21490 enum machine_mode mode
;
21491 tree type
= TREE_TYPE (field
);
21493 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
21495 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
21496 ? get_inner_array_type (type
) : type
);
21497 if (mode
== DFmode
|| mode
== DCmode
21498 || GET_MODE_CLASS (mode
) == MODE_INT
21499 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
21500 return MIN (32, computed
);
21504 /* Output assembler code to FILE to increment profiler label # LABELNO
21505 for profiling a function entry. */
21507 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
21511 #ifndef NO_PROFILE_COUNTERS
21512 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
21515 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
21516 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
21518 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
21522 #ifndef NO_PROFILE_COUNTERS
21523 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21524 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
21526 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
21530 #ifndef NO_PROFILE_COUNTERS
21531 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
21532 PROFILE_COUNT_REGISTER
);
21534 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
21538 /* We don't have exact information about the insn sizes, but we may assume
21539 quite safely that we are informed about all 1 byte insns and memory
21540 address sizes. This is enough to eliminate unnecessary padding in
21544 min_insn_size (rtx insn
)
21548 if (!INSN_P (insn
) || !active_insn_p (insn
))
21551 /* Discard alignments we've emit and jump instructions. */
21552 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
21553 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
21556 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
21557 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
21560 /* Important case - calls are always 5 bytes.
21561 It is common to have many calls in the row. */
21563 && symbolic_reference_mentioned_p (PATTERN (insn
))
21564 && !SIBLING_CALL_P (insn
))
21566 if (get_attr_length (insn
) <= 1)
21569 /* For normal instructions we may rely on the sizes of addresses
21570 and the presence of symbol to require 4 bytes of encoding.
21571 This is not the case for jumps where references are PC relative. */
21572 if (!JUMP_P (insn
))
21574 l
= get_attr_length_address (insn
);
21575 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
21584 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21588 ix86_avoid_jump_misspredicts (void)
21590 rtx insn
, start
= get_insns ();
21591 int nbytes
= 0, njumps
= 0;
21594 /* Look for all minimal intervals of instructions containing 4 jumps.
21595 The intervals are bounded by START and INSN. NBYTES is the total
21596 size of instructions in the interval including INSN and not including
21597 START. When the NBYTES is smaller than 16 bytes, it is possible
21598 that the end of START and INSN ends up in the same 16byte page.
21600 The smallest offset in the page INSN can start is the case where START
21601 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21602 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21604 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
21607 nbytes
+= min_insn_size (insn
);
21609 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
21610 INSN_UID (insn
), min_insn_size (insn
));
21612 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
21613 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
21621 start
= NEXT_INSN (start
);
21622 if ((JUMP_P (start
)
21623 && GET_CODE (PATTERN (start
)) != ADDR_VEC
21624 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
21626 njumps
--, isjump
= 1;
21629 nbytes
-= min_insn_size (start
);
21631 gcc_assert (njumps
>= 0);
21633 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
21634 INSN_UID (start
), INSN_UID (insn
), nbytes
);
21636 if (njumps
== 3 && isjump
&& nbytes
< 16)
21638 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
21641 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
21642 INSN_UID (insn
), padsize
);
21643 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
21648 /* AMD Athlon works faster
21649 when RET is not destination of conditional jump or directly preceded
21650 by other jump instruction. We avoid the penalty by inserting NOP just
21651 before the RET instructions in such cases. */
21653 ix86_pad_returns (void)
21658 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
21660 basic_block bb
= e
->src
;
21661 rtx ret
= BB_END (bb
);
21663 bool replace
= false;
21665 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
21666 || !maybe_hot_bb_p (bb
))
21668 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
21669 if (active_insn_p (prev
) || LABEL_P (prev
))
21671 if (prev
&& LABEL_P (prev
))
21676 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21677 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
21678 && !(e
->flags
& EDGE_FALLTHRU
))
21683 prev
= prev_active_insn (ret
);
21685 && ((JUMP_P (prev
) && any_condjump_p (prev
))
21688 /* Empty functions get branch mispredict even when the jump destination
21689 is not visible to us. */
21690 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
21695 emit_insn_before (gen_return_internal_long (), ret
);
21701 /* Implement machine specific optimizations. We implement padding of returns
21702 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21706 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
21707 ix86_pad_returns ();
21708 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
21709 ix86_avoid_jump_misspredicts ();
21712 /* Return nonzero when QImode register that must be represented via REX prefix
21715 x86_extended_QIreg_mentioned_p (rtx insn
)
21718 extract_insn_cached (insn
);
21719 for (i
= 0; i
< recog_data
.n_operands
; i
++)
21720 if (REG_P (recog_data
.operand
[i
])
21721 && REGNO (recog_data
.operand
[i
]) >= 4)
21726 /* Return nonzero when P points to register encoded via REX prefix.
21727 Called via for_each_rtx. */
21729 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
21731 unsigned int regno
;
21734 regno
= REGNO (*p
);
21735 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
21738 /* Return true when INSN mentions register that must be encoded using REX
21741 x86_extended_reg_mentioned_p (rtx insn
)
21743 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
21746 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21747 optabs would emit if we didn't have TFmode patterns. */
21750 x86_emit_floatuns (rtx operands
[2])
21752 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
21753 enum machine_mode mode
, inmode
;
21755 inmode
= GET_MODE (operands
[1]);
21756 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
21759 in
= force_reg (inmode
, operands
[1]);
21760 mode
= GET_MODE (out
);
21761 neglab
= gen_label_rtx ();
21762 donelab
= gen_label_rtx ();
21763 f0
= gen_reg_rtx (mode
);
21765 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
21767 expand_float (out
, in
, 0);
21769 emit_jump_insn (gen_jump (donelab
));
21772 emit_label (neglab
);
21774 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
21776 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
21778 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
21780 expand_float (f0
, i0
, 0);
21782 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
21784 emit_label (donelab
);
21787 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21788 with all elements equal to VAR. Return true if successful. */
21791 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
21792 rtx target
, rtx val
)
21794 enum machine_mode smode
, wsmode
, wvmode
;
21809 val
= force_reg (GET_MODE_INNER (mode
), val
);
21810 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
21811 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21817 if (TARGET_SSE
|| TARGET_3DNOW_A
)
21819 val
= gen_lowpart (SImode
, val
);
21820 x
= gen_rtx_TRUNCATE (HImode
, val
);
21821 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
21822 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21844 /* Extend HImode to SImode using a paradoxical SUBREG. */
21845 tmp1
= gen_reg_rtx (SImode
);
21846 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
21847 /* Insert the SImode value as low element of V4SImode vector. */
21848 tmp2
= gen_reg_rtx (V4SImode
);
21849 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
21850 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
21851 CONST0_RTX (V4SImode
),
21853 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
21854 /* Cast the V4SImode vector back to a V8HImode vector. */
21855 tmp1
= gen_reg_rtx (V8HImode
);
21856 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
21857 /* Duplicate the low short through the whole low SImode word. */
21858 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
21859 /* Cast the V8HImode vector back to a V4SImode vector. */
21860 tmp2
= gen_reg_rtx (V4SImode
);
21861 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
21862 /* Replicate the low element of the V4SImode vector. */
21863 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
21864 /* Cast the V2SImode back to V8HImode, and store in target. */
21865 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
21876 /* Extend QImode to SImode using a paradoxical SUBREG. */
21877 tmp1
= gen_reg_rtx (SImode
);
21878 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
21879 /* Insert the SImode value as low element of V4SImode vector. */
21880 tmp2
= gen_reg_rtx (V4SImode
);
21881 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
21882 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
21883 CONST0_RTX (V4SImode
),
21885 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
21886 /* Cast the V4SImode vector back to a V16QImode vector. */
21887 tmp1
= gen_reg_rtx (V16QImode
);
21888 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
21889 /* Duplicate the low byte through the whole low SImode word. */
21890 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
21891 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
21892 /* Cast the V16QImode vector back to a V4SImode vector. */
21893 tmp2
= gen_reg_rtx (V4SImode
);
21894 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
21895 /* Replicate the low element of the V4SImode vector. */
21896 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
21897 /* Cast the V2SImode back to V16QImode, and store in target. */
21898 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
21906 /* Replicate the value once into the next wider mode and recurse. */
21907 val
= convert_modes (wsmode
, smode
, val
, true);
21908 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
21909 GEN_INT (GET_MODE_BITSIZE (smode
)),
21910 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
21911 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
21913 x
= gen_reg_rtx (wvmode
);
21914 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
21915 gcc_unreachable ();
21916 emit_move_insn (target
, gen_lowpart (mode
, x
));
21924 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21925 whose ONE_VAR element is VAR, and other elements are zero. Return true
21929 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
21930 rtx target
, rtx var
, int one_var
)
21932 enum machine_mode vsimode
;
21948 var
= force_reg (GET_MODE_INNER (mode
), var
);
21949 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
21950 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21955 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
21956 new_target
= gen_reg_rtx (mode
);
21958 new_target
= target
;
21959 var
= force_reg (GET_MODE_INNER (mode
), var
);
21960 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
21961 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
21962 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
21965 /* We need to shuffle the value to the correct position, so
21966 create a new pseudo to store the intermediate result. */
21968 /* With SSE2, we can use the integer shuffle insns. */
21969 if (mode
!= V4SFmode
&& TARGET_SSE2
)
21971 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
21973 GEN_INT (one_var
== 1 ? 0 : 1),
21974 GEN_INT (one_var
== 2 ? 0 : 1),
21975 GEN_INT (one_var
== 3 ? 0 : 1)));
21976 if (target
!= new_target
)
21977 emit_move_insn (target
, new_target
);
21981 /* Otherwise convert the intermediate result to V4SFmode and
21982 use the SSE1 shuffle instructions. */
21983 if (mode
!= V4SFmode
)
21985 tmp
= gen_reg_rtx (V4SFmode
);
21986 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
21991 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
21993 GEN_INT (one_var
== 1 ? 0 : 1),
21994 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
21995 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
21997 if (mode
!= V4SFmode
)
21998 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
21999 else if (tmp
!= target
)
22000 emit_move_insn (target
, tmp
);
22002 else if (target
!= new_target
)
22003 emit_move_insn (target
, new_target
);
22008 vsimode
= V4SImode
;
22014 vsimode
= V2SImode
;
22020 /* Zero extend the variable element to SImode and recurse. */
22021 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
22023 x
= gen_reg_rtx (vsimode
);
22024 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
22026 gcc_unreachable ();
22028 emit_move_insn (target
, gen_lowpart (mode
, x
));
22036 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22037 consisting of the values in VALS. It is known that all elements
22038 except ONE_VAR are constants. Return true if successful. */
22041 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
22042 rtx target
, rtx vals
, int one_var
)
22044 rtx var
= XVECEXP (vals
, 0, one_var
);
22045 enum machine_mode wmode
;
22048 const_vec
= copy_rtx (vals
);
22049 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
22050 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
22058 /* For the two element vectors, it's just as easy to use
22059 the general case. */
22075 /* There's no way to set one QImode entry easily. Combine
22076 the variable value with its adjacent constant value, and
22077 promote to an HImode set. */
22078 x
= XVECEXP (vals
, 0, one_var
^ 1);
22081 var
= convert_modes (HImode
, QImode
, var
, true);
22082 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
22083 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
22084 x
= GEN_INT (INTVAL (x
) & 0xff);
22088 var
= convert_modes (HImode
, QImode
, var
, true);
22089 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
22091 if (x
!= const0_rtx
)
22092 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
22093 1, OPTAB_LIB_WIDEN
);
22095 x
= gen_reg_rtx (wmode
);
22096 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
22097 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
22099 emit_move_insn (target
, gen_lowpart (mode
, x
));
22106 emit_move_insn (target
, const_vec
);
22107 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
22111 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
22112 all values variable, and none identical. */
22115 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
22116 rtx target
, rtx vals
)
22118 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
22119 rtx op0
= NULL
, op1
= NULL
;
22120 bool use_vec_concat
= false;
22126 if (!mmx_ok
&& !TARGET_SSE
)
22132 /* For the two element vectors, we always implement VEC_CONCAT. */
22133 op0
= XVECEXP (vals
, 0, 0);
22134 op1
= XVECEXP (vals
, 0, 1);
22135 use_vec_concat
= true;
22139 half_mode
= V2SFmode
;
22142 half_mode
= V2SImode
;
22148 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22149 Recurse to load the two halves. */
22151 op0
= gen_reg_rtx (half_mode
);
22152 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
22153 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
22155 op1
= gen_reg_rtx (half_mode
);
22156 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
22157 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
22159 use_vec_concat
= true;
22170 gcc_unreachable ();
22173 if (use_vec_concat
)
22175 if (!register_operand (op0
, half_mode
))
22176 op0
= force_reg (half_mode
, op0
);
22177 if (!register_operand (op1
, half_mode
))
22178 op1
= force_reg (half_mode
, op1
);
22180 emit_insn (gen_rtx_SET (VOIDmode
, target
,
22181 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
22185 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
22186 enum machine_mode inner_mode
;
22187 rtx words
[4], shift
;
22189 inner_mode
= GET_MODE_INNER (mode
);
22190 n_elts
= GET_MODE_NUNITS (mode
);
22191 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
22192 n_elt_per_word
= n_elts
/ n_words
;
22193 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
22195 for (i
= 0; i
< n_words
; ++i
)
22197 rtx word
= NULL_RTX
;
22199 for (j
= 0; j
< n_elt_per_word
; ++j
)
22201 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
22202 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
22208 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
22209 word
, 1, OPTAB_LIB_WIDEN
);
22210 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
22211 word
, 1, OPTAB_LIB_WIDEN
);
22219 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
22220 else if (n_words
== 2)
22222 rtx tmp
= gen_reg_rtx (mode
);
22223 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
22224 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
22225 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
22226 emit_move_insn (target
, tmp
);
22228 else if (n_words
== 4)
22230 rtx tmp
= gen_reg_rtx (V4SImode
);
22231 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
22232 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
22233 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
22236 gcc_unreachable ();
22240 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22241 instructions unless MMX_OK is true. */
22244 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
22246 enum machine_mode mode
= GET_MODE (target
);
22247 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22248 int n_elts
= GET_MODE_NUNITS (mode
);
22249 int n_var
= 0, one_var
= -1;
22250 bool all_same
= true, all_const_zero
= true;
22254 for (i
= 0; i
< n_elts
; ++i
)
22256 x
= XVECEXP (vals
, 0, i
);
22257 if (!CONSTANT_P (x
))
22258 n_var
++, one_var
= i
;
22259 else if (x
!= CONST0_RTX (inner_mode
))
22260 all_const_zero
= false;
22261 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
22265 /* Constants are best loaded from the constant pool. */
22268 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
22272 /* If all values are identical, broadcast the value. */
22274 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
22275 XVECEXP (vals
, 0, 0)))
22278 /* Values where only one field is non-constant are best loaded from
22279 the pool and overwritten via move later. */
22283 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
22284 XVECEXP (vals
, 0, one_var
),
22288 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
22292 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
22296 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
22298 enum machine_mode mode
= GET_MODE (target
);
22299 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22300 bool use_vec_merge
= false;
22309 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
22310 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
22312 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
22314 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
22315 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22321 use_vec_merge
= TARGET_SSE4_1
;
22329 /* For the two element vectors, we implement a VEC_CONCAT with
22330 the extraction of the other element. */
22332 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
22333 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
22336 op0
= val
, op1
= tmp
;
22338 op0
= tmp
, op1
= val
;
22340 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
22341 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22346 use_vec_merge
= TARGET_SSE4_1
;
22353 use_vec_merge
= true;
22357 /* tmp = target = A B C D */
22358 tmp
= copy_to_reg (target
);
22359 /* target = A A B B */
22360 emit_insn (gen_sse_unpcklps (target
, target
, target
));
22361 /* target = X A B B */
22362 ix86_expand_vector_set (false, target
, val
, 0);
22363 /* target = A X C D */
22364 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22365 GEN_INT (1), GEN_INT (0),
22366 GEN_INT (2+4), GEN_INT (3+4)));
22370 /* tmp = target = A B C D */
22371 tmp
= copy_to_reg (target
);
22372 /* tmp = X B C D */
22373 ix86_expand_vector_set (false, tmp
, val
, 0);
22374 /* target = A B X D */
22375 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22376 GEN_INT (0), GEN_INT (1),
22377 GEN_INT (0+4), GEN_INT (3+4)));
22381 /* tmp = target = A B C D */
22382 tmp
= copy_to_reg (target
);
22383 /* tmp = X B C D */
22384 ix86_expand_vector_set (false, tmp
, val
, 0);
22385 /* target = A B X D */
22386 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22387 GEN_INT (0), GEN_INT (1),
22388 GEN_INT (2+4), GEN_INT (0+4)));
22392 gcc_unreachable ();
22397 use_vec_merge
= TARGET_SSE4_1
;
22401 /* Element 0 handled by vec_merge below. */
22404 use_vec_merge
= true;
22410 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22411 store into element 0, then shuffle them back. */
22415 order
[0] = GEN_INT (elt
);
22416 order
[1] = const1_rtx
;
22417 order
[2] = const2_rtx
;
22418 order
[3] = GEN_INT (3);
22419 order
[elt
] = const0_rtx
;
22421 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
22422 order
[1], order
[2], order
[3]));
22424 ix86_expand_vector_set (false, target
, val
, 0);
22426 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
22427 order
[1], order
[2], order
[3]));
22431 /* For SSE1, we have to reuse the V4SF code. */
22432 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
22433 gen_lowpart (SFmode
, val
), elt
);
22438 use_vec_merge
= TARGET_SSE2
;
22441 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
22445 use_vec_merge
= TARGET_SSE4_1
;
22455 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
22456 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
22457 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22461 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
22463 emit_move_insn (mem
, target
);
22465 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
22466 emit_move_insn (tmp
, val
);
22468 emit_move_insn (target
, mem
);
22473 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
22475 enum machine_mode mode
= GET_MODE (vec
);
22476 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22477 bool use_vec_extr
= false;
22490 use_vec_extr
= true;
22494 use_vec_extr
= TARGET_SSE4_1
;
22506 tmp
= gen_reg_rtx (mode
);
22507 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
22508 GEN_INT (elt
), GEN_INT (elt
),
22509 GEN_INT (elt
+4), GEN_INT (elt
+4)));
22513 tmp
= gen_reg_rtx (mode
);
22514 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
22518 gcc_unreachable ();
22521 use_vec_extr
= true;
22526 use_vec_extr
= TARGET_SSE4_1
;
22540 tmp
= gen_reg_rtx (mode
);
22541 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
22542 GEN_INT (elt
), GEN_INT (elt
),
22543 GEN_INT (elt
), GEN_INT (elt
)));
22547 tmp
= gen_reg_rtx (mode
);
22548 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
22552 gcc_unreachable ();
22555 use_vec_extr
= true;
22560 /* For SSE1, we have to reuse the V4SF code. */
22561 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
22562 gen_lowpart (V4SFmode
, vec
), elt
);
22568 use_vec_extr
= TARGET_SSE2
;
22571 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
22575 use_vec_extr
= TARGET_SSE4_1
;
22579 /* ??? Could extract the appropriate HImode element and shift. */
22586 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
22587 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
22589 /* Let the rtl optimizers know about the zero extension performed. */
22590 if (inner_mode
== QImode
|| inner_mode
== HImode
)
22592 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
22593 target
= gen_lowpart (SImode
, target
);
22596 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22600 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
22602 emit_move_insn (mem
, vec
);
22604 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
22605 emit_move_insn (target
, tmp
);
22609 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22610 pattern to reduce; DEST is the destination; IN is the input vector. */
22613 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
22615 rtx tmp1
, tmp2
, tmp3
;
22617 tmp1
= gen_reg_rtx (V4SFmode
);
22618 tmp2
= gen_reg_rtx (V4SFmode
);
22619 tmp3
= gen_reg_rtx (V4SFmode
);
22621 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
22622 emit_insn (fn (tmp2
, tmp1
, in
));
22624 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
22625 GEN_INT (1), GEN_INT (1),
22626 GEN_INT (1+4), GEN_INT (1+4)));
22627 emit_insn (fn (dest
, tmp2
, tmp3
));
22630 /* Target hook for scalar_mode_supported_p. */
22632 ix86_scalar_mode_supported_p (enum machine_mode mode
)
22634 if (DECIMAL_FLOAT_MODE_P (mode
))
22636 else if (mode
== TFmode
)
22637 return TARGET_64BIT
;
22639 return default_scalar_mode_supported_p (mode
);
22642 /* Implements target hook vector_mode_supported_p. */
22644 ix86_vector_mode_supported_p (enum machine_mode mode
)
22646 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
22648 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
22650 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
22652 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
22657 /* Target hook for c_mode_for_suffix. */
22658 static enum machine_mode
22659 ix86_c_mode_for_suffix (char suffix
)
22661 if (TARGET_64BIT
&& suffix
== 'q')
22663 if (TARGET_MMX
&& suffix
== 'w')
22669 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22671 We do this in the new i386 backend to maintain source compatibility
22672 with the old cc0-based compiler. */
22675 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
22676 tree inputs ATTRIBUTE_UNUSED
,
22679 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
22681 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
22686 /* Implements target vector targetm.asm.encode_section_info. This
22687 is not used by netware. */
22689 static void ATTRIBUTE_UNUSED
22690 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
22692 default_encode_section_info (decl
, rtl
, first
);
22694 if (TREE_CODE (decl
) == VAR_DECL
22695 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
22696 && ix86_in_large_data_p (decl
))
22697 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
22700 /* Worker function for REVERSE_CONDITION. */
22703 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
22705 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
22706 ? reverse_condition (code
)
22707 : reverse_condition_maybe_unordered (code
));
22710 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22714 output_387_reg_move (rtx insn
, rtx
*operands
)
22716 if (REG_P (operands
[0]))
22718 if (REG_P (operands
[1])
22719 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
22721 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
22722 return output_387_ffreep (operands
, 0);
22723 return "fstp\t%y0";
22725 if (STACK_TOP_P (operands
[0]))
22726 return "fld%z1\t%y1";
22729 else if (MEM_P (operands
[0]))
22731 gcc_assert (REG_P (operands
[1]));
22732 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
22733 return "fstp%z0\t%y0";
22736 /* There is no non-popping store to memory for XFmode.
22737 So if we need one, follow the store with a load. */
22738 if (GET_MODE (operands
[0]) == XFmode
)
22739 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22741 return "fst%z0\t%y0";
22748 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22749 FP status register is set. */
22752 ix86_emit_fp_unordered_jump (rtx label
)
22754 rtx reg
= gen_reg_rtx (HImode
);
22757 emit_insn (gen_x86_fnstsw_1 (reg
));
22759 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
22761 emit_insn (gen_x86_sahf_1 (reg
));
22763 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22764 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
22768 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
22770 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22771 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
22774 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
22775 gen_rtx_LABEL_REF (VOIDmode
, label
),
22777 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
22779 emit_jump_insn (temp
);
22780 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22783 /* Output code to perform a log1p XFmode calculation. */
22785 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
22787 rtx label1
= gen_label_rtx ();
22788 rtx label2
= gen_label_rtx ();
22790 rtx tmp
= gen_reg_rtx (XFmode
);
22791 rtx tmp2
= gen_reg_rtx (XFmode
);
22793 emit_insn (gen_absxf2 (tmp
, op1
));
22794 emit_insn (gen_cmpxf (tmp
,
22795 CONST_DOUBLE_FROM_REAL_VALUE (
22796 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
22798 emit_jump_insn (gen_bge (label1
));
22800 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
22801 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
22802 emit_jump (label2
);
22804 emit_label (label1
);
22805 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
22806 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
22807 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
22808 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
22810 emit_label (label2
);
22813 /* Output code to perform a Newton-Rhapson approximation of a single precision
22814 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22816 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
22818 rtx x0
, x1
, e0
, e1
, two
;
22820 x0
= gen_reg_rtx (mode
);
22821 e0
= gen_reg_rtx (mode
);
22822 e1
= gen_reg_rtx (mode
);
22823 x1
= gen_reg_rtx (mode
);
22825 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
22827 if (VECTOR_MODE_P (mode
))
22828 two
= ix86_build_const_vector (SFmode
, true, two
);
22830 two
= force_reg (mode
, two
);
22832 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22834 /* x0 = 1./b estimate */
22835 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22836 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
22839 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
22840 gen_rtx_MULT (mode
, x0
, b
)));
22842 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
22843 gen_rtx_MINUS (mode
, two
, e0
)));
22845 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
22846 gen_rtx_MULT (mode
, x0
, e1
)));
22848 emit_insn (gen_rtx_SET (VOIDmode
, res
,
22849 gen_rtx_MULT (mode
, a
, x1
)));
22852 /* Output code to perform a Newton-Rhapson approximation of a
22853 single precision floating point [reciprocal] square root. */
22855 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
22858 rtx x0
, e0
, e1
, e2
, e3
, three
, half
, zero
, mask
;
22860 x0
= gen_reg_rtx (mode
);
22861 e0
= gen_reg_rtx (mode
);
22862 e1
= gen_reg_rtx (mode
);
22863 e2
= gen_reg_rtx (mode
);
22864 e3
= gen_reg_rtx (mode
);
22866 three
= CONST_DOUBLE_FROM_REAL_VALUE (dconst3
, SFmode
);
22867 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, SFmode
);
22869 mask
= gen_reg_rtx (mode
);
22871 if (VECTOR_MODE_P (mode
))
22873 three
= ix86_build_const_vector (SFmode
, true, three
);
22874 half
= ix86_build_const_vector (SFmode
, true, half
);
22877 three
= force_reg (mode
, three
);
22878 half
= force_reg (mode
, half
);
22880 zero
= force_reg (mode
, CONST0_RTX(mode
));
22882 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22883 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22885 /* Compare a to zero. */
22886 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
22887 gen_rtx_NE (mode
, a
, zero
)));
22889 /* x0 = 1./sqrt(a) estimate */
22890 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22891 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
22893 /* Filter out infinity. */
22894 if (VECTOR_MODE_P (mode
))
22895 emit_insn (gen_rtx_SET (VOIDmode
, gen_lowpart (V4SFmode
, x0
),
22897 gen_lowpart (V4SFmode
, x0
),
22898 gen_lowpart (V4SFmode
, mask
))));
22900 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22901 gen_rtx_AND (mode
, x0
, mask
)));
22904 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
22905 gen_rtx_MULT (mode
, x0
, a
)));
22907 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
22908 gen_rtx_MULT (mode
, e0
, x0
)));
22910 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
22911 gen_rtx_MINUS (mode
, three
, e1
)));
22914 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
22915 gen_rtx_MULT (mode
, half
, x0
)));
22918 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
22919 gen_rtx_MULT (mode
, half
, e0
)));
22920 /* ret = e2 * e3 */
22921 emit_insn (gen_rtx_SET (VOIDmode
, res
,
22922 gen_rtx_MULT (mode
, e2
, e3
)));
22925 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22927 static void ATTRIBUTE_UNUSED
22928 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
22931 /* With Binutils 2.15, the "@unwind" marker must be specified on
22932 every occurrence of the ".eh_frame" section, not just the first
22935 && strcmp (name
, ".eh_frame") == 0)
22937 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
22938 flags
& SECTION_WRITE
? "aw" : "a");
22941 default_elf_asm_named_section (name
, flags
, decl
);
22944 /* Return the mangling of TYPE if it is an extended fundamental type. */
22946 static const char *
22947 ix86_mangle_type (const_tree type
)
22949 type
= TYPE_MAIN_VARIANT (type
);
22951 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
22952 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
22955 switch (TYPE_MODE (type
))
22958 /* __float128 is "g". */
22961 /* "long double" or __float80 is "e". */
22968 /* For 32-bit code we can save PIC register setup by using
22969 __stack_chk_fail_local hidden function instead of calling
22970 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22971 register, so it is better to call __stack_chk_fail directly. */
22974 ix86_stack_protect_fail (void)
22976 return TARGET_64BIT
22977 ? default_external_stack_protect_fail ()
22978 : default_hidden_stack_protect_fail ();
22981 /* Select a format to encode pointers in exception handling data. CODE
22982 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22983 true if the symbol may be affected by dynamic relocations.
22985 ??? All x86 object file formats are capable of representing this.
22986 After all, the relocation needed is the same as for the call insn.
22987 Whether or not a particular assembler allows us to enter such, I
22988 guess we'll have to see. */
22990 asm_preferred_eh_data_format (int code
, int global
)
22994 int type
= DW_EH_PE_sdata8
;
22996 || ix86_cmodel
== CM_SMALL_PIC
22997 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
22998 type
= DW_EH_PE_sdata4
;
22999 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
23001 if (ix86_cmodel
== CM_SMALL
23002 || (ix86_cmodel
== CM_MEDIUM
&& code
))
23003 return DW_EH_PE_udata4
;
23004 return DW_EH_PE_absptr
;
23007 /* Expand copysign from SIGN to the positive value ABS_VALUE
23008 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
23011 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
23013 enum machine_mode mode
= GET_MODE (sign
);
23014 rtx sgn
= gen_reg_rtx (mode
);
23015 if (mask
== NULL_RTX
)
23017 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
23018 if (!VECTOR_MODE_P (mode
))
23020 /* We need to generate a scalar mode mask in this case. */
23021 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
23022 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
23023 mask
= gen_reg_rtx (mode
);
23024 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
23028 mask
= gen_rtx_NOT (mode
, mask
);
23029 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
23030 gen_rtx_AND (mode
, mask
, sign
)));
23031 emit_insn (gen_rtx_SET (VOIDmode
, result
,
23032 gen_rtx_IOR (mode
, abs_value
, sgn
)));
23035 /* Expand fabs (OP0) and return a new rtx that holds the result. The
23036 mask for masking out the sign-bit is stored in *SMASK, if that is
23039 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
23041 enum machine_mode mode
= GET_MODE (op0
);
23044 xa
= gen_reg_rtx (mode
);
23045 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
23046 if (!VECTOR_MODE_P (mode
))
23048 /* We need to generate a scalar mode mask in this case. */
23049 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
23050 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
23051 mask
= gen_reg_rtx (mode
);
23052 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
23054 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
23055 gen_rtx_AND (mode
, op0
, mask
)));
23063 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
23064 swapping the operands if SWAP_OPERANDS is true. The expanded
23065 code is a forward jump to a newly created label in case the
23066 comparison is true. The generated label rtx is returned. */
23068 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
23069 bool swap_operands
)
23080 label
= gen_label_rtx ();
23081 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
23082 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23083 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
23084 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
23085 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23086 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
23087 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23088 JUMP_LABEL (tmp
) = label
;
23093 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
23094 using comparison code CODE. Operands are swapped for the comparison if
23095 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
23097 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
23098 bool swap_operands
)
23100 enum machine_mode mode
= GET_MODE (op0
);
23101 rtx mask
= gen_reg_rtx (mode
);
23110 if (mode
== DFmode
)
23111 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
23112 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23114 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
23115 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23120 /* Generate and return a rtx of mode MODE for 2**n where n is the number
23121 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
23123 ix86_gen_TWO52 (enum machine_mode mode
)
23125 REAL_VALUE_TYPE TWO52r
;
23128 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
23129 TWO52
= const_double_from_real_value (TWO52r
, mode
);
23130 TWO52
= force_reg (mode
, TWO52
);
23135 /* Expand SSE sequence for computing lround from OP1 storing
23138 ix86_expand_lround (rtx op0
, rtx op1
)
23140 /* C code for the stuff we're doing below:
23141 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23144 enum machine_mode mode
= GET_MODE (op1
);
23145 const struct real_format
*fmt
;
23146 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
23149 /* load nextafter (0.5, 0.0) */
23150 fmt
= REAL_MODE_FORMAT (mode
);
23151 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
23152 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
23154 /* adj = copysign (0.5, op1) */
23155 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
23156 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
23158 /* adj = op1 + adj */
23159 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
23161 /* op0 = (imode)adj */
23162 expand_fix (op0
, adj
, 0);
23165 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23168 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
23170 /* C code for the stuff we're doing below (for do_floor):
23172 xi -= (double)xi > op1 ? 1 : 0;
23175 enum machine_mode fmode
= GET_MODE (op1
);
23176 enum machine_mode imode
= GET_MODE (op0
);
23177 rtx ireg
, freg
, label
, tmp
;
23179 /* reg = (long)op1 */
23180 ireg
= gen_reg_rtx (imode
);
23181 expand_fix (ireg
, op1
, 0);
23183 /* freg = (double)reg */
23184 freg
= gen_reg_rtx (fmode
);
23185 expand_float (freg
, ireg
, 0);
23187 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23188 label
= ix86_expand_sse_compare_and_jump (UNLE
,
23189 freg
, op1
, !do_floor
);
23190 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
23191 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
23192 emit_move_insn (ireg
, tmp
);
23194 emit_label (label
);
23195 LABEL_NUSES (label
) = 1;
23197 emit_move_insn (op0
, ireg
);
23200 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23201 result in OPERAND0. */
23203 ix86_expand_rint (rtx operand0
, rtx operand1
)
23205 /* C code for the stuff we're doing below:
23206 xa = fabs (operand1);
23207 if (!isless (xa, 2**52))
23209 xa = xa + 2**52 - 2**52;
23210 return copysign (xa, operand1);
23212 enum machine_mode mode
= GET_MODE (operand0
);
23213 rtx res
, xa
, label
, TWO52
, mask
;
23215 res
= gen_reg_rtx (mode
);
23216 emit_move_insn (res
, operand1
);
23218 /* xa = abs (operand1) */
23219 xa
= ix86_expand_sse_fabs (res
, &mask
);
23221 /* if (!isless (xa, TWO52)) goto label; */
23222 TWO52
= ix86_gen_TWO52 (mode
);
23223 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23225 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23226 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
23228 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
23230 emit_label (label
);
23231 LABEL_NUSES (label
) = 1;
23233 emit_move_insn (operand0
, res
);
23236 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23239 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
23241 /* C code for the stuff we expand below.
23242 double xa = fabs (x), x2;
23243 if (!isless (xa, TWO52))
23245 xa = xa + TWO52 - TWO52;
23246 x2 = copysign (xa, x);
23255 enum machine_mode mode
= GET_MODE (operand0
);
23256 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
23258 TWO52
= ix86_gen_TWO52 (mode
);
23260 /* Temporary for holding the result, initialized to the input
23261 operand to ease control flow. */
23262 res
= gen_reg_rtx (mode
);
23263 emit_move_insn (res
, operand1
);
23265 /* xa = abs (operand1) */
23266 xa
= ix86_expand_sse_fabs (res
, &mask
);
23268 /* if (!isless (xa, TWO52)) goto label; */
23269 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23271 /* xa = xa + TWO52 - TWO52; */
23272 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23273 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
23275 /* xa = copysign (xa, operand1) */
23276 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
23278 /* generate 1.0 or -1.0 */
23279 one
= force_reg (mode
,
23280 const_double_from_real_value (do_floor
23281 ? dconst1
: dconstm1
, mode
));
23283 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23284 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
23285 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23286 gen_rtx_AND (mode
, one
, tmp
)));
23287 /* We always need to subtract here to preserve signed zero. */
23288 tmp
= expand_simple_binop (mode
, MINUS
,
23289 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23290 emit_move_insn (res
, tmp
);
23292 emit_label (label
);
23293 LABEL_NUSES (label
) = 1;
23295 emit_move_insn (operand0
, res
);
23298 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23301 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
23303 /* C code for the stuff we expand below.
23304 double xa = fabs (x), x2;
23305 if (!isless (xa, TWO52))
23307 x2 = (double)(long)x;
23314 if (HONOR_SIGNED_ZEROS (mode))
23315 return copysign (x2, x);
23318 enum machine_mode mode
= GET_MODE (operand0
);
23319 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
23321 TWO52
= ix86_gen_TWO52 (mode
);
23323 /* Temporary for holding the result, initialized to the input
23324 operand to ease control flow. */
23325 res
= gen_reg_rtx (mode
);
23326 emit_move_insn (res
, operand1
);
23328 /* xa = abs (operand1) */
23329 xa
= ix86_expand_sse_fabs (res
, &mask
);
23331 /* if (!isless (xa, TWO52)) goto label; */
23332 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23334 /* xa = (double)(long)x */
23335 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23336 expand_fix (xi
, res
, 0);
23337 expand_float (xa
, xi
, 0);
23340 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
23342 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23343 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
23344 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23345 gen_rtx_AND (mode
, one
, tmp
)));
23346 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
23347 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23348 emit_move_insn (res
, tmp
);
23350 if (HONOR_SIGNED_ZEROS (mode
))
23351 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
23353 emit_label (label
);
23354 LABEL_NUSES (label
) = 1;
23356 emit_move_insn (operand0
, res
);
23359 /* Expand SSE sequence for computing round from OPERAND1 storing
23360 into OPERAND0. Sequence that works without relying on DImode truncation
23361 via cvttsd2siq that is only available on 64bit targets. */
23363 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
23365 /* C code for the stuff we expand below.
23366 double xa = fabs (x), xa2, x2;
23367 if (!isless (xa, TWO52))
23369 Using the absolute value and copying back sign makes
23370 -0.0 -> -0.0 correct.
23371 xa2 = xa + TWO52 - TWO52;
23376 else if (dxa > 0.5)
23378 x2 = copysign (xa2, x);
23381 enum machine_mode mode
= GET_MODE (operand0
);
23382 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
23384 TWO52
= ix86_gen_TWO52 (mode
);
23386 /* Temporary for holding the result, initialized to the input
23387 operand to ease control flow. */
23388 res
= gen_reg_rtx (mode
);
23389 emit_move_insn (res
, operand1
);
23391 /* xa = abs (operand1) */
23392 xa
= ix86_expand_sse_fabs (res
, &mask
);
23394 /* if (!isless (xa, TWO52)) goto label; */
23395 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23397 /* xa2 = xa + TWO52 - TWO52; */
23398 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23399 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
23401 /* dxa = xa2 - xa; */
23402 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
23404 /* generate 0.5, 1.0 and -0.5 */
23405 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
23406 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
23407 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
23411 tmp
= gen_reg_rtx (mode
);
23412 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23413 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
23414 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23415 gen_rtx_AND (mode
, one
, tmp
)));
23416 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23417 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23418 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
23419 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23420 gen_rtx_AND (mode
, one
, tmp
)));
23421 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23423 /* res = copysign (xa2, operand1) */
23424 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
23426 emit_label (label
);
23427 LABEL_NUSES (label
) = 1;
23429 emit_move_insn (operand0
, res
);
23432 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23435 ix86_expand_trunc (rtx operand0
, rtx operand1
)
23437 /* C code for SSE variant we expand below.
23438 double xa = fabs (x), x2;
23439 if (!isless (xa, TWO52))
23441 x2 = (double)(long)x;
23442 if (HONOR_SIGNED_ZEROS (mode))
23443 return copysign (x2, x);
23446 enum machine_mode mode
= GET_MODE (operand0
);
23447 rtx xa
, xi
, TWO52
, label
, res
, mask
;
23449 TWO52
= ix86_gen_TWO52 (mode
);
23451 /* Temporary for holding the result, initialized to the input
23452 operand to ease control flow. */
23453 res
= gen_reg_rtx (mode
);
23454 emit_move_insn (res
, operand1
);
23456 /* xa = abs (operand1) */
23457 xa
= ix86_expand_sse_fabs (res
, &mask
);
23459 /* if (!isless (xa, TWO52)) goto label; */
23460 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23462 /* x = (double)(long)x */
23463 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23464 expand_fix (xi
, res
, 0);
23465 expand_float (res
, xi
, 0);
23467 if (HONOR_SIGNED_ZEROS (mode
))
23468 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
23470 emit_label (label
);
23471 LABEL_NUSES (label
) = 1;
23473 emit_move_insn (operand0
, res
);
23476 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23479 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
23481 enum machine_mode mode
= GET_MODE (operand0
);
23482 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
23484 /* C code for SSE variant we expand below.
23485 double xa = fabs (x), x2;
23486 if (!isless (xa, TWO52))
23488 xa2 = xa + TWO52 - TWO52;
23492 x2 = copysign (xa2, x);
23496 TWO52
= ix86_gen_TWO52 (mode
);
23498 /* Temporary for holding the result, initialized to the input
23499 operand to ease control flow. */
23500 res
= gen_reg_rtx (mode
);
23501 emit_move_insn (res
, operand1
);
23503 /* xa = abs (operand1) */
23504 xa
= ix86_expand_sse_fabs (res
, &smask
);
23506 /* if (!isless (xa, TWO52)) goto label; */
23507 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23509 /* res = xa + TWO52 - TWO52; */
23510 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23511 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
23512 emit_move_insn (res
, tmp
);
23515 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
23517 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23518 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
23519 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
23520 gen_rtx_AND (mode
, mask
, one
)));
23521 tmp
= expand_simple_binop (mode
, MINUS
,
23522 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
23523 emit_move_insn (res
, tmp
);
23525 /* res = copysign (res, operand1) */
23526 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
23528 emit_label (label
);
23529 LABEL_NUSES (label
) = 1;
23531 emit_move_insn (operand0
, res
);
23534 /* Expand SSE sequence for computing round from OPERAND1 storing
23537 ix86_expand_round (rtx operand0
, rtx operand1
)
23539 /* C code for the stuff we're doing below:
23540 double xa = fabs (x);
23541 if (!isless (xa, TWO52))
23543 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23544 return copysign (xa, x);
23546 enum machine_mode mode
= GET_MODE (operand0
);
23547 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
23548 const struct real_format
*fmt
;
23549 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
23551 /* Temporary for holding the result, initialized to the input
23552 operand to ease control flow. */
23553 res
= gen_reg_rtx (mode
);
23554 emit_move_insn (res
, operand1
);
23556 TWO52
= ix86_gen_TWO52 (mode
);
23557 xa
= ix86_expand_sse_fabs (res
, &mask
);
23558 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23560 /* load nextafter (0.5, 0.0) */
23561 fmt
= REAL_MODE_FORMAT (mode
);
23562 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
23563 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
23565 /* xa = xa + 0.5 */
23566 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
23567 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
23569 /* xa = (double)(int64_t)xa */
23570 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23571 expand_fix (xi
, xa
, 0);
23572 expand_float (xa
, xi
, 0);
23574 /* res = copysign (xa, operand1) */
23575 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
23577 emit_label (label
);
23578 LABEL_NUSES (label
) = 1;
23580 emit_move_insn (operand0
, res
);
23584 /* Table of valid machine attributes. */
23585 static const struct attribute_spec ix86_attribute_table
[] =
23587 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23588 /* Stdcall attribute says callee is responsible for popping arguments
23589 if they are not variable. */
23590 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23591 /* Fastcall attribute says callee is responsible for popping arguments
23592 if they are not variable. */
23593 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23594 /* Cdecl attribute says the callee is a normal C declaration */
23595 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23596 /* Regparm attribute specifies how many integer arguments are to be
23597 passed in registers. */
23598 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
23599 /* Sseregparm attribute says we are using x86_64 calling conventions
23600 for FP arguments. */
23601 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23602 /* force_align_arg_pointer says this function realigns the stack at entry. */
23603 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
23604 false, true, true, ix86_handle_cconv_attribute
},
23605 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23606 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
23607 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
23608 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
23610 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
23611 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
23612 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23613 SUBTARGET_ATTRIBUTE_TABLE
,
23615 { NULL
, 0, 0, false, false, false, NULL
}
23618 /* Initialize the GCC target structure. */
23619 #undef TARGET_ATTRIBUTE_TABLE
23620 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23621 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23622 # undef TARGET_MERGE_DECL_ATTRIBUTES
23623 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23626 #undef TARGET_COMP_TYPE_ATTRIBUTES
23627 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23629 #undef TARGET_INIT_BUILTINS
23630 #define TARGET_INIT_BUILTINS ix86_init_builtins
23631 #undef TARGET_EXPAND_BUILTIN
23632 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23634 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23635 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23636 ix86_builtin_vectorized_function
23638 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23639 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23641 #undef TARGET_BUILTIN_RECIPROCAL
23642 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23644 #undef TARGET_ASM_FUNCTION_EPILOGUE
23645 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23647 #undef TARGET_ENCODE_SECTION_INFO
23648 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23649 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23651 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23654 #undef TARGET_ASM_OPEN_PAREN
23655 #define TARGET_ASM_OPEN_PAREN ""
23656 #undef TARGET_ASM_CLOSE_PAREN
23657 #define TARGET_ASM_CLOSE_PAREN ""
23659 #undef TARGET_ASM_ALIGNED_HI_OP
23660 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23661 #undef TARGET_ASM_ALIGNED_SI_OP
23662 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23664 #undef TARGET_ASM_ALIGNED_DI_OP
23665 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23668 #undef TARGET_ASM_UNALIGNED_HI_OP
23669 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23670 #undef TARGET_ASM_UNALIGNED_SI_OP
23671 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23672 #undef TARGET_ASM_UNALIGNED_DI_OP
23673 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23675 #undef TARGET_SCHED_ADJUST_COST
23676 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23677 #undef TARGET_SCHED_ISSUE_RATE
23678 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23681 ia32_multipass_dfa_lookahead
23683 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23684 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23687 #undef TARGET_HAVE_TLS
23688 #define TARGET_HAVE_TLS true
23690 #undef TARGET_CANNOT_FORCE_CONST_MEM
23691 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23692 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23693 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23695 #undef TARGET_DELEGITIMIZE_ADDRESS
23696 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23698 #undef TARGET_MS_BITFIELD_LAYOUT_P
23699 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23702 #undef TARGET_BINDS_LOCAL_P
23703 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23705 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23706 #undef TARGET_BINDS_LOCAL_P
23707 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23710 #undef TARGET_ASM_OUTPUT_MI_THUNK
23711 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23712 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23713 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23715 #undef TARGET_ASM_FILE_START
23716 #define TARGET_ASM_FILE_START x86_file_start
23718 #undef TARGET_DEFAULT_TARGET_FLAGS
23719 #define TARGET_DEFAULT_TARGET_FLAGS \
23721 | TARGET_SUBTARGET_DEFAULT \
23722 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23724 #undef TARGET_HANDLE_OPTION
23725 #define TARGET_HANDLE_OPTION ix86_handle_option
23727 #undef TARGET_RTX_COSTS
23728 #define TARGET_RTX_COSTS ix86_rtx_costs
23729 #undef TARGET_ADDRESS_COST
23730 #define TARGET_ADDRESS_COST ix86_address_cost
23732 #undef TARGET_FIXED_CONDITION_CODE_REGS
23733 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23734 #undef TARGET_CC_MODES_COMPATIBLE
23735 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23737 #undef TARGET_MACHINE_DEPENDENT_REORG
23738 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23740 #undef TARGET_BUILD_BUILTIN_VA_LIST
23741 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23743 #undef TARGET_MD_ASM_CLOBBERS
23744 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23746 #undef TARGET_PROMOTE_PROTOTYPES
23747 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23748 #undef TARGET_STRUCT_VALUE_RTX
23749 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23750 #undef TARGET_SETUP_INCOMING_VARARGS
23751 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23752 #undef TARGET_MUST_PASS_IN_STACK
23753 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23754 #undef TARGET_PASS_BY_REFERENCE
23755 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23756 #undef TARGET_INTERNAL_ARG_POINTER
23757 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23758 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23759 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23760 #undef TARGET_STRICT_ARGUMENT_NAMING
23761 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23763 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23764 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23766 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23767 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23769 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23770 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23772 #undef TARGET_C_MODE_FOR_SUFFIX
23773 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23776 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23777 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23780 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23781 #undef TARGET_INSERT_ATTRIBUTES
23782 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23785 #undef TARGET_MANGLE_TYPE
23786 #define TARGET_MANGLE_TYPE ix86_mangle_type
23788 #undef TARGET_STACK_PROTECT_FAIL
23789 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23791 #undef TARGET_FUNCTION_VALUE
23792 #define TARGET_FUNCTION_VALUE ix86_function_value
23794 struct gcc_target targetm
= TARGET_INITIALIZER
;
23796 #include "gt-i386.h"