1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 static int x86_builtin_vectorization_cost (bool);
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
76 struct processor_costs size_cost
= { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
129 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
130 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
131 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
145 /* Processor costs (relative to an add) */
147 struct processor_costs i386_cost
= { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
200 DUMMY_STRINGOP_ALGS
},
201 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
202 DUMMY_STRINGOP_ALGS
},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
217 struct processor_costs i486_cost
= { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
272 DUMMY_STRINGOP_ALGS
},
273 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
274 DUMMY_STRINGOP_ALGS
},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
289 struct processor_costs pentium_cost
= {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
342 DUMMY_STRINGOP_ALGS
},
343 {{libcall
, {{-1, rep_prefix_4_byte
}}},
344 DUMMY_STRINGOP_ALGS
},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
359 struct processor_costs pentiumpro_cost
= {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
416 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
417 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
418 DUMMY_STRINGOP_ALGS
},
419 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
420 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
421 DUMMY_STRINGOP_ALGS
},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
436 struct processor_costs geode_cost
= {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
490 DUMMY_STRINGOP_ALGS
},
491 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
492 DUMMY_STRINGOP_ALGS
},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
507 struct processor_costs k6_cost
= {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
563 DUMMY_STRINGOP_ALGS
},
564 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
565 DUMMY_STRINGOP_ALGS
},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs athlon_cost
= {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs k8_cost
= {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
702 100, /* number of parallel prefetches */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
714 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
715 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
716 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
717 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
731 struct processor_costs amdfam10_cost
= {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
776 MOVD reg64, xmmreg Double FADD 3
778 MOVD reg32, xmmreg Double FADD 3
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
788 100, /* number of parallel prefetches */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
801 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
802 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
803 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
804 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
819 struct processor_costs pentium4_cost
= {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
872 DUMMY_STRINGOP_ALGS
},
873 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
875 DUMMY_STRINGOP_ALGS
},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
890 struct processor_costs nocona_cost
= {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
943 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
944 {100000, unrolled_loop
}, {-1, libcall
}}}},
945 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
947 {libcall
, {{24, loop
}, {64, unrolled_loop
},
948 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
963 struct processor_costs core2_cost
= {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1015 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1016 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1017 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1018 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1019 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1020 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1036 struct processor_costs generic64_cost
= {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS
,
1095 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1096 {DUMMY_STRINGOP_ALGS
,
1097 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1113 struct processor_costs generic32_cost
= {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1166 DUMMY_STRINGOP_ALGS
},
1167 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1168 DUMMY_STRINGOP_ALGS
},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1182 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC64
,
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
1219 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1224 /* X86_TUNE_USE_BIT_TEST */
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486
| m_PENT
| m_PPRO
| m_AMD_MULTIPLE
| m_K6
| m_CORE2
| m_GENERIC
,
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
| m_GENERIC
,
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1244 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE
| m_PPRO
| m_PENT4
| m_NOCONA
1249 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2
| m_GENERIC
,
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386
| m_486
| m_K6_GEODE
,
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO
| m_AMD_MULTIPLE
| m_PENT
| m_CORE2
| m_GENERIC
),
1270 /* X86_TUNE_USE_MOV0 */
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1285 /* X86_TUNE_READ_MODIFY */
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_AMD_MULTIPLE
| m_CORE2
1290 | m_GENERIC
/* | m_PENT4 ? */,
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT
| m_486
| m_386
),
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386
| m_PENT4
| m_NOCONA
,
1298 /* X86_TUNE_QIMODE_MATH */
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE
| m_PPRO
| m_K6_GEODE
| m_386
1318 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE
| m_PPRO
| m_386
| m_486
1325 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1330 | m_GENERIC
| m_GEODE
),
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO
| m_PENT4
| m_NOCONA
,
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1370 /* X86_TUNE_SHIFT1 */
1373 /* X86_TUNE_USE_FFREEP */
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO
| m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1389 /* X86_TUNE_USE_BT */
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC
,
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1417 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1433 from integer to FP. */
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386
| m_486
| m_PENT
| m_K6
),
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
1460 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1462 static enum stringop_alg stringop_alg
= no_stringop
;
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1471 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1472 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1477 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1479 /* ax, dx, cx, bx */
1480 AREG
, DREG
, CREG
, BREG
,
1481 /* si, di, bp, sp */
1482 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1484 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1485 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1491 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1494 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1497 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1498 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1499 /* SSE REX registers */
1500 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1504 /* The "default" register map used in 32bit mode. */
1506 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1517 static int const x86_64_int_parameter_registers
[6] =
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1523 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1529 static int const x86_64_int_return_registers
[4] =
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1600 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1614 rtx ix86_compare_op0
= NULL_RTX
;
1615 rtx ix86_compare_op1
= NULL_RTX
;
1616 rtx ix86_compare_emitted
= NULL_RTX
;
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621 /* Define the structure for the machine field in struct function. */
1623 struct stack_local_entry
GTY(())
1625 unsigned short mode
;
1628 struct stack_local_entry
*next
;
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1655 HOST_WIDE_INT frame
;
1657 int outgoing_arguments_size
;
1660 HOST_WIDE_INT to_allocate
;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset
;
1663 HOST_WIDE_INT hard_frame_pointer_offset
;
1664 HOST_WIDE_INT stack_pointer_offset
;
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov
;
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel
;
1674 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1676 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath
;
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune
;
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch
;
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse
;
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm
;
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer
;
1695 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary
;
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost
;
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1706 int ix86_section_threshold
= 65536;
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix
[16];
1710 int internal_label_prefix_len
;
1712 /* Fence to use after loop using movnt. */
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1725 X86_64_INTEGER_CLASS
,
1726 X86_64_INTEGERSI_CLASS
,
1733 X86_64_COMPLEX_X87_CLASS
,
1736 static const char * const x86_64_reg_class_name
[] =
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1742 #define MAX_CLASSES 4
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1746 static bool ext_80387_constants_init
= 0;
1749 static struct machine_function
* ix86_init_machine_status (void);
1750 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1751 static int ix86_function_regparm (const_tree
, const_tree
);
1752 static void ix86_compute_frame_layout (struct ix86_frame
*);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit
;
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
1802 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1812 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX
;
1815 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
1816 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
1821 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW
;
1824 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
1825 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
1833 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE
;
1836 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
1837 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
1842 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2
;
1845 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
1846 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
1851 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3
;
1854 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
1855 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
1860 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3
;
1863 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
1864 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
1869 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1
;
1872 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
1873 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
1878 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2
;
1881 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
1882 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
1887 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4
;
1888 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4
;
1892 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
1893 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
1897 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A
;
1900 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
1901 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
1906 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5
;
1909 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE5_UNSET
;
1910 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5_UNSET
;
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1929 override_options (void)
1932 int ix86_tune_defaulted
= 0;
1933 int ix86_arch_specified
= 0;
1934 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1941 const struct processor_costs
*cost
; /* Processor costs */
1942 const int align_loop
; /* Default alignments. */
1943 const int align_loop_max_skip
;
1944 const int align_jump
;
1945 const int align_jump_max_skip
;
1946 const int align_func
;
1948 const processor_target_table
[PROCESSOR_max
] =
1950 {&i386_cost
, 4, 3, 4, 3, 4},
1951 {&i486_cost
, 16, 15, 16, 15, 16},
1952 {&pentium_cost
, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
1954 {&geode_cost
, 0, 0, 0, 0, 0},
1955 {&k6_cost
, 32, 7, 32, 7, 32},
1956 {&athlon_cost
, 16, 7, 16, 7, 16},
1957 {&pentium4_cost
, 0, 0, 0, 0, 0},
1958 {&k8_cost
, 16, 7, 16, 7, 16},
1959 {&nocona_cost
, 0, 0, 0, 0, 0},
1960 {&core2_cost
, 16, 10, 16, 10, 16},
1961 {&generic32_cost
, 16, 7, 16, 7, 16},
1962 {&generic64_cost
, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost
, 32, 24, 32, 7, 32}
1966 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
1997 PTA_PREFETCH_SSE
= 1 << 4,
1999 PTA_3DNOW_A
= 1 << 6,
2003 PTA_POPCNT
= 1 << 10,
2005 PTA_SSE4A
= 1 << 12,
2006 PTA_NO_SAHF
= 1 << 13,
2007 PTA_SSE4_1
= 1 << 14,
2008 PTA_SSE4_2
= 1 << 15,
2014 const char *const name
; /* processor name or nickname. */
2015 const enum processor_type processor
;
2016 const unsigned /*enum pta_flags*/ flags
;
2018 const processor_alias_table
[] =
2020 {"i386", PROCESSOR_I386
, 0},
2021 {"i486", PROCESSOR_I486
, 0},
2022 {"i586", PROCESSOR_PENTIUM
, 0},
2023 {"pentium", PROCESSOR_PENTIUM
, 0},
2024 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
2025 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
2026 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
2027 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
2028 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2029 {"i686", PROCESSOR_PENTIUMPRO
, 0},
2030 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
2031 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
2032 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2033 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2034 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2035 {"pentium4", PROCESSOR_PENTIUM4
, PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2036 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2037 {"prescott", PROCESSOR_NOCONA
, PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2038 {"nocona", PROCESSOR_NOCONA
, (PTA_64BIT
2039 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2040 | PTA_CX16
| PTA_NO_SAHF
)},
2041 {"core2", PROCESSOR_CORE2
, (PTA_64BIT
2042 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2045 {"geode", PROCESSOR_GEODE
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2046 |PTA_PREFETCH_SSE
)},
2047 {"k6", PROCESSOR_K6
, PTA_MMX
},
2048 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2049 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2050 {"athlon", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2051 | PTA_PREFETCH_SSE
)},
2052 {"athlon-tbird", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2053 | PTA_PREFETCH_SSE
)},
2054 {"athlon-4", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2056 {"athlon-xp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2058 {"athlon-mp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2060 {"x86-64", PROCESSOR_K8
, (PTA_64BIT
2061 | PTA_MMX
| PTA_SSE
| PTA_SSE2
2063 {"k8", PROCESSOR_K8
, (PTA_64BIT
2064 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2065 | PTA_SSE
| PTA_SSE2
2067 {"k8-sse3", PROCESSOR_K8
, (PTA_64BIT
2068 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2069 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2071 {"opteron", PROCESSOR_K8
, (PTA_64BIT
2072 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2073 | PTA_SSE
| PTA_SSE2
2075 {"opteron-sse3", PROCESSOR_K8
, (PTA_64BIT
2076 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2077 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2079 {"athlon64", PROCESSOR_K8
, (PTA_64BIT
2080 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2081 | PTA_SSE
| PTA_SSE2
2083 {"athlon64-sse3", PROCESSOR_K8
, (PTA_64BIT
2084 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2085 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2087 {"athlon-fx", PROCESSOR_K8
, (PTA_64BIT
2088 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2089 | PTA_SSE
| PTA_SSE2
2091 {"amdfam10", PROCESSOR_AMDFAM10
, (PTA_64BIT
2092 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2093 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2095 | PTA_CX16
| PTA_ABM
)},
2096 {"barcelona", PROCESSOR_AMDFAM10
, (PTA_64BIT
2097 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2098 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2100 | PTA_CX16
| PTA_ABM
)},
2101 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
2102 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
2105 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
2107 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2108 SUBTARGET_OVERRIDE_OPTIONS
;
2111 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2112 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2115 /* -fPIC is the default for x86_64. */
2116 if (TARGET_MACHO
&& TARGET_64BIT
)
2119 /* Set the default values for switches whose default depends on TARGET_64BIT
2120 in case they weren't overwritten by command line options. */
2123 /* Mach-O doesn't support omitting the frame pointer for now. */
2124 if (flag_omit_frame_pointer
== 2)
2125 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2126 if (flag_asynchronous_unwind_tables
== 2)
2127 flag_asynchronous_unwind_tables
= 1;
2128 if (flag_pcc_struct_return
== 2)
2129 flag_pcc_struct_return
= 0;
2133 if (flag_omit_frame_pointer
== 2)
2134 flag_omit_frame_pointer
= 0;
2135 if (flag_asynchronous_unwind_tables
== 2)
2136 flag_asynchronous_unwind_tables
= 0;
2137 if (flag_pcc_struct_return
== 2)
2138 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2141 /* Need to check -mtune=generic first. */
2142 if (ix86_tune_string
)
2144 if (!strcmp (ix86_tune_string
, "generic")
2145 || !strcmp (ix86_tune_string
, "i686")
2146 /* As special support for cross compilers we read -mtune=native
2147 as -mtune=generic. With native compilers we won't see the
2148 -mtune=native, as it was changed by the driver. */
2149 || !strcmp (ix86_tune_string
, "native"))
2152 ix86_tune_string
= "generic64";
2154 ix86_tune_string
= "generic32";
2156 else if (!strncmp (ix86_tune_string
, "generic", 7))
2157 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2161 if (ix86_arch_string
)
2162 ix86_tune_string
= ix86_arch_string
;
2163 if (!ix86_tune_string
)
2165 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2166 ix86_tune_defaulted
= 1;
2169 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2170 need to use a sensible tune option. */
2171 if (!strcmp (ix86_tune_string
, "generic")
2172 || !strcmp (ix86_tune_string
, "x86-64")
2173 || !strcmp (ix86_tune_string
, "i686"))
2176 ix86_tune_string
= "generic64";
2178 ix86_tune_string
= "generic32";
2181 if (ix86_stringop_string
)
2183 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2184 stringop_alg
= rep_prefix_1_byte
;
2185 else if (!strcmp (ix86_stringop_string
, "libcall"))
2186 stringop_alg
= libcall
;
2187 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2188 stringop_alg
= rep_prefix_4_byte
;
2189 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2190 stringop_alg
= rep_prefix_8_byte
;
2191 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2192 stringop_alg
= loop_1_byte
;
2193 else if (!strcmp (ix86_stringop_string
, "loop"))
2194 stringop_alg
= loop
;
2195 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2196 stringop_alg
= unrolled_loop
;
2198 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2200 if (!strcmp (ix86_tune_string
, "x86-64"))
2201 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2202 "-mtune=generic instead as appropriate.");
2204 if (!ix86_arch_string
)
2205 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2207 ix86_arch_specified
= 1;
2209 if (!strcmp (ix86_arch_string
, "generic"))
2210 error ("generic CPU can be used only for -mtune= switch");
2211 if (!strncmp (ix86_arch_string
, "generic", 7))
2212 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2214 if (ix86_cmodel_string
!= 0)
2216 if (!strcmp (ix86_cmodel_string
, "small"))
2217 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2218 else if (!strcmp (ix86_cmodel_string
, "medium"))
2219 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2220 else if (!strcmp (ix86_cmodel_string
, "large"))
2221 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2223 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2224 else if (!strcmp (ix86_cmodel_string
, "32"))
2225 ix86_cmodel
= CM_32
;
2226 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2227 ix86_cmodel
= CM_KERNEL
;
2229 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2233 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2234 use of rip-relative addressing. This eliminates fixups that
2235 would otherwise be needed if this object is to be placed in a
2236 DLL, and is essentially just as efficient as direct addressing. */
2237 if (TARGET_64BIT_MS_ABI
)
2238 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2239 else if (TARGET_64BIT
)
2240 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2242 ix86_cmodel
= CM_32
;
2244 if (ix86_asm_string
!= 0)
2247 && !strcmp (ix86_asm_string
, "intel"))
2248 ix86_asm_dialect
= ASM_INTEL
;
2249 else if (!strcmp (ix86_asm_string
, "att"))
2250 ix86_asm_dialect
= ASM_ATT
;
2252 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2254 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2255 error ("code model %qs not supported in the %s bit mode",
2256 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2257 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2258 sorry ("%i-bit mode not compiled in",
2259 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2261 for (i
= 0; i
< pta_size
; i
++)
2262 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2264 ix86_arch
= processor_alias_table
[i
].processor
;
2265 /* Default cpu tuning to the architecture. */
2266 ix86_tune
= ix86_arch
;
2268 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2269 error ("CPU you selected does not support x86-64 "
2272 if (processor_alias_table
[i
].flags
& PTA_MMX
2273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2274 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2275 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2277 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2278 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2280 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2281 if (processor_alias_table
[i
].flags
& PTA_SSE
2282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2284 if (processor_alias_table
[i
].flags
& PTA_SSE2
2285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2286 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2287 if (processor_alias_table
[i
].flags
& PTA_SSE3
2288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2289 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2290 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2292 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2293 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2295 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2296 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2298 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2299 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2301 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2302 if (processor_alias_table
[i
].flags
& PTA_SSE5
2303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE5
))
2304 ix86_isa_flags
|= OPTION_MASK_ISA_SSE5
;
2306 if (processor_alias_table
[i
].flags
& PTA_ABM
)
2308 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2309 x86_cmpxchg16b
= true;
2310 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
))
2312 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2313 x86_prefetch_sse
= true;
2314 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2321 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2323 ix86_arch_mask
= 1u << ix86_arch
;
2324 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2325 ix86_arch_features
[i
] &= ix86_arch_mask
;
2327 for (i
= 0; i
< pta_size
; i
++)
2328 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2330 ix86_tune
= processor_alias_table
[i
].processor
;
2331 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2333 if (ix86_tune_defaulted
)
2335 ix86_tune_string
= "x86-64";
2336 for (i
= 0; i
< pta_size
; i
++)
2337 if (! strcmp (ix86_tune_string
,
2338 processor_alias_table
[i
].name
))
2340 ix86_tune
= processor_alias_table
[i
].processor
;
2343 error ("CPU you selected does not support x86-64 "
2346 /* Intel CPUs have always interpreted SSE prefetch instructions as
2347 NOPs; so, we can enable SSE prefetch instructions even when
2348 -mtune (rather than -march) points us to a processor that has them.
2349 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2350 higher processors. */
2352 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
2353 x86_prefetch_sse
= true;
2357 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2359 ix86_tune_mask
= 1u << ix86_tune
;
2360 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2361 ix86_tune_features
[i
] &= ix86_tune_mask
;
2364 ix86_cost
= &size_cost
;
2366 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2368 /* Arrange to set up i386_stack_locals for all functions. */
2369 init_machine_status
= ix86_init_machine_status
;
2371 /* Validate -mregparm= value. */
2372 if (ix86_regparm_string
)
2375 warning (0, "-mregparm is ignored in 64-bit mode");
2376 i
= atoi (ix86_regparm_string
);
2377 if (i
< 0 || i
> REGPARM_MAX
)
2378 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2383 ix86_regparm
= REGPARM_MAX
;
2385 /* If the user has provided any of the -malign-* options,
2386 warn and use that value only if -falign-* is not set.
2387 Remove this code in GCC 3.2 or later. */
2388 if (ix86_align_loops_string
)
2390 warning (0, "-malign-loops is obsolete, use -falign-loops");
2391 if (align_loops
== 0)
2393 i
= atoi (ix86_align_loops_string
);
2394 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2395 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2397 align_loops
= 1 << i
;
2401 if (ix86_align_jumps_string
)
2403 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2404 if (align_jumps
== 0)
2406 i
= atoi (ix86_align_jumps_string
);
2407 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2408 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2410 align_jumps
= 1 << i
;
2414 if (ix86_align_funcs_string
)
2416 warning (0, "-malign-functions is obsolete, use -falign-functions");
2417 if (align_functions
== 0)
2419 i
= atoi (ix86_align_funcs_string
);
2420 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2421 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2423 align_functions
= 1 << i
;
2427 /* Default align_* from the processor table. */
2428 if (align_loops
== 0)
2430 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2431 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2433 if (align_jumps
== 0)
2435 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2436 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2438 if (align_functions
== 0)
2440 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2443 /* Validate -mbranch-cost= value, or provide default. */
2444 ix86_branch_cost
= ix86_cost
->branch_cost
;
2445 if (ix86_branch_cost_string
)
2447 i
= atoi (ix86_branch_cost_string
);
2449 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2451 ix86_branch_cost
= i
;
2453 if (ix86_section_threshold_string
)
2455 i
= atoi (ix86_section_threshold_string
);
2457 error ("-mlarge-data-threshold=%d is negative", i
);
2459 ix86_section_threshold
= i
;
2462 if (ix86_tls_dialect_string
)
2464 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2465 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2466 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2467 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2468 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2469 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2471 error ("bad value (%s) for -mtls-dialect= switch",
2472 ix86_tls_dialect_string
);
2475 if (ix87_precision_string
)
2477 i
= atoi (ix87_precision_string
);
2478 if (i
!= 32 && i
!= 64 && i
!= 80)
2479 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2484 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
2486 /* Enable by default the SSE and MMX builtins. Do allow the user to
2487 explicitly disable any of these. In particular, disabling SSE and
2488 MMX for kernel code is extremely useful. */
2489 if (!ix86_arch_specified
)
2491 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
2492 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
2495 warning (0, "-mrtd is ignored in 64bit mode");
2499 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
2501 if (!ix86_arch_specified
)
2503 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
2505 /* i386 ABI does not specify red zone. It still makes sense to use it
2506 when programmer takes care to stack from being destroyed. */
2507 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2508 target_flags
|= MASK_NO_RED_ZONE
;
2511 /* Keep nonleaf frame pointers. */
2512 if (flag_omit_frame_pointer
)
2513 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2514 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2515 flag_omit_frame_pointer
= 1;
2517 /* If we're doing fast math, we don't care about comparison order
2518 wrt NaNs. This lets us use a shorter comparison sequence. */
2519 if (flag_finite_math_only
)
2520 target_flags
&= ~MASK_IEEE_FP
;
2522 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2523 since the insns won't need emulation. */
2524 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2525 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2527 /* Likewise, if the target doesn't have a 387, or we've specified
2528 software floating point, don't use 387 inline intrinsics. */
2530 target_flags
|= MASK_NO_FANCY_MATH_387
;
2532 /* Turn on SSE4A bultins for -msse5. */
2534 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2536 /* Turn on SSE4.1 builtins for -msse4.2. */
2538 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2540 /* Turn on SSSE3 builtins for -msse4.1. */
2542 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2544 /* Turn on SSE3 builtins for -mssse3. */
2546 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2548 /* Turn on SSE3 builtins for -msse4a. */
2550 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2552 /* Turn on SSE2 builtins for -msse3. */
2554 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2556 /* Turn on SSE builtins for -msse2. */
2558 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2560 /* Turn on MMX builtins for -msse. */
2563 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
2564 x86_prefetch_sse
= true;
2567 /* Turn on MMX builtins for 3Dnow. */
2569 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2572 if (TARGET_SSE4_2
|| TARGET_ABM
)
2575 /* Validate -mpreferred-stack-boundary= value, or provide default.
2576 The default of 128 bits is for Pentium III's SSE __m128. We can't
2577 change it because of optimize_size. Otherwise, we can't mix object
2578 files compiled with -Os and -On. */
2579 ix86_preferred_stack_boundary
= 128;
2580 if (ix86_preferred_stack_boundary_string
)
2582 i
= atoi (ix86_preferred_stack_boundary_string
);
2583 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2584 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2585 TARGET_64BIT
? 4 : 2);
2587 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2590 /* Accept -msseregparm only if at least SSE support is enabled. */
2591 if (TARGET_SSEREGPARM
2593 error ("-msseregparm used without SSE enabled");
2595 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2596 if (ix86_fpmath_string
!= 0)
2598 if (! strcmp (ix86_fpmath_string
, "387"))
2599 ix86_fpmath
= FPMATH_387
;
2600 else if (! strcmp (ix86_fpmath_string
, "sse"))
2604 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2605 ix86_fpmath
= FPMATH_387
;
2608 ix86_fpmath
= FPMATH_SSE
;
2610 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2611 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2615 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2616 ix86_fpmath
= FPMATH_387
;
2618 else if (!TARGET_80387
)
2620 warning (0, "387 instruction set disabled, using SSE arithmetics");
2621 ix86_fpmath
= FPMATH_SSE
;
2624 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
2627 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2630 /* If the i387 is disabled, then do not return values in it. */
2632 target_flags
&= ~MASK_FLOAT_RETURNS
;
2634 /* Use external vectorized library in vectorizing intrinsics. */
2635 if (ix86_veclibabi_string
)
2637 if (strcmp (ix86_veclibabi_string
, "acml") == 0)
2638 ix86_veclib_handler
= ix86_veclibabi_acml
;
2640 error ("unknown vectorization library ABI type (%s) for "
2641 "-mveclibabi= switch", ix86_veclibabi_string
);
2644 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2645 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2647 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2649 /* ??? Unwind info is not correct around the CFG unless either a frame
2650 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2651 unwind info generation to be aware of the CFG and propagating states
2653 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2654 || flag_exceptions
|| flag_non_call_exceptions
)
2655 && flag_omit_frame_pointer
2656 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2658 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2659 warning (0, "unwind tables currently require either a frame pointer "
2660 "or -maccumulate-outgoing-args for correctness");
2661 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2664 /* For sane SSE instruction set generation we need fcomi instruction.
2665 It is safe to enable all CMOVE instructions. */
2669 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2672 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2673 p
= strchr (internal_label_prefix
, 'X');
2674 internal_label_prefix_len
= p
- internal_label_prefix
;
2678 /* When scheduling description is not available, disable scheduler pass
2679 so it won't slow down the compilation and make x87 code slower. */
2680 if (!TARGET_SCHEDULE
)
2681 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2683 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2684 set_param_value ("simultaneous-prefetches",
2685 ix86_cost
->simultaneous_prefetches
);
2686 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2687 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2688 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
2689 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
2690 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
2691 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
2693 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2694 can be optimized to ap = __builtin_next_arg (0). */
2695 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
2696 targetm
.expand_builtin_va_start
= NULL
;
2699 /* Return true if this goes in large data/bss. */
2702 ix86_in_large_data_p (tree exp
)
2704 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2707 /* Functions are never large data. */
2708 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2711 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2713 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2714 if (strcmp (section
, ".ldata") == 0
2715 || strcmp (section
, ".lbss") == 0)
2721 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2723 /* If this is an incomplete type with size 0, then we can't put it
2724 in data because it might be too big when completed. */
2725 if (!size
|| size
> ix86_section_threshold
)
2732 /* Switch to the appropriate section for output of DECL.
2733 DECL is either a `VAR_DECL' node or a constant of some sort.
2734 RELOC indicates whether forming the initial value of DECL requires
2735 link-time relocations. */
2737 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2741 x86_64_elf_select_section (tree decl
, int reloc
,
2742 unsigned HOST_WIDE_INT align
)
2744 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2745 && ix86_in_large_data_p (decl
))
2747 const char *sname
= NULL
;
2748 unsigned int flags
= SECTION_WRITE
;
2749 switch (categorize_decl_for_section (decl
, reloc
))
2754 case SECCAT_DATA_REL
:
2755 sname
= ".ldata.rel";
2757 case SECCAT_DATA_REL_LOCAL
:
2758 sname
= ".ldata.rel.local";
2760 case SECCAT_DATA_REL_RO
:
2761 sname
= ".ldata.rel.ro";
2763 case SECCAT_DATA_REL_RO_LOCAL
:
2764 sname
= ".ldata.rel.ro.local";
2768 flags
|= SECTION_BSS
;
2771 case SECCAT_RODATA_MERGE_STR
:
2772 case SECCAT_RODATA_MERGE_STR_INIT
:
2773 case SECCAT_RODATA_MERGE_CONST
:
2777 case SECCAT_SRODATA
:
2784 /* We don't split these for medium model. Place them into
2785 default sections and hope for best. */
2790 /* We might get called with string constants, but get_named_section
2791 doesn't like them as they are not DECLs. Also, we need to set
2792 flags in that case. */
2794 return get_section (sname
, flags
, NULL
);
2795 return get_named_section (decl
, sname
, reloc
);
2798 return default_elf_select_section (decl
, reloc
, align
);
2801 /* Build up a unique section name, expressed as a
2802 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2803 RELOC indicates whether the initial value of EXP requires
2804 link-time relocations. */
2806 static void ATTRIBUTE_UNUSED
2807 x86_64_elf_unique_section (tree decl
, int reloc
)
2809 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2810 && ix86_in_large_data_p (decl
))
2812 const char *prefix
= NULL
;
2813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2814 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2816 switch (categorize_decl_for_section (decl
, reloc
))
2819 case SECCAT_DATA_REL
:
2820 case SECCAT_DATA_REL_LOCAL
:
2821 case SECCAT_DATA_REL_RO
:
2822 case SECCAT_DATA_REL_RO_LOCAL
:
2823 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2826 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2829 case SECCAT_RODATA_MERGE_STR
:
2830 case SECCAT_RODATA_MERGE_STR_INIT
:
2831 case SECCAT_RODATA_MERGE_CONST
:
2832 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2834 case SECCAT_SRODATA
:
2841 /* We don't split these for medium model. Place them into
2842 default sections and hope for best. */
2850 plen
= strlen (prefix
);
2852 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2853 name
= targetm
.strip_name_encoding (name
);
2854 nlen
= strlen (name
);
2856 string
= (char *) alloca (nlen
+ plen
+ 1);
2857 memcpy (string
, prefix
, plen
);
2858 memcpy (string
+ plen
, name
, nlen
+ 1);
2860 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2864 default_unique_section (decl
, reloc
);
2867 #ifdef COMMON_ASM_OP
2868 /* This says how to output assembler code to declare an
2869 uninitialized external linkage data object.
2871 For medium model x86-64 we need to use .largecomm opcode for
2874 x86_elf_aligned_common (FILE *file
,
2875 const char *name
, unsigned HOST_WIDE_INT size
,
2878 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2879 && size
> (unsigned int)ix86_section_threshold
)
2880 fprintf (file
, ".largecomm\t");
2882 fprintf (file
, "%s", COMMON_ASM_OP
);
2883 assemble_name (file
, name
);
2884 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2885 size
, align
/ BITS_PER_UNIT
);
2889 /* Utility function for targets to use in implementing
2890 ASM_OUTPUT_ALIGNED_BSS. */
2893 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2894 const char *name
, unsigned HOST_WIDE_INT size
,
2897 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2898 && size
> (unsigned int)ix86_section_threshold
)
2899 switch_to_section (get_named_section (decl
, ".lbss", 0));
2901 switch_to_section (bss_section
);
2902 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2903 #ifdef ASM_DECLARE_OBJECT_NAME
2904 last_assemble_variable_decl
= decl
;
2905 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2907 /* Standard thing is just output label for the object. */
2908 ASM_OUTPUT_LABEL (file
, name
);
2909 #endif /* ASM_DECLARE_OBJECT_NAME */
2910 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2914 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2916 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2917 make the problem with not enough registers even worse. */
2918 #ifdef INSN_SCHEDULING
2920 flag_schedule_insns
= 0;
2924 /* The Darwin libraries never set errno, so we might as well
2925 avoid calling them when that's the only reason we would. */
2926 flag_errno_math
= 0;
2928 /* The default values of these switches depend on the TARGET_64BIT
2929 that is not known at this moment. Mark these values with 2 and
2930 let user the to override these. In case there is no command line option
2931 specifying them, we will set the defaults in override_options. */
2933 flag_omit_frame_pointer
= 2;
2934 flag_pcc_struct_return
= 2;
2935 flag_asynchronous_unwind_tables
= 2;
2936 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2937 SUBTARGET_OPTIMIZATION_OPTIONS
;
2941 /* Decide whether we can make a sibling call to a function. DECL is the
2942 declaration of the function being targeted by the call and EXP is the
2943 CALL_EXPR representing the call. */
2946 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2951 /* If we are generating position-independent code, we cannot sibcall
2952 optimize any indirect call, or a direct call to a global function,
2953 as the PLT requires %ebx be live. */
2954 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2961 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2962 if (POINTER_TYPE_P (func
))
2963 func
= TREE_TYPE (func
);
2966 /* Check that the return value locations are the same. Like
2967 if we are returning floats on the 80387 register stack, we cannot
2968 make a sibcall from a function that doesn't return a float to a
2969 function that does or, conversely, from a function that does return
2970 a float to a function that doesn't; the necessary stack adjustment
2971 would not be executed. This is also the place we notice
2972 differences in the return value ABI. Note that it is ok for one
2973 of the functions to have void return type as long as the return
2974 value of the other is passed in a register. */
2975 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2976 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2978 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2980 if (!rtx_equal_p (a
, b
))
2983 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2985 else if (!rtx_equal_p (a
, b
))
2988 /* If this call is indirect, we'll need to be able to use a call-clobbered
2989 register for the address of the target function. Make sure that all
2990 such registers are not used for passing parameters. */
2991 if (!decl
&& !TARGET_64BIT
)
2995 /* We're looking at the CALL_EXPR, we need the type of the function. */
2996 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2997 type
= TREE_TYPE (type
); /* pointer type */
2998 type
= TREE_TYPE (type
); /* function type */
3000 if (ix86_function_regparm (type
, NULL
) >= 3)
3002 /* ??? Need to count the actual number of registers to be used,
3003 not the possible number of registers. Fix later. */
3008 /* Dllimport'd functions are also called indirectly. */
3009 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3010 && decl
&& DECL_DLLIMPORT_P (decl
)
3011 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
3014 /* If we forced aligned the stack, then sibcalling would unalign the
3015 stack, which may break the called function. */
3016 if (cfun
->machine
->force_align_arg_pointer
)
3019 /* Otherwise okay. That also includes certain types of indirect calls. */
3023 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3024 calling convention attributes;
3025 arguments as in struct attribute_spec.handler. */
3028 ix86_handle_cconv_attribute (tree
*node
, tree name
,
3030 int flags ATTRIBUTE_UNUSED
,
3033 if (TREE_CODE (*node
) != FUNCTION_TYPE
3034 && TREE_CODE (*node
) != METHOD_TYPE
3035 && TREE_CODE (*node
) != FIELD_DECL
3036 && TREE_CODE (*node
) != TYPE_DECL
)
3038 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
3039 IDENTIFIER_POINTER (name
));
3040 *no_add_attrs
= true;
3044 /* Can combine regparm with all attributes but fastcall. */
3045 if (is_attribute_p ("regparm", name
))
3049 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3051 error ("fastcall and regparm attributes are not compatible");
3054 cst
= TREE_VALUE (args
);
3055 if (TREE_CODE (cst
) != INTEGER_CST
)
3057 warning (OPT_Wattributes
,
3058 "%qs attribute requires an integer constant argument",
3059 IDENTIFIER_POINTER (name
));
3060 *no_add_attrs
= true;
3062 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
3064 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
3065 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
3066 *no_add_attrs
= true;
3070 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3071 TYPE_ATTRIBUTES (*node
))
3072 && compare_tree_int (cst
, REGPARM_MAX
-1))
3074 error ("%s functions limited to %d register parameters",
3075 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
3083 /* Do not warn when emulating the MS ABI. */
3084 if (!TARGET_64BIT_MS_ABI
)
3085 warning (OPT_Wattributes
, "%qs attribute ignored",
3086 IDENTIFIER_POINTER (name
));
3087 *no_add_attrs
= true;
3091 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3092 if (is_attribute_p ("fastcall", name
))
3094 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3096 error ("fastcall and cdecl attributes are not compatible");
3098 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3100 error ("fastcall and stdcall attributes are not compatible");
3102 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
3104 error ("fastcall and regparm attributes are not compatible");
3108 /* Can combine stdcall with fastcall (redundant), regparm and
3110 else if (is_attribute_p ("stdcall", name
))
3112 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3114 error ("stdcall and cdecl attributes are not compatible");
3116 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3118 error ("stdcall and fastcall attributes are not compatible");
3122 /* Can combine cdecl with regparm and sseregparm. */
3123 else if (is_attribute_p ("cdecl", name
))
3125 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3127 error ("stdcall and cdecl attributes are not compatible");
3129 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3131 error ("fastcall and cdecl attributes are not compatible");
3135 /* Can combine sseregparm with all attributes. */
3140 /* Return 0 if the attributes for two types are incompatible, 1 if they
3141 are compatible, and 2 if they are nearly compatible (which causes a
3142 warning to be generated). */
3145 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
3147 /* Check for mismatch of non-default calling convention. */
3148 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
3150 if (TREE_CODE (type1
) != FUNCTION_TYPE
3151 && TREE_CODE (type1
) != METHOD_TYPE
)
3154 /* Check for mismatched fastcall/regparm types. */
3155 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
3156 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
3157 || (ix86_function_regparm (type1
, NULL
)
3158 != ix86_function_regparm (type2
, NULL
)))
3161 /* Check for mismatched sseregparm types. */
3162 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
3163 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
3166 /* Check for mismatched return types (cdecl vs stdcall). */
3167 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
3168 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
3174 /* Return the regparm value for a function with the indicated TYPE and DECL.
3175 DECL may be NULL when calling function indirectly
3176 or considering a libcall. */
3179 ix86_function_regparm (const_tree type
, const_tree decl
)
3182 int regparm
= ix86_regparm
;
3187 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3189 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3191 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3194 /* Use register calling convention for local functions when possible. */
3195 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
3196 && flag_unit_at_a_time
&& !profile_flag
)
3198 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3199 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3202 int local_regparm
, globals
= 0, regno
;
3205 /* Make sure no regparm register is taken by a
3206 fixed register or global register variable. */
3207 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3208 if (global_regs
[local_regparm
] || fixed_regs
[local_regparm
])
3211 /* We can't use regparm(3) for nested functions as these use
3212 static chain pointer in third argument. */
3213 if (local_regparm
== 3
3214 && (decl_function_context (decl
)
3215 || ix86_force_align_arg_pointer
)
3216 && !DECL_NO_STATIC_CHAIN (decl
))
3219 /* If the function realigns its stackpointer, the prologue will
3220 clobber %ecx. If we've already generated code for the callee,
3221 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3222 scanning the attributes for the self-realigning property. */
3223 f
= DECL_STRUCT_FUNCTION (decl
);
3224 if (local_regparm
== 3
3225 && (f
? !!f
->machine
->force_align_arg_pointer
3226 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3227 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3230 /* Each global register variable or fixed register usage
3231 increases register pressure, so less registers should be
3232 used for argument passing. This functionality can be
3233 overriden by explicit regparm value. */
3234 for (regno
= 0; regno
< 6; regno
++)
3235 if (global_regs
[regno
] || fixed_regs
[regno
])
3238 = globals
< local_regparm
? local_regparm
- globals
: 0;
3240 if (local_regparm
> regparm
)
3241 regparm
= local_regparm
;
3248 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3249 DFmode (2) arguments in SSE registers for a function with the
3250 indicated TYPE and DECL. DECL may be NULL when calling function
3251 indirectly or considering a libcall. Otherwise return 0. */
3254 ix86_function_sseregparm (const_tree type
, const_tree decl
)
3256 gcc_assert (!TARGET_64BIT
);
3258 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3259 by the sseregparm attribute. */
3260 if (TARGET_SSEREGPARM
3261 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3266 error ("Calling %qD with attribute sseregparm without "
3267 "SSE/SSE2 enabled", decl
);
3269 error ("Calling %qT with attribute sseregparm without "
3270 "SSE/SSE2 enabled", type
);
3277 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3278 (and DFmode for SSE2) arguments in SSE registers. */
3279 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3281 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3282 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3284 return TARGET_SSE2
? 2 : 1;
3290 /* Return true if EAX is live at the start of the function. Used by
3291 ix86_expand_prologue to determine if we need special help before
3292 calling allocate_stack_worker. */
3295 ix86_eax_live_at_start_p (void)
3297 /* Cheat. Don't bother working forward from ix86_function_regparm
3298 to the function type to whether an actual argument is located in
3299 eax. Instead just look at cfg info, which is still close enough
3300 to correct at this point. This gives false positives for broken
3301 functions that might use uninitialized data that happens to be
3302 allocated in eax, but who cares? */
3303 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
3306 /* Value is the number of bytes of arguments automatically
3307 popped when returning from a subroutine call.
3308 FUNDECL is the declaration node of the function (as a tree),
3309 FUNTYPE is the data type of the function (as a tree),
3310 or for a library call it is an identifier node for the subroutine name.
3311 SIZE is the number of bytes of arguments passed on the stack.
3313 On the 80386, the RTD insn may be used to pop them if the number
3314 of args is fixed, but if the number is variable then the caller
3315 must pop them all. RTD can't be used for library calls now
3316 because the library is compiled with the Unix compiler.
3317 Use of RTD is a selectable option, since it is incompatible with
3318 standard Unix calling sequences. If the option is not selected,
3319 the caller must always pop the args.
3321 The attribute stdcall is equivalent to RTD on a per module basis. */
3324 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3328 /* None of the 64-bit ABIs pop arguments. */
3332 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3334 /* Cdecl functions override -mrtd, and never pop the stack. */
3335 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3337 /* Stdcall and fastcall functions will pop the stack if not
3339 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3340 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3343 if (rtd
&& ! stdarg_p (funtype
))
3347 /* Lose any fake structure return argument if it is passed on the stack. */
3348 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3349 && !KEEP_AGGREGATE_RETURN_POINTER
)
3351 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3353 return GET_MODE_SIZE (Pmode
);
3359 /* Argument support functions. */
3361 /* Return true when register may be used to pass function parameters. */
3363 ix86_function_arg_regno_p (int regno
)
3366 const int *parm_regs
;
3371 return (regno
< REGPARM_MAX
3372 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3374 return (regno
< REGPARM_MAX
3375 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3376 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3377 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3378 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3383 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3388 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3389 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3393 /* RAX is used as hidden argument to va_arg functions. */
3394 if (!TARGET_64BIT_MS_ABI
&& regno
== AX_REG
)
3397 if (TARGET_64BIT_MS_ABI
)
3398 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3400 parm_regs
= x86_64_int_parameter_registers
;
3401 for (i
= 0; i
< REGPARM_MAX
; i
++)
3402 if (regno
== parm_regs
[i
])
3407 /* Return if we do not know how to pass TYPE solely in registers. */
3410 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
3412 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3415 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3416 The layout_type routine is crafty and tries to trick us into passing
3417 currently unsupported vector types on the stack by using TImode. */
3418 return (!TARGET_64BIT
&& mode
== TImode
3419 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3422 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3423 for a call to a function whose data type is FNTYPE.
3424 For a library call, FNTYPE is 0. */
3427 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3428 tree fntype
, /* tree ptr for function decl */
3429 rtx libname
, /* SYMBOL_REF of library name or 0 */
3432 memset (cum
, 0, sizeof (*cum
));
3434 /* Set up the number of registers to use for passing arguments. */
3435 cum
->nregs
= ix86_regparm
;
3437 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3439 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3440 cum
->warn_sse
= true;
3441 cum
->warn_mmx
= true;
3442 cum
->maybe_vaarg
= (fntype
3443 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
3448 /* If there are variable arguments, then we won't pass anything
3449 in registers in 32-bit mode. */
3450 if (cum
->maybe_vaarg
)
3460 /* Use ecx and edx registers if function has fastcall attribute,
3461 else look for regparm information. */
3464 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3470 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3473 /* Set up the number of SSE registers used for passing SFmode
3474 and DFmode arguments. Warn for mismatching ABI. */
3475 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3479 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3480 But in the case of vector types, it is some vector mode.
3482 When we have only some of our vector isa extensions enabled, then there
3483 are some modes for which vector_mode_supported_p is false. For these
3484 modes, the generic vector support in gcc will choose some non-vector mode
3485 in order to implement the type. By computing the natural mode, we'll
3486 select the proper ABI location for the operand and not depend on whatever
3487 the middle-end decides to do with these vector types. */
3489 static enum machine_mode
3490 type_natural_mode (const_tree type
)
3492 enum machine_mode mode
= TYPE_MODE (type
);
3494 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3496 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3497 if ((size
== 8 || size
== 16)
3498 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3499 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3501 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3503 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3504 mode
= MIN_MODE_VECTOR_FLOAT
;
3506 mode
= MIN_MODE_VECTOR_INT
;
3508 /* Get the mode which has this inner mode and number of units. */
3509 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3510 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3511 && GET_MODE_INNER (mode
) == innermode
)
3521 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3522 this may not agree with the mode that the type system has chosen for the
3523 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3524 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3527 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3532 if (orig_mode
!= BLKmode
)
3533 tmp
= gen_rtx_REG (orig_mode
, regno
);
3536 tmp
= gen_rtx_REG (mode
, regno
);
3537 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3538 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3544 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3545 of this code is to classify each 8bytes of incoming argument by the register
3546 class and assign registers accordingly. */
3548 /* Return the union class of CLASS1 and CLASS2.
3549 See the x86-64 PS ABI for details. */
3551 static enum x86_64_reg_class
3552 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3554 /* Rule #1: If both classes are equal, this is the resulting class. */
3555 if (class1
== class2
)
3558 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3560 if (class1
== X86_64_NO_CLASS
)
3562 if (class2
== X86_64_NO_CLASS
)
3565 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3566 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3567 return X86_64_MEMORY_CLASS
;
3569 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3570 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3571 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3572 return X86_64_INTEGERSI_CLASS
;
3573 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3574 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3575 return X86_64_INTEGER_CLASS
;
3577 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3579 if (class1
== X86_64_X87_CLASS
3580 || class1
== X86_64_X87UP_CLASS
3581 || class1
== X86_64_COMPLEX_X87_CLASS
3582 || class2
== X86_64_X87_CLASS
3583 || class2
== X86_64_X87UP_CLASS
3584 || class2
== X86_64_COMPLEX_X87_CLASS
)
3585 return X86_64_MEMORY_CLASS
;
3587 /* Rule #6: Otherwise class SSE is used. */
3588 return X86_64_SSE_CLASS
;
3591 /* Classify the argument of type TYPE and mode MODE.
3592 CLASSES will be filled by the register class used to pass each word
3593 of the operand. The number of words is returned. In case the parameter
3594 should be passed in memory, 0 is returned. As a special case for zero
3595 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3597 BIT_OFFSET is used internally for handling records and specifies offset
3598 of the offset in bits modulo 256 to avoid overflow cases.
3600 See the x86-64 PS ABI for details.
3604 classify_argument (enum machine_mode mode
, const_tree type
,
3605 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3607 HOST_WIDE_INT bytes
=
3608 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3609 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3611 /* Variable sized entities are always passed/returned in memory. */
3615 if (mode
!= VOIDmode
3616 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3619 if (type
&& AGGREGATE_TYPE_P (type
))
3623 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3625 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3629 for (i
= 0; i
< words
; i
++)
3630 classes
[i
] = X86_64_NO_CLASS
;
3632 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3633 signalize memory class, so handle it as special case. */
3636 classes
[0] = X86_64_NO_CLASS
;
3640 /* Classify each field of record and merge classes. */
3641 switch (TREE_CODE (type
))
3644 /* And now merge the fields of structure. */
3645 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3647 if (TREE_CODE (field
) == FIELD_DECL
)
3651 if (TREE_TYPE (field
) == error_mark_node
)
3654 /* Bitfields are always classified as integer. Handle them
3655 early, since later code would consider them to be
3656 misaligned integers. */
3657 if (DECL_BIT_FIELD (field
))
3659 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3660 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3661 + tree_low_cst (DECL_SIZE (field
), 0)
3664 merge_classes (X86_64_INTEGER_CLASS
,
3669 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3670 TREE_TYPE (field
), subclasses
,
3671 (int_bit_position (field
)
3672 + bit_offset
) % 256);
3675 for (i
= 0; i
< num
; i
++)
3678 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3680 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3688 /* Arrays are handled as small records. */
3691 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3692 TREE_TYPE (type
), subclasses
, bit_offset
);
3696 /* The partial classes are now full classes. */
3697 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3698 subclasses
[0] = X86_64_SSE_CLASS
;
3699 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3700 subclasses
[0] = X86_64_INTEGER_CLASS
;
3702 for (i
= 0; i
< words
; i
++)
3703 classes
[i
] = subclasses
[i
% num
];
3708 case QUAL_UNION_TYPE
:
3709 /* Unions are similar to RECORD_TYPE but offset is always 0.
3711 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3713 if (TREE_CODE (field
) == FIELD_DECL
)
3717 if (TREE_TYPE (field
) == error_mark_node
)
3720 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3721 TREE_TYPE (field
), subclasses
,
3725 for (i
= 0; i
< num
; i
++)
3726 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3735 /* Final merger cleanup. */
3736 for (i
= 0; i
< words
; i
++)
3738 /* If one class is MEMORY, everything should be passed in
3740 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3743 /* The X86_64_SSEUP_CLASS should be always preceded by
3744 X86_64_SSE_CLASS. */
3745 if (classes
[i
] == X86_64_SSEUP_CLASS
3746 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3747 classes
[i
] = X86_64_SSE_CLASS
;
3749 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3750 if (classes
[i
] == X86_64_X87UP_CLASS
3751 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3752 classes
[i
] = X86_64_SSE_CLASS
;
3757 /* Compute alignment needed. We align all types to natural boundaries with
3758 exception of XFmode that is aligned to 64bits. */
3759 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3761 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3764 mode_alignment
= 128;
3765 else if (mode
== XCmode
)
3766 mode_alignment
= 256;
3767 if (COMPLEX_MODE_P (mode
))
3768 mode_alignment
/= 2;
3769 /* Misaligned fields are always returned in memory. */
3770 if (bit_offset
% mode_alignment
)
3774 /* for V1xx modes, just use the base mode */
3775 if (VECTOR_MODE_P (mode
)
3776 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3777 mode
= GET_MODE_INNER (mode
);
3779 /* Classification of atomic types. */
3784 classes
[0] = X86_64_SSE_CLASS
;
3787 classes
[0] = X86_64_SSE_CLASS
;
3788 classes
[1] = X86_64_SSEUP_CLASS
;
3797 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3798 classes
[0] = X86_64_INTEGERSI_CLASS
;
3800 classes
[0] = X86_64_INTEGER_CLASS
;
3804 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3809 if (!(bit_offset
% 64))
3810 classes
[0] = X86_64_SSESF_CLASS
;
3812 classes
[0] = X86_64_SSE_CLASS
;
3815 classes
[0] = X86_64_SSEDF_CLASS
;
3818 classes
[0] = X86_64_X87_CLASS
;
3819 classes
[1] = X86_64_X87UP_CLASS
;
3822 classes
[0] = X86_64_SSE_CLASS
;
3823 classes
[1] = X86_64_SSEUP_CLASS
;
3826 classes
[0] = X86_64_SSE_CLASS
;
3829 classes
[0] = X86_64_SSEDF_CLASS
;
3830 classes
[1] = X86_64_SSEDF_CLASS
;
3833 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3836 /* This modes is larger than 16 bytes. */
3844 classes
[0] = X86_64_SSE_CLASS
;
3845 classes
[1] = X86_64_SSEUP_CLASS
;
3851 classes
[0] = X86_64_SSE_CLASS
;
3857 gcc_assert (VECTOR_MODE_P (mode
));
3862 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3864 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3865 classes
[0] = X86_64_INTEGERSI_CLASS
;
3867 classes
[0] = X86_64_INTEGER_CLASS
;
3868 classes
[1] = X86_64_INTEGER_CLASS
;
3869 return 1 + (bytes
> 8);
3873 /* Examine the argument and return set number of register required in each
3874 class. Return 0 iff parameter should be passed in memory. */
3876 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
3877 int *int_nregs
, int *sse_nregs
)
3879 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3880 int n
= classify_argument (mode
, type
, regclass
, 0);
3886 for (n
--; n
>= 0; n
--)
3887 switch (regclass
[n
])
3889 case X86_64_INTEGER_CLASS
:
3890 case X86_64_INTEGERSI_CLASS
:
3893 case X86_64_SSE_CLASS
:
3894 case X86_64_SSESF_CLASS
:
3895 case X86_64_SSEDF_CLASS
:
3898 case X86_64_NO_CLASS
:
3899 case X86_64_SSEUP_CLASS
:
3901 case X86_64_X87_CLASS
:
3902 case X86_64_X87UP_CLASS
:
3906 case X86_64_COMPLEX_X87_CLASS
:
3907 return in_return
? 2 : 0;
3908 case X86_64_MEMORY_CLASS
:
3914 /* Construct container for the argument used by GCC interface. See
3915 FUNCTION_ARG for the detailed description. */
3918 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3919 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
3920 const int *intreg
, int sse_regno
)
3922 /* The following variables hold the static issued_error state. */
3923 static bool issued_sse_arg_error
;
3924 static bool issued_sse_ret_error
;
3925 static bool issued_x87_ret_error
;
3927 enum machine_mode tmpmode
;
3929 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3930 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3934 int needed_sseregs
, needed_intregs
;
3935 rtx exp
[MAX_CLASSES
];
3938 n
= classify_argument (mode
, type
, regclass
, 0);
3941 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3944 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3947 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3948 some less clueful developer tries to use floating-point anyway. */
3949 if (needed_sseregs
&& !TARGET_SSE
)
3953 if (!issued_sse_ret_error
)
3955 error ("SSE register return with SSE disabled");
3956 issued_sse_ret_error
= true;
3959 else if (!issued_sse_arg_error
)
3961 error ("SSE register argument with SSE disabled");
3962 issued_sse_arg_error
= true;
3967 /* Likewise, error if the ABI requires us to return values in the
3968 x87 registers and the user specified -mno-80387. */
3969 if (!TARGET_80387
&& in_return
)
3970 for (i
= 0; i
< n
; i
++)
3971 if (regclass
[i
] == X86_64_X87_CLASS
3972 || regclass
[i
] == X86_64_X87UP_CLASS
3973 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
3975 if (!issued_x87_ret_error
)
3977 error ("x87 register return with x87 disabled");
3978 issued_x87_ret_error
= true;
3983 /* First construct simple cases. Avoid SCmode, since we want to use
3984 single register to pass this type. */
3985 if (n
== 1 && mode
!= SCmode
)
3986 switch (regclass
[0])
3988 case X86_64_INTEGER_CLASS
:
3989 case X86_64_INTEGERSI_CLASS
:
3990 return gen_rtx_REG (mode
, intreg
[0]);
3991 case X86_64_SSE_CLASS
:
3992 case X86_64_SSESF_CLASS
:
3993 case X86_64_SSEDF_CLASS
:
3994 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3995 case X86_64_X87_CLASS
:
3996 case X86_64_COMPLEX_X87_CLASS
:
3997 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3998 case X86_64_NO_CLASS
:
3999 /* Zero sized array, struct or class. */
4004 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
4005 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
4006 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
4009 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
4010 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
4011 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
4012 && regclass
[1] == X86_64_INTEGER_CLASS
4013 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
4014 && intreg
[0] + 1 == intreg
[1])
4015 return gen_rtx_REG (mode
, intreg
[0]);
4017 /* Otherwise figure out the entries of the PARALLEL. */
4018 for (i
= 0; i
< n
; i
++)
4020 switch (regclass
[i
])
4022 case X86_64_NO_CLASS
:
4024 case X86_64_INTEGER_CLASS
:
4025 case X86_64_INTEGERSI_CLASS
:
4026 /* Merge TImodes on aligned occasions here too. */
4027 if (i
* 8 + 8 > bytes
)
4028 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
4029 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
4033 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4034 if (tmpmode
== BLKmode
)
4036 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4037 gen_rtx_REG (tmpmode
, *intreg
),
4041 case X86_64_SSESF_CLASS
:
4042 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4043 gen_rtx_REG (SFmode
,
4044 SSE_REGNO (sse_regno
)),
4048 case X86_64_SSEDF_CLASS
:
4049 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4050 gen_rtx_REG (DFmode
,
4051 SSE_REGNO (sse_regno
)),
4055 case X86_64_SSE_CLASS
:
4056 if (i
< n
- 1 && regclass
[i
+ 1] == X86_64_SSEUP_CLASS
)
4060 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4061 gen_rtx_REG (tmpmode
,
4062 SSE_REGNO (sse_regno
)),
4064 if (tmpmode
== TImode
)
4073 /* Empty aligned struct, union or class. */
4077 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
4078 for (i
= 0; i
< nexps
; i
++)
4079 XVECEXP (ret
, 0, i
) = exp
[i
];
4083 /* Update the data in CUM to advance over an argument of mode MODE
4084 and data type TYPE. (TYPE is null for libcalls where that information
4085 may not be available.) */
4088 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4089 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4105 cum
->words
+= words
;
4106 cum
->nregs
-= words
;
4107 cum
->regno
+= words
;
4109 if (cum
->nregs
<= 0)
4117 if (cum
->float_in_sse
< 2)
4120 if (cum
->float_in_sse
< 1)
4131 if (!type
|| !AGGREGATE_TYPE_P (type
))
4133 cum
->sse_words
+= words
;
4134 cum
->sse_nregs
-= 1;
4135 cum
->sse_regno
+= 1;
4136 if (cum
->sse_nregs
<= 0)
4148 if (!type
|| !AGGREGATE_TYPE_P (type
))
4150 cum
->mmx_words
+= words
;
4151 cum
->mmx_nregs
-= 1;
4152 cum
->mmx_regno
+= 1;
4153 if (cum
->mmx_nregs
<= 0)
4164 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4165 tree type
, HOST_WIDE_INT words
)
4167 int int_nregs
, sse_nregs
;
4169 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
4170 cum
->words
+= words
;
4171 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
4173 cum
->nregs
-= int_nregs
;
4174 cum
->sse_nregs
-= sse_nregs
;
4175 cum
->regno
+= int_nregs
;
4176 cum
->sse_regno
+= sse_nregs
;
4179 cum
->words
+= words
;
4183 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
4184 HOST_WIDE_INT words
)
4186 /* Otherwise, this should be passed indirect. */
4187 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
4189 cum
->words
+= words
;
4198 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4199 tree type
, int named ATTRIBUTE_UNUSED
)
4201 HOST_WIDE_INT bytes
, words
;
4203 if (mode
== BLKmode
)
4204 bytes
= int_size_in_bytes (type
);
4206 bytes
= GET_MODE_SIZE (mode
);
4207 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4210 mode
= type_natural_mode (type
);
4212 if (TARGET_64BIT_MS_ABI
)
4213 function_arg_advance_ms_64 (cum
, bytes
, words
);
4214 else if (TARGET_64BIT
)
4215 function_arg_advance_64 (cum
, mode
, type
, words
);
4217 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4220 /* Define where to put the arguments to a function.
4221 Value is zero to push the argument on the stack,
4222 or a hard register in which to store the argument.
4224 MODE is the argument's machine mode.
4225 TYPE is the data type of the argument (as a tree).
4226 This is null for libcalls where that information may
4228 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4229 the preceding args and about the function being called.
4230 NAMED is nonzero if this argument is a named parameter
4231 (otherwise it is an extra parameter matching an ellipsis). */
4234 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4235 enum machine_mode orig_mode
, tree type
,
4236 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4238 static bool warnedsse
, warnedmmx
;
4240 /* Avoid the AL settings for the Unix64 ABI. */
4241 if (mode
== VOIDmode
)
4257 if (words
<= cum
->nregs
)
4259 int regno
= cum
->regno
;
4261 /* Fastcall allocates the first two DWORD (SImode) or
4262 smaller arguments to ECX and EDX if it isn't an
4268 || (type
&& AGGREGATE_TYPE_P (type
)))
4271 /* ECX not EAX is the first allocated register. */
4272 if (regno
== AX_REG
)
4275 return gen_rtx_REG (mode
, regno
);
4280 if (cum
->float_in_sse
< 2)
4283 if (cum
->float_in_sse
< 1)
4293 if (!type
|| !AGGREGATE_TYPE_P (type
))
4295 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4298 warning (0, "SSE vector argument without SSE enabled "
4302 return gen_reg_or_parallel (mode
, orig_mode
,
4303 cum
->sse_regno
+ FIRST_SSE_REG
);
4311 if (!type
|| !AGGREGATE_TYPE_P (type
))
4313 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4316 warning (0, "MMX vector argument without MMX enabled "
4320 return gen_reg_or_parallel (mode
, orig_mode
,
4321 cum
->mmx_regno
+ FIRST_MMX_REG
);
4330 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4331 enum machine_mode orig_mode
, tree type
)
4333 /* Handle a hidden AL argument containing number of registers
4334 for varargs x86-64 functions. */
4335 if (mode
== VOIDmode
)
4336 return GEN_INT (cum
->maybe_vaarg
4337 ? (cum
->sse_nregs
< 0
4342 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4344 &x86_64_int_parameter_registers
[cum
->regno
],
4349 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4350 enum machine_mode orig_mode
, int named
)
4354 /* Avoid the AL settings for the Unix64 ABI. */
4355 if (mode
== VOIDmode
)
4358 /* If we've run out of registers, it goes on the stack. */
4359 if (cum
->nregs
== 0)
4362 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
4364 /* Only floating point modes are passed in anything but integer regs. */
4365 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4368 regno
= cum
->regno
+ FIRST_SSE_REG
;
4373 /* Unnamed floating parameters are passed in both the
4374 SSE and integer registers. */
4375 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4376 t2
= gen_rtx_REG (mode
, regno
);
4377 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4378 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4379 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4383 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4387 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4388 tree type
, int named
)
4390 enum machine_mode mode
= omode
;
4391 HOST_WIDE_INT bytes
, words
;
4393 if (mode
== BLKmode
)
4394 bytes
= int_size_in_bytes (type
);
4396 bytes
= GET_MODE_SIZE (mode
);
4397 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4399 /* To simplify the code below, represent vector types with a vector mode
4400 even if MMX/SSE are not active. */
4401 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4402 mode
= type_natural_mode (type
);
4404 if (TARGET_64BIT_MS_ABI
)
4405 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4406 else if (TARGET_64BIT
)
4407 return function_arg_64 (cum
, mode
, omode
, type
);
4409 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4412 /* A C expression that indicates when an argument must be passed by
4413 reference. If nonzero for an argument, a copy of that argument is
4414 made in memory and a pointer to the argument is passed instead of
4415 the argument itself. The pointer is passed in whatever way is
4416 appropriate for passing a pointer to that type. */
4419 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4420 enum machine_mode mode ATTRIBUTE_UNUSED
,
4421 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4423 if (TARGET_64BIT_MS_ABI
)
4427 /* Arrays are passed by reference. */
4428 if (TREE_CODE (type
) == ARRAY_TYPE
)
4431 if (AGGREGATE_TYPE_P (type
))
4433 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4434 are passed by reference. */
4435 int el2
= exact_log2 (int_size_in_bytes (type
));
4436 return !(el2
>= 0 && el2
<= 3);
4440 /* __m128 is passed by reference. */
4441 /* ??? How to handle complex? For now treat them as structs,
4442 and pass them by reference if they're too large. */
4443 if (GET_MODE_SIZE (mode
) > 8)
4446 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4452 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4453 ABI. Only called if TARGET_SSE. */
4455 contains_128bit_aligned_vector_p (tree type
)
4457 enum machine_mode mode
= TYPE_MODE (type
);
4458 if (SSE_REG_MODE_P (mode
)
4459 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4461 if (TYPE_ALIGN (type
) < 128)
4464 if (AGGREGATE_TYPE_P (type
))
4466 /* Walk the aggregates recursively. */
4467 switch (TREE_CODE (type
))
4471 case QUAL_UNION_TYPE
:
4475 /* Walk all the structure fields. */
4476 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4478 if (TREE_CODE (field
) == FIELD_DECL
4479 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4486 /* Just for use if some languages passes arrays by value. */
4487 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4498 /* Gives the alignment boundary, in bits, of an argument with the
4499 specified mode and type. */
4502 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4506 align
= TYPE_ALIGN (type
);
4508 align
= GET_MODE_ALIGNMENT (mode
);
4509 if (align
< PARM_BOUNDARY
)
4510 align
= PARM_BOUNDARY
;
4513 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4514 make an exception for SSE modes since these require 128bit
4517 The handling here differs from field_alignment. ICC aligns MMX
4518 arguments to 4 byte boundaries, while structure fields are aligned
4519 to 8 byte boundaries. */
4521 align
= PARM_BOUNDARY
;
4524 if (!SSE_REG_MODE_P (mode
))
4525 align
= PARM_BOUNDARY
;
4529 if (!contains_128bit_aligned_vector_p (type
))
4530 align
= PARM_BOUNDARY
;
4538 /* Return true if N is a possible register number of function value. */
4541 ix86_function_value_regno_p (int regno
)
4548 case FIRST_FLOAT_REG
:
4549 if (TARGET_64BIT_MS_ABI
)
4551 return TARGET_FLOAT_RETURNS_IN_80387
;
4557 if (TARGET_MACHO
|| TARGET_64BIT
)
4565 /* Define how to find the value returned by a function.
4566 VALTYPE is the data type of the value (as a tree).
4567 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4568 otherwise, FUNC is 0. */
4571 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4572 const_tree fntype
, const_tree fn
)
4576 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4577 we normally prevent this case when mmx is not available. However
4578 some ABIs may require the result to be returned like DImode. */
4579 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4580 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4582 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4583 we prevent this case when sse is not available. However some ABIs
4584 may require the result to be returned like integer TImode. */
4585 else if (mode
== TImode
4586 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4587 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4589 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4590 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4591 regno
= FIRST_FLOAT_REG
;
4593 /* Most things go in %eax. */
4596 /* Override FP return register with %xmm0 for local functions when
4597 SSE math is enabled or for functions with sseregparm attribute. */
4598 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4600 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4601 if ((sse_level
>= 1 && mode
== SFmode
)
4602 || (sse_level
== 2 && mode
== DFmode
))
4603 regno
= FIRST_SSE_REG
;
4606 return gen_rtx_REG (orig_mode
, regno
);
4610 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4615 /* Handle libcalls, which don't provide a type node. */
4616 if (valtype
== NULL
)
4628 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4631 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4635 return gen_rtx_REG (mode
, AX_REG
);
4639 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4640 REGPARM_MAX
, SSE_REGPARM_MAX
,
4641 x86_64_int_return_registers
, 0);
4643 /* For zero sized structures, construct_container returns NULL, but we
4644 need to keep rest of compiler happy by returning meaningful value. */
4646 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
4652 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4654 unsigned int regno
= AX_REG
;
4658 if (mode
== SFmode
|| mode
== DFmode
)
4659 regno
= FIRST_SSE_REG
;
4660 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4661 regno
= FIRST_SSE_REG
;
4664 return gen_rtx_REG (orig_mode
, regno
);
4668 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4669 enum machine_mode orig_mode
, enum machine_mode mode
)
4671 const_tree fn
, fntype
;
4674 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4675 fn
= fntype_or_decl
;
4676 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4678 if (TARGET_64BIT_MS_ABI
)
4679 return function_value_ms_64 (orig_mode
, mode
);
4680 else if (TARGET_64BIT
)
4681 return function_value_64 (orig_mode
, mode
, valtype
);
4683 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4687 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
4688 bool outgoing ATTRIBUTE_UNUSED
)
4690 enum machine_mode mode
, orig_mode
;
4692 orig_mode
= TYPE_MODE (valtype
);
4693 mode
= type_natural_mode (valtype
);
4694 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4698 ix86_libcall_value (enum machine_mode mode
)
4700 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4703 /* Return true iff type is returned in memory. */
4706 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
4710 if (mode
== BLKmode
)
4713 size
= int_size_in_bytes (type
);
4715 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4718 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4720 /* User-created vectors small enough to fit in EAX. */
4724 /* MMX/3dNow values are returned in MM0,
4725 except when it doesn't exits. */
4727 return (TARGET_MMX
? 0 : 1);
4729 /* SSE values are returned in XMM0, except when it doesn't exist. */
4731 return (TARGET_SSE
? 0 : 1);
4746 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
4748 int needed_intregs
, needed_sseregs
;
4749 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4753 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
4755 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4757 /* __m128 and friends are returned in xmm0. */
4758 if (!COMPLEX_MODE_P (mode
) && size
== 16 && VECTOR_MODE_P (mode
))
4761 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4762 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8)
4763 || COMPLEX_MODE_P (mode
);
4767 ix86_return_in_memory (const_tree type
)
4769 const enum machine_mode mode
= type_natural_mode (type
);
4771 if (TARGET_64BIT_MS_ABI
)
4772 return return_in_memory_ms_64 (type
, mode
);
4773 else if (TARGET_64BIT
)
4774 return return_in_memory_64 (type
, mode
);
4776 return return_in_memory_32 (type
, mode
);
4779 /* Return false iff TYPE is returned in memory. This version is used
4780 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4781 but differs notably in that when MMX is available, 8-byte vectors
4782 are returned in memory, rather than in MMX registers. */
4785 ix86_sol10_return_in_memory (const_tree type
)
4788 enum machine_mode mode
= type_natural_mode (type
);
4791 return return_in_memory_64 (type
, mode
);
4793 if (mode
== BLKmode
)
4796 size
= int_size_in_bytes (type
);
4798 if (VECTOR_MODE_P (mode
))
4800 /* Return in memory only if MMX registers *are* available. This
4801 seems backwards, but it is consistent with the existing
4808 else if (mode
== TImode
)
4810 else if (mode
== XFmode
)
4816 /* When returning SSE vector types, we have a choice of either
4817 (1) being abi incompatible with a -march switch, or
4818 (2) generating an error.
4819 Given no good solution, I think the safest thing is one warning.
4820 The user won't be able to use -Werror, but....
4822 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4823 called in response to actually generating a caller or callee that
4824 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4825 via aggregate_value_p for general type probing from tree-ssa. */
4828 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4830 static bool warnedsse
, warnedmmx
;
4832 if (!TARGET_64BIT
&& type
)
4834 /* Look at the return type of the function, not the function type. */
4835 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4837 if (!TARGET_SSE
&& !warnedsse
)
4840 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4843 warning (0, "SSE vector return without SSE enabled "
4848 if (!TARGET_MMX
&& !warnedmmx
)
4850 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4853 warning (0, "MMX vector return without MMX enabled "
4863 /* Create the va_list data type. */
4866 ix86_build_builtin_va_list (void)
4868 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4870 /* For i386 we use plain pointer to argument area. */
4871 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4872 return build_pointer_type (char_type_node
);
4874 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4875 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4877 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4878 unsigned_type_node
);
4879 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4880 unsigned_type_node
);
4881 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4883 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4886 va_list_gpr_counter_field
= f_gpr
;
4887 va_list_fpr_counter_field
= f_fpr
;
4889 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4890 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4891 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4892 DECL_FIELD_CONTEXT (f_sav
) = record
;
4894 TREE_CHAIN (record
) = type_decl
;
4895 TYPE_NAME (record
) = type_decl
;
4896 TYPE_FIELDS (record
) = f_gpr
;
4897 TREE_CHAIN (f_gpr
) = f_fpr
;
4898 TREE_CHAIN (f_fpr
) = f_ovf
;
4899 TREE_CHAIN (f_ovf
) = f_sav
;
4901 layout_type (record
);
4903 /* The correct type is an array type of one element. */
4904 return build_array_type (record
, build_index_type (size_zero_node
));
4907 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4910 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4920 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4923 /* Indicate to allocate space on the stack for varargs save area. */
4924 ix86_save_varrargs_registers
= 1;
4925 /* We need 16-byte stack alignment to save SSE registers. If user
4926 asked for lower preferred_stack_boundary, lets just hope that he knows
4927 what he is doing and won't varargs SSE values.
4929 We also may end up assuming that only 64bit values are stored in SSE
4930 register let some floating point program work. */
4931 if (ix86_preferred_stack_boundary
>= 128)
4932 cfun
->stack_alignment_needed
= 128;
4934 save_area
= frame_pointer_rtx
;
4935 set
= get_varargs_alias_set ();
4937 for (i
= cum
->regno
;
4939 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4942 mem
= gen_rtx_MEM (Pmode
,
4943 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4944 MEM_NOTRAP_P (mem
) = 1;
4945 set_mem_alias_set (mem
, set
);
4946 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4947 x86_64_int_parameter_registers
[i
]));
4950 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4952 /* Now emit code to save SSE registers. The AX parameter contains number
4953 of SSE parameter registers used to call this function. We use
4954 sse_prologue_save insn template that produces computed jump across
4955 SSE saves. We need some preparation work to get this working. */
4957 label
= gen_label_rtx ();
4958 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4960 /* Compute address to jump to :
4961 label - 5*eax + nnamed_sse_arguments*5 */
4962 tmp_reg
= gen_reg_rtx (Pmode
);
4963 nsse_reg
= gen_reg_rtx (Pmode
);
4964 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, AX_REG
)));
4965 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4966 gen_rtx_MULT (Pmode
, nsse_reg
,
4971 gen_rtx_CONST (DImode
,
4972 gen_rtx_PLUS (DImode
,
4974 GEN_INT (cum
->sse_regno
* 4))));
4976 emit_move_insn (nsse_reg
, label_ref
);
4977 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4979 /* Compute address of memory block we save into. We always use pointer
4980 pointing 127 bytes after first byte to store - this is needed to keep
4981 instruction size limited by 4 bytes. */
4982 tmp_reg
= gen_reg_rtx (Pmode
);
4983 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4984 plus_constant (save_area
,
4985 8 * REGPARM_MAX
+ 127)));
4986 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4987 MEM_NOTRAP_P (mem
) = 1;
4988 set_mem_alias_set (mem
, set
);
4989 set_mem_align (mem
, BITS_PER_WORD
);
4991 /* And finally do the dirty job! */
4992 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4993 GEN_INT (cum
->sse_regno
), label
));
4998 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
5000 alias_set_type set
= get_varargs_alias_set ();
5003 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
5007 mem
= gen_rtx_MEM (Pmode
,
5008 plus_constant (virtual_incoming_args_rtx
,
5009 i
* UNITS_PER_WORD
));
5010 MEM_NOTRAP_P (mem
) = 1;
5011 set_mem_alias_set (mem
, set
);
5013 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
5014 emit_move_insn (mem
, reg
);
5019 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5020 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5023 CUMULATIVE_ARGS next_cum
;
5026 /* This argument doesn't appear to be used anymore. Which is good,
5027 because the old code here didn't suppress rtl generation. */
5028 gcc_assert (!no_rtl
);
5033 fntype
= TREE_TYPE (current_function_decl
);
5035 /* For varargs, we do not want to skip the dummy va_dcl argument.
5036 For stdargs, we do want to skip the last named argument. */
5038 if (stdarg_p (fntype
))
5039 function_arg_advance (&next_cum
, mode
, type
, 1);
5041 if (TARGET_64BIT_MS_ABI
)
5042 setup_incoming_varargs_ms_64 (&next_cum
);
5044 setup_incoming_varargs_64 (&next_cum
);
5047 /* Implement va_start. */
5050 ix86_va_start (tree valist
, rtx nextarg
)
5052 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
5053 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5054 tree gpr
, fpr
, ovf
, sav
, t
;
5057 /* Only 64bit target needs something special. */
5058 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5060 std_expand_builtin_va_start (valist
, nextarg
);
5064 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5065 f_fpr
= TREE_CHAIN (f_gpr
);
5066 f_ovf
= TREE_CHAIN (f_fpr
);
5067 f_sav
= TREE_CHAIN (f_ovf
);
5069 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
5070 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5071 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5072 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5073 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5075 /* Count number of gp and fp argument registers used. */
5076 words
= current_function_args_info
.words
;
5077 n_gpr
= current_function_args_info
.regno
;
5078 n_fpr
= current_function_args_info
.sse_regno
;
5080 if (cfun
->va_list_gpr_size
)
5082 type
= TREE_TYPE (gpr
);
5083 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
5084 build_int_cst (type
, n_gpr
* 8));
5085 TREE_SIDE_EFFECTS (t
) = 1;
5086 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5089 if (cfun
->va_list_fpr_size
)
5091 type
= TREE_TYPE (fpr
);
5092 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
5093 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
5094 TREE_SIDE_EFFECTS (t
) = 1;
5095 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5098 /* Find the overflow area. */
5099 type
= TREE_TYPE (ovf
);
5100 t
= make_tree (type
, virtual_incoming_args_rtx
);
5102 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
5103 size_int (words
* UNITS_PER_WORD
));
5104 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
5105 TREE_SIDE_EFFECTS (t
) = 1;
5106 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5108 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
5110 /* Find the register save area.
5111 Prologue of the function save it right above stack frame. */
5112 type
= TREE_TYPE (sav
);
5113 t
= make_tree (type
, frame_pointer_rtx
);
5114 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
5115 TREE_SIDE_EFFECTS (t
) = 1;
5116 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5120 /* Implement va_arg. */
5123 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
5125 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
5126 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5127 tree gpr
, fpr
, ovf
, sav
, t
;
5129 tree lab_false
, lab_over
= NULL_TREE
;
5134 enum machine_mode nat_mode
;
5136 /* Only 64bit target needs something special. */
5137 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5138 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5140 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5141 f_fpr
= TREE_CHAIN (f_gpr
);
5142 f_ovf
= TREE_CHAIN (f_fpr
);
5143 f_sav
= TREE_CHAIN (f_ovf
);
5145 valist
= build_va_arg_indirect_ref (valist
);
5146 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5147 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5148 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5149 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5151 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5153 type
= build_pointer_type (type
);
5154 size
= int_size_in_bytes (type
);
5155 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5157 nat_mode
= type_natural_mode (type
);
5158 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
5159 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
5161 /* Pull the value out of the saved registers. */
5163 addr
= create_tmp_var (ptr_type_node
, "addr");
5164 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
5168 int needed_intregs
, needed_sseregs
;
5170 tree int_addr
, sse_addr
;
5172 lab_false
= create_artificial_label ();
5173 lab_over
= create_artificial_label ();
5175 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
5177 need_temp
= (!REG_P (container
)
5178 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
5179 || TYPE_ALIGN (type
) > 128));
5181 /* In case we are passing structure, verify that it is consecutive block
5182 on the register save area. If not we need to do moves. */
5183 if (!need_temp
&& !REG_P (container
))
5185 /* Verify that all registers are strictly consecutive */
5186 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
5190 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5192 rtx slot
= XVECEXP (container
, 0, i
);
5193 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
5194 || INTVAL (XEXP (slot
, 1)) != i
* 16)
5202 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5204 rtx slot
= XVECEXP (container
, 0, i
);
5205 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
5206 || INTVAL (XEXP (slot
, 1)) != i
* 8)
5218 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
5219 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
5220 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
5221 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
5224 /* First ensure that we fit completely in registers. */
5227 t
= build_int_cst (TREE_TYPE (gpr
),
5228 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
5229 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
5230 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5231 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5232 gimplify_and_add (t
, pre_p
);
5236 t
= build_int_cst (TREE_TYPE (fpr
),
5237 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
5239 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
5240 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5241 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5242 gimplify_and_add (t
, pre_p
);
5245 /* Compute index to start of area used for integer regs. */
5248 /* int_addr = gpr + sav; */
5249 t
= fold_convert (sizetype
, gpr
);
5250 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5251 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
5252 gimplify_and_add (t
, pre_p
);
5256 /* sse_addr = fpr + sav; */
5257 t
= fold_convert (sizetype
, fpr
);
5258 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5259 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
5260 gimplify_and_add (t
, pre_p
);
5265 tree temp
= create_tmp_var (type
, "va_arg_tmp");
5268 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
5269 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5270 gimplify_and_add (t
, pre_p
);
5272 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
5274 rtx slot
= XVECEXP (container
, 0, i
);
5275 rtx reg
= XEXP (slot
, 0);
5276 enum machine_mode mode
= GET_MODE (reg
);
5277 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5278 tree addr_type
= build_pointer_type (piece_type
);
5281 tree dest_addr
, dest
;
5283 if (SSE_REGNO_P (REGNO (reg
)))
5285 src_addr
= sse_addr
;
5286 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5290 src_addr
= int_addr
;
5291 src_offset
= REGNO (reg
) * 8;
5293 src_addr
= fold_convert (addr_type
, src_addr
);
5294 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
5295 size_int (src_offset
));
5296 src
= build_va_arg_indirect_ref (src_addr
);
5298 dest_addr
= fold_convert (addr_type
, addr
);
5299 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, dest_addr
,
5300 size_int (INTVAL (XEXP (slot
, 1))));
5301 dest
= build_va_arg_indirect_ref (dest_addr
);
5303 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
5304 gimplify_and_add (t
, pre_p
);
5310 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5311 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5312 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
5313 gimplify_and_add (t
, pre_p
);
5317 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5318 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5319 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
5320 gimplify_and_add (t
, pre_p
);
5323 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
5324 gimplify_and_add (t
, pre_p
);
5326 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
5327 append_to_statement_list (t
, pre_p
);
5330 /* ... otherwise out of the overflow area. */
5332 /* Care for on-stack alignment if needed. */
5333 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
5334 || integer_zerop (TYPE_SIZE (type
)))
5338 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
5339 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
5340 size_int (align
- 1));
5341 t
= fold_convert (sizetype
, t
);
5342 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5344 t
= fold_convert (TREE_TYPE (ovf
), t
);
5346 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5348 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5349 gimplify_and_add (t2
, pre_p
);
5351 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
5352 size_int (rsize
* UNITS_PER_WORD
));
5353 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
5354 gimplify_and_add (t
, pre_p
);
5358 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
5359 append_to_statement_list (t
, pre_p
);
5362 ptrtype
= build_pointer_type (type
);
5363 addr
= fold_convert (ptrtype
, addr
);
5366 addr
= build_va_arg_indirect_ref (addr
);
5367 return build_va_arg_indirect_ref (addr
);
5370 /* Return nonzero if OPNUM's MEM should be matched
5371 in movabs* patterns. */
5374 ix86_check_movabs (rtx insn
, int opnum
)
5378 set
= PATTERN (insn
);
5379 if (GET_CODE (set
) == PARALLEL
)
5380 set
= XVECEXP (set
, 0, 0);
5381 gcc_assert (GET_CODE (set
) == SET
);
5382 mem
= XEXP (set
, opnum
);
5383 while (GET_CODE (mem
) == SUBREG
)
5384 mem
= SUBREG_REG (mem
);
5385 gcc_assert (MEM_P (mem
));
5386 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5389 /* Initialize the table of extra 80387 mathematical constants. */
5392 init_ext_80387_constants (void)
5394 static const char * cst
[5] =
5396 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5397 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5398 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5399 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5400 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5404 for (i
= 0; i
< 5; i
++)
5406 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5407 /* Ensure each constant is rounded to XFmode precision. */
5408 real_convert (&ext_80387_constants_table
[i
],
5409 XFmode
, &ext_80387_constants_table
[i
]);
5412 ext_80387_constants_init
= 1;
5415 /* Return true if the constant is something that can be loaded with
5416 a special instruction. */
5419 standard_80387_constant_p (rtx x
)
5421 enum machine_mode mode
= GET_MODE (x
);
5425 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5428 if (x
== CONST0_RTX (mode
))
5430 if (x
== CONST1_RTX (mode
))
5433 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5435 /* For XFmode constants, try to find a special 80387 instruction when
5436 optimizing for size or on those CPUs that benefit from them. */
5438 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5442 if (! ext_80387_constants_init
)
5443 init_ext_80387_constants ();
5445 for (i
= 0; i
< 5; i
++)
5446 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5450 /* Load of the constant -0.0 or -1.0 will be split as
5451 fldz;fchs or fld1;fchs sequence. */
5452 if (real_isnegzero (&r
))
5454 if (real_identical (&r
, &dconstm1
))
5460 /* Return the opcode of the special instruction to be used to load
5464 standard_80387_constant_opcode (rtx x
)
5466 switch (standard_80387_constant_p (x
))
5490 /* Return the CONST_DOUBLE representing the 80387 constant that is
5491 loaded by the specified special instruction. The argument IDX
5492 matches the return value from standard_80387_constant_p. */
5495 standard_80387_constant_rtx (int idx
)
5499 if (! ext_80387_constants_init
)
5500 init_ext_80387_constants ();
5516 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5520 /* Return 1 if mode is a valid mode for sse. */
5522 standard_sse_mode_p (enum machine_mode mode
)
5539 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5542 standard_sse_constant_p (rtx x
)
5544 enum machine_mode mode
= GET_MODE (x
);
5546 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5548 if (vector_all_ones_operand (x
, mode
)
5549 && standard_sse_mode_p (mode
))
5550 return TARGET_SSE2
? 2 : -1;
5555 /* Return the opcode of the special instruction to be used to load
5559 standard_sse_constant_opcode (rtx insn
, rtx x
)
5561 switch (standard_sse_constant_p (x
))
5564 if (get_attr_mode (insn
) == MODE_V4SF
)
5565 return "xorps\t%0, %0";
5566 else if (get_attr_mode (insn
) == MODE_V2DF
)
5567 return "xorpd\t%0, %0";
5569 return "pxor\t%0, %0";
5571 return "pcmpeqd\t%0, %0";
5576 /* Returns 1 if OP contains a symbol reference */
5579 symbolic_reference_mentioned_p (rtx op
)
5584 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5587 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5588 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5594 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5595 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5599 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5606 /* Return 1 if it is appropriate to emit `ret' instructions in the
5607 body of a function. Do this only if the epilogue is simple, needing a
5608 couple of insns. Prior to reloading, we can't tell how many registers
5609 must be saved, so return 0 then. Return 0 if there is no frame
5610 marker to de-allocate. */
5613 ix86_can_use_return_insn_p (void)
5615 struct ix86_frame frame
;
5617 if (! reload_completed
|| frame_pointer_needed
)
5620 /* Don't allow more than 32 pop, since that's all we can do
5621 with one instruction. */
5622 if (current_function_pops_args
5623 && current_function_args_size
>= 32768)
5626 ix86_compute_frame_layout (&frame
);
5627 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5630 /* Value should be nonzero if functions must have frame pointers.
5631 Zero means the frame pointer need not be set up (and parms may
5632 be accessed via the stack pointer) in functions that seem suitable. */
5635 ix86_frame_pointer_required (void)
5637 /* If we accessed previous frames, then the generated code expects
5638 to be able to access the saved ebp value in our frame. */
5639 if (cfun
->machine
->accesses_prev_frame
)
5642 /* Several x86 os'es need a frame pointer for other reasons,
5643 usually pertaining to setjmp. */
5644 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5647 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5648 the frame pointer by default. Turn it back on now if we've not
5649 got a leaf function. */
5650 if (TARGET_OMIT_LEAF_FRAME_POINTER
5651 && (!current_function_is_leaf
5652 || ix86_current_function_calls_tls_descriptor
))
5655 if (current_function_profile
)
5661 /* Record that the current function accesses previous call frames. */
5664 ix86_setup_frame_addresses (void)
5666 cfun
->machine
->accesses_prev_frame
= 1;
5669 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5670 # define USE_HIDDEN_LINKONCE 1
5672 # define USE_HIDDEN_LINKONCE 0
5675 static int pic_labels_used
;
5677 /* Fills in the label name that should be used for a pc thunk for
5678 the given register. */
5681 get_pc_thunk_name (char name
[32], unsigned int regno
)
5683 gcc_assert (!TARGET_64BIT
);
5685 if (USE_HIDDEN_LINKONCE
)
5686 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5688 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5692 /* This function generates code for -fpic that loads %ebx with
5693 the return address of the caller and then returns. */
5696 ix86_file_end (void)
5701 for (regno
= 0; regno
< 8; ++regno
)
5705 if (! ((pic_labels_used
>> regno
) & 1))
5708 get_pc_thunk_name (name
, regno
);
5713 switch_to_section (darwin_sections
[text_coal_section
]);
5714 fputs ("\t.weak_definition\t", asm_out_file
);
5715 assemble_name (asm_out_file
, name
);
5716 fputs ("\n\t.private_extern\t", asm_out_file
);
5717 assemble_name (asm_out_file
, name
);
5718 fputs ("\n", asm_out_file
);
5719 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5723 if (USE_HIDDEN_LINKONCE
)
5727 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5729 TREE_PUBLIC (decl
) = 1;
5730 TREE_STATIC (decl
) = 1;
5731 DECL_ONE_ONLY (decl
) = 1;
5733 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5734 switch_to_section (get_named_section (decl
, NULL
, 0));
5736 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5737 fputs ("\t.hidden\t", asm_out_file
);
5738 assemble_name (asm_out_file
, name
);
5739 fputc ('\n', asm_out_file
);
5740 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5744 switch_to_section (text_section
);
5745 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5748 xops
[0] = gen_rtx_REG (SImode
, regno
);
5749 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5750 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5751 output_asm_insn ("ret", xops
);
5754 if (NEED_INDICATE_EXEC_STACK
)
5755 file_end_indicate_exec_stack ();
5758 /* Emit code for the SET_GOT patterns. */
5761 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5767 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5769 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5770 xops
[2] = gen_rtx_MEM (Pmode
,
5771 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5772 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5774 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5775 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5776 an unadorned address. */
5777 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5778 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5779 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5783 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5785 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5787 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5790 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5792 output_asm_insn ("call\t%a2", xops
);
5795 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5796 is what will be referenced by the Mach-O PIC subsystem. */
5798 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5801 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5802 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5805 output_asm_insn ("pop{l}\t%0", xops
);
5810 get_pc_thunk_name (name
, REGNO (dest
));
5811 pic_labels_used
|= 1 << REGNO (dest
);
5813 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5814 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5815 output_asm_insn ("call\t%X2", xops
);
5816 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5817 is what will be referenced by the Mach-O PIC subsystem. */
5820 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5822 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5823 CODE_LABEL_NUMBER (label
));
5830 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5831 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5833 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5838 /* Generate an "push" pattern for input ARG. */
5843 return gen_rtx_SET (VOIDmode
,
5845 gen_rtx_PRE_DEC (Pmode
,
5846 stack_pointer_rtx
)),
5850 /* Return >= 0 if there is an unused call-clobbered register available
5851 for the entire function. */
5854 ix86_select_alt_pic_regnum (void)
5856 if (current_function_is_leaf
&& !current_function_profile
5857 && !ix86_current_function_calls_tls_descriptor
)
5860 for (i
= 2; i
>= 0; --i
)
5861 if (!df_regs_ever_live_p (i
))
5865 return INVALID_REGNUM
;
5868 /* Return 1 if we need to save REGNO. */
5870 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5872 if (pic_offset_table_rtx
5873 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5874 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5875 || current_function_profile
5876 || current_function_calls_eh_return
5877 || current_function_uses_const_pool
))
5879 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5884 if (current_function_calls_eh_return
&& maybe_eh_return
)
5889 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5890 if (test
== INVALID_REGNUM
)
5897 if (cfun
->machine
->force_align_arg_pointer
5898 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5901 return (df_regs_ever_live_p (regno
)
5902 && !call_used_regs
[regno
]
5903 && !fixed_regs
[regno
]
5904 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5907 /* Return number of registers to be saved on the stack. */
5910 ix86_nsaved_regs (void)
5915 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5916 if (ix86_save_reg (regno
, true))
5921 /* Return the offset between two registers, one to be eliminated, and the other
5922 its replacement, at the start of a routine. */
5925 ix86_initial_elimination_offset (int from
, int to
)
5927 struct ix86_frame frame
;
5928 ix86_compute_frame_layout (&frame
);
5930 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5931 return frame
.hard_frame_pointer_offset
;
5932 else if (from
== FRAME_POINTER_REGNUM
5933 && to
== HARD_FRAME_POINTER_REGNUM
)
5934 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5937 gcc_assert (to
== STACK_POINTER_REGNUM
);
5939 if (from
== ARG_POINTER_REGNUM
)
5940 return frame
.stack_pointer_offset
;
5942 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5943 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5947 /* Fill structure ix86_frame about frame of currently computed function. */
5950 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5952 HOST_WIDE_INT total_size
;
5953 unsigned int stack_alignment_needed
;
5954 HOST_WIDE_INT offset
;
5955 unsigned int preferred_alignment
;
5956 HOST_WIDE_INT size
= get_frame_size ();
5958 frame
->nregs
= ix86_nsaved_regs ();
5961 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5962 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5964 /* During reload iteration the amount of registers saved can change.
5965 Recompute the value as needed. Do not recompute when amount of registers
5966 didn't change as reload does multiple calls to the function and does not
5967 expect the decision to change within single iteration. */
5969 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5971 int count
= frame
->nregs
;
5973 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5974 /* The fast prologue uses move instead of push to save registers. This
5975 is significantly longer, but also executes faster as modern hardware
5976 can execute the moves in parallel, but can't do that for push/pop.
5978 Be careful about choosing what prologue to emit: When function takes
5979 many instructions to execute we may use slow version as well as in
5980 case function is known to be outside hot spot (this is known with
5981 feedback only). Weight the size of function by number of registers
5982 to save as it is cheap to use one or two push instructions but very
5983 slow to use many of them. */
5985 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5986 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5987 || (flag_branch_probabilities
5988 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5989 cfun
->machine
->use_fast_prologue_epilogue
= false;
5991 cfun
->machine
->use_fast_prologue_epilogue
5992 = !expensive_function_p (count
);
5994 if (TARGET_PROLOGUE_USING_MOVE
5995 && cfun
->machine
->use_fast_prologue_epilogue
)
5996 frame
->save_regs_using_mov
= true;
5998 frame
->save_regs_using_mov
= false;
6001 /* Skip return address and saved base pointer. */
6002 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
6004 frame
->hard_frame_pointer_offset
= offset
;
6006 /* Do some sanity checking of stack_alignment_needed and
6007 preferred_alignment, since i386 port is the only using those features
6008 that may break easily. */
6010 gcc_assert (!size
|| stack_alignment_needed
);
6011 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
6012 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
6013 gcc_assert (stack_alignment_needed
6014 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
6016 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
6017 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
6019 /* Register save area */
6020 offset
+= frame
->nregs
* UNITS_PER_WORD
;
6023 if (ix86_save_varrargs_registers
)
6025 offset
+= X86_64_VARARGS_SIZE
;
6026 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
6029 frame
->va_arg_size
= 0;
6031 /* Align start of frame for local function. */
6032 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
6033 & -stack_alignment_needed
) - offset
;
6035 offset
+= frame
->padding1
;
6037 /* Frame pointer points here. */
6038 frame
->frame_pointer_offset
= offset
;
6042 /* Add outgoing arguments area. Can be skipped if we eliminated
6043 all the function calls as dead code.
6044 Skipping is however impossible when function calls alloca. Alloca
6045 expander assumes that last current_function_outgoing_args_size
6046 of stack frame are unused. */
6047 if (ACCUMULATE_OUTGOING_ARGS
6048 && (!current_function_is_leaf
|| current_function_calls_alloca
6049 || ix86_current_function_calls_tls_descriptor
))
6051 offset
+= current_function_outgoing_args_size
;
6052 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
6055 frame
->outgoing_arguments_size
= 0;
6057 /* Align stack boundary. Only needed if we're calling another function
6059 if (!current_function_is_leaf
|| current_function_calls_alloca
6060 || ix86_current_function_calls_tls_descriptor
)
6061 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
6062 & -preferred_alignment
) - offset
;
6064 frame
->padding2
= 0;
6066 offset
+= frame
->padding2
;
6068 /* We've reached end of stack frame. */
6069 frame
->stack_pointer_offset
= offset
;
6071 /* Size prologue needs to allocate. */
6072 frame
->to_allocate
=
6073 (size
+ frame
->padding1
+ frame
->padding2
6074 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
6076 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
6077 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
6078 frame
->save_regs_using_mov
= false;
6080 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
6081 && current_function_is_leaf
6082 && !ix86_current_function_calls_tls_descriptor
)
6084 frame
->red_zone_size
= frame
->to_allocate
;
6085 if (frame
->save_regs_using_mov
)
6086 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6087 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6088 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6091 frame
->red_zone_size
= 0;
6092 frame
->to_allocate
-= frame
->red_zone_size
;
6093 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6095 fprintf (stderr
, "\n");
6096 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
6097 fprintf (stderr
, "size: %ld\n", (long)size
);
6098 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
6099 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
6100 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
6101 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
6102 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
6103 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
6104 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
6105 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
6106 (long)frame
->hard_frame_pointer_offset
);
6107 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
6108 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
6109 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
6110 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
6114 /* Emit code to save registers in the prologue. */
6117 ix86_emit_save_regs (void)
6122 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
6123 if (ix86_save_reg (regno
, true))
6125 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
6126 RTX_FRAME_RELATED_P (insn
) = 1;
6130 /* Emit code to save registers using MOV insns. First register
6131 is restored from POINTER + OFFSET. */
6133 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
6138 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6139 if (ix86_save_reg (regno
, true))
6141 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
6143 gen_rtx_REG (Pmode
, regno
));
6144 RTX_FRAME_RELATED_P (insn
) = 1;
6145 offset
+= UNITS_PER_WORD
;
6149 /* Expand prologue or epilogue stack adjustment.
6150 The pattern exist to put a dependency on all ebp-based memory accesses.
6151 STYLE should be negative if instructions should be marked as frame related,
6152 zero if %r11 register is live and cannot be freely used and positive
6156 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
6161 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
6162 else if (x86_64_immediate_operand (offset
, DImode
))
6163 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
6167 /* r11 is used by indirect sibcall return as well, set before the
6168 epilogue and used after the epilogue. ATM indirect sibcall
6169 shouldn't be used together with huge frame sizes in one
6170 function because of the frame_size check in sibcall.c. */
6172 r11
= gen_rtx_REG (DImode
, R11_REG
);
6173 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
6175 RTX_FRAME_RELATED_P (insn
) = 1;
6176 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
6180 RTX_FRAME_RELATED_P (insn
) = 1;
6183 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6186 ix86_internal_arg_pointer (void)
6188 bool has_force_align_arg_pointer
=
6189 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
6190 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
6191 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6192 && DECL_NAME (current_function_decl
)
6193 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
6194 && DECL_FILE_SCOPE_P (current_function_decl
))
6195 || ix86_force_align_arg_pointer
6196 || has_force_align_arg_pointer
)
6198 /* Nested functions can't realign the stack due to a register
6200 if (DECL_CONTEXT (current_function_decl
)
6201 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
6203 if (ix86_force_align_arg_pointer
)
6204 warning (0, "-mstackrealign ignored for nested functions");
6205 if (has_force_align_arg_pointer
)
6206 error ("%s not supported for nested functions",
6207 ix86_force_align_arg_pointer_string
);
6208 return virtual_incoming_args_rtx
;
6210 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, CX_REG
);
6211 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
6214 return virtual_incoming_args_rtx
;
6217 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6218 This is called from dwarf2out.c to emit call frame instructions
6219 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6221 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
6223 rtx unspec
= SET_SRC (pattern
);
6224 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
6228 case UNSPEC_REG_SAVE
:
6229 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
6230 SET_DEST (pattern
));
6232 case UNSPEC_DEF_CFA
:
6233 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
6234 INTVAL (XVECEXP (unspec
, 0, 0)));
6241 /* Expand the prologue into a bunch of separate insns. */
6244 ix86_expand_prologue (void)
6248 struct ix86_frame frame
;
6249 HOST_WIDE_INT allocate
;
6251 ix86_compute_frame_layout (&frame
);
6253 if (cfun
->machine
->force_align_arg_pointer
)
6257 /* Grab the argument pointer. */
6258 x
= plus_constant (stack_pointer_rtx
, 4);
6259 y
= cfun
->machine
->force_align_arg_pointer
;
6260 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
6261 RTX_FRAME_RELATED_P (insn
) = 1;
6263 /* The unwind info consists of two parts: install the fafp as the cfa,
6264 and record the fafp as the "save register" of the stack pointer.
6265 The later is there in order that the unwinder can see where it
6266 should restore the stack pointer across the and insn. */
6267 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
6268 x
= gen_rtx_SET (VOIDmode
, y
, x
);
6269 RTX_FRAME_RELATED_P (x
) = 1;
6270 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
6272 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
6273 RTX_FRAME_RELATED_P (y
) = 1;
6274 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
6275 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6276 REG_NOTES (insn
) = x
;
6278 /* Align the stack. */
6279 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6282 /* And here we cheat like madmen with the unwind info. We force the
6283 cfa register back to sp+4, which is exactly what it was at the
6284 start of the function. Re-pushing the return address results in
6285 the return at the same spot relative to the cfa, and thus is
6286 correct wrt the unwind info. */
6287 x
= cfun
->machine
->force_align_arg_pointer
;
6288 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
6289 insn
= emit_insn (gen_push (x
));
6290 RTX_FRAME_RELATED_P (insn
) = 1;
6293 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
6294 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
6295 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6296 REG_NOTES (insn
) = x
;
6299 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6300 slower on all targets. Also sdb doesn't like it. */
6302 if (frame_pointer_needed
)
6304 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
6305 RTX_FRAME_RELATED_P (insn
) = 1;
6307 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
6308 RTX_FRAME_RELATED_P (insn
) = 1;
6311 allocate
= frame
.to_allocate
;
6313 if (!frame
.save_regs_using_mov
)
6314 ix86_emit_save_regs ();
6316 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
6318 /* When using red zone we may start register saving before allocating
6319 the stack frame saving one cycle of the prologue. */
6320 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
6321 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
6322 : stack_pointer_rtx
,
6323 -frame
.nregs
* UNITS_PER_WORD
);
6327 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
6328 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6329 GEN_INT (-allocate
), -1);
6332 /* Only valid for Win32. */
6333 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
6337 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
6339 if (TARGET_64BIT_MS_ABI
)
6342 eax_live
= ix86_eax_live_at_start_p ();
6346 emit_insn (gen_push (eax
));
6347 allocate
-= UNITS_PER_WORD
;
6350 emit_move_insn (eax
, GEN_INT (allocate
));
6353 insn
= gen_allocate_stack_worker_64 (eax
);
6355 insn
= gen_allocate_stack_worker_32 (eax
);
6356 insn
= emit_insn (insn
);
6357 RTX_FRAME_RELATED_P (insn
) = 1;
6358 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
6359 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
6360 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
6361 t
, REG_NOTES (insn
));
6365 if (frame_pointer_needed
)
6366 t
= plus_constant (hard_frame_pointer_rtx
,
6369 - frame
.nregs
* UNITS_PER_WORD
);
6371 t
= plus_constant (stack_pointer_rtx
, allocate
);
6372 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6376 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6378 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6379 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6381 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6382 -frame
.nregs
* UNITS_PER_WORD
);
6385 pic_reg_used
= false;
6386 if (pic_offset_table_rtx
6387 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6388 || current_function_profile
))
6390 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6392 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6393 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
6395 pic_reg_used
= true;
6402 if (ix86_cmodel
== CM_LARGE_PIC
)
6404 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
6405 rtx label
= gen_label_rtx ();
6407 LABEL_PRESERVE_P (label
) = 1;
6408 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6409 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6410 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6411 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6412 pic_offset_table_rtx
, tmp_reg
));
6415 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6418 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6421 /* Prevent function calls from being scheduled before the call to mcount.
6422 In the pic_reg_used case, make sure that the got load isn't deleted. */
6423 if (current_function_profile
)
6426 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
6427 emit_insn (gen_blockage ());
6431 /* Emit code to restore saved registers using MOV insns. First register
6432 is restored from POINTER + OFFSET. */
6434 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6435 int maybe_eh_return
)
6438 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6440 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6441 if (ix86_save_reg (regno
, maybe_eh_return
))
6443 /* Ensure that adjust_address won't be forced to produce pointer
6444 out of range allowed by x86-64 instruction set. */
6445 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6449 r11
= gen_rtx_REG (DImode
, R11_REG
);
6450 emit_move_insn (r11
, GEN_INT (offset
));
6451 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6452 base_address
= gen_rtx_MEM (Pmode
, r11
);
6455 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6456 adjust_address (base_address
, Pmode
, offset
));
6457 offset
+= UNITS_PER_WORD
;
6461 /* Restore function stack, frame, and registers. */
6464 ix86_expand_epilogue (int style
)
6467 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6468 struct ix86_frame frame
;
6469 HOST_WIDE_INT offset
;
6471 ix86_compute_frame_layout (&frame
);
6473 /* Calculate start of saved registers relative to ebp. Special care
6474 must be taken for the normal return case of a function using
6475 eh_return: the eax and edx registers are marked as saved, but not
6476 restored along this path. */
6477 offset
= frame
.nregs
;
6478 if (current_function_calls_eh_return
&& style
!= 2)
6480 offset
*= -UNITS_PER_WORD
;
6482 /* If we're only restoring one register and sp is not valid then
6483 using a move instruction to restore the register since it's
6484 less work than reloading sp and popping the register.
6486 The default code result in stack adjustment using add/lea instruction,
6487 while this code results in LEAVE instruction (or discrete equivalent),
6488 so it is profitable in some other cases as well. Especially when there
6489 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6490 and there is exactly one register to pop. This heuristic may need some
6491 tuning in future. */
6492 if ((!sp_valid
&& frame
.nregs
<= 1)
6493 || (TARGET_EPILOGUE_USING_MOVE
6494 && cfun
->machine
->use_fast_prologue_epilogue
6495 && (frame
.nregs
> 1 || frame
.to_allocate
))
6496 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6497 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6498 && cfun
->machine
->use_fast_prologue_epilogue
6499 && frame
.nregs
== 1)
6500 || current_function_calls_eh_return
)
6502 /* Restore registers. We can use ebp or esp to address the memory
6503 locations. If both are available, default to ebp, since offsets
6504 are known to be small. Only exception is esp pointing directly to the
6505 end of block of saved registers, where we may simplify addressing
6508 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6509 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6510 frame
.to_allocate
, style
== 2);
6512 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6513 offset
, style
== 2);
6515 /* eh_return epilogues need %ecx added to the stack pointer. */
6518 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6520 if (frame_pointer_needed
)
6522 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6523 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6524 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6526 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6527 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6529 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6534 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6535 tmp
= plus_constant (tmp
, (frame
.to_allocate
6536 + frame
.nregs
* UNITS_PER_WORD
));
6537 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6540 else if (!frame_pointer_needed
)
6541 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6542 GEN_INT (frame
.to_allocate
6543 + frame
.nregs
* UNITS_PER_WORD
),
6545 /* If not an i386, mov & pop is faster than "leave". */
6546 else if (TARGET_USE_LEAVE
|| optimize_size
6547 || !cfun
->machine
->use_fast_prologue_epilogue
)
6548 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6551 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6552 hard_frame_pointer_rtx
,
6555 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6557 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6562 /* First step is to deallocate the stack frame so that we can
6563 pop the registers. */
6566 gcc_assert (frame_pointer_needed
);
6567 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6568 hard_frame_pointer_rtx
,
6569 GEN_INT (offset
), style
);
6571 else if (frame
.to_allocate
)
6572 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6573 GEN_INT (frame
.to_allocate
), style
);
6575 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6576 if (ix86_save_reg (regno
, false))
6579 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6581 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6583 if (frame_pointer_needed
)
6585 /* Leave results in shorter dependency chains on CPUs that are
6586 able to grok it fast. */
6587 if (TARGET_USE_LEAVE
)
6588 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6589 else if (TARGET_64BIT
)
6590 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6592 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6596 if (cfun
->machine
->force_align_arg_pointer
)
6598 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6599 cfun
->machine
->force_align_arg_pointer
,
6603 /* Sibcall epilogues don't want a return instruction. */
6607 if (current_function_pops_args
&& current_function_args_size
)
6609 rtx popc
= GEN_INT (current_function_pops_args
);
6611 /* i386 can only pop 64K bytes. If asked to pop more, pop
6612 return address, do explicit add, and jump indirectly to the
6615 if (current_function_pops_args
>= 65536)
6617 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
6619 /* There is no "pascal" calling convention in any 64bit ABI. */
6620 gcc_assert (!TARGET_64BIT
);
6622 emit_insn (gen_popsi1 (ecx
));
6623 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6624 emit_jump_insn (gen_return_indirect_internal (ecx
));
6627 emit_jump_insn (gen_return_pop_internal (popc
));
6630 emit_jump_insn (gen_return_internal ());
6633 /* Reset from the function's potential modifications. */
6636 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6637 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6639 if (pic_offset_table_rtx
)
6640 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
6642 /* Mach-O doesn't support labels at the end of objects, so if
6643 it looks like we might want one, insert a NOP. */
6645 rtx insn
= get_last_insn ();
6648 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6649 insn
= PREV_INSN (insn
);
6653 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6654 fputs ("\tnop\n", file
);
6660 /* Extract the parts of an RTL expression that is a valid memory address
6661 for an instruction. Return 0 if the structure of the address is
6662 grossly off. Return -1 if the address contains ASHIFT, so it is not
6663 strictly valid, but still used for computing length of lea instruction. */
6666 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6668 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6669 rtx base_reg
, index_reg
;
6670 HOST_WIDE_INT scale
= 1;
6671 rtx scale_rtx
= NULL_RTX
;
6673 enum ix86_address_seg seg
= SEG_DEFAULT
;
6675 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6677 else if (GET_CODE (addr
) == PLUS
)
6687 addends
[n
++] = XEXP (op
, 1);
6690 while (GET_CODE (op
) == PLUS
);
6695 for (i
= n
; i
>= 0; --i
)
6698 switch (GET_CODE (op
))
6703 index
= XEXP (op
, 0);
6704 scale_rtx
= XEXP (op
, 1);
6708 if (XINT (op
, 1) == UNSPEC_TP
6709 && TARGET_TLS_DIRECT_SEG_REFS
6710 && seg
== SEG_DEFAULT
)
6711 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6740 else if (GET_CODE (addr
) == MULT
)
6742 index
= XEXP (addr
, 0); /* index*scale */
6743 scale_rtx
= XEXP (addr
, 1);
6745 else if (GET_CODE (addr
) == ASHIFT
)
6749 /* We're called for lea too, which implements ashift on occasion. */
6750 index
= XEXP (addr
, 0);
6751 tmp
= XEXP (addr
, 1);
6752 if (!CONST_INT_P (tmp
))
6754 scale
= INTVAL (tmp
);
6755 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6761 disp
= addr
; /* displacement */
6763 /* Extract the integral value of scale. */
6766 if (!CONST_INT_P (scale_rtx
))
6768 scale
= INTVAL (scale_rtx
);
6771 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6772 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6774 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6775 if (base_reg
&& index_reg
&& scale
== 1
6776 && (index_reg
== arg_pointer_rtx
6777 || index_reg
== frame_pointer_rtx
6778 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6781 tmp
= base
, base
= index
, index
= tmp
;
6782 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6785 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6786 if ((base_reg
== hard_frame_pointer_rtx
6787 || base_reg
== frame_pointer_rtx
6788 || base_reg
== arg_pointer_rtx
) && !disp
)
6791 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6792 Avoid this by transforming to [%esi+0]. */
6793 if (TARGET_K6
&& !optimize_size
6794 && base_reg
&& !index_reg
&& !disp
6796 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6799 /* Special case: encode reg+reg instead of reg*2. */
6800 if (!base
&& index
&& scale
&& scale
== 2)
6801 base
= index
, base_reg
= index_reg
, scale
= 1;
6803 /* Special case: scaling cannot be encoded without base or displacement. */
6804 if (!base
&& !disp
&& index
&& scale
!= 1)
6816 /* Return cost of the memory address x.
6817 For i386, it is better to use a complex address than let gcc copy
6818 the address into a reg and make a new pseudo. But not if the address
6819 requires to two regs - that would mean more pseudos with longer
6822 ix86_address_cost (rtx x
)
6824 struct ix86_address parts
;
6826 int ok
= ix86_decompose_address (x
, &parts
);
6830 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6831 parts
.base
= SUBREG_REG (parts
.base
);
6832 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6833 parts
.index
= SUBREG_REG (parts
.index
);
6835 /* Attempt to minimize number of registers in the address. */
6837 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6839 && (!REG_P (parts
.index
)
6840 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6844 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6846 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6847 && parts
.base
!= parts
.index
)
6850 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6851 since it's predecode logic can't detect the length of instructions
6852 and it degenerates to vector decoded. Increase cost of such
6853 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6854 to split such addresses or even refuse such addresses at all.
6856 Following addressing modes are affected:
6861 The first and last case may be avoidable by explicitly coding the zero in
6862 memory address, but I don't have AMD-K6 machine handy to check this
6866 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6867 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6868 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6874 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6875 this is used for to form addresses to local data when -fPIC is in
6879 darwin_local_data_pic (rtx disp
)
6881 if (GET_CODE (disp
) == MINUS
)
6883 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6884 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6885 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6887 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6888 if (! strcmp (sym_name
, "<pic base>"))
6896 /* Determine if a given RTX is a valid constant. We already know this
6897 satisfies CONSTANT_P. */
6900 legitimate_constant_p (rtx x
)
6902 switch (GET_CODE (x
))
6907 if (GET_CODE (x
) == PLUS
)
6909 if (!CONST_INT_P (XEXP (x
, 1)))
6914 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6917 /* Only some unspecs are valid as "constants". */
6918 if (GET_CODE (x
) == UNSPEC
)
6919 switch (XINT (x
, 1))
6924 return TARGET_64BIT
;
6927 x
= XVECEXP (x
, 0, 0);
6928 return (GET_CODE (x
) == SYMBOL_REF
6929 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6931 x
= XVECEXP (x
, 0, 0);
6932 return (GET_CODE (x
) == SYMBOL_REF
6933 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6938 /* We must have drilled down to a symbol. */
6939 if (GET_CODE (x
) == LABEL_REF
)
6941 if (GET_CODE (x
) != SYMBOL_REF
)
6946 /* TLS symbols are never valid. */
6947 if (SYMBOL_REF_TLS_MODEL (x
))
6950 /* DLLIMPORT symbols are never valid. */
6951 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6952 && SYMBOL_REF_DLLIMPORT_P (x
))
6957 if (GET_MODE (x
) == TImode
6958 && x
!= CONST0_RTX (TImode
)
6964 if (x
== CONST0_RTX (GET_MODE (x
)))
6972 /* Otherwise we handle everything else in the move patterns. */
6976 /* Determine if it's legal to put X into the constant pool. This
6977 is not possible for the address of thread-local symbols, which
6978 is checked above. */
6981 ix86_cannot_force_const_mem (rtx x
)
6983 /* We can always put integral constants and vectors in memory. */
6984 switch (GET_CODE (x
))
6994 return !legitimate_constant_p (x
);
6997 /* Determine if a given RTX is a valid constant address. */
7000 constant_address_p (rtx x
)
7002 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
7005 /* Nonzero if the constant value X is a legitimate general operand
7006 when generating PIC code. It is given that flag_pic is on and
7007 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7010 legitimate_pic_operand_p (rtx x
)
7014 switch (GET_CODE (x
))
7017 inner
= XEXP (x
, 0);
7018 if (GET_CODE (inner
) == PLUS
7019 && CONST_INT_P (XEXP (inner
, 1)))
7020 inner
= XEXP (inner
, 0);
7022 /* Only some unspecs are valid as "constants". */
7023 if (GET_CODE (inner
) == UNSPEC
)
7024 switch (XINT (inner
, 1))
7029 return TARGET_64BIT
;
7031 x
= XVECEXP (inner
, 0, 0);
7032 return (GET_CODE (x
) == SYMBOL_REF
7033 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
7041 return legitimate_pic_address_disp_p (x
);
7048 /* Determine if a given CONST RTX is a valid memory displacement
7052 legitimate_pic_address_disp_p (rtx disp
)
7056 /* In 64bit mode we can allow direct addresses of symbols and labels
7057 when they are not dynamic symbols. */
7060 rtx op0
= disp
, op1
;
7062 switch (GET_CODE (disp
))
7068 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
7070 op0
= XEXP (XEXP (disp
, 0), 0);
7071 op1
= XEXP (XEXP (disp
, 0), 1);
7072 if (!CONST_INT_P (op1
)
7073 || INTVAL (op1
) >= 16*1024*1024
7074 || INTVAL (op1
) < -16*1024*1024)
7076 if (GET_CODE (op0
) == LABEL_REF
)
7078 if (GET_CODE (op0
) != SYMBOL_REF
)
7083 /* TLS references should always be enclosed in UNSPEC. */
7084 if (SYMBOL_REF_TLS_MODEL (op0
))
7086 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
7087 && ix86_cmodel
!= CM_LARGE_PIC
)
7095 if (GET_CODE (disp
) != CONST
)
7097 disp
= XEXP (disp
, 0);
7101 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7102 of GOT tables. We should not need these anyway. */
7103 if (GET_CODE (disp
) != UNSPEC
7104 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
7105 && XINT (disp
, 1) != UNSPEC_GOTOFF
7106 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
7109 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
7110 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
7116 if (GET_CODE (disp
) == PLUS
)
7118 if (!CONST_INT_P (XEXP (disp
, 1)))
7120 disp
= XEXP (disp
, 0);
7124 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
7127 if (GET_CODE (disp
) != UNSPEC
)
7130 switch (XINT (disp
, 1))
7135 /* We need to check for both symbols and labels because VxWorks loads
7136 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7138 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7139 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
7141 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7142 While ABI specify also 32bit relocation but we don't produce it in
7143 small PIC model at all. */
7144 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7145 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
7147 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
7149 case UNSPEC_GOTTPOFF
:
7150 case UNSPEC_GOTNTPOFF
:
7151 case UNSPEC_INDNTPOFF
:
7154 disp
= XVECEXP (disp
, 0, 0);
7155 return (GET_CODE (disp
) == SYMBOL_REF
7156 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
7158 disp
= XVECEXP (disp
, 0, 0);
7159 return (GET_CODE (disp
) == SYMBOL_REF
7160 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
7162 disp
= XVECEXP (disp
, 0, 0);
7163 return (GET_CODE (disp
) == SYMBOL_REF
7164 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
7170 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7171 memory address for an instruction. The MODE argument is the machine mode
7172 for the MEM expression that wants to use this address.
7174 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7175 convert common non-canonical forms to canonical form so that they will
7179 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
7180 rtx addr
, int strict
)
7182 struct ix86_address parts
;
7183 rtx base
, index
, disp
;
7184 HOST_WIDE_INT scale
;
7185 const char *reason
= NULL
;
7186 rtx reason_rtx
= NULL_RTX
;
7188 if (ix86_decompose_address (addr
, &parts
) <= 0)
7190 reason
= "decomposition failed";
7195 index
= parts
.index
;
7197 scale
= parts
.scale
;
7199 /* Validate base register.
7201 Don't allow SUBREG's that span more than a word here. It can lead to spill
7202 failures when the base is one word out of a two word structure, which is
7203 represented internally as a DImode int. */
7212 else if (GET_CODE (base
) == SUBREG
7213 && REG_P (SUBREG_REG (base
))
7214 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
7216 reg
= SUBREG_REG (base
);
7219 reason
= "base is not a register";
7223 if (GET_MODE (base
) != Pmode
)
7225 reason
= "base is not in Pmode";
7229 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
7230 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
7232 reason
= "base is not valid";
7237 /* Validate index register.
7239 Don't allow SUBREG's that span more than a word here -- same as above. */
7248 else if (GET_CODE (index
) == SUBREG
7249 && REG_P (SUBREG_REG (index
))
7250 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
7252 reg
= SUBREG_REG (index
);
7255 reason
= "index is not a register";
7259 if (GET_MODE (index
) != Pmode
)
7261 reason
= "index is not in Pmode";
7265 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
7266 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
7268 reason
= "index is not valid";
7273 /* Validate scale factor. */
7276 reason_rtx
= GEN_INT (scale
);
7279 reason
= "scale without index";
7283 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
7285 reason
= "scale is not a valid multiplier";
7290 /* Validate displacement. */
7295 if (GET_CODE (disp
) == CONST
7296 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
7297 switch (XINT (XEXP (disp
, 0), 1))
7299 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7300 used. While ABI specify also 32bit relocations, we don't produce
7301 them at all and use IP relative instead. */
7304 gcc_assert (flag_pic
);
7306 goto is_legitimate_pic
;
7307 reason
= "64bit address unspec";
7310 case UNSPEC_GOTPCREL
:
7311 gcc_assert (flag_pic
);
7312 goto is_legitimate_pic
;
7314 case UNSPEC_GOTTPOFF
:
7315 case UNSPEC_GOTNTPOFF
:
7316 case UNSPEC_INDNTPOFF
:
7322 reason
= "invalid address unspec";
7326 else if (SYMBOLIC_CONST (disp
)
7330 && MACHOPIC_INDIRECT
7331 && !machopic_operand_p (disp
)
7337 if (TARGET_64BIT
&& (index
|| base
))
7339 /* foo@dtpoff(%rX) is ok. */
7340 if (GET_CODE (disp
) != CONST
7341 || GET_CODE (XEXP (disp
, 0)) != PLUS
7342 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7343 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7344 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7345 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7347 reason
= "non-constant pic memory reference";
7351 else if (! legitimate_pic_address_disp_p (disp
))
7353 reason
= "displacement is an invalid pic construct";
7357 /* This code used to verify that a symbolic pic displacement
7358 includes the pic_offset_table_rtx register.
7360 While this is good idea, unfortunately these constructs may
7361 be created by "adds using lea" optimization for incorrect
7370 This code is nonsensical, but results in addressing
7371 GOT table with pic_offset_table_rtx base. We can't
7372 just refuse it easily, since it gets matched by
7373 "addsi3" pattern, that later gets split to lea in the
7374 case output register differs from input. While this
7375 can be handled by separate addsi pattern for this case
7376 that never results in lea, this seems to be easier and
7377 correct fix for crash to disable this test. */
7379 else if (GET_CODE (disp
) != LABEL_REF
7380 && !CONST_INT_P (disp
)
7381 && (GET_CODE (disp
) != CONST
7382 || !legitimate_constant_p (disp
))
7383 && (GET_CODE (disp
) != SYMBOL_REF
7384 || !legitimate_constant_p (disp
)))
7386 reason
= "displacement is not constant";
7389 else if (TARGET_64BIT
7390 && !x86_64_immediate_operand (disp
, VOIDmode
))
7392 reason
= "displacement is out of range";
7397 /* Everything looks valid. */
7404 /* Return a unique alias set for the GOT. */
7406 static alias_set_type
7407 ix86_GOT_alias_set (void)
7409 static alias_set_type set
= -1;
7411 set
= new_alias_set ();
7415 /* Return a legitimate reference for ORIG (an address) using the
7416 register REG. If REG is 0, a new pseudo is generated.
7418 There are two types of references that must be handled:
7420 1. Global data references must load the address from the GOT, via
7421 the PIC reg. An insn is emitted to do this load, and the reg is
7424 2. Static data references, constant pool addresses, and code labels
7425 compute the address as an offset from the GOT, whose base is in
7426 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7427 differentiate them from global data objects. The returned
7428 address is the PIC reg + an unspec constant.
7430 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7431 reg also appears in the address. */
7434 legitimize_pic_address (rtx orig
, rtx reg
)
7441 if (TARGET_MACHO
&& !TARGET_64BIT
)
7444 reg
= gen_reg_rtx (Pmode
);
7445 /* Use the generic Mach-O PIC machinery. */
7446 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7450 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7452 else if (TARGET_64BIT
7453 && ix86_cmodel
!= CM_SMALL_PIC
7454 && gotoff_operand (addr
, Pmode
))
7457 /* This symbol may be referenced via a displacement from the PIC
7458 base address (@GOTOFF). */
7460 if (reload_in_progress
)
7461 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7462 if (GET_CODE (addr
) == CONST
)
7463 addr
= XEXP (addr
, 0);
7464 if (GET_CODE (addr
) == PLUS
)
7466 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7468 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7471 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7472 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7474 tmpreg
= gen_reg_rtx (Pmode
);
7477 emit_move_insn (tmpreg
, new_rtx
);
7481 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7482 tmpreg
, 1, OPTAB_DIRECT
);
7485 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7487 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7489 /* This symbol may be referenced via a displacement from the PIC
7490 base address (@GOTOFF). */
7492 if (reload_in_progress
)
7493 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7494 if (GET_CODE (addr
) == CONST
)
7495 addr
= XEXP (addr
, 0);
7496 if (GET_CODE (addr
) == PLUS
)
7498 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7500 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7503 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7504 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7505 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7509 emit_move_insn (reg
, new_rtx
);
7513 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7514 /* We can't use @GOTOFF for text labels on VxWorks;
7515 see gotoff_operand. */
7516 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7518 /* Given that we've already handled dllimport variables separately
7519 in legitimize_address, and all other variables should satisfy
7520 legitimate_pic_address_disp_p, we should never arrive here. */
7521 gcc_assert (!TARGET_64BIT_MS_ABI
);
7523 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7525 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7526 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7527 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7528 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7531 reg
= gen_reg_rtx (Pmode
);
7532 /* Use directly gen_movsi, otherwise the address is loaded
7533 into register for CSE. We don't want to CSE this addresses,
7534 instead we CSE addresses from the GOT table, so skip this. */
7535 emit_insn (gen_movsi (reg
, new_rtx
));
7540 /* This symbol must be referenced via a load from the
7541 Global Offset Table (@GOT). */
7543 if (reload_in_progress
)
7544 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7545 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7546 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7548 new_rtx
= force_reg (Pmode
, new_rtx
);
7549 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7550 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7551 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7554 reg
= gen_reg_rtx (Pmode
);
7555 emit_move_insn (reg
, new_rtx
);
7561 if (CONST_INT_P (addr
)
7562 && !x86_64_immediate_operand (addr
, VOIDmode
))
7566 emit_move_insn (reg
, addr
);
7570 new_rtx
= force_reg (Pmode
, addr
);
7572 else if (GET_CODE (addr
) == CONST
)
7574 addr
= XEXP (addr
, 0);
7576 /* We must match stuff we generate before. Assume the only
7577 unspecs that can get here are ours. Not that we could do
7578 anything with them anyway.... */
7579 if (GET_CODE (addr
) == UNSPEC
7580 || (GET_CODE (addr
) == PLUS
7581 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7583 gcc_assert (GET_CODE (addr
) == PLUS
);
7585 if (GET_CODE (addr
) == PLUS
)
7587 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7589 /* Check first to see if this is a constant offset from a @GOTOFF
7590 symbol reference. */
7591 if (gotoff_operand (op0
, Pmode
)
7592 && CONST_INT_P (op1
))
7596 if (reload_in_progress
)
7597 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7598 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7600 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
7601 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7602 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7606 emit_move_insn (reg
, new_rtx
);
7612 if (INTVAL (op1
) < -16*1024*1024
7613 || INTVAL (op1
) >= 16*1024*1024)
7615 if (!x86_64_immediate_operand (op1
, Pmode
))
7616 op1
= force_reg (Pmode
, op1
);
7617 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7623 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7624 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
7625 base
== reg
? NULL_RTX
: reg
);
7627 if (CONST_INT_P (new_rtx
))
7628 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
7631 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
7633 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
7634 new_rtx
= XEXP (new_rtx
, 1);
7636 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
7644 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7647 get_thread_pointer (int to_reg
)
7651 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7655 reg
= gen_reg_rtx (Pmode
);
7656 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7657 insn
= emit_insn (insn
);
7662 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7663 false if we expect this to be used for a memory address and true if
7664 we expect to load the address into a register. */
7667 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7669 rtx dest
, base
, off
, pic
, tp
;
7674 case TLS_MODEL_GLOBAL_DYNAMIC
:
7675 dest
= gen_reg_rtx (Pmode
);
7676 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7678 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7680 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
7683 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7684 insns
= get_insns ();
7687 CONST_OR_PURE_CALL_P (insns
) = 1;
7688 emit_libcall_block (insns
, dest
, rax
, x
);
7690 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7691 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7693 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7695 if (TARGET_GNU2_TLS
)
7697 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7699 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7703 case TLS_MODEL_LOCAL_DYNAMIC
:
7704 base
= gen_reg_rtx (Pmode
);
7705 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7707 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7709 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, note
;
7712 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7713 insns
= get_insns ();
7716 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7717 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7718 CONST_OR_PURE_CALL_P (insns
) = 1;
7719 emit_libcall_block (insns
, base
, rax
, note
);
7721 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7722 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7724 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7726 if (TARGET_GNU2_TLS
)
7728 rtx x
= ix86_tls_module_base ();
7730 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7731 gen_rtx_MINUS (Pmode
, x
, tp
));
7734 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7735 off
= gen_rtx_CONST (Pmode
, off
);
7737 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7739 if (TARGET_GNU2_TLS
)
7741 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7743 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7748 case TLS_MODEL_INITIAL_EXEC
:
7752 type
= UNSPEC_GOTNTPOFF
;
7756 if (reload_in_progress
)
7757 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7758 pic
= pic_offset_table_rtx
;
7759 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7761 else if (!TARGET_ANY_GNU_TLS
)
7763 pic
= gen_reg_rtx (Pmode
);
7764 emit_insn (gen_set_got (pic
));
7765 type
= UNSPEC_GOTTPOFF
;
7770 type
= UNSPEC_INDNTPOFF
;
7773 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7774 off
= gen_rtx_CONST (Pmode
, off
);
7776 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7777 off
= gen_const_mem (Pmode
, off
);
7778 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7780 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7782 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7783 off
= force_reg (Pmode
, off
);
7784 return gen_rtx_PLUS (Pmode
, base
, off
);
7788 base
= get_thread_pointer (true);
7789 dest
= gen_reg_rtx (Pmode
);
7790 emit_insn (gen_subsi3 (dest
, base
, off
));
7794 case TLS_MODEL_LOCAL_EXEC
:
7795 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7796 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7797 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7798 off
= gen_rtx_CONST (Pmode
, off
);
7800 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7802 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7803 return gen_rtx_PLUS (Pmode
, base
, off
);
7807 base
= get_thread_pointer (true);
7808 dest
= gen_reg_rtx (Pmode
);
7809 emit_insn (gen_subsi3 (dest
, base
, off
));
7820 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7823 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7824 htab_t dllimport_map
;
7827 get_dllimport_decl (tree decl
)
7829 struct tree_map
*h
, in
;
7833 size_t namelen
, prefixlen
;
7839 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7841 in
.hash
= htab_hash_pointer (decl
);
7842 in
.base
.from
= decl
;
7843 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7844 h
= (struct tree_map
*) *loc
;
7848 *loc
= h
= GGC_NEW (struct tree_map
);
7850 h
->base
.from
= decl
;
7851 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7852 DECL_ARTIFICIAL (to
) = 1;
7853 DECL_IGNORED_P (to
) = 1;
7854 DECL_EXTERNAL (to
) = 1;
7855 TREE_READONLY (to
) = 1;
7857 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7858 name
= targetm
.strip_name_encoding (name
);
7859 prefix
= name
[0] == FASTCALL_PREFIX
? "*__imp_": "*__imp__";
7860 namelen
= strlen (name
);
7861 prefixlen
= strlen (prefix
);
7862 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
7863 memcpy (imp_name
, prefix
, prefixlen
);
7864 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7866 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7867 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7868 SET_SYMBOL_REF_DECL (rtl
, to
);
7869 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7871 rtl
= gen_const_mem (Pmode
, rtl
);
7872 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7874 SET_DECL_RTL (to
, rtl
);
7875 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
7880 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7881 true if we require the result be a register. */
7884 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7889 gcc_assert (SYMBOL_REF_DECL (symbol
));
7890 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7892 x
= DECL_RTL (imp_decl
);
7894 x
= force_reg (Pmode
, x
);
7898 /* Try machine-dependent ways of modifying an illegitimate address
7899 to be legitimate. If we find one, return the new, valid address.
7900 This macro is used in only one place: `memory_address' in explow.c.
7902 OLDX is the address as it was before break_out_memory_refs was called.
7903 In some cases it is useful to look at this to decide what needs to be done.
7905 MODE and WIN are passed so that this macro can use
7906 GO_IF_LEGITIMATE_ADDRESS.
7908 It is always safe for this macro to do nothing. It exists to recognize
7909 opportunities to optimize the output.
7911 For the 80386, we handle X+REG by loading X into a register R and
7912 using R+REG. R will go in a general reg and indexing will be used.
7913 However, if REG is a broken-out memory address or multiplication,
7914 nothing needs to be done because REG can certainly go in a general reg.
7916 When -fpic is used, special handling is needed for symbolic references.
7917 See comments by legitimize_pic_address in i386.c for details. */
7920 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7925 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7927 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
7928 if (GET_CODE (x
) == CONST
7929 && GET_CODE (XEXP (x
, 0)) == PLUS
7930 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7931 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7933 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
7934 (enum tls_model
) log
, false);
7935 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7938 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7940 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7941 return legitimize_dllimport_symbol (x
, true);
7942 if (GET_CODE (x
) == CONST
7943 && GET_CODE (XEXP (x
, 0)) == PLUS
7944 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7945 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7947 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7948 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7952 if (flag_pic
&& SYMBOLIC_CONST (x
))
7953 return legitimize_pic_address (x
, 0);
7955 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7956 if (GET_CODE (x
) == ASHIFT
7957 && CONST_INT_P (XEXP (x
, 1))
7958 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7961 log
= INTVAL (XEXP (x
, 1));
7962 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7963 GEN_INT (1 << log
));
7966 if (GET_CODE (x
) == PLUS
)
7968 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7970 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7971 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7972 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7975 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7976 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7977 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7978 GEN_INT (1 << log
));
7981 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7982 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7983 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7986 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7987 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7988 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7989 GEN_INT (1 << log
));
7992 /* Put multiply first if it isn't already. */
7993 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7995 rtx tmp
= XEXP (x
, 0);
7996 XEXP (x
, 0) = XEXP (x
, 1);
8001 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8002 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8003 created by virtual register instantiation, register elimination, and
8004 similar optimizations. */
8005 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
8008 x
= gen_rtx_PLUS (Pmode
,
8009 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8010 XEXP (XEXP (x
, 1), 0)),
8011 XEXP (XEXP (x
, 1), 1));
8015 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8016 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8017 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
8018 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8019 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
8020 && CONSTANT_P (XEXP (x
, 1)))
8023 rtx other
= NULL_RTX
;
8025 if (CONST_INT_P (XEXP (x
, 1)))
8027 constant
= XEXP (x
, 1);
8028 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
8030 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
8032 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
8033 other
= XEXP (x
, 1);
8041 x
= gen_rtx_PLUS (Pmode
,
8042 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
8043 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
8044 plus_constant (other
, INTVAL (constant
)));
8048 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8051 if (GET_CODE (XEXP (x
, 0)) == MULT
)
8054 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
8057 if (GET_CODE (XEXP (x
, 1)) == MULT
)
8060 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
8064 && REG_P (XEXP (x
, 1))
8065 && REG_P (XEXP (x
, 0)))
8068 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
8071 x
= legitimize_pic_address (x
, 0);
8074 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8077 if (REG_P (XEXP (x
, 0)))
8079 rtx temp
= gen_reg_rtx (Pmode
);
8080 rtx val
= force_operand (XEXP (x
, 1), temp
);
8082 emit_move_insn (temp
, val
);
8088 else if (REG_P (XEXP (x
, 1)))
8090 rtx temp
= gen_reg_rtx (Pmode
);
8091 rtx val
= force_operand (XEXP (x
, 0), temp
);
8093 emit_move_insn (temp
, val
);
8103 /* Print an integer constant expression in assembler syntax. Addition
8104 and subtraction are the only arithmetic that may appear in these
8105 expressions. FILE is the stdio stream to write to, X is the rtx, and
8106 CODE is the operand print code from the output string. */
8109 output_pic_addr_const (FILE *file
, rtx x
, int code
)
8113 switch (GET_CODE (x
))
8116 gcc_assert (flag_pic
);
8121 if (! TARGET_MACHO
|| TARGET_64BIT
)
8122 output_addr_const (file
, x
);
8125 const char *name
= XSTR (x
, 0);
8127 /* Mark the decl as referenced so that cgraph will
8128 output the function. */
8129 if (SYMBOL_REF_DECL (x
))
8130 mark_decl_referenced (SYMBOL_REF_DECL (x
));
8133 if (MACHOPIC_INDIRECT
8134 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
8135 name
= machopic_indirection_name (x
, /*stub_p=*/true);
8137 assemble_name (file
, name
);
8139 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
8140 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
8141 fputs ("@PLT", file
);
8148 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
8149 assemble_name (asm_out_file
, buf
);
8153 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8157 /* This used to output parentheses around the expression,
8158 but that does not work on the 386 (either ATT or BSD assembler). */
8159 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8163 if (GET_MODE (x
) == VOIDmode
)
8165 /* We can use %d if the number is <32 bits and positive. */
8166 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
8167 fprintf (file
, "0x%lx%08lx",
8168 (unsigned long) CONST_DOUBLE_HIGH (x
),
8169 (unsigned long) CONST_DOUBLE_LOW (x
));
8171 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
8174 /* We can't handle floating point constants;
8175 PRINT_OPERAND must handle them. */
8176 output_operand_lossage ("floating constant misused");
8180 /* Some assemblers need integer constants to appear first. */
8181 if (CONST_INT_P (XEXP (x
, 0)))
8183 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8185 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8189 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
8190 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8192 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8198 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
8199 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8201 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8203 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
8207 gcc_assert (XVECLEN (x
, 0) == 1);
8208 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
8209 switch (XINT (x
, 1))
8212 fputs ("@GOT", file
);
8215 fputs ("@GOTOFF", file
);
8218 fputs ("@PLTOFF", file
);
8220 case UNSPEC_GOTPCREL
:
8221 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
8222 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
8224 case UNSPEC_GOTTPOFF
:
8225 /* FIXME: This might be @TPOFF in Sun ld too. */
8226 fputs ("@GOTTPOFF", file
);
8229 fputs ("@TPOFF", file
);
8233 fputs ("@TPOFF", file
);
8235 fputs ("@NTPOFF", file
);
8238 fputs ("@DTPOFF", file
);
8240 case UNSPEC_GOTNTPOFF
:
8242 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
8243 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file
);
8245 fputs ("@GOTNTPOFF", file
);
8247 case UNSPEC_INDNTPOFF
:
8248 fputs ("@INDNTPOFF", file
);
8251 output_operand_lossage ("invalid UNSPEC as operand");
8257 output_operand_lossage ("invalid expression as operand");
8261 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8262 We need to emit DTP-relative relocations. */
8264 static void ATTRIBUTE_UNUSED
8265 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8267 fputs (ASM_LONG
, file
);
8268 output_addr_const (file
, x
);
8269 fputs ("@DTPOFF", file
);
8275 fputs (", 0", file
);
8282 /* In the name of slightly smaller debug output, and to cater to
8283 general assembler lossage, recognize PIC+GOTOFF and turn it back
8284 into a direct symbol reference.
8286 On Darwin, this is necessary to avoid a crash, because Darwin
8287 has a different PIC label for each routine but the DWARF debugging
8288 information is not associated with any particular routine, so it's
8289 necessary to remove references to the PIC label from RTL stored by
8290 the DWARF output code. */
8293 ix86_delegitimize_address (rtx orig_x
)
8296 /* reg_addend is NULL or a multiple of some register. */
8297 rtx reg_addend
= NULL_RTX
;
8298 /* const_addend is NULL or a const_int. */
8299 rtx const_addend
= NULL_RTX
;
8300 /* This is the result, or NULL. */
8301 rtx result
= NULL_RTX
;
8308 if (GET_CODE (x
) != CONST
8309 || GET_CODE (XEXP (x
, 0)) != UNSPEC
8310 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
8313 return XVECEXP (XEXP (x
, 0), 0, 0);
8316 if (GET_CODE (x
) != PLUS
8317 || GET_CODE (XEXP (x
, 1)) != CONST
)
8320 if (REG_P (XEXP (x
, 0))
8321 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8322 /* %ebx + GOT/GOTOFF */
8324 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
8326 /* %ebx + %reg * scale + GOT/GOTOFF */
8327 reg_addend
= XEXP (x
, 0);
8328 if (REG_P (XEXP (reg_addend
, 0))
8329 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8330 reg_addend
= XEXP (reg_addend
, 1);
8331 else if (REG_P (XEXP (reg_addend
, 1))
8332 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
8333 reg_addend
= XEXP (reg_addend
, 0);
8336 if (!REG_P (reg_addend
)
8337 && GET_CODE (reg_addend
) != MULT
8338 && GET_CODE (reg_addend
) != ASHIFT
)
8344 x
= XEXP (XEXP (x
, 1), 0);
8345 if (GET_CODE (x
) == PLUS
8346 && CONST_INT_P (XEXP (x
, 1)))
8348 const_addend
= XEXP (x
, 1);
8352 if (GET_CODE (x
) == UNSPEC
8353 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
8354 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
8355 result
= XVECEXP (x
, 0, 0);
8357 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8359 result
= XEXP (x
, 0);
8365 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8367 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8371 /* If X is a machine specific address (i.e. a symbol or label being
8372 referenced as a displacement from the GOT implemented using an
8373 UNSPEC), then return the base term. Otherwise return X. */
8376 ix86_find_base_term (rtx x
)
8382 if (GET_CODE (x
) != CONST
)
8385 if (GET_CODE (term
) == PLUS
8386 && (CONST_INT_P (XEXP (term
, 1))
8387 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8388 term
= XEXP (term
, 0);
8389 if (GET_CODE (term
) != UNSPEC
8390 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8393 term
= XVECEXP (term
, 0, 0);
8395 if (GET_CODE (term
) != SYMBOL_REF
8396 && GET_CODE (term
) != LABEL_REF
)
8402 term
= ix86_delegitimize_address (x
);
8404 if (GET_CODE (term
) != SYMBOL_REF
8405 && GET_CODE (term
) != LABEL_REF
)
8412 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8417 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8419 enum rtx_code second_code
, bypass_code
;
8420 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8421 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8422 code
= ix86_fp_compare_code_to_integer (code
);
8426 code
= reverse_condition (code
);
8477 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8481 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8482 Those same assemblers have the same but opposite lossage on cmov. */
8484 suffix
= fp
? "nbe" : "a";
8485 else if (mode
== CCCmode
)
8508 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8530 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8531 suffix
= fp
? "nb" : "ae";
8534 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8541 else if (mode
== CCCmode
)
8542 suffix
= fp
? "nb" : "ae";
8547 suffix
= fp
? "u" : "p";
8550 suffix
= fp
? "nu" : "np";
8555 fputs (suffix
, file
);
8558 /* Print the name of register X to FILE based on its machine mode and number.
8559 If CODE is 'w', pretend the mode is HImode.
8560 If CODE is 'b', pretend the mode is QImode.
8561 If CODE is 'k', pretend the mode is SImode.
8562 If CODE is 'q', pretend the mode is DImode.
8563 If CODE is 'h', pretend the reg is the 'high' byte register.
8564 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8567 print_reg (rtx x
, int code
, FILE *file
)
8569 gcc_assert (x
== pc_rtx
8570 || (REGNO (x
) != ARG_POINTER_REGNUM
8571 && REGNO (x
) != FRAME_POINTER_REGNUM
8572 && REGNO (x
) != FLAGS_REG
8573 && REGNO (x
) != FPSR_REG
8574 && REGNO (x
) != FPCR_REG
));
8576 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8581 gcc_assert (TARGET_64BIT
);
8582 fputs ("rip", file
);
8586 if (code
== 'w' || MMX_REG_P (x
))
8588 else if (code
== 'b')
8590 else if (code
== 'k')
8592 else if (code
== 'q')
8594 else if (code
== 'y')
8596 else if (code
== 'h')
8599 code
= GET_MODE_SIZE (GET_MODE (x
));
8601 /* Irritatingly, AMD extended registers use different naming convention
8602 from the normal registers. */
8603 if (REX_INT_REG_P (x
))
8605 gcc_assert (TARGET_64BIT
);
8609 error ("extended registers have no high halves");
8612 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8615 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8618 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8621 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8624 error ("unsupported operand size for extended register");
8632 if (STACK_TOP_P (x
))
8634 fputs ("st(0)", file
);
8641 if (! ANY_FP_REG_P (x
))
8642 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8647 fputs (hi_reg_name
[REGNO (x
)], file
);
8650 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8652 fputs (qi_reg_name
[REGNO (x
)], file
);
8655 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8657 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8664 /* Locate some local-dynamic symbol still in use by this function
8665 so that we can print its name in some tls_local_dynamic_base
8669 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8673 if (GET_CODE (x
) == SYMBOL_REF
8674 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8676 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8684 get_some_local_dynamic_name (void)
8688 if (cfun
->machine
->some_ld_name
)
8689 return cfun
->machine
->some_ld_name
;
8691 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8693 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8694 return cfun
->machine
->some_ld_name
;
8700 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8701 C -- print opcode suffix for set/cmov insn.
8702 c -- like C, but print reversed condition
8703 F,f -- likewise, but for floating-point.
8704 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8706 R -- print the prefix for register names.
8707 z -- print the opcode suffix for the size of the current operand.
8708 * -- print a star (in certain assembler syntax)
8709 A -- print an absolute memory reference.
8710 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8711 s -- print a shift double count, followed by the assemblers argument
8713 b -- print the QImode name of the register for the indicated operand.
8714 %b0 would print %al if operands[0] is reg 0.
8715 w -- likewise, print the HImode name of the register.
8716 k -- likewise, print the SImode name of the register.
8717 q -- likewise, print the DImode name of the register.
8718 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8719 y -- print "st(0)" instead of "st" as a register.
8720 D -- print condition for SSE cmp instruction.
8721 P -- if PIC, print an @PLT suffix.
8722 X -- don't print any sort of PIC '@' suffix for a symbol.
8723 & -- print some in-use local-dynamic symbol name.
8724 H -- print a memory address offset by 8; used for sse high-parts
8725 Y -- print condition for SSE5 com* instruction.
8726 + -- print a branch hint as 'cs' or 'ds' prefix
8727 ; -- print a semicolon (after prefixes due to bug in older gas).
8731 print_operand (FILE *file
, rtx x
, int code
)
8738 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8743 assemble_name (file
, get_some_local_dynamic_name ());
8747 switch (ASSEMBLER_DIALECT
)
8754 /* Intel syntax. For absolute addresses, registers should not
8755 be surrounded by braces. */
8759 PRINT_OPERAND (file
, x
, 0);
8769 PRINT_OPERAND (file
, x
, 0);
8774 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8779 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8784 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8789 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8794 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8799 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8804 /* 387 opcodes don't get size suffixes if the operands are
8806 if (STACK_REG_P (x
))
8809 /* Likewise if using Intel opcodes. */
8810 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8813 /* This is the size of op from size of operand. */
8814 switch (GET_MODE_SIZE (GET_MODE (x
)))
8823 #ifdef HAVE_GAS_FILDS_FISTS
8833 if (GET_MODE (x
) == SFmode
)
8848 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8850 #ifdef GAS_MNEMONICS
8876 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8878 PRINT_OPERAND (file
, x
, 0);
8884 /* Little bit of braindamage here. The SSE compare instructions
8885 does use completely different names for the comparisons that the
8886 fp conditional moves. */
8887 switch (GET_CODE (x
))
8902 fputs ("unord", file
);
8906 fputs ("neq", file
);
8910 fputs ("nlt", file
);
8914 fputs ("nle", file
);
8917 fputs ("ord", file
);
8924 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8925 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8927 switch (GET_MODE (x
))
8929 case HImode
: putc ('w', file
); break;
8931 case SFmode
: putc ('l', file
); break;
8933 case DFmode
: putc ('q', file
); break;
8934 default: gcc_unreachable ();
8941 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8944 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8945 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8948 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8951 /* Like above, but reverse condition */
8953 /* Check to see if argument to %c is really a constant
8954 and not a condition code which needs to be reversed. */
8955 if (!COMPARISON_P (x
))
8957 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8960 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8963 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8964 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8967 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8971 /* It doesn't actually matter what mode we use here, as we're
8972 only going to use this for printing. */
8973 x
= adjust_address_nv (x
, DImode
, 8);
8980 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8983 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8986 int pred_val
= INTVAL (XEXP (x
, 0));
8988 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8989 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8991 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8992 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8994 /* Emit hints only in the case default branch prediction
8995 heuristics would fail. */
8996 if (taken
!= cputaken
)
8998 /* We use 3e (DS) prefix for taken branches and
8999 2e (CS) prefix for not taken branches. */
9001 fputs ("ds ; ", file
);
9003 fputs ("cs ; ", file
);
9011 switch (GET_CODE (x
))
9014 fputs ("neq", file
);
9021 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
9025 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
9036 fputs ("unord", file
);
9039 fputs ("ord", file
);
9042 fputs ("ueq", file
);
9045 fputs ("nlt", file
);
9048 fputs ("nle", file
);
9051 fputs ("ule", file
);
9054 fputs ("ult", file
);
9057 fputs ("une", file
);
9066 fputs (" ; ", file
);
9073 output_operand_lossage ("invalid operand code '%c'", code
);
9078 print_reg (x
, code
, file
);
9082 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9083 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
9084 && GET_MODE (x
) != BLKmode
)
9087 switch (GET_MODE_SIZE (GET_MODE (x
)))
9089 case 1: size
= "BYTE"; break;
9090 case 2: size
= "WORD"; break;
9091 case 4: size
= "DWORD"; break;
9092 case 8: size
= "QWORD"; break;
9093 case 12: size
= "XWORD"; break;
9095 if (GET_MODE (x
) == XFmode
)
9104 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9107 else if (code
== 'w')
9109 else if (code
== 'k')
9113 fputs (" PTR ", file
);
9117 /* Avoid (%rip) for call operands. */
9118 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
9119 && !CONST_INT_P (x
))
9120 output_addr_const (file
, x
);
9121 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
9122 output_operand_lossage ("invalid constraints for operand");
9127 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
9132 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9133 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
9135 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9137 fprintf (file
, "0x%08lx", l
);
9140 /* These float cases don't actually occur as immediate operands. */
9141 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
9145 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9146 fprintf (file
, "%s", dstr
);
9149 else if (GET_CODE (x
) == CONST_DOUBLE
9150 && GET_MODE (x
) == XFmode
)
9154 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9155 fprintf (file
, "%s", dstr
);
9160 /* We have patterns that allow zero sets of memory, for instance.
9161 In 64-bit mode, we should probably support all 8-byte vectors,
9162 since we can in fact encode that into an immediate. */
9163 if (GET_CODE (x
) == CONST_VECTOR
)
9165 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
9171 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
9173 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9176 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
9177 || GET_CODE (x
) == LABEL_REF
)
9179 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9182 fputs ("OFFSET FLAT:", file
);
9185 if (CONST_INT_P (x
))
9186 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
9188 output_pic_addr_const (file
, x
, code
);
9190 output_addr_const (file
, x
);
9194 /* Print a memory operand whose address is ADDR. */
9197 print_operand_address (FILE *file
, rtx addr
)
9199 struct ix86_address parts
;
9200 rtx base
, index
, disp
;
9202 int ok
= ix86_decompose_address (addr
, &parts
);
9207 index
= parts
.index
;
9209 scale
= parts
.scale
;
9217 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9219 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
9225 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9226 if (TARGET_64BIT
&& !base
&& !index
)
9230 if (GET_CODE (disp
) == CONST
9231 && GET_CODE (XEXP (disp
, 0)) == PLUS
9232 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9233 symbol
= XEXP (XEXP (disp
, 0), 0);
9235 if (GET_CODE (symbol
) == LABEL_REF
9236 || (GET_CODE (symbol
) == SYMBOL_REF
9237 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
9240 if (!base
&& !index
)
9242 /* Displacement only requires special attention. */
9244 if (CONST_INT_P (disp
))
9246 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
9247 fputs ("ds:", file
);
9248 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
9251 output_pic_addr_const (file
, disp
, 0);
9253 output_addr_const (file
, disp
);
9257 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9262 output_pic_addr_const (file
, disp
, 0);
9263 else if (GET_CODE (disp
) == LABEL_REF
)
9264 output_asm_label (disp
);
9266 output_addr_const (file
, disp
);
9271 print_reg (base
, 0, file
);
9275 print_reg (index
, 0, file
);
9277 fprintf (file
, ",%d", scale
);
9283 rtx offset
= NULL_RTX
;
9287 /* Pull out the offset of a symbol; print any symbol itself. */
9288 if (GET_CODE (disp
) == CONST
9289 && GET_CODE (XEXP (disp
, 0)) == PLUS
9290 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9292 offset
= XEXP (XEXP (disp
, 0), 1);
9293 disp
= gen_rtx_CONST (VOIDmode
,
9294 XEXP (XEXP (disp
, 0), 0));
9298 output_pic_addr_const (file
, disp
, 0);
9299 else if (GET_CODE (disp
) == LABEL_REF
)
9300 output_asm_label (disp
);
9301 else if (CONST_INT_P (disp
))
9304 output_addr_const (file
, disp
);
9310 print_reg (base
, 0, file
);
9313 if (INTVAL (offset
) >= 0)
9315 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9319 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9326 print_reg (index
, 0, file
);
9328 fprintf (file
, "*%d", scale
);
9336 output_addr_const_extra (FILE *file
, rtx x
)
9340 if (GET_CODE (x
) != UNSPEC
)
9343 op
= XVECEXP (x
, 0, 0);
9344 switch (XINT (x
, 1))
9346 case UNSPEC_GOTTPOFF
:
9347 output_addr_const (file
, op
);
9348 /* FIXME: This might be @TPOFF in Sun ld. */
9349 fputs ("@GOTTPOFF", file
);
9352 output_addr_const (file
, op
);
9353 fputs ("@TPOFF", file
);
9356 output_addr_const (file
, op
);
9358 fputs ("@TPOFF", file
);
9360 fputs ("@NTPOFF", file
);
9363 output_addr_const (file
, op
);
9364 fputs ("@DTPOFF", file
);
9366 case UNSPEC_GOTNTPOFF
:
9367 output_addr_const (file
, op
);
9369 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
9370 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file
);
9372 fputs ("@GOTNTPOFF", file
);
9374 case UNSPEC_INDNTPOFF
:
9375 output_addr_const (file
, op
);
9376 fputs ("@INDNTPOFF", file
);
9386 /* Split one or more DImode RTL references into pairs of SImode
9387 references. The RTL can be REG, offsettable MEM, integer constant, or
9388 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9389 split and "num" is its length. lo_half and hi_half are output arrays
9390 that parallel "operands". */
9393 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9397 rtx op
= operands
[num
];
9399 /* simplify_subreg refuse to split volatile memory addresses,
9400 but we still have to handle it. */
9403 lo_half
[num
] = adjust_address (op
, SImode
, 0);
9404 hi_half
[num
] = adjust_address (op
, SImode
, 4);
9408 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
9409 GET_MODE (op
) == VOIDmode
9410 ? DImode
: GET_MODE (op
), 0);
9411 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
9412 GET_MODE (op
) == VOIDmode
9413 ? DImode
: GET_MODE (op
), 4);
9417 /* Split one or more TImode RTL references into pairs of DImode
9418 references. The RTL can be REG, offsettable MEM, integer constant, or
9419 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9420 split and "num" is its length. lo_half and hi_half are output arrays
9421 that parallel "operands". */
9424 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9428 rtx op
= operands
[num
];
9430 /* simplify_subreg refuse to split volatile memory addresses, but we
9431 still have to handle it. */
9434 lo_half
[num
] = adjust_address (op
, DImode
, 0);
9435 hi_half
[num
] = adjust_address (op
, DImode
, 8);
9439 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
9440 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
9445 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9446 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9447 is the expression of the binary operation. The output may either be
9448 emitted here, or returned to the caller, like all output_* functions.
9450 There is no guarantee that the operands are the same mode, as they
9451 might be within FLOAT or FLOAT_EXTEND expressions. */
9453 #ifndef SYSV386_COMPAT
9454 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9455 wants to fix the assemblers because that causes incompatibility
9456 with gcc. No-one wants to fix gcc because that causes
9457 incompatibility with assemblers... You can use the option of
9458 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9459 #define SYSV386_COMPAT 1
9463 output_387_binary_op (rtx insn
, rtx
*operands
)
9465 static char buf
[30];
9468 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
9470 #ifdef ENABLE_CHECKING
9471 /* Even if we do not want to check the inputs, this documents input
9472 constraints. Which helps in understanding the following code. */
9473 if (STACK_REG_P (operands
[0])
9474 && ((REG_P (operands
[1])
9475 && REGNO (operands
[0]) == REGNO (operands
[1])
9476 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
9477 || (REG_P (operands
[2])
9478 && REGNO (operands
[0]) == REGNO (operands
[2])
9479 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
9480 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
9483 gcc_assert (is_sse
);
9486 switch (GET_CODE (operands
[3]))
9489 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9490 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9498 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9499 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9507 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9508 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9516 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9517 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9531 if (GET_MODE (operands
[0]) == SFmode
)
9532 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9534 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9539 switch (GET_CODE (operands
[3]))
9543 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9545 rtx temp
= operands
[2];
9546 operands
[2] = operands
[1];
9550 /* know operands[0] == operands[1]. */
9552 if (MEM_P (operands
[2]))
9558 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9560 if (STACK_TOP_P (operands
[0]))
9561 /* How is it that we are storing to a dead operand[2]?
9562 Well, presumably operands[1] is dead too. We can't
9563 store the result to st(0) as st(0) gets popped on this
9564 instruction. Instead store to operands[2] (which I
9565 think has to be st(1)). st(1) will be popped later.
9566 gcc <= 2.8.1 didn't have this check and generated
9567 assembly code that the Unixware assembler rejected. */
9568 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9570 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9574 if (STACK_TOP_P (operands
[0]))
9575 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9577 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9582 if (MEM_P (operands
[1]))
9588 if (MEM_P (operands
[2]))
9594 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9597 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9598 derived assemblers, confusingly reverse the direction of
9599 the operation for fsub{r} and fdiv{r} when the
9600 destination register is not st(0). The Intel assembler
9601 doesn't have this brain damage. Read !SYSV386_COMPAT to
9602 figure out what the hardware really does. */
9603 if (STACK_TOP_P (operands
[0]))
9604 p
= "{p\t%0, %2|rp\t%2, %0}";
9606 p
= "{rp\t%2, %0|p\t%0, %2}";
9608 if (STACK_TOP_P (operands
[0]))
9609 /* As above for fmul/fadd, we can't store to st(0). */
9610 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9612 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9617 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9620 if (STACK_TOP_P (operands
[0]))
9621 p
= "{rp\t%0, %1|p\t%1, %0}";
9623 p
= "{p\t%1, %0|rp\t%0, %1}";
9625 if (STACK_TOP_P (operands
[0]))
9626 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9628 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9633 if (STACK_TOP_P (operands
[0]))
9635 if (STACK_TOP_P (operands
[1]))
9636 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9638 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9641 else if (STACK_TOP_P (operands
[1]))
9644 p
= "{\t%1, %0|r\t%0, %1}";
9646 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9652 p
= "{r\t%2, %0|\t%0, %2}";
9654 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9667 /* Return needed mode for entity in optimize_mode_switching pass. */
9670 ix86_mode_needed (int entity
, rtx insn
)
9672 enum attr_i387_cw mode
;
9674 /* The mode UNINITIALIZED is used to store control word after a
9675 function call or ASM pattern. The mode ANY specify that function
9676 has no requirements on the control word and make no changes in the
9677 bits we are interested in. */
9680 || (NONJUMP_INSN_P (insn
)
9681 && (asm_noperands (PATTERN (insn
)) >= 0
9682 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9683 return I387_CW_UNINITIALIZED
;
9685 if (recog_memoized (insn
) < 0)
9688 mode
= get_attr_i387_cw (insn
);
9693 if (mode
== I387_CW_TRUNC
)
9698 if (mode
== I387_CW_FLOOR
)
9703 if (mode
== I387_CW_CEIL
)
9708 if (mode
== I387_CW_MASK_PM
)
9719 /* Output code to initialize control word copies used by trunc?f?i and
9720 rounding patterns. CURRENT_MODE is set to current control word,
9721 while NEW_MODE is set to new control word. */
9724 emit_i387_cw_initialization (int mode
)
9726 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9729 enum ix86_stack_slot slot
;
9731 rtx reg
= gen_reg_rtx (HImode
);
9733 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9734 emit_move_insn (reg
, copy_rtx (stored_mode
));
9736 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9741 /* round toward zero (truncate) */
9742 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9743 slot
= SLOT_CW_TRUNC
;
9747 /* round down toward -oo */
9748 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9749 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9750 slot
= SLOT_CW_FLOOR
;
9754 /* round up toward +oo */
9755 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9756 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9757 slot
= SLOT_CW_CEIL
;
9760 case I387_CW_MASK_PM
:
9761 /* mask precision exception for nearbyint() */
9762 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9763 slot
= SLOT_CW_MASK_PM
;
9775 /* round toward zero (truncate) */
9776 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9777 slot
= SLOT_CW_TRUNC
;
9781 /* round down toward -oo */
9782 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9783 slot
= SLOT_CW_FLOOR
;
9787 /* round up toward +oo */
9788 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9789 slot
= SLOT_CW_CEIL
;
9792 case I387_CW_MASK_PM
:
9793 /* mask precision exception for nearbyint() */
9794 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9795 slot
= SLOT_CW_MASK_PM
;
9803 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9805 new_mode
= assign_386_stack_local (HImode
, slot
);
9806 emit_move_insn (new_mode
, reg
);
9809 /* Output code for INSN to convert a float to a signed int. OPERANDS
9810 are the insn operands. The output may be [HSD]Imode and the input
9811 operand may be [SDX]Fmode. */
9814 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9816 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9817 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9818 int round_mode
= get_attr_i387_cw (insn
);
9820 /* Jump through a hoop or two for DImode, since the hardware has no
9821 non-popping instruction. We used to do this a different way, but
9822 that was somewhat fragile and broke with post-reload splitters. */
9823 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9824 output_asm_insn ("fld\t%y1", operands
);
9826 gcc_assert (STACK_TOP_P (operands
[1]));
9827 gcc_assert (MEM_P (operands
[0]));
9828 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9831 output_asm_insn ("fisttp%z0\t%0", operands
);
9834 if (round_mode
!= I387_CW_ANY
)
9835 output_asm_insn ("fldcw\t%3", operands
);
9836 if (stack_top_dies
|| dimode_p
)
9837 output_asm_insn ("fistp%z0\t%0", operands
);
9839 output_asm_insn ("fist%z0\t%0", operands
);
9840 if (round_mode
!= I387_CW_ANY
)
9841 output_asm_insn ("fldcw\t%2", operands
);
9847 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9848 have the values zero or one, indicates the ffreep insn's operand
9849 from the OPERANDS array. */
9852 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9854 if (TARGET_USE_FFREEP
)
9855 #if HAVE_AS_IX86_FFREEP
9856 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9859 static char retval
[] = ".word\t0xc_df";
9860 int regno
= REGNO (operands
[opno
]);
9862 gcc_assert (FP_REGNO_P (regno
));
9864 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9869 return opno
? "fstp\t%y1" : "fstp\t%y0";
9873 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9874 should be used. UNORDERED_P is true when fucom should be used. */
9877 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9880 rtx cmp_op0
, cmp_op1
;
9881 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9885 cmp_op0
= operands
[0];
9886 cmp_op1
= operands
[1];
9890 cmp_op0
= operands
[1];
9891 cmp_op1
= operands
[2];
9896 if (GET_MODE (operands
[0]) == SFmode
)
9898 return "ucomiss\t{%1, %0|%0, %1}";
9900 return "comiss\t{%1, %0|%0, %1}";
9903 return "ucomisd\t{%1, %0|%0, %1}";
9905 return "comisd\t{%1, %0|%0, %1}";
9908 gcc_assert (STACK_TOP_P (cmp_op0
));
9910 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9912 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9916 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9917 return output_387_ffreep (operands
, 1);
9920 return "ftst\n\tfnstsw\t%0";
9923 if (STACK_REG_P (cmp_op1
)
9925 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9926 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9928 /* If both the top of the 387 stack dies, and the other operand
9929 is also a stack register that dies, then this must be a
9930 `fcompp' float compare */
9934 /* There is no double popping fcomi variant. Fortunately,
9935 eflags is immune from the fstp's cc clobbering. */
9937 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9939 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9940 return output_387_ffreep (operands
, 0);
9945 return "fucompp\n\tfnstsw\t%0";
9947 return "fcompp\n\tfnstsw\t%0";
9952 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9954 static const char * const alt
[16] =
9956 "fcom%z2\t%y2\n\tfnstsw\t%0",
9957 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9958 "fucom%z2\t%y2\n\tfnstsw\t%0",
9959 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9961 "ficom%z2\t%y2\n\tfnstsw\t%0",
9962 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9966 "fcomi\t{%y1, %0|%0, %y1}",
9967 "fcomip\t{%y1, %0|%0, %y1}",
9968 "fucomi\t{%y1, %0|%0, %y1}",
9969 "fucomip\t{%y1, %0|%0, %y1}",
9980 mask
= eflags_p
<< 3;
9981 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9982 mask
|= unordered_p
<< 1;
9983 mask
|= stack_top_dies
;
9985 gcc_assert (mask
< 16);
9994 ix86_output_addr_vec_elt (FILE *file
, int value
)
9996 const char *directive
= ASM_LONG
;
10000 directive
= ASM_QUAD
;
10002 gcc_assert (!TARGET_64BIT
);
10005 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
10009 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
10011 const char *directive
= ASM_LONG
;
10014 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
10015 directive
= ASM_QUAD
;
10017 gcc_assert (!TARGET_64BIT
);
10019 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10020 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
10021 fprintf (file
, "%s%s%d-%s%d\n",
10022 directive
, LPREFIX
, value
, LPREFIX
, rel
);
10023 else if (HAVE_AS_GOTOFF_IN_DATA
)
10024 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
10026 else if (TARGET_MACHO
)
10028 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
10029 machopic_output_function_base_name (file
);
10030 fprintf(file
, "\n");
10034 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
10035 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
10038 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10042 ix86_expand_clear (rtx dest
)
10046 /* We play register width games, which are only valid after reload. */
10047 gcc_assert (reload_completed
);
10049 /* Avoid HImode and its attendant prefix byte. */
10050 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
10051 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
10052 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
10054 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10055 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
10057 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10058 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
10064 /* X is an unchanging MEM. If it is a constant pool reference, return
10065 the constant pool rtx, else NULL. */
10068 maybe_get_pool_constant (rtx x
)
10070 x
= ix86_delegitimize_address (XEXP (x
, 0));
10072 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
10073 return get_pool_constant (x
);
10079 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
10082 enum tls_model model
;
10087 if (GET_CODE (op1
) == SYMBOL_REF
)
10089 model
= SYMBOL_REF_TLS_MODEL (op1
);
10092 op1
= legitimize_tls_address (op1
, model
, true);
10093 op1
= force_operand (op1
, op0
);
10097 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10098 && SYMBOL_REF_DLLIMPORT_P (op1
))
10099 op1
= legitimize_dllimport_symbol (op1
, false);
10101 else if (GET_CODE (op1
) == CONST
10102 && GET_CODE (XEXP (op1
, 0)) == PLUS
10103 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
10105 rtx addend
= XEXP (XEXP (op1
, 0), 1);
10106 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
10109 model
= SYMBOL_REF_TLS_MODEL (symbol
);
10111 tmp
= legitimize_tls_address (symbol
, model
, true);
10112 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10113 && SYMBOL_REF_DLLIMPORT_P (symbol
))
10114 tmp
= legitimize_dllimport_symbol (symbol
, true);
10118 tmp
= force_operand (tmp
, NULL
);
10119 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
10120 op0
, 1, OPTAB_DIRECT
);
10126 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
10128 if (TARGET_MACHO
&& !TARGET_64BIT
)
10133 rtx temp
= ((reload_in_progress
10134 || ((op0
&& REG_P (op0
))
10136 ? op0
: gen_reg_rtx (Pmode
));
10137 op1
= machopic_indirect_data_reference (op1
, temp
);
10138 op1
= machopic_legitimize_pic_address (op1
, mode
,
10139 temp
== op1
? 0 : temp
);
10141 else if (MACHOPIC_INDIRECT
)
10142 op1
= machopic_indirect_data_reference (op1
, 0);
10150 op1
= force_reg (Pmode
, op1
);
10151 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
10153 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
10154 op1
= legitimize_pic_address (op1
, reg
);
10163 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
10164 || !push_operand (op0
, mode
))
10166 op1
= force_reg (mode
, op1
);
10168 if (push_operand (op0
, mode
)
10169 && ! general_no_elim_operand (op1
, mode
))
10170 op1
= copy_to_mode_reg (mode
, op1
);
10172 /* Force large constants in 64bit compilation into register
10173 to get them CSEed. */
10174 if (can_create_pseudo_p ()
10175 && (mode
== DImode
) && TARGET_64BIT
10176 && immediate_operand (op1
, mode
)
10177 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
10178 && !register_operand (op0
, mode
)
10180 op1
= copy_to_mode_reg (mode
, op1
);
10182 if (can_create_pseudo_p ()
10183 && FLOAT_MODE_P (mode
)
10184 && GET_CODE (op1
) == CONST_DOUBLE
)
10186 /* If we are loading a floating point constant to a register,
10187 force the value to memory now, since we'll get better code
10188 out the back end. */
10190 op1
= validize_mem (force_const_mem (mode
, op1
));
10191 if (!register_operand (op0
, mode
))
10193 rtx temp
= gen_reg_rtx (mode
);
10194 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
10195 emit_move_insn (op0
, temp
);
10201 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10205 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
10207 rtx op0
= operands
[0], op1
= operands
[1];
10208 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
10210 /* Force constants other than zero into memory. We do not know how
10211 the instructions used to build constants modify the upper 64 bits
10212 of the register, once we have that information we may be able
10213 to handle some of them more efficiently. */
10214 if (can_create_pseudo_p ()
10215 && register_operand (op0
, mode
)
10216 && (CONSTANT_P (op1
)
10217 || (GET_CODE (op1
) == SUBREG
10218 && CONSTANT_P (SUBREG_REG (op1
))))
10219 && standard_sse_constant_p (op1
) <= 0)
10220 op1
= validize_mem (force_const_mem (mode
, op1
));
10222 /* TDmode values are passed as TImode on the stack. TImode values
10223 are moved via xmm registers, and moving them to stack can result in
10224 unaligned memory access. Use ix86_expand_vector_move_misalign()
10225 if memory operand is not aligned correctly. */
10226 if (can_create_pseudo_p ()
10227 && (mode
== TImode
) && !TARGET_64BIT
10228 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
10229 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
10233 /* ix86_expand_vector_move_misalign() does not like constants ... */
10234 if (CONSTANT_P (op1
)
10235 || (GET_CODE (op1
) == SUBREG
10236 && CONSTANT_P (SUBREG_REG (op1
))))
10237 op1
= validize_mem (force_const_mem (mode
, op1
));
10239 /* ... nor both arguments in memory. */
10240 if (!register_operand (op0
, mode
)
10241 && !register_operand (op1
, mode
))
10242 op1
= force_reg (mode
, op1
);
10244 tmp
[0] = op0
; tmp
[1] = op1
;
10245 ix86_expand_vector_move_misalign (mode
, tmp
);
10249 /* Make operand1 a register if it isn't already. */
10250 if (can_create_pseudo_p ()
10251 && !register_operand (op0
, mode
)
10252 && !register_operand (op1
, mode
))
10254 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
10258 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10261 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10262 straight to ix86_expand_vector_move. */
10263 /* Code generation for scalar reg-reg moves of single and double precision data:
10264 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10268 if (x86_sse_partial_reg_dependency == true)
10273 Code generation for scalar loads of double precision data:
10274 if (x86_sse_split_regs == true)
10275 movlpd mem, reg (gas syntax)
10279 Code generation for unaligned packed loads of single precision data
10280 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10281 if (x86_sse_unaligned_move_optimal)
10284 if (x86_sse_partial_reg_dependency == true)
10296 Code generation for unaligned packed loads of double precision data
10297 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10298 if (x86_sse_unaligned_move_optimal)
10301 if (x86_sse_split_regs == true)
10314 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
10323 /* If we're optimizing for size, movups is the smallest. */
10326 op0
= gen_lowpart (V4SFmode
, op0
);
10327 op1
= gen_lowpart (V4SFmode
, op1
);
10328 emit_insn (gen_sse_movups (op0
, op1
));
10332 /* ??? If we have typed data, then it would appear that using
10333 movdqu is the only way to get unaligned data loaded with
10335 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10337 op0
= gen_lowpart (V16QImode
, op0
);
10338 op1
= gen_lowpart (V16QImode
, op1
);
10339 emit_insn (gen_sse2_movdqu (op0
, op1
));
10343 if (TARGET_SSE2
&& mode
== V2DFmode
)
10347 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10349 op0
= gen_lowpart (V2DFmode
, op0
);
10350 op1
= gen_lowpart (V2DFmode
, op1
);
10351 emit_insn (gen_sse2_movupd (op0
, op1
));
10355 /* When SSE registers are split into halves, we can avoid
10356 writing to the top half twice. */
10357 if (TARGET_SSE_SPLIT_REGS
)
10359 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10364 /* ??? Not sure about the best option for the Intel chips.
10365 The following would seem to satisfy; the register is
10366 entirely cleared, breaking the dependency chain. We
10367 then store to the upper half, with a dependency depth
10368 of one. A rumor has it that Intel recommends two movsd
10369 followed by an unpacklpd, but this is unconfirmed. And
10370 given that the dependency depth of the unpacklpd would
10371 still be one, I'm not sure why this would be better. */
10372 zero
= CONST0_RTX (V2DFmode
);
10375 m
= adjust_address (op1
, DFmode
, 0);
10376 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
10377 m
= adjust_address (op1
, DFmode
, 8);
10378 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
10382 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10384 op0
= gen_lowpart (V4SFmode
, op0
);
10385 op1
= gen_lowpart (V4SFmode
, op1
);
10386 emit_insn (gen_sse_movups (op0
, op1
));
10390 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
10391 emit_move_insn (op0
, CONST0_RTX (mode
));
10393 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10395 if (mode
!= V4SFmode
)
10396 op0
= gen_lowpart (V4SFmode
, op0
);
10397 m
= adjust_address (op1
, V2SFmode
, 0);
10398 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
10399 m
= adjust_address (op1
, V2SFmode
, 8);
10400 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
10403 else if (MEM_P (op0
))
10405 /* If we're optimizing for size, movups is the smallest. */
10408 op0
= gen_lowpart (V4SFmode
, op0
);
10409 op1
= gen_lowpart (V4SFmode
, op1
);
10410 emit_insn (gen_sse_movups (op0
, op1
));
10414 /* ??? Similar to above, only less clear because of quote
10415 typeless stores unquote. */
10416 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
10417 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10419 op0
= gen_lowpart (V16QImode
, op0
);
10420 op1
= gen_lowpart (V16QImode
, op1
);
10421 emit_insn (gen_sse2_movdqu (op0
, op1
));
10425 if (TARGET_SSE2
&& mode
== V2DFmode
)
10427 m
= adjust_address (op0
, DFmode
, 0);
10428 emit_insn (gen_sse2_storelpd (m
, op1
));
10429 m
= adjust_address (op0
, DFmode
, 8);
10430 emit_insn (gen_sse2_storehpd (m
, op1
));
10434 if (mode
!= V4SFmode
)
10435 op1
= gen_lowpart (V4SFmode
, op1
);
10436 m
= adjust_address (op0
, V2SFmode
, 0);
10437 emit_insn (gen_sse_storelps (m
, op1
));
10438 m
= adjust_address (op0
, V2SFmode
, 8);
10439 emit_insn (gen_sse_storehps (m
, op1
));
10443 gcc_unreachable ();
10446 /* Expand a push in MODE. This is some mode for which we do not support
10447 proper push instructions, at least from the registers that we expect
10448 the value to live in. */
10451 ix86_expand_push (enum machine_mode mode
, rtx x
)
10455 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
10456 GEN_INT (-GET_MODE_SIZE (mode
)),
10457 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
10458 if (tmp
!= stack_pointer_rtx
)
10459 emit_move_insn (stack_pointer_rtx
, tmp
);
10461 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
10462 emit_move_insn (tmp
, x
);
10465 /* Helper function of ix86_fixup_binary_operands to canonicalize
10466 operand order. Returns true if the operands should be swapped. */
10469 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
10472 rtx dst
= operands
[0];
10473 rtx src1
= operands
[1];
10474 rtx src2
= operands
[2];
10476 /* If the operation is not commutative, we can't do anything. */
10477 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
10480 /* Highest priority is that src1 should match dst. */
10481 if (rtx_equal_p (dst
, src1
))
10483 if (rtx_equal_p (dst
, src2
))
10486 /* Next highest priority is that immediate constants come second. */
10487 if (immediate_operand (src2
, mode
))
10489 if (immediate_operand (src1
, mode
))
10492 /* Lowest priority is that memory references should come second. */
10502 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10503 destination to use for the operation. If different from the true
10504 destination in operands[0], a copy operation will be required. */
10507 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
10510 rtx dst
= operands
[0];
10511 rtx src1
= operands
[1];
10512 rtx src2
= operands
[2];
10514 /* Canonicalize operand order. */
10515 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10522 /* Both source operands cannot be in memory. */
10523 if (MEM_P (src1
) && MEM_P (src2
))
10525 /* Optimization: Only read from memory once. */
10526 if (rtx_equal_p (src1
, src2
))
10528 src2
= force_reg (mode
, src2
);
10532 src2
= force_reg (mode
, src2
);
10535 /* If the destination is memory, and we do not have matching source
10536 operands, do things in registers. */
10537 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10538 dst
= gen_reg_rtx (mode
);
10540 /* Source 1 cannot be a constant. */
10541 if (CONSTANT_P (src1
))
10542 src1
= force_reg (mode
, src1
);
10544 /* Source 1 cannot be a non-matching memory. */
10545 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10546 src1
= force_reg (mode
, src1
);
10548 operands
[1] = src1
;
10549 operands
[2] = src2
;
10553 /* Similarly, but assume that the destination has already been
10554 set up properly. */
10557 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10558 enum machine_mode mode
, rtx operands
[])
10560 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10561 gcc_assert (dst
== operands
[0]);
10564 /* Attempt to expand a binary operator. Make the expansion closer to the
10565 actual machine, then just general_operand, which will allow 3 separate
10566 memory references (one output, two input) in a single insn. */
10569 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10572 rtx src1
, src2
, dst
, op
, clob
;
10574 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10575 src1
= operands
[1];
10576 src2
= operands
[2];
10578 /* Emit the instruction. */
10580 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10581 if (reload_in_progress
)
10583 /* Reload doesn't know about the flags register, and doesn't know that
10584 it doesn't want to clobber it. We can only do this with PLUS. */
10585 gcc_assert (code
== PLUS
);
10590 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10591 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10594 /* Fix up the destination if needed. */
10595 if (dst
!= operands
[0])
10596 emit_move_insn (operands
[0], dst
);
10599 /* Return TRUE or FALSE depending on whether the binary operator meets the
10600 appropriate constraints. */
10603 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10606 rtx dst
= operands
[0];
10607 rtx src1
= operands
[1];
10608 rtx src2
= operands
[2];
10610 /* Both source operands cannot be in memory. */
10611 if (MEM_P (src1
) && MEM_P (src2
))
10614 /* Canonicalize operand order for commutative operators. */
10615 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10622 /* If the destination is memory, we must have a matching source operand. */
10623 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10626 /* Source 1 cannot be a constant. */
10627 if (CONSTANT_P (src1
))
10630 /* Source 1 cannot be a non-matching memory. */
10631 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10637 /* Attempt to expand a unary operator. Make the expansion closer to the
10638 actual machine, then just general_operand, which will allow 2 separate
10639 memory references (one output, one input) in a single insn. */
10642 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10645 int matching_memory
;
10646 rtx src
, dst
, op
, clob
;
10651 /* If the destination is memory, and we do not have matching source
10652 operands, do things in registers. */
10653 matching_memory
= 0;
10656 if (rtx_equal_p (dst
, src
))
10657 matching_memory
= 1;
10659 dst
= gen_reg_rtx (mode
);
10662 /* When source operand is memory, destination must match. */
10663 if (MEM_P (src
) && !matching_memory
)
10664 src
= force_reg (mode
, src
);
10666 /* Emit the instruction. */
10668 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10669 if (reload_in_progress
|| code
== NOT
)
10671 /* Reload doesn't know about the flags register, and doesn't know that
10672 it doesn't want to clobber it. */
10673 gcc_assert (code
== NOT
);
10678 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10679 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10682 /* Fix up the destination if needed. */
10683 if (dst
!= operands
[0])
10684 emit_move_insn (operands
[0], dst
);
10687 /* Return TRUE or FALSE depending on whether the unary operator meets the
10688 appropriate constraints. */
10691 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10692 enum machine_mode mode ATTRIBUTE_UNUSED
,
10693 rtx operands
[2] ATTRIBUTE_UNUSED
)
10695 /* If one of operands is memory, source and destination must match. */
10696 if ((MEM_P (operands
[0])
10697 || MEM_P (operands
[1]))
10698 && ! rtx_equal_p (operands
[0], operands
[1]))
10703 /* Post-reload splitter for converting an SF or DFmode value in an
10704 SSE register into an unsigned SImode. */
10707 ix86_split_convert_uns_si_sse (rtx operands
[])
10709 enum machine_mode vecmode
;
10710 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10712 large
= operands
[1];
10713 zero_or_two31
= operands
[2];
10714 input
= operands
[3];
10715 two31
= operands
[4];
10716 vecmode
= GET_MODE (large
);
10717 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10719 /* Load up the value into the low element. We must ensure that the other
10720 elements are valid floats -- zero is the easiest such value. */
10723 if (vecmode
== V4SFmode
)
10724 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10726 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10730 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10731 emit_move_insn (value
, CONST0_RTX (vecmode
));
10732 if (vecmode
== V4SFmode
)
10733 emit_insn (gen_sse_movss (value
, value
, input
));
10735 emit_insn (gen_sse2_movsd (value
, value
, input
));
10738 emit_move_insn (large
, two31
);
10739 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10741 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10742 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10744 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10745 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10747 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10748 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10750 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10751 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10753 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10754 if (vecmode
== V4SFmode
)
10755 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10757 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10760 emit_insn (gen_xorv4si3 (value
, value
, large
));
10763 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10764 Expects the 64-bit DImode to be supplied in a pair of integral
10765 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10766 -mfpmath=sse, !optimize_size only. */
10769 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10771 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10772 rtx int_xmm
, fp_xmm
;
10773 rtx biases
, exponents
;
10776 int_xmm
= gen_reg_rtx (V4SImode
);
10777 if (TARGET_INTER_UNIT_MOVES
)
10778 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10779 else if (TARGET_SSE_SPLIT_REGS
)
10781 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10782 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10786 x
= gen_reg_rtx (V2DImode
);
10787 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10788 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10791 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10792 gen_rtvec (4, GEN_INT (0x43300000UL
),
10793 GEN_INT (0x45300000UL
),
10794 const0_rtx
, const0_rtx
));
10795 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10797 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10798 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10800 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10801 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10802 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10803 (0x1.0p84 + double(fp_value_hi_xmm)).
10804 Note these exponents differ by 32. */
10806 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10808 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10809 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10810 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10811 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10812 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10813 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10814 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10815 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10816 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10818 /* Add the upper and lower DFmode values together. */
10820 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10823 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10824 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10825 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10828 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10831 /* Convert an unsigned SImode value into a DFmode. Only currently used
10832 for SSE, but applicable anywhere. */
10835 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10837 REAL_VALUE_TYPE TWO31r
;
10840 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10841 NULL
, 1, OPTAB_DIRECT
);
10843 fp
= gen_reg_rtx (DFmode
);
10844 emit_insn (gen_floatsidf2 (fp
, x
));
10846 real_ldexp (&TWO31r
, &dconst1
, 31);
10847 x
= const_double_from_real_value (TWO31r
, DFmode
);
10849 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10851 emit_move_insn (target
, x
);
10854 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10855 32-bit mode; otherwise we have a direct convert instruction. */
10858 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10860 REAL_VALUE_TYPE TWO32r
;
10861 rtx fp_lo
, fp_hi
, x
;
10863 fp_lo
= gen_reg_rtx (DFmode
);
10864 fp_hi
= gen_reg_rtx (DFmode
);
10866 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10868 real_ldexp (&TWO32r
, &dconst1
, 32);
10869 x
= const_double_from_real_value (TWO32r
, DFmode
);
10870 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10872 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10874 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10877 emit_move_insn (target
, x
);
10880 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10881 For x86_32, -mfpmath=sse, !optimize_size only. */
10883 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10885 REAL_VALUE_TYPE ONE16r
;
10886 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10888 real_ldexp (&ONE16r
, &dconst1
, 16);
10889 x
= const_double_from_real_value (ONE16r
, SFmode
);
10890 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10891 NULL
, 0, OPTAB_DIRECT
);
10892 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10893 NULL
, 0, OPTAB_DIRECT
);
10894 fp_hi
= gen_reg_rtx (SFmode
);
10895 fp_lo
= gen_reg_rtx (SFmode
);
10896 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10897 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10898 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10900 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10902 if (!rtx_equal_p (target
, fp_hi
))
10903 emit_move_insn (target
, fp_hi
);
10906 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10907 then replicate the value for all elements of the vector
10911 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10918 v
= gen_rtvec (4, value
, value
, value
, value
);
10919 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
10923 v
= gen_rtvec (2, value
, value
);
10924 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
10928 v
= gen_rtvec (4, value
, value
, value
, value
);
10930 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10931 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10932 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10936 v
= gen_rtvec (2, value
, value
);
10938 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10939 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10942 gcc_unreachable ();
10946 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10947 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10948 for an SSE register. If VECT is true, then replicate the mask for
10949 all elements of the vector register. If INVERT is true, then create
10950 a mask excluding the sign bit. */
10953 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10955 enum machine_mode vec_mode
, imode
;
10956 HOST_WIDE_INT hi
, lo
;
10961 /* Find the sign bit, sign extended to 2*HWI. */
10967 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
10968 lo
= 0x80000000, hi
= lo
< 0;
10974 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
10975 if (HOST_BITS_PER_WIDE_INT
>= 64)
10976 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10978 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10984 vec_mode
= VOIDmode
;
10985 gcc_assert (HOST_BITS_PER_WIDE_INT
>= 64);
10986 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
10990 gcc_unreachable ();
10994 lo
= ~lo
, hi
= ~hi
;
10996 /* Force this value into the low part of a fp vector constant. */
10997 mask
= immed_double_const (lo
, hi
, imode
);
10998 mask
= gen_lowpart (mode
, mask
);
11000 if (vec_mode
== VOIDmode
)
11001 return force_reg (mode
, mask
);
11003 v
= ix86_build_const_vector (mode
, vect
, mask
);
11004 return force_reg (vec_mode
, v
);
11007 /* Generate code for floating point ABS or NEG. */
11010 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
11013 rtx mask
, set
, use
, clob
, dst
, src
;
11014 bool matching_memory
;
11015 bool use_sse
= false;
11016 bool vector_mode
= VECTOR_MODE_P (mode
);
11017 enum machine_mode elt_mode
= mode
;
11021 elt_mode
= GET_MODE_INNER (mode
);
11024 else if (mode
== TFmode
)
11026 else if (TARGET_SSE_MATH
)
11027 use_sse
= SSE_FLOAT_MODE_P (mode
);
11029 /* NEG and ABS performed with SSE use bitwise mask operations.
11030 Create the appropriate mask now. */
11032 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
11039 /* If the destination is memory, and we don't have matching source
11040 operands or we're using the x87, do things in registers. */
11041 matching_memory
= false;
11044 if (use_sse
&& rtx_equal_p (dst
, src
))
11045 matching_memory
= true;
11047 dst
= gen_reg_rtx (mode
);
11049 if (MEM_P (src
) && !matching_memory
)
11050 src
= force_reg (mode
, src
);
11054 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
11055 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
11060 set
= gen_rtx_fmt_e (code
, mode
, src
);
11061 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
11064 use
= gen_rtx_USE (VOIDmode
, mask
);
11065 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
11066 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
11067 gen_rtvec (3, set
, use
, clob
)));
11073 if (dst
!= operands
[0])
11074 emit_move_insn (operands
[0], dst
);
11077 /* Expand a copysign operation. Special case operand 0 being a constant. */
11080 ix86_expand_copysign (rtx operands
[])
11082 enum machine_mode mode
, vmode
;
11083 rtx dest
, op0
, op1
, mask
, nmask
;
11085 dest
= operands
[0];
11089 mode
= GET_MODE (dest
);
11090 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
11092 if (GET_CODE (op0
) == CONST_DOUBLE
)
11094 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
11096 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
11097 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
11099 if (mode
== SFmode
|| mode
== DFmode
)
11101 if (op0
== CONST0_RTX (mode
))
11102 op0
= CONST0_RTX (vmode
);
11107 if (mode
== SFmode
)
11108 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
11109 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
11111 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
11112 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
11116 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11118 if (mode
== SFmode
)
11119 copysign_insn
= gen_copysignsf3_const
;
11120 else if (mode
== DFmode
)
11121 copysign_insn
= gen_copysigndf3_const
;
11123 copysign_insn
= gen_copysigntf3_const
;
11125 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
11129 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
11131 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
11132 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11134 if (mode
== SFmode
)
11135 copysign_insn
= gen_copysignsf3_var
;
11136 else if (mode
== DFmode
)
11137 copysign_insn
= gen_copysigndf3_var
;
11139 copysign_insn
= gen_copysigntf3_var
;
11141 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
11145 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11146 be a constant, and so has already been expanded into a vector constant. */
11149 ix86_split_copysign_const (rtx operands
[])
11151 enum machine_mode mode
, vmode
;
11152 rtx dest
, op0
, op1
, mask
, x
;
11154 dest
= operands
[0];
11157 mask
= operands
[3];
11159 mode
= GET_MODE (dest
);
11160 vmode
= GET_MODE (mask
);
11162 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
11163 x
= gen_rtx_AND (vmode
, dest
, mask
);
11164 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11166 if (op0
!= CONST0_RTX (vmode
))
11168 x
= gen_rtx_IOR (vmode
, dest
, op0
);
11169 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11173 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11174 so we have to do two masks. */
11177 ix86_split_copysign_var (rtx operands
[])
11179 enum machine_mode mode
, vmode
;
11180 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
11182 dest
= operands
[0];
11183 scratch
= operands
[1];
11186 nmask
= operands
[4];
11187 mask
= operands
[5];
11189 mode
= GET_MODE (dest
);
11190 vmode
= GET_MODE (mask
);
11192 if (rtx_equal_p (op0
, op1
))
11194 /* Shouldn't happen often (it's useless, obviously), but when it does
11195 we'd generate incorrect code if we continue below. */
11196 emit_move_insn (dest
, op0
);
11200 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
11202 gcc_assert (REGNO (op1
) == REGNO (scratch
));
11204 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11205 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11208 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11209 x
= gen_rtx_NOT (vmode
, dest
);
11210 x
= gen_rtx_AND (vmode
, x
, op0
);
11211 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11215 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
11217 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11219 else /* alternative 2,4 */
11221 gcc_assert (REGNO (mask
) == REGNO (scratch
));
11222 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
11223 x
= gen_rtx_AND (vmode
, scratch
, op1
);
11225 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11227 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
11229 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11230 x
= gen_rtx_AND (vmode
, dest
, nmask
);
11232 else /* alternative 3,4 */
11234 gcc_assert (REGNO (nmask
) == REGNO (dest
));
11236 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11237 x
= gen_rtx_AND (vmode
, dest
, op0
);
11239 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11242 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
11243 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11246 /* Return TRUE or FALSE depending on whether the first SET in INSN
11247 has source and destination with matching CC modes, and that the
11248 CC mode is at least as constrained as REQ_MODE. */
11251 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
11254 enum machine_mode set_mode
;
11256 set
= PATTERN (insn
);
11257 if (GET_CODE (set
) == PARALLEL
)
11258 set
= XVECEXP (set
, 0, 0);
11259 gcc_assert (GET_CODE (set
) == SET
);
11260 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
11262 set_mode
= GET_MODE (SET_DEST (set
));
11266 if (req_mode
!= CCNOmode
11267 && (req_mode
!= CCmode
11268 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
11272 if (req_mode
== CCGCmode
)
11276 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
11280 if (req_mode
== CCZmode
)
11287 gcc_unreachable ();
11290 return (GET_MODE (SET_SRC (set
)) == set_mode
);
11293 /* Generate insn patterns to do an integer compare of OPERANDS. */
11296 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
11298 enum machine_mode cmpmode
;
11301 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
11302 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
11304 /* This is very simple, but making the interface the same as in the
11305 FP case makes the rest of the code easier. */
11306 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
11307 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
11309 /* Return the test that should be put into the flags user, i.e.
11310 the bcc, scc, or cmov instruction. */
11311 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
11314 /* Figure out whether to use ordered or unordered fp comparisons.
11315 Return the appropriate mode to use. */
11318 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
11320 /* ??? In order to make all comparisons reversible, we do all comparisons
11321 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11322 all forms trapping and nontrapping comparisons, we can make inequality
11323 comparisons trapping again, since it results in better code when using
11324 FCOM based compares. */
11325 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
11329 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
11331 enum machine_mode mode
= GET_MODE (op0
);
11333 if (SCALAR_FLOAT_MODE_P (mode
))
11335 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11336 return ix86_fp_compare_mode (code
);
11341 /* Only zero flag is needed. */
11342 case EQ
: /* ZF=0 */
11343 case NE
: /* ZF!=0 */
11345 /* Codes needing carry flag. */
11346 case GEU
: /* CF=0 */
11347 case LTU
: /* CF=1 */
11348 /* Detect overflow checks. They need just the carry flag. */
11349 if (GET_CODE (op0
) == PLUS
11350 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11354 case GTU
: /* CF=0 & ZF=0 */
11355 case LEU
: /* CF=1 | ZF=1 */
11356 /* Detect overflow checks. They need just the carry flag. */
11357 if (GET_CODE (op0
) == MINUS
11358 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11362 /* Codes possibly doable only with sign flag when
11363 comparing against zero. */
11364 case GE
: /* SF=OF or SF=0 */
11365 case LT
: /* SF<>OF or SF=1 */
11366 if (op1
== const0_rtx
)
11369 /* For other cases Carry flag is not required. */
11371 /* Codes doable only with sign flag when comparing
11372 against zero, but we miss jump instruction for it
11373 so we need to use relational tests against overflow
11374 that thus needs to be zero. */
11375 case GT
: /* ZF=0 & SF=OF */
11376 case LE
: /* ZF=1 | SF<>OF */
11377 if (op1
== const0_rtx
)
11381 /* strcmp pattern do (use flags) and combine may ask us for proper
11386 gcc_unreachable ();
11390 /* Return the fixed registers used for condition codes. */
11393 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
11400 /* If two condition code modes are compatible, return a condition code
11401 mode which is compatible with both. Otherwise, return
11404 static enum machine_mode
11405 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
11410 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
11413 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
11414 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
11420 gcc_unreachable ();
11450 /* These are only compatible with themselves, which we already
11456 /* Split comparison code CODE into comparisons we can do using branch
11457 instructions. BYPASS_CODE is comparison code for branch that will
11458 branch around FIRST_CODE and SECOND_CODE. If some of branches
11459 is not required, set value to UNKNOWN.
11460 We never require more than two branches. */
11463 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
11464 enum rtx_code
*first_code
,
11465 enum rtx_code
*second_code
)
11467 *first_code
= code
;
11468 *bypass_code
= UNKNOWN
;
11469 *second_code
= UNKNOWN
;
11471 /* The fcomi comparison sets flags as follows:
11481 case GT
: /* GTU - CF=0 & ZF=0 */
11482 case GE
: /* GEU - CF=0 */
11483 case ORDERED
: /* PF=0 */
11484 case UNORDERED
: /* PF=1 */
11485 case UNEQ
: /* EQ - ZF=1 */
11486 case UNLT
: /* LTU - CF=1 */
11487 case UNLE
: /* LEU - CF=1 | ZF=1 */
11488 case LTGT
: /* EQ - ZF=0 */
11490 case LT
: /* LTU - CF=1 - fails on unordered */
11491 *first_code
= UNLT
;
11492 *bypass_code
= UNORDERED
;
11494 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
11495 *first_code
= UNLE
;
11496 *bypass_code
= UNORDERED
;
11498 case EQ
: /* EQ - ZF=1 - fails on unordered */
11499 *first_code
= UNEQ
;
11500 *bypass_code
= UNORDERED
;
11502 case NE
: /* NE - ZF=0 - fails on unordered */
11503 *first_code
= LTGT
;
11504 *second_code
= UNORDERED
;
11506 case UNGE
: /* GEU - CF=0 - fails on unordered */
11508 *second_code
= UNORDERED
;
11510 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
11512 *second_code
= UNORDERED
;
11515 gcc_unreachable ();
11517 if (!TARGET_IEEE_FP
)
11519 *second_code
= UNKNOWN
;
11520 *bypass_code
= UNKNOWN
;
11524 /* Return cost of comparison done fcom + arithmetics operations on AX.
11525 All following functions do use number of instructions as a cost metrics.
11526 In future this should be tweaked to compute bytes for optimize_size and
11527 take into account performance of various instructions on various CPUs. */
11529 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
11531 if (!TARGET_IEEE_FP
)
11533 /* The cost of code output by ix86_expand_fp_compare. */
11557 gcc_unreachable ();
11561 /* Return cost of comparison done using fcomi operation.
11562 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11564 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
11566 enum rtx_code bypass_code
, first_code
, second_code
;
11567 /* Return arbitrarily high cost when instruction is not supported - this
11568 prevents gcc from using it. */
11571 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11572 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
11575 /* Return cost of comparison done using sahf operation.
11576 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11578 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11580 enum rtx_code bypass_code
, first_code
, second_code
;
11581 /* Return arbitrarily high cost when instruction is not preferred - this
11582 avoids gcc from using it. */
11583 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11585 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11586 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11589 /* Compute cost of the comparison done using any method.
11590 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11592 ix86_fp_comparison_cost (enum rtx_code code
)
11594 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11597 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11598 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11600 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11601 if (min
> sahf_cost
)
11603 if (min
> fcomi_cost
)
11608 /* Return true if we should use an FCOMI instruction for this
11612 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11614 enum rtx_code swapped_code
= swap_condition (code
);
11616 return ((ix86_fp_comparison_cost (code
)
11617 == ix86_fp_comparison_fcomi_cost (code
))
11618 || (ix86_fp_comparison_cost (swapped_code
)
11619 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11622 /* Swap, force into registers, or otherwise massage the two operands
11623 to a fp comparison. The operands are updated in place; the new
11624 comparison code is returned. */
11626 static enum rtx_code
11627 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11629 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11630 rtx op0
= *pop0
, op1
= *pop1
;
11631 enum machine_mode op_mode
= GET_MODE (op0
);
11632 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11634 /* All of the unordered compare instructions only work on registers.
11635 The same is true of the fcomi compare instructions. The XFmode
11636 compare instructions require registers except when comparing
11637 against zero or when converting operand 1 from fixed point to
11641 && (fpcmp_mode
== CCFPUmode
11642 || (op_mode
== XFmode
11643 && ! (standard_80387_constant_p (op0
) == 1
11644 || standard_80387_constant_p (op1
) == 1)
11645 && GET_CODE (op1
) != FLOAT
)
11646 || ix86_use_fcomi_compare (code
)))
11648 op0
= force_reg (op_mode
, op0
);
11649 op1
= force_reg (op_mode
, op1
);
11653 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11654 things around if they appear profitable, otherwise force op0
11655 into a register. */
11657 if (standard_80387_constant_p (op0
) == 0
11659 && ! (standard_80387_constant_p (op1
) == 0
11663 tmp
= op0
, op0
= op1
, op1
= tmp
;
11664 code
= swap_condition (code
);
11668 op0
= force_reg (op_mode
, op0
);
11670 if (CONSTANT_P (op1
))
11672 int tmp
= standard_80387_constant_p (op1
);
11674 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11678 op1
= force_reg (op_mode
, op1
);
11681 op1
= force_reg (op_mode
, op1
);
11685 /* Try to rearrange the comparison to make it cheaper. */
11686 if (ix86_fp_comparison_cost (code
)
11687 > ix86_fp_comparison_cost (swap_condition (code
))
11688 && (REG_P (op1
) || can_create_pseudo_p ()))
11691 tmp
= op0
, op0
= op1
, op1
= tmp
;
11692 code
= swap_condition (code
);
11694 op0
= force_reg (op_mode
, op0
);
11702 /* Convert comparison codes we use to represent FP comparison to integer
11703 code that will result in proper branch. Return UNKNOWN if no such code
11707 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11736 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11739 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11740 rtx
*second_test
, rtx
*bypass_test
)
11742 enum machine_mode fpcmp_mode
, intcmp_mode
;
11744 int cost
= ix86_fp_comparison_cost (code
);
11745 enum rtx_code bypass_code
, first_code
, second_code
;
11747 fpcmp_mode
= ix86_fp_compare_mode (code
);
11748 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11751 *second_test
= NULL_RTX
;
11753 *bypass_test
= NULL_RTX
;
11755 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11757 /* Do fcomi/sahf based test when profitable. */
11758 if (ix86_fp_comparison_arithmetics_cost (code
) > cost
11759 && (bypass_code
== UNKNOWN
|| bypass_test
)
11760 && (second_code
== UNKNOWN
|| second_test
))
11762 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11763 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11769 gcc_assert (TARGET_SAHF
);
11772 scratch
= gen_reg_rtx (HImode
);
11773 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11775 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
11778 /* The FP codes work out to act like unsigned. */
11779 intcmp_mode
= fpcmp_mode
;
11781 if (bypass_code
!= UNKNOWN
)
11782 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11783 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11785 if (second_code
!= UNKNOWN
)
11786 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11787 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11792 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11793 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11794 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11796 scratch
= gen_reg_rtx (HImode
);
11797 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11799 /* In the unordered case, we have to check C2 for NaN's, which
11800 doesn't happen to work out to anything nice combination-wise.
11801 So do some bit twiddling on the value we've got in AH to come
11802 up with an appropriate set of condition codes. */
11804 intcmp_mode
= CCNOmode
;
11809 if (code
== GT
|| !TARGET_IEEE_FP
)
11811 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11816 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11817 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11818 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11819 intcmp_mode
= CCmode
;
11825 if (code
== LT
&& TARGET_IEEE_FP
)
11827 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11828 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11829 intcmp_mode
= CCmode
;
11834 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11840 if (code
== GE
|| !TARGET_IEEE_FP
)
11842 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11847 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11848 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11855 if (code
== LE
&& TARGET_IEEE_FP
)
11857 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11858 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11859 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11860 intcmp_mode
= CCmode
;
11865 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11871 if (code
== EQ
&& TARGET_IEEE_FP
)
11873 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11874 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11875 intcmp_mode
= CCmode
;
11880 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11887 if (code
== NE
&& TARGET_IEEE_FP
)
11889 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11890 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11896 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11902 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11906 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11911 gcc_unreachable ();
11915 /* Return the test that should be put into the flags user, i.e.
11916 the bcc, scc, or cmov instruction. */
11917 return gen_rtx_fmt_ee (code
, VOIDmode
,
11918 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11923 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11926 op0
= ix86_compare_op0
;
11927 op1
= ix86_compare_op1
;
11930 *second_test
= NULL_RTX
;
11932 *bypass_test
= NULL_RTX
;
11934 if (ix86_compare_emitted
)
11936 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11937 ix86_compare_emitted
= NULL_RTX
;
11939 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11941 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11942 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11943 second_test
, bypass_test
);
11946 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11951 /* Return true if the CODE will result in nontrivial jump sequence. */
11953 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11955 enum rtx_code bypass_code
, first_code
, second_code
;
11958 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11959 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11963 ix86_expand_branch (enum rtx_code code
, rtx label
)
11967 /* If we have emitted a compare insn, go straight to simple.
11968 ix86_expand_compare won't emit anything if ix86_compare_emitted
11970 if (ix86_compare_emitted
)
11973 switch (GET_MODE (ix86_compare_op0
))
11979 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11980 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11981 gen_rtx_LABEL_REF (VOIDmode
, label
),
11983 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11992 enum rtx_code bypass_code
, first_code
, second_code
;
11994 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11995 &ix86_compare_op1
);
11997 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11999 /* Check whether we will use the natural sequence with one jump. If
12000 so, we can expand jump early. Otherwise delay expansion by
12001 creating compound insn to not confuse optimizers. */
12002 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
)
12004 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
12005 gen_rtx_LABEL_REF (VOIDmode
, label
),
12006 pc_rtx
, NULL_RTX
, NULL_RTX
);
12010 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
12011 ix86_compare_op0
, ix86_compare_op1
);
12012 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
12013 gen_rtx_LABEL_REF (VOIDmode
, label
),
12015 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
12017 use_fcomi
= ix86_use_fcomi_compare (code
);
12018 vec
= rtvec_alloc (3 + !use_fcomi
);
12019 RTVEC_ELT (vec
, 0) = tmp
;
12021 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FPSR_REG
));
12023 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FLAGS_REG
));
12026 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
12028 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
12037 /* Expand DImode branch into multiple compare+branch. */
12039 rtx lo
[2], hi
[2], label2
;
12040 enum rtx_code code1
, code2
, code3
;
12041 enum machine_mode submode
;
12043 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
12045 tmp
= ix86_compare_op0
;
12046 ix86_compare_op0
= ix86_compare_op1
;
12047 ix86_compare_op1
= tmp
;
12048 code
= swap_condition (code
);
12050 if (GET_MODE (ix86_compare_op0
) == DImode
)
12052 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
12053 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
12058 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
12059 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
12063 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12064 avoid two branches. This costs one extra insn, so disable when
12065 optimizing for size. */
12067 if ((code
== EQ
|| code
== NE
)
12069 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
12074 if (hi
[1] != const0_rtx
)
12075 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
12076 NULL_RTX
, 0, OPTAB_WIDEN
);
12079 if (lo
[1] != const0_rtx
)
12080 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
12081 NULL_RTX
, 0, OPTAB_WIDEN
);
12083 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
12084 NULL_RTX
, 0, OPTAB_WIDEN
);
12086 ix86_compare_op0
= tmp
;
12087 ix86_compare_op1
= const0_rtx
;
12088 ix86_expand_branch (code
, label
);
12092 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12093 op1 is a constant and the low word is zero, then we can just
12094 examine the high word. */
12096 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
12099 case LT
: case LTU
: case GE
: case GEU
:
12100 ix86_compare_op0
= hi
[0];
12101 ix86_compare_op1
= hi
[1];
12102 ix86_expand_branch (code
, label
);
12108 /* Otherwise, we need two or three jumps. */
12110 label2
= gen_label_rtx ();
12113 code2
= swap_condition (code
);
12114 code3
= unsigned_condition (code
);
12118 case LT
: case GT
: case LTU
: case GTU
:
12121 case LE
: code1
= LT
; code2
= GT
; break;
12122 case GE
: code1
= GT
; code2
= LT
; break;
12123 case LEU
: code1
= LTU
; code2
= GTU
; break;
12124 case GEU
: code1
= GTU
; code2
= LTU
; break;
12126 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
12127 case NE
: code2
= UNKNOWN
; break;
12130 gcc_unreachable ();
12135 * if (hi(a) < hi(b)) goto true;
12136 * if (hi(a) > hi(b)) goto false;
12137 * if (lo(a) < lo(b)) goto true;
12141 ix86_compare_op0
= hi
[0];
12142 ix86_compare_op1
= hi
[1];
12144 if (code1
!= UNKNOWN
)
12145 ix86_expand_branch (code1
, label
);
12146 if (code2
!= UNKNOWN
)
12147 ix86_expand_branch (code2
, label2
);
12149 ix86_compare_op0
= lo
[0];
12150 ix86_compare_op1
= lo
[1];
12151 ix86_expand_branch (code3
, label
);
12153 if (code2
!= UNKNOWN
)
12154 emit_label (label2
);
12159 gcc_unreachable ();
12163 /* Split branch based on floating point condition. */
12165 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
12166 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
12168 rtx second
, bypass
;
12169 rtx label
= NULL_RTX
;
12171 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
12174 if (target2
!= pc_rtx
)
12177 code
= reverse_condition_maybe_unordered (code
);
12182 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
12183 tmp
, &second
, &bypass
);
12185 /* Remove pushed operand from stack. */
12187 ix86_free_from_memory (GET_MODE (pushed
));
12189 if (split_branch_probability
>= 0)
12191 /* Distribute the probabilities across the jumps.
12192 Assume the BYPASS and SECOND to be always test
12194 probability
= split_branch_probability
;
12196 /* Value of 1 is low enough to make no need for probability
12197 to be updated. Later we may run some experiments and see
12198 if unordered values are more frequent in practice. */
12200 bypass_probability
= 1;
12202 second_probability
= 1;
12204 if (bypass
!= NULL_RTX
)
12206 label
= gen_label_rtx ();
12207 i
= emit_jump_insn (gen_rtx_SET
12209 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12211 gen_rtx_LABEL_REF (VOIDmode
,
12214 if (bypass_probability
>= 0)
12216 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12217 GEN_INT (bypass_probability
),
12220 i
= emit_jump_insn (gen_rtx_SET
12222 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12223 condition
, target1
, target2
)));
12224 if (probability
>= 0)
12226 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12227 GEN_INT (probability
),
12229 if (second
!= NULL_RTX
)
12231 i
= emit_jump_insn (gen_rtx_SET
12233 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
12235 if (second_probability
>= 0)
12237 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12238 GEN_INT (second_probability
),
12241 if (label
!= NULL_RTX
)
12242 emit_label (label
);
12246 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
12248 rtx ret
, tmp
, tmpreg
, equiv
;
12249 rtx second_test
, bypass_test
;
12251 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
12252 return 0; /* FAIL */
12254 gcc_assert (GET_MODE (dest
) == QImode
);
12256 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12257 PUT_MODE (ret
, QImode
);
12262 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
12263 if (bypass_test
|| second_test
)
12265 rtx test
= second_test
;
12267 rtx tmp2
= gen_reg_rtx (QImode
);
12270 gcc_assert (!second_test
);
12271 test
= bypass_test
;
12273 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
12275 PUT_MODE (test
, QImode
);
12276 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
12279 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
12281 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
12284 /* Attach a REG_EQUAL note describing the comparison result. */
12285 if (ix86_compare_op0
&& ix86_compare_op1
)
12287 equiv
= simplify_gen_relational (code
, QImode
,
12288 GET_MODE (ix86_compare_op0
),
12289 ix86_compare_op0
, ix86_compare_op1
);
12290 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
12293 return 1; /* DONE */
12296 /* Expand comparison setting or clearing carry flag. Return true when
12297 successful and set pop for the operation. */
12299 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
12301 enum machine_mode mode
=
12302 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
12304 /* Do not handle DImode compares that go through special path. */
12305 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
12308 if (SCALAR_FLOAT_MODE_P (mode
))
12310 rtx second_test
= NULL
, bypass_test
= NULL
;
12311 rtx compare_op
, compare_seq
;
12313 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
12315 /* Shortcut: following common codes never translate
12316 into carry flag compares. */
12317 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
12318 || code
== ORDERED
|| code
== UNORDERED
)
12321 /* These comparisons require zero flag; swap operands so they won't. */
12322 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
12323 && !TARGET_IEEE_FP
)
12328 code
= swap_condition (code
);
12331 /* Try to expand the comparison and verify that we end up with
12332 carry flag based comparison. This fails to be true only when
12333 we decide to expand comparison using arithmetic that is not
12334 too common scenario. */
12336 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
12337 &second_test
, &bypass_test
);
12338 compare_seq
= get_insns ();
12341 if (second_test
|| bypass_test
)
12344 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12345 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12346 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
12348 code
= GET_CODE (compare_op
);
12350 if (code
!= LTU
&& code
!= GEU
)
12353 emit_insn (compare_seq
);
12358 if (!INTEGRAL_MODE_P (mode
))
12367 /* Convert a==0 into (unsigned)a<1. */
12370 if (op1
!= const0_rtx
)
12373 code
= (code
== EQ
? LTU
: GEU
);
12376 /* Convert a>b into b<a or a>=b-1. */
12379 if (CONST_INT_P (op1
))
12381 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
12382 /* Bail out on overflow. We still can swap operands but that
12383 would force loading of the constant into register. */
12384 if (op1
== const0_rtx
12385 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
12387 code
= (code
== GTU
? GEU
: LTU
);
12394 code
= (code
== GTU
? LTU
: GEU
);
12398 /* Convert a>=0 into (unsigned)a<0x80000000. */
12401 if (mode
== DImode
|| op1
!= const0_rtx
)
12403 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12404 code
= (code
== LT
? GEU
: LTU
);
12408 if (mode
== DImode
|| op1
!= constm1_rtx
)
12410 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12411 code
= (code
== LE
? GEU
: LTU
);
12417 /* Swapping operands may cause constant to appear as first operand. */
12418 if (!nonimmediate_operand (op0
, VOIDmode
))
12420 if (!can_create_pseudo_p ())
12422 op0
= force_reg (mode
, op0
);
12424 ix86_compare_op0
= op0
;
12425 ix86_compare_op1
= op1
;
12426 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
12427 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
12432 ix86_expand_int_movcc (rtx operands
[])
12434 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
12435 rtx compare_seq
, compare_op
;
12436 rtx second_test
, bypass_test
;
12437 enum machine_mode mode
= GET_MODE (operands
[0]);
12438 bool sign_bit_compare_p
= false;;
12441 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12442 compare_seq
= get_insns ();
12445 compare_code
= GET_CODE (compare_op
);
12447 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
12448 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
12449 sign_bit_compare_p
= true;
12451 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12452 HImode insns, we'd be swallowed in word prefix ops. */
12454 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
12455 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
12456 && CONST_INT_P (operands
[2])
12457 && CONST_INT_P (operands
[3]))
12459 rtx out
= operands
[0];
12460 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
12461 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
12462 HOST_WIDE_INT diff
;
12465 /* Sign bit compares are better done using shifts than we do by using
12467 if (sign_bit_compare_p
12468 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12469 ix86_compare_op1
, &compare_op
))
12471 /* Detect overlap between destination and compare sources. */
12474 if (!sign_bit_compare_p
)
12476 bool fpcmp
= false;
12478 compare_code
= GET_CODE (compare_op
);
12480 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12481 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12484 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
12487 /* To simplify rest of code, restrict to the GEU case. */
12488 if (compare_code
== LTU
)
12490 HOST_WIDE_INT tmp
= ct
;
12493 compare_code
= reverse_condition (compare_code
);
12494 code
= reverse_condition (code
);
12499 PUT_CODE (compare_op
,
12500 reverse_condition_maybe_unordered
12501 (GET_CODE (compare_op
)));
12503 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12507 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
12508 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
12509 tmp
= gen_reg_rtx (mode
);
12511 if (mode
== DImode
)
12512 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
12514 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
12518 if (code
== GT
|| code
== GE
)
12519 code
= reverse_condition (code
);
12522 HOST_WIDE_INT tmp
= ct
;
12527 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
12528 ix86_compare_op1
, VOIDmode
, 0, -1);
12541 tmp
= expand_simple_binop (mode
, PLUS
,
12543 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12554 tmp
= expand_simple_binop (mode
, IOR
,
12556 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12558 else if (diff
== -1 && ct
)
12568 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12570 tmp
= expand_simple_binop (mode
, PLUS
,
12571 copy_rtx (tmp
), GEN_INT (cf
),
12572 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12580 * andl cf - ct, dest
12590 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12593 tmp
= expand_simple_binop (mode
, AND
,
12595 gen_int_mode (cf
- ct
, mode
),
12596 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12598 tmp
= expand_simple_binop (mode
, PLUS
,
12599 copy_rtx (tmp
), GEN_INT (ct
),
12600 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12603 if (!rtx_equal_p (tmp
, out
))
12604 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12606 return 1; /* DONE */
12611 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12614 tmp
= ct
, ct
= cf
, cf
= tmp
;
12617 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12619 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12621 /* We may be reversing unordered compare to normal compare, that
12622 is not valid in general (we may convert non-trapping condition
12623 to trapping one), however on i386 we currently emit all
12624 comparisons unordered. */
12625 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12626 code
= reverse_condition_maybe_unordered (code
);
12630 compare_code
= reverse_condition (compare_code
);
12631 code
= reverse_condition (code
);
12635 compare_code
= UNKNOWN
;
12636 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12637 && CONST_INT_P (ix86_compare_op1
))
12639 if (ix86_compare_op1
== const0_rtx
12640 && (code
== LT
|| code
== GE
))
12641 compare_code
= code
;
12642 else if (ix86_compare_op1
== constm1_rtx
)
12646 else if (code
== GT
)
12651 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12652 if (compare_code
!= UNKNOWN
12653 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12654 && (cf
== -1 || ct
== -1))
12656 /* If lea code below could be used, only optimize
12657 if it results in a 2 insn sequence. */
12659 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12660 || diff
== 3 || diff
== 5 || diff
== 9)
12661 || (compare_code
== LT
&& ct
== -1)
12662 || (compare_code
== GE
&& cf
== -1))
12665 * notl op1 (if necessary)
12673 code
= reverse_condition (code
);
12676 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12677 ix86_compare_op1
, VOIDmode
, 0, -1);
12679 out
= expand_simple_binop (mode
, IOR
,
12681 out
, 1, OPTAB_DIRECT
);
12682 if (out
!= operands
[0])
12683 emit_move_insn (operands
[0], out
);
12685 return 1; /* DONE */
12690 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12691 || diff
== 3 || diff
== 5 || diff
== 9)
12692 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12694 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12700 * lea cf(dest*(ct-cf)),dest
12704 * This also catches the degenerate setcc-only case.
12710 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12711 ix86_compare_op1
, VOIDmode
, 0, 1);
12714 /* On x86_64 the lea instruction operates on Pmode, so we need
12715 to get arithmetics done in proper mode to match. */
12717 tmp
= copy_rtx (out
);
12721 out1
= copy_rtx (out
);
12722 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12726 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12732 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12735 if (!rtx_equal_p (tmp
, out
))
12738 out
= force_operand (tmp
, copy_rtx (out
));
12740 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12742 if (!rtx_equal_p (out
, operands
[0]))
12743 emit_move_insn (operands
[0], copy_rtx (out
));
12745 return 1; /* DONE */
12749 * General case: Jumpful:
12750 * xorl dest,dest cmpl op1, op2
12751 * cmpl op1, op2 movl ct, dest
12752 * setcc dest jcc 1f
12753 * decl dest movl cf, dest
12754 * andl (cf-ct),dest 1:
12757 * Size 20. Size 14.
12759 * This is reasonably steep, but branch mispredict costs are
12760 * high on modern cpus, so consider failing only if optimizing
12764 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12765 && BRANCH_COST
>= 2)
12769 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12774 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12776 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12778 /* We may be reversing unordered compare to normal compare,
12779 that is not valid in general (we may convert non-trapping
12780 condition to trapping one), however on i386 we currently
12781 emit all comparisons unordered. */
12782 code
= reverse_condition_maybe_unordered (code
);
12786 code
= reverse_condition (code
);
12787 if (compare_code
!= UNKNOWN
)
12788 compare_code
= reverse_condition (compare_code
);
12792 if (compare_code
!= UNKNOWN
)
12794 /* notl op1 (if needed)
12799 For x < 0 (resp. x <= -1) there will be no notl,
12800 so if possible swap the constants to get rid of the
12802 True/false will be -1/0 while code below (store flag
12803 followed by decrement) is 0/-1, so the constants need
12804 to be exchanged once more. */
12806 if (compare_code
== GE
|| !cf
)
12808 code
= reverse_condition (code
);
12813 HOST_WIDE_INT tmp
= cf
;
12818 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12819 ix86_compare_op1
, VOIDmode
, 0, -1);
12823 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12824 ix86_compare_op1
, VOIDmode
, 0, 1);
12826 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12827 copy_rtx (out
), 1, OPTAB_DIRECT
);
12830 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12831 gen_int_mode (cf
- ct
, mode
),
12832 copy_rtx (out
), 1, OPTAB_DIRECT
);
12834 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12835 copy_rtx (out
), 1, OPTAB_DIRECT
);
12836 if (!rtx_equal_p (out
, operands
[0]))
12837 emit_move_insn (operands
[0], copy_rtx (out
));
12839 return 1; /* DONE */
12843 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12845 /* Try a few things more with specific constants and a variable. */
12848 rtx var
, orig_out
, out
, tmp
;
12850 if (BRANCH_COST
<= 2)
12851 return 0; /* FAIL */
12853 /* If one of the two operands is an interesting constant, load a
12854 constant with the above and mask it in with a logical operation. */
12856 if (CONST_INT_P (operands
[2]))
12859 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12860 operands
[3] = constm1_rtx
, op
= and_optab
;
12861 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12862 operands
[3] = const0_rtx
, op
= ior_optab
;
12864 return 0; /* FAIL */
12866 else if (CONST_INT_P (operands
[3]))
12869 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12870 operands
[2] = constm1_rtx
, op
= and_optab
;
12871 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12872 operands
[2] = const0_rtx
, op
= ior_optab
;
12874 return 0; /* FAIL */
12877 return 0; /* FAIL */
12879 orig_out
= operands
[0];
12880 tmp
= gen_reg_rtx (mode
);
12883 /* Recurse to get the constant loaded. */
12884 if (ix86_expand_int_movcc (operands
) == 0)
12885 return 0; /* FAIL */
12887 /* Mask in the interesting variable. */
12888 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12890 if (!rtx_equal_p (out
, orig_out
))
12891 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12893 return 1; /* DONE */
12897 * For comparison with above,
12907 if (! nonimmediate_operand (operands
[2], mode
))
12908 operands
[2] = force_reg (mode
, operands
[2]);
12909 if (! nonimmediate_operand (operands
[3], mode
))
12910 operands
[3] = force_reg (mode
, operands
[3]);
12912 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12914 rtx tmp
= gen_reg_rtx (mode
);
12915 emit_move_insn (tmp
, operands
[3]);
12918 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12920 rtx tmp
= gen_reg_rtx (mode
);
12921 emit_move_insn (tmp
, operands
[2]);
12925 if (! register_operand (operands
[2], VOIDmode
)
12927 || ! register_operand (operands
[3], VOIDmode
)))
12928 operands
[2] = force_reg (mode
, operands
[2]);
12931 && ! register_operand (operands
[3], VOIDmode
))
12932 operands
[3] = force_reg (mode
, operands
[3]);
12934 emit_insn (compare_seq
);
12935 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12936 gen_rtx_IF_THEN_ELSE (mode
,
12937 compare_op
, operands
[2],
12940 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12941 gen_rtx_IF_THEN_ELSE (mode
,
12943 copy_rtx (operands
[3]),
12944 copy_rtx (operands
[0]))));
12946 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12947 gen_rtx_IF_THEN_ELSE (mode
,
12949 copy_rtx (operands
[2]),
12950 copy_rtx (operands
[0]))));
12952 return 1; /* DONE */
12955 /* Swap, force into registers, or otherwise massage the two operands
12956 to an sse comparison with a mask result. Thus we differ a bit from
12957 ix86_prepare_fp_compare_args which expects to produce a flags result.
12959 The DEST operand exists to help determine whether to commute commutative
12960 operators. The POP0/POP1 operands are updated in place. The new
12961 comparison code is returned, or UNKNOWN if not implementable. */
12963 static enum rtx_code
12964 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12965 rtx
*pop0
, rtx
*pop1
)
12973 /* We have no LTGT as an operator. We could implement it with
12974 NE & ORDERED, but this requires an extra temporary. It's
12975 not clear that it's worth it. */
12982 /* These are supported directly. */
12989 /* For commutative operators, try to canonicalize the destination
12990 operand to be first in the comparison - this helps reload to
12991 avoid extra moves. */
12992 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
13000 /* These are not supported directly. Swap the comparison operands
13001 to transform into something that is supported. */
13005 code
= swap_condition (code
);
13009 gcc_unreachable ();
13015 /* Detect conditional moves that exactly match min/max operational
13016 semantics. Note that this is IEEE safe, as long as we don't
13017 interchange the operands.
13019 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13020 and TRUE if the operation is successful and instructions are emitted. */
13023 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
13024 rtx cmp_op1
, rtx if_true
, rtx if_false
)
13026 enum machine_mode mode
;
13032 else if (code
== UNGE
)
13035 if_true
= if_false
;
13041 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
13043 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
13048 mode
= GET_MODE (dest
);
13050 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13051 but MODE may be a vector mode and thus not appropriate. */
13052 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
13054 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
13057 if_true
= force_reg (mode
, if_true
);
13058 v
= gen_rtvec (2, if_true
, if_false
);
13059 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
13063 code
= is_min
? SMIN
: SMAX
;
13064 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
13067 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
13071 /* Expand an sse vector comparison. Return the register with the result. */
13074 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
13075 rtx op_true
, rtx op_false
)
13077 enum machine_mode mode
= GET_MODE (dest
);
13080 cmp_op0
= force_reg (mode
, cmp_op0
);
13081 if (!nonimmediate_operand (cmp_op1
, mode
))
13082 cmp_op1
= force_reg (mode
, cmp_op1
);
13085 || reg_overlap_mentioned_p (dest
, op_true
)
13086 || reg_overlap_mentioned_p (dest
, op_false
))
13087 dest
= gen_reg_rtx (mode
);
13089 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
13090 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13095 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13096 operations. This is used for both scalar and vector conditional moves. */
13099 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
13101 enum machine_mode mode
= GET_MODE (dest
);
13106 rtx pcmov
= gen_rtx_SET (mode
, dest
,
13107 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
13112 else if (op_false
== CONST0_RTX (mode
))
13114 op_true
= force_reg (mode
, op_true
);
13115 x
= gen_rtx_AND (mode
, cmp
, op_true
);
13116 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13118 else if (op_true
== CONST0_RTX (mode
))
13120 op_false
= force_reg (mode
, op_false
);
13121 x
= gen_rtx_NOT (mode
, cmp
);
13122 x
= gen_rtx_AND (mode
, x
, op_false
);
13123 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13127 op_true
= force_reg (mode
, op_true
);
13128 op_false
= force_reg (mode
, op_false
);
13130 t2
= gen_reg_rtx (mode
);
13132 t3
= gen_reg_rtx (mode
);
13136 x
= gen_rtx_AND (mode
, op_true
, cmp
);
13137 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
13139 x
= gen_rtx_NOT (mode
, cmp
);
13140 x
= gen_rtx_AND (mode
, x
, op_false
);
13141 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
13143 x
= gen_rtx_IOR (mode
, t3
, t2
);
13144 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13148 /* Expand a floating-point conditional move. Return true if successful. */
13151 ix86_expand_fp_movcc (rtx operands
[])
13153 enum machine_mode mode
= GET_MODE (operands
[0]);
13154 enum rtx_code code
= GET_CODE (operands
[1]);
13155 rtx tmp
, compare_op
, second_test
, bypass_test
;
13157 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
13159 enum machine_mode cmode
;
13161 /* Since we've no cmove for sse registers, don't force bad register
13162 allocation just to gain access to it. Deny movcc when the
13163 comparison mode doesn't match the move mode. */
13164 cmode
= GET_MODE (ix86_compare_op0
);
13165 if (cmode
== VOIDmode
)
13166 cmode
= GET_MODE (ix86_compare_op1
);
13170 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13172 &ix86_compare_op1
);
13173 if (code
== UNKNOWN
)
13176 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
13177 ix86_compare_op1
, operands
[2],
13181 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
13182 ix86_compare_op1
, operands
[2], operands
[3]);
13183 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
13187 /* The floating point conditional move instructions don't directly
13188 support conditions resulting from a signed integer comparison. */
13190 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13192 /* The floating point conditional move instructions don't directly
13193 support signed integer comparisons. */
13195 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
13197 gcc_assert (!second_test
&& !bypass_test
);
13198 tmp
= gen_reg_rtx (QImode
);
13199 ix86_expand_setcc (code
, tmp
);
13201 ix86_compare_op0
= tmp
;
13202 ix86_compare_op1
= const0_rtx
;
13203 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13205 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
13207 tmp
= gen_reg_rtx (mode
);
13208 emit_move_insn (tmp
, operands
[3]);
13211 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
13213 tmp
= gen_reg_rtx (mode
);
13214 emit_move_insn (tmp
, operands
[2]);
13218 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13219 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
13220 operands
[2], operands
[3])));
13222 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13223 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
13224 operands
[3], operands
[0])));
13226 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13227 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
13228 operands
[2], operands
[0])));
13233 /* Expand a floating-point vector conditional move; a vcond operation
13234 rather than a movcc operation. */
13237 ix86_expand_fp_vcond (rtx operands
[])
13239 enum rtx_code code
= GET_CODE (operands
[3]);
13242 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13243 &operands
[4], &operands
[5]);
13244 if (code
== UNKNOWN
)
13247 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
13248 operands
[5], operands
[1], operands
[2]))
13251 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
13252 operands
[1], operands
[2]);
13253 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
13257 /* Expand a signed/unsigned integral vector conditional move. */
13260 ix86_expand_int_vcond (rtx operands
[])
13262 enum machine_mode mode
= GET_MODE (operands
[0]);
13263 enum rtx_code code
= GET_CODE (operands
[3]);
13264 bool negate
= false;
13267 cop0
= operands
[4];
13268 cop1
= operands
[5];
13270 /* Canonicalize the comparison to EQ, GT, GTU. */
13281 code
= reverse_condition (code
);
13287 code
= reverse_condition (code
);
13293 code
= swap_condition (code
);
13294 x
= cop0
, cop0
= cop1
, cop1
= x
;
13298 gcc_unreachable ();
13301 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13302 if (mode
== V2DImode
)
13307 /* SSE4.1 supports EQ. */
13308 if (!TARGET_SSE4_1
)
13314 /* SSE4.2 supports GT/GTU. */
13315 if (!TARGET_SSE4_2
)
13320 gcc_unreachable ();
13324 /* Unsigned parallel compare is not supported by the hardware. Play some
13325 tricks to turn this into a signed comparison against 0. */
13328 cop0
= force_reg (mode
, cop0
);
13337 /* Perform a parallel modulo subtraction. */
13338 t1
= gen_reg_rtx (mode
);
13339 emit_insn ((mode
== V4SImode
13341 : gen_subv2di3
) (t1
, cop0
, cop1
));
13343 /* Extract the original sign bit of op0. */
13344 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
13346 t2
= gen_reg_rtx (mode
);
13347 emit_insn ((mode
== V4SImode
13349 : gen_andv2di3
) (t2
, cop0
, mask
));
13351 /* XOR it back into the result of the subtraction. This results
13352 in the sign bit set iff we saw unsigned underflow. */
13353 x
= gen_reg_rtx (mode
);
13354 emit_insn ((mode
== V4SImode
13356 : gen_xorv2di3
) (x
, t1
, t2
));
13364 /* Perform a parallel unsigned saturating subtraction. */
13365 x
= gen_reg_rtx (mode
);
13366 emit_insn (gen_rtx_SET (VOIDmode
, x
,
13367 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
13374 gcc_unreachable ();
13378 cop1
= CONST0_RTX (mode
);
13381 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
13382 operands
[1+negate
], operands
[2-negate
]);
13384 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
13385 operands
[2-negate
]);
13389 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13390 true if we should do zero extension, else sign extension. HIGH_P is
13391 true if we want the N/2 high elements, else the low elements. */
13394 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13396 enum machine_mode imode
= GET_MODE (operands
[1]);
13397 rtx (*unpack
)(rtx
, rtx
, rtx
);
13404 unpack
= gen_vec_interleave_highv16qi
;
13406 unpack
= gen_vec_interleave_lowv16qi
;
13410 unpack
= gen_vec_interleave_highv8hi
;
13412 unpack
= gen_vec_interleave_lowv8hi
;
13416 unpack
= gen_vec_interleave_highv4si
;
13418 unpack
= gen_vec_interleave_lowv4si
;
13421 gcc_unreachable ();
13424 dest
= gen_lowpart (imode
, operands
[0]);
13427 se
= force_reg (imode
, CONST0_RTX (imode
));
13429 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
13430 operands
[1], pc_rtx
, pc_rtx
);
13432 emit_insn (unpack (dest
, operands
[1], se
));
13435 /* This function performs the same task as ix86_expand_sse_unpack,
13436 but with SSE4.1 instructions. */
13439 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13441 enum machine_mode imode
= GET_MODE (operands
[1]);
13442 rtx (*unpack
)(rtx
, rtx
);
13449 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
13451 unpack
= gen_sse4_1_extendv8qiv8hi2
;
13455 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
13457 unpack
= gen_sse4_1_extendv4hiv4si2
;
13461 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
13463 unpack
= gen_sse4_1_extendv2siv2di2
;
13466 gcc_unreachable ();
13469 dest
= operands
[0];
13472 /* Shift higher 8 bytes to lower 8 bytes. */
13473 src
= gen_reg_rtx (imode
);
13474 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
13475 gen_lowpart (TImode
, operands
[1]),
13481 emit_insn (unpack (dest
, src
));
13484 /* This function performs the same task as ix86_expand_sse_unpack,
13485 but with amdfam15 instructions. */
13487 #define PPERM_SRC 0x00 /* copy source */
13488 #define PPERM_INVERT 0x20 /* invert source */
13489 #define PPERM_REVERSE 0x40 /* bit reverse source */
13490 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13491 #define PPERM_ZERO 0x80 /* all 0's */
13492 #define PPERM_ONES 0xa0 /* all 1's */
13493 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13494 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13496 #define PPERM_SRC1 0x00 /* use first source byte */
13497 #define PPERM_SRC2 0x10 /* use second source byte */
13500 ix86_expand_sse5_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13502 enum machine_mode imode
= GET_MODE (operands
[1]);
13503 int pperm_bytes
[16];
13505 int h
= (high_p
) ? 8 : 0;
13508 rtvec v
= rtvec_alloc (16);
13511 rtx op0
= operands
[0], op1
= operands
[1];
13516 vs
= rtvec_alloc (8);
13517 h2
= (high_p
) ? 8 : 0;
13518 for (i
= 0; i
< 8; i
++)
13520 pperm_bytes
[2*i
+0] = PPERM_SRC
| PPERM_SRC2
| i
| h
;
13521 pperm_bytes
[2*i
+1] = ((unsigned_p
)
13523 : PPERM_SIGN
| PPERM_SRC2
| i
| h
);
13526 for (i
= 0; i
< 16; i
++)
13527 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13529 for (i
= 0; i
< 8; i
++)
13530 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13532 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13533 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13535 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0
, op1
, p
, x
));
13537 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0
, op1
, p
, x
));
13541 vs
= rtvec_alloc (4);
13542 h2
= (high_p
) ? 4 : 0;
13543 for (i
= 0; i
< 4; i
++)
13545 sign_extend
= ((unsigned_p
)
13547 : PPERM_SIGN
| PPERM_SRC2
| ((2*i
) + 1 + h
));
13548 pperm_bytes
[4*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 0 + h
);
13549 pperm_bytes
[4*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 1 + h
);
13550 pperm_bytes
[4*i
+2] = sign_extend
;
13551 pperm_bytes
[4*i
+3] = sign_extend
;
13554 for (i
= 0; i
< 16; i
++)
13555 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13557 for (i
= 0; i
< 4; i
++)
13558 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13560 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13561 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13563 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0
, op1
, p
, x
));
13565 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0
, op1
, p
, x
));
13569 vs
= rtvec_alloc (2);
13570 h2
= (high_p
) ? 2 : 0;
13571 for (i
= 0; i
< 2; i
++)
13573 sign_extend
= ((unsigned_p
)
13575 : PPERM_SIGN
| PPERM_SRC2
| ((4*i
) + 3 + h
));
13576 pperm_bytes
[8*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 0 + h
);
13577 pperm_bytes
[8*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 1 + h
);
13578 pperm_bytes
[8*i
+2] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 2 + h
);
13579 pperm_bytes
[8*i
+3] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 3 + h
);
13580 pperm_bytes
[8*i
+4] = sign_extend
;
13581 pperm_bytes
[8*i
+5] = sign_extend
;
13582 pperm_bytes
[8*i
+6] = sign_extend
;
13583 pperm_bytes
[8*i
+7] = sign_extend
;
13586 for (i
= 0; i
< 16; i
++)
13587 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13589 for (i
= 0; i
< 2; i
++)
13590 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13592 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13593 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13595 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0
, op1
, p
, x
));
13597 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0
, op1
, p
, x
));
13601 gcc_unreachable ();
13607 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13608 next narrower integer vector type */
13610 ix86_expand_sse5_pack (rtx operands
[3])
13612 enum machine_mode imode
= GET_MODE (operands
[0]);
13613 int pperm_bytes
[16];
13615 rtvec v
= rtvec_alloc (16);
13617 rtx op0
= operands
[0];
13618 rtx op1
= operands
[1];
13619 rtx op2
= operands
[2];
13624 for (i
= 0; i
< 8; i
++)
13626 pperm_bytes
[i
+0] = PPERM_SRC
| PPERM_SRC1
| (i
*2);
13627 pperm_bytes
[i
+8] = PPERM_SRC
| PPERM_SRC2
| (i
*2);
13630 for (i
= 0; i
< 16; i
++)
13631 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13633 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13634 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0
, op1
, op2
, x
));
13638 for (i
= 0; i
< 4; i
++)
13640 pperm_bytes
[(2*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 0);
13641 pperm_bytes
[(2*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 1);
13642 pperm_bytes
[(2*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 0);
13643 pperm_bytes
[(2*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 1);
13646 for (i
= 0; i
< 16; i
++)
13647 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13649 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13650 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0
, op1
, op2
, x
));
13654 for (i
= 0; i
< 2; i
++)
13656 pperm_bytes
[(4*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 0);
13657 pperm_bytes
[(4*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 1);
13658 pperm_bytes
[(4*i
)+2] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 2);
13659 pperm_bytes
[(4*i
)+3] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 3);
13660 pperm_bytes
[(4*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 0);
13661 pperm_bytes
[(4*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 1);
13662 pperm_bytes
[(4*i
)+10] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 2);
13663 pperm_bytes
[(4*i
)+11] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 3);
13666 for (i
= 0; i
< 16; i
++)
13667 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13669 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13670 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0
, op1
, op2
, x
));
13674 gcc_unreachable ();
13680 /* Expand conditional increment or decrement using adb/sbb instructions.
13681 The default case using setcc followed by the conditional move can be
13682 done by generic code. */
13684 ix86_expand_int_addcc (rtx operands
[])
13686 enum rtx_code code
= GET_CODE (operands
[1]);
13688 rtx val
= const0_rtx
;
13689 bool fpcmp
= false;
13690 enum machine_mode mode
= GET_MODE (operands
[0]);
13692 if (operands
[3] != const1_rtx
13693 && operands
[3] != constm1_rtx
)
13695 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
13696 ix86_compare_op1
, &compare_op
))
13698 code
= GET_CODE (compare_op
);
13700 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
13701 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
13704 code
= ix86_fp_compare_code_to_integer (code
);
13711 PUT_CODE (compare_op
,
13712 reverse_condition_maybe_unordered
13713 (GET_CODE (compare_op
)));
13715 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
13717 PUT_MODE (compare_op
, mode
);
13719 /* Construct either adc or sbb insn. */
13720 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
13722 switch (GET_MODE (operands
[0]))
13725 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13728 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13731 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13734 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13737 gcc_unreachable ();
13742 switch (GET_MODE (operands
[0]))
13745 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13748 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13751 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13754 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13757 gcc_unreachable ();
13760 return 1; /* DONE */
13764 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13765 works for floating pointer parameters and nonoffsetable memories.
13766 For pushes, it returns just stack offsets; the values will be saved
13767 in the right order. Maximally three parts are generated. */
13770 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
13775 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
13777 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
13779 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
13780 gcc_assert (size
>= 2 && size
<= 3);
13782 /* Optimize constant pool reference to immediates. This is used by fp
13783 moves, that force all constants to memory to allow combining. */
13784 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
13786 rtx tmp
= maybe_get_pool_constant (operand
);
13791 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
13793 /* The only non-offsetable memories we handle are pushes. */
13794 int ok
= push_operand (operand
, VOIDmode
);
13798 operand
= copy_rtx (operand
);
13799 PUT_MODE (operand
, Pmode
);
13800 parts
[0] = parts
[1] = parts
[2] = operand
;
13804 if (GET_CODE (operand
) == CONST_VECTOR
)
13806 enum machine_mode imode
= int_mode_for_mode (mode
);
13807 /* Caution: if we looked through a constant pool memory above,
13808 the operand may actually have a different mode now. That's
13809 ok, since we want to pun this all the way back to an integer. */
13810 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
13811 gcc_assert (operand
!= NULL
);
13817 if (mode
== DImode
)
13818 split_di (&operand
, 1, &parts
[0], &parts
[1]);
13821 if (REG_P (operand
))
13823 gcc_assert (reload_completed
);
13824 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
13825 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
13827 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
13829 else if (offsettable_memref_p (operand
))
13831 operand
= adjust_address (operand
, SImode
, 0);
13832 parts
[0] = operand
;
13833 parts
[1] = adjust_address (operand
, SImode
, 4);
13835 parts
[2] = adjust_address (operand
, SImode
, 8);
13837 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13842 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13846 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
13847 parts
[2] = gen_int_mode (l
[2], SImode
);
13850 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
13853 gcc_unreachable ();
13855 parts
[1] = gen_int_mode (l
[1], SImode
);
13856 parts
[0] = gen_int_mode (l
[0], SImode
);
13859 gcc_unreachable ();
13864 if (mode
== TImode
)
13865 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
13866 if (mode
== XFmode
|| mode
== TFmode
)
13868 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13869 if (REG_P (operand
))
13871 gcc_assert (reload_completed
);
13872 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13873 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13875 else if (offsettable_memref_p (operand
))
13877 operand
= adjust_address (operand
, DImode
, 0);
13878 parts
[0] = operand
;
13879 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13881 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13886 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13887 real_to_target (l
, &r
, mode
);
13889 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13890 if (HOST_BITS_PER_WIDE_INT
>= 64)
13893 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13894 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13897 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13899 if (upper_mode
== SImode
)
13900 parts
[1] = gen_int_mode (l
[2], SImode
);
13901 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13904 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13905 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13908 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13911 gcc_unreachable ();
13918 /* Emit insns to perform a move or push of DI, DF, and XF values.
13919 Return false when normal moves are needed; true when all required
13920 insns have been emitted. Operands 2-4 contain the input values
13921 int the correct order; operands 5-7 contain the output values. */
13924 ix86_split_long_move (rtx operands
[])
13929 int collisions
= 0;
13930 enum machine_mode mode
= GET_MODE (operands
[0]);
13932 /* The DFmode expanders may ask us to move double.
13933 For 64bit target this is single move. By hiding the fact
13934 here we simplify i386.md splitters. */
13935 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13937 /* Optimize constant pool reference to immediates. This is used by
13938 fp moves, that force all constants to memory to allow combining. */
13940 if (MEM_P (operands
[1])
13941 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13942 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13943 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13944 if (push_operand (operands
[0], VOIDmode
))
13946 operands
[0] = copy_rtx (operands
[0]);
13947 PUT_MODE (operands
[0], Pmode
);
13950 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13951 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13952 emit_move_insn (operands
[0], operands
[1]);
13956 /* The only non-offsettable memory we handle is push. */
13957 if (push_operand (operands
[0], VOIDmode
))
13960 gcc_assert (!MEM_P (operands
[0])
13961 || offsettable_memref_p (operands
[0]));
13963 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13964 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13966 /* When emitting push, take care for source operands on the stack. */
13967 if (push
&& MEM_P (operands
[1])
13968 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13971 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13972 XEXP (part
[1][2], 0));
13973 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13974 XEXP (part
[1][1], 0));
13977 /* We need to do copy in the right order in case an address register
13978 of the source overlaps the destination. */
13979 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13981 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13983 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13986 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13989 /* Collision in the middle part can be handled by reordering. */
13990 if (collisions
== 1 && nparts
== 3
13991 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13994 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13995 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13998 /* If there are more collisions, we can't handle it by reordering.
13999 Do an lea to the last part and use only one colliding move. */
14000 else if (collisions
> 1)
14006 base
= part
[0][nparts
- 1];
14008 /* Handle the case when the last part isn't valid for lea.
14009 Happens in 64-bit mode storing the 12-byte XFmode. */
14010 if (GET_MODE (base
) != Pmode
)
14011 base
= gen_rtx_REG (Pmode
, REGNO (base
));
14013 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
14014 part
[1][0] = replace_equiv_address (part
[1][0], base
);
14015 part
[1][1] = replace_equiv_address (part
[1][1],
14016 plus_constant (base
, UNITS_PER_WORD
));
14018 part
[1][2] = replace_equiv_address (part
[1][2],
14019 plus_constant (base
, 8));
14029 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
14030 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
14031 emit_move_insn (part
[0][2], part
[1][2]);
14036 /* In 64bit mode we don't have 32bit push available. In case this is
14037 register, it is OK - we will just use larger counterpart. We also
14038 retype memory - these comes from attempt to avoid REX prefix on
14039 moving of second half of TFmode value. */
14040 if (GET_MODE (part
[1][1]) == SImode
)
14042 switch (GET_CODE (part
[1][1]))
14045 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
14049 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
14053 gcc_unreachable ();
14056 if (GET_MODE (part
[1][0]) == SImode
)
14057 part
[1][0] = part
[1][1];
14060 emit_move_insn (part
[0][1], part
[1][1]);
14061 emit_move_insn (part
[0][0], part
[1][0]);
14065 /* Choose correct order to not overwrite the source before it is copied. */
14066 if ((REG_P (part
[0][0])
14067 && REG_P (part
[1][1])
14068 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
14070 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
14072 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
14076 operands
[2] = part
[0][2];
14077 operands
[3] = part
[0][1];
14078 operands
[4] = part
[0][0];
14079 operands
[5] = part
[1][2];
14080 operands
[6] = part
[1][1];
14081 operands
[7] = part
[1][0];
14085 operands
[2] = part
[0][1];
14086 operands
[3] = part
[0][0];
14087 operands
[5] = part
[1][1];
14088 operands
[6] = part
[1][0];
14095 operands
[2] = part
[0][0];
14096 operands
[3] = part
[0][1];
14097 operands
[4] = part
[0][2];
14098 operands
[5] = part
[1][0];
14099 operands
[6] = part
[1][1];
14100 operands
[7] = part
[1][2];
14104 operands
[2] = part
[0][0];
14105 operands
[3] = part
[0][1];
14106 operands
[5] = part
[1][0];
14107 operands
[6] = part
[1][1];
14111 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14114 if (CONST_INT_P (operands
[5])
14115 && operands
[5] != const0_rtx
14116 && REG_P (operands
[2]))
14118 if (CONST_INT_P (operands
[6])
14119 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
14120 operands
[6] = operands
[2];
14123 && CONST_INT_P (operands
[7])
14124 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
14125 operands
[7] = operands
[2];
14129 && CONST_INT_P (operands
[6])
14130 && operands
[6] != const0_rtx
14131 && REG_P (operands
[3])
14132 && CONST_INT_P (operands
[7])
14133 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
14134 operands
[7] = operands
[3];
14137 emit_move_insn (operands
[2], operands
[5]);
14138 emit_move_insn (operands
[3], operands
[6]);
14140 emit_move_insn (operands
[4], operands
[7]);
14145 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14146 left shift by a constant, either using a single shift or
14147 a sequence of add instructions. */
14150 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
14154 emit_insn ((mode
== DImode
14156 : gen_adddi3
) (operand
, operand
, operand
));
14158 else if (!optimize_size
14159 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
14162 for (i
=0; i
<count
; i
++)
14164 emit_insn ((mode
== DImode
14166 : gen_adddi3
) (operand
, operand
, operand
));
14170 emit_insn ((mode
== DImode
14172 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
14176 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14178 rtx low
[2], high
[2];
14180 const int single_width
= mode
== DImode
? 32 : 64;
14182 if (CONST_INT_P (operands
[2]))
14184 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14185 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14187 if (count
>= single_width
)
14189 emit_move_insn (high
[0], low
[1]);
14190 emit_move_insn (low
[0], const0_rtx
);
14192 if (count
> single_width
)
14193 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
14197 if (!rtx_equal_p (operands
[0], operands
[1]))
14198 emit_move_insn (operands
[0], operands
[1]);
14199 emit_insn ((mode
== DImode
14201 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
14202 ix86_expand_ashl_const (low
[0], count
, mode
);
14207 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14209 if (operands
[1] == const1_rtx
)
14211 /* Assuming we've chosen a QImode capable registers, then 1 << N
14212 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14213 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
14215 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
14217 ix86_expand_clear (low
[0]);
14218 ix86_expand_clear (high
[0]);
14219 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
14221 d
= gen_lowpart (QImode
, low
[0]);
14222 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
14223 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
14224 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
14226 d
= gen_lowpart (QImode
, high
[0]);
14227 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
14228 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
14229 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
14232 /* Otherwise, we can get the same results by manually performing
14233 a bit extract operation on bit 5/6, and then performing the two
14234 shifts. The two methods of getting 0/1 into low/high are exactly
14235 the same size. Avoiding the shift in the bit extract case helps
14236 pentium4 a bit; no one else seems to care much either way. */
14241 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
14242 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
14244 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
14245 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
14247 emit_insn ((mode
== DImode
14249 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
14250 emit_insn ((mode
== DImode
14252 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
14253 emit_move_insn (low
[0], high
[0]);
14254 emit_insn ((mode
== DImode
14256 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
14259 emit_insn ((mode
== DImode
14261 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
14262 emit_insn ((mode
== DImode
14264 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
14268 if (operands
[1] == constm1_rtx
)
14270 /* For -1 << N, we can avoid the shld instruction, because we
14271 know that we're shifting 0...31/63 ones into a -1. */
14272 emit_move_insn (low
[0], constm1_rtx
);
14274 emit_move_insn (high
[0], low
[0]);
14276 emit_move_insn (high
[0], constm1_rtx
);
14280 if (!rtx_equal_p (operands
[0], operands
[1]))
14281 emit_move_insn (operands
[0], operands
[1]);
14283 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14284 emit_insn ((mode
== DImode
14286 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
14289 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
14291 if (TARGET_CMOVE
&& scratch
)
14293 ix86_expand_clear (scratch
);
14294 emit_insn ((mode
== DImode
14295 ? gen_x86_shift_adj_1
14296 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
14299 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
14303 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14305 rtx low
[2], high
[2];
14307 const int single_width
= mode
== DImode
? 32 : 64;
14309 if (CONST_INT_P (operands
[2]))
14311 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14312 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14314 if (count
== single_width
* 2 - 1)
14316 emit_move_insn (high
[0], high
[1]);
14317 emit_insn ((mode
== DImode
14319 : gen_ashrdi3
) (high
[0], high
[0],
14320 GEN_INT (single_width
- 1)));
14321 emit_move_insn (low
[0], high
[0]);
14324 else if (count
>= single_width
)
14326 emit_move_insn (low
[0], high
[1]);
14327 emit_move_insn (high
[0], low
[0]);
14328 emit_insn ((mode
== DImode
14330 : gen_ashrdi3
) (high
[0], high
[0],
14331 GEN_INT (single_width
- 1)));
14332 if (count
> single_width
)
14333 emit_insn ((mode
== DImode
14335 : gen_ashrdi3
) (low
[0], low
[0],
14336 GEN_INT (count
- single_width
)));
14340 if (!rtx_equal_p (operands
[0], operands
[1]))
14341 emit_move_insn (operands
[0], operands
[1]);
14342 emit_insn ((mode
== DImode
14344 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14345 emit_insn ((mode
== DImode
14347 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14352 if (!rtx_equal_p (operands
[0], operands
[1]))
14353 emit_move_insn (operands
[0], operands
[1]);
14355 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14357 emit_insn ((mode
== DImode
14359 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14360 emit_insn ((mode
== DImode
14362 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
14364 if (TARGET_CMOVE
&& scratch
)
14366 emit_move_insn (scratch
, high
[0]);
14367 emit_insn ((mode
== DImode
14369 : gen_ashrdi3
) (scratch
, scratch
,
14370 GEN_INT (single_width
- 1)));
14371 emit_insn ((mode
== DImode
14372 ? gen_x86_shift_adj_1
14373 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14377 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
14382 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14384 rtx low
[2], high
[2];
14386 const int single_width
= mode
== DImode
? 32 : 64;
14388 if (CONST_INT_P (operands
[2]))
14390 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14391 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14393 if (count
>= single_width
)
14395 emit_move_insn (low
[0], high
[1]);
14396 ix86_expand_clear (high
[0]);
14398 if (count
> single_width
)
14399 emit_insn ((mode
== DImode
14401 : gen_lshrdi3
) (low
[0], low
[0],
14402 GEN_INT (count
- single_width
)));
14406 if (!rtx_equal_p (operands
[0], operands
[1]))
14407 emit_move_insn (operands
[0], operands
[1]);
14408 emit_insn ((mode
== DImode
14410 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14411 emit_insn ((mode
== DImode
14413 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14418 if (!rtx_equal_p (operands
[0], operands
[1]))
14419 emit_move_insn (operands
[0], operands
[1]);
14421 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14423 emit_insn ((mode
== DImode
14425 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14426 emit_insn ((mode
== DImode
14428 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
14430 /* Heh. By reversing the arguments, we can reuse this pattern. */
14431 if (TARGET_CMOVE
&& scratch
)
14433 ix86_expand_clear (scratch
);
14434 emit_insn ((mode
== DImode
14435 ? gen_x86_shift_adj_1
14436 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14440 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
14444 /* Predict just emitted jump instruction to be taken with probability PROB. */
14446 predict_jump (int prob
)
14448 rtx insn
= get_last_insn ();
14449 gcc_assert (JUMP_P (insn
));
14451 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
14456 /* Helper function for the string operations below. Dest VARIABLE whether
14457 it is aligned to VALUE bytes. If true, jump to the label. */
14459 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
14461 rtx label
= gen_label_rtx ();
14462 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
14463 if (GET_MODE (variable
) == DImode
)
14464 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
14466 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
14467 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
14470 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
14472 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14476 /* Adjust COUNTER by the VALUE. */
14478 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
14480 if (GET_MODE (countreg
) == DImode
)
14481 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
14483 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
14486 /* Zero extend possibly SImode EXP to Pmode register. */
14488 ix86_zero_extend_to_Pmode (rtx exp
)
14491 if (GET_MODE (exp
) == VOIDmode
)
14492 return force_reg (Pmode
, exp
);
14493 if (GET_MODE (exp
) == Pmode
)
14494 return copy_to_mode_reg (Pmode
, exp
);
14495 r
= gen_reg_rtx (Pmode
);
14496 emit_insn (gen_zero_extendsidi2 (r
, exp
));
14500 /* Divide COUNTREG by SCALE. */
14502 scale_counter (rtx countreg
, int scale
)
14505 rtx piece_size_mask
;
14509 if (CONST_INT_P (countreg
))
14510 return GEN_INT (INTVAL (countreg
) / scale
);
14511 gcc_assert (REG_P (countreg
));
14513 piece_size_mask
= GEN_INT (scale
- 1);
14514 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
14515 GEN_INT (exact_log2 (scale
)),
14516 NULL
, 1, OPTAB_DIRECT
);
14520 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14521 DImode for constant loop counts. */
14523 static enum machine_mode
14524 counter_mode (rtx count_exp
)
14526 if (GET_MODE (count_exp
) != VOIDmode
)
14527 return GET_MODE (count_exp
);
14528 if (GET_CODE (count_exp
) != CONST_INT
)
14530 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
14535 /* When SRCPTR is non-NULL, output simple loop to move memory
14536 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14537 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14538 equivalent loop to set memory by VALUE (supposed to be in MODE).
14540 The size is rounded down to whole number of chunk size moved at once.
14541 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14545 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
14546 rtx destptr
, rtx srcptr
, rtx value
,
14547 rtx count
, enum machine_mode mode
, int unroll
,
14550 rtx out_label
, top_label
, iter
, tmp
;
14551 enum machine_mode iter_mode
= counter_mode (count
);
14552 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
14553 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
14559 top_label
= gen_label_rtx ();
14560 out_label
= gen_label_rtx ();
14561 iter
= gen_reg_rtx (iter_mode
);
14563 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
14564 NULL
, 1, OPTAB_DIRECT
);
14565 /* Those two should combine. */
14566 if (piece_size
== const1_rtx
)
14568 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
14570 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
14572 emit_move_insn (iter
, const0_rtx
);
14574 emit_label (top_label
);
14576 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
14577 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
14578 destmem
= change_address (destmem
, mode
, x_addr
);
14582 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
14583 srcmem
= change_address (srcmem
, mode
, y_addr
);
14585 /* When unrolling for chips that reorder memory reads and writes,
14586 we can save registers by using single temporary.
14587 Also using 4 temporaries is overkill in 32bit mode. */
14588 if (!TARGET_64BIT
&& 0)
14590 for (i
= 0; i
< unroll
; i
++)
14595 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14597 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14599 emit_move_insn (destmem
, srcmem
);
14605 gcc_assert (unroll
<= 4);
14606 for (i
= 0; i
< unroll
; i
++)
14608 tmpreg
[i
] = gen_reg_rtx (mode
);
14612 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14614 emit_move_insn (tmpreg
[i
], srcmem
);
14616 for (i
= 0; i
< unroll
; i
++)
14621 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14623 emit_move_insn (destmem
, tmpreg
[i
]);
14628 for (i
= 0; i
< unroll
; i
++)
14632 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14633 emit_move_insn (destmem
, value
);
14636 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
14637 true, OPTAB_LIB_WIDEN
);
14639 emit_move_insn (iter
, tmp
);
14641 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
14643 if (expected_size
!= -1)
14645 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
14646 if (expected_size
== 0)
14648 else if (expected_size
> REG_BR_PROB_BASE
)
14649 predict_jump (REG_BR_PROB_BASE
- 1);
14651 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
14654 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
14655 iter
= ix86_zero_extend_to_Pmode (iter
);
14656 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
14657 true, OPTAB_LIB_WIDEN
);
14658 if (tmp
!= destptr
)
14659 emit_move_insn (destptr
, tmp
);
14662 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
14663 true, OPTAB_LIB_WIDEN
);
14665 emit_move_insn (srcptr
, tmp
);
14667 emit_label (out_label
);
14670 /* Output "rep; mov" instruction.
14671 Arguments have same meaning as for previous function */
14673 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
14674 rtx destptr
, rtx srcptr
,
14676 enum machine_mode mode
)
14682 /* If the size is known, it is shorter to use rep movs. */
14683 if (mode
== QImode
&& CONST_INT_P (count
)
14684 && !(INTVAL (count
) & 3))
14687 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14688 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14689 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
14690 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
14691 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14692 if (mode
!= QImode
)
14694 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14695 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14696 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14697 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14698 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14699 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
14703 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14704 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
14706 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
14710 /* Output "rep; stos" instruction.
14711 Arguments have same meaning as for previous function */
14713 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
14715 enum machine_mode mode
)
14720 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14721 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14722 value
= force_reg (mode
, gen_lowpart (mode
, value
));
14723 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14724 if (mode
!= QImode
)
14726 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14727 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14728 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14731 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14732 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
14736 emit_strmov (rtx destmem
, rtx srcmem
,
14737 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
14739 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
14740 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
14741 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14744 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14746 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
14747 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
14750 if (CONST_INT_P (count
))
14752 HOST_WIDE_INT countval
= INTVAL (count
);
14755 if ((countval
& 0x10) && max_size
> 16)
14759 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14760 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
14763 gcc_unreachable ();
14766 if ((countval
& 0x08) && max_size
> 8)
14769 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14772 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14773 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
14777 if ((countval
& 0x04) && max_size
> 4)
14779 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14782 if ((countval
& 0x02) && max_size
> 2)
14784 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
14787 if ((countval
& 0x01) && max_size
> 1)
14789 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
14796 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
14797 count
, 1, OPTAB_DIRECT
);
14798 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
14799 count
, QImode
, 1, 4);
14803 /* When there are stringops, we can cheaply increase dest and src pointers.
14804 Otherwise we save code size by maintaining offset (zero is readily
14805 available from preceding rep operation) and using x86 addressing modes.
14807 if (TARGET_SINGLE_STRINGOP
)
14811 rtx label
= ix86_expand_aligntest (count
, 4, true);
14812 src
= change_address (srcmem
, SImode
, srcptr
);
14813 dest
= change_address (destmem
, SImode
, destptr
);
14814 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14815 emit_label (label
);
14816 LABEL_NUSES (label
) = 1;
14820 rtx label
= ix86_expand_aligntest (count
, 2, true);
14821 src
= change_address (srcmem
, HImode
, srcptr
);
14822 dest
= change_address (destmem
, HImode
, destptr
);
14823 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14824 emit_label (label
);
14825 LABEL_NUSES (label
) = 1;
14829 rtx label
= ix86_expand_aligntest (count
, 1, true);
14830 src
= change_address (srcmem
, QImode
, srcptr
);
14831 dest
= change_address (destmem
, QImode
, destptr
);
14832 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14833 emit_label (label
);
14834 LABEL_NUSES (label
) = 1;
14839 rtx offset
= force_reg (Pmode
, const0_rtx
);
14844 rtx label
= ix86_expand_aligntest (count
, 4, true);
14845 src
= change_address (srcmem
, SImode
, srcptr
);
14846 dest
= change_address (destmem
, SImode
, destptr
);
14847 emit_move_insn (dest
, src
);
14848 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
14849 true, OPTAB_LIB_WIDEN
);
14851 emit_move_insn (offset
, tmp
);
14852 emit_label (label
);
14853 LABEL_NUSES (label
) = 1;
14857 rtx label
= ix86_expand_aligntest (count
, 2, true);
14858 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14859 src
= change_address (srcmem
, HImode
, tmp
);
14860 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14861 dest
= change_address (destmem
, HImode
, tmp
);
14862 emit_move_insn (dest
, src
);
14863 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
14864 true, OPTAB_LIB_WIDEN
);
14866 emit_move_insn (offset
, tmp
);
14867 emit_label (label
);
14868 LABEL_NUSES (label
) = 1;
14872 rtx label
= ix86_expand_aligntest (count
, 1, true);
14873 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14874 src
= change_address (srcmem
, QImode
, tmp
);
14875 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14876 dest
= change_address (destmem
, QImode
, tmp
);
14877 emit_move_insn (dest
, src
);
14878 emit_label (label
);
14879 LABEL_NUSES (label
) = 1;
14884 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14886 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14887 rtx count
, int max_size
)
14890 expand_simple_binop (counter_mode (count
), AND
, count
,
14891 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14892 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14893 gen_lowpart (QImode
, value
), count
, QImode
,
14897 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14899 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14903 if (CONST_INT_P (count
))
14905 HOST_WIDE_INT countval
= INTVAL (count
);
14908 if ((countval
& 0x10) && max_size
> 16)
14912 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14913 emit_insn (gen_strset (destptr
, dest
, value
));
14914 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14915 emit_insn (gen_strset (destptr
, dest
, value
));
14918 gcc_unreachable ();
14921 if ((countval
& 0x08) && max_size
> 8)
14925 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14926 emit_insn (gen_strset (destptr
, dest
, value
));
14930 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14931 emit_insn (gen_strset (destptr
, dest
, value
));
14932 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14933 emit_insn (gen_strset (destptr
, dest
, value
));
14937 if ((countval
& 0x04) && max_size
> 4)
14939 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14940 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14943 if ((countval
& 0x02) && max_size
> 2)
14945 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14946 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14949 if ((countval
& 0x01) && max_size
> 1)
14951 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14952 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14959 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14964 rtx label
= ix86_expand_aligntest (count
, 16, true);
14967 dest
= change_address (destmem
, DImode
, destptr
);
14968 emit_insn (gen_strset (destptr
, dest
, value
));
14969 emit_insn (gen_strset (destptr
, dest
, value
));
14973 dest
= change_address (destmem
, SImode
, destptr
);
14974 emit_insn (gen_strset (destptr
, dest
, value
));
14975 emit_insn (gen_strset (destptr
, dest
, value
));
14976 emit_insn (gen_strset (destptr
, dest
, value
));
14977 emit_insn (gen_strset (destptr
, dest
, value
));
14979 emit_label (label
);
14980 LABEL_NUSES (label
) = 1;
14984 rtx label
= ix86_expand_aligntest (count
, 8, true);
14987 dest
= change_address (destmem
, DImode
, destptr
);
14988 emit_insn (gen_strset (destptr
, dest
, value
));
14992 dest
= change_address (destmem
, SImode
, destptr
);
14993 emit_insn (gen_strset (destptr
, dest
, value
));
14994 emit_insn (gen_strset (destptr
, dest
, value
));
14996 emit_label (label
);
14997 LABEL_NUSES (label
) = 1;
15001 rtx label
= ix86_expand_aligntest (count
, 4, true);
15002 dest
= change_address (destmem
, SImode
, destptr
);
15003 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
15004 emit_label (label
);
15005 LABEL_NUSES (label
) = 1;
15009 rtx label
= ix86_expand_aligntest (count
, 2, true);
15010 dest
= change_address (destmem
, HImode
, destptr
);
15011 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
15012 emit_label (label
);
15013 LABEL_NUSES (label
) = 1;
15017 rtx label
= ix86_expand_aligntest (count
, 1, true);
15018 dest
= change_address (destmem
, QImode
, destptr
);
15019 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
15020 emit_label (label
);
15021 LABEL_NUSES (label
) = 1;
15025 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15026 DESIRED_ALIGNMENT. */
15028 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
15029 rtx destptr
, rtx srcptr
, rtx count
,
15030 int align
, int desired_alignment
)
15032 if (align
<= 1 && desired_alignment
> 1)
15034 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
15035 srcmem
= change_address (srcmem
, QImode
, srcptr
);
15036 destmem
= change_address (destmem
, QImode
, destptr
);
15037 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15038 ix86_adjust_counter (count
, 1);
15039 emit_label (label
);
15040 LABEL_NUSES (label
) = 1;
15042 if (align
<= 2 && desired_alignment
> 2)
15044 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
15045 srcmem
= change_address (srcmem
, HImode
, srcptr
);
15046 destmem
= change_address (destmem
, HImode
, destptr
);
15047 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15048 ix86_adjust_counter (count
, 2);
15049 emit_label (label
);
15050 LABEL_NUSES (label
) = 1;
15052 if (align
<= 4 && desired_alignment
> 4)
15054 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
15055 srcmem
= change_address (srcmem
, SImode
, srcptr
);
15056 destmem
= change_address (destmem
, SImode
, destptr
);
15057 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15058 ix86_adjust_counter (count
, 4);
15059 emit_label (label
);
15060 LABEL_NUSES (label
) = 1;
15062 gcc_assert (desired_alignment
<= 8);
15065 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15066 DESIRED_ALIGNMENT. */
15068 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
15069 int align
, int desired_alignment
)
15071 if (align
<= 1 && desired_alignment
> 1)
15073 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
15074 destmem
= change_address (destmem
, QImode
, destptr
);
15075 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
15076 ix86_adjust_counter (count
, 1);
15077 emit_label (label
);
15078 LABEL_NUSES (label
) = 1;
15080 if (align
<= 2 && desired_alignment
> 2)
15082 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
15083 destmem
= change_address (destmem
, HImode
, destptr
);
15084 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
15085 ix86_adjust_counter (count
, 2);
15086 emit_label (label
);
15087 LABEL_NUSES (label
) = 1;
15089 if (align
<= 4 && desired_alignment
> 4)
15091 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
15092 destmem
= change_address (destmem
, SImode
, destptr
);
15093 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
15094 ix86_adjust_counter (count
, 4);
15095 emit_label (label
);
15096 LABEL_NUSES (label
) = 1;
15098 gcc_assert (desired_alignment
<= 8);
15101 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15102 static enum stringop_alg
15103 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
15104 int *dynamic_check
)
15106 const struct stringop_algs
* algs
;
15107 /* Algorithms using the rep prefix want at least edi and ecx;
15108 additionally, memset wants eax and memcpy wants esi. Don't
15109 consider such algorithms if the user has appropriated those
15110 registers for their own purposes. */
15111 bool rep_prefix_usable
= !(global_regs
[CX_REG
] || global_regs
[DI_REG
]
15112 || (memset
? global_regs
[AX_REG
] : global_regs
[SI_REG
]));
15114 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15115 || (alg != rep_prefix_1_byte \
15116 && alg != rep_prefix_4_byte \
15117 && alg != rep_prefix_8_byte))
15119 *dynamic_check
= -1;
15121 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
15123 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
15124 if (stringop_alg
!= no_stringop
&& ALG_USABLE_P (stringop_alg
))
15125 return stringop_alg
;
15126 /* rep; movq or rep; movl is the smallest variant. */
15127 else if (optimize_size
)
15129 if (!count
|| (count
& 3))
15130 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
15132 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
15134 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15136 else if (expected_size
!= -1 && expected_size
< 4)
15137 return loop_1_byte
;
15138 else if (expected_size
!= -1)
15141 enum stringop_alg alg
= libcall
;
15142 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
15144 /* We get here if the algorithms that were not libcall-based
15145 were rep-prefix based and we are unable to use rep prefixes
15146 based on global register usage. Break out of the loop and
15147 use the heuristic below. */
15148 if (algs
->size
[i
].max
== 0)
15150 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
15152 enum stringop_alg candidate
= algs
->size
[i
].alg
;
15154 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
15156 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15157 last non-libcall inline algorithm. */
15158 if (TARGET_INLINE_ALL_STRINGOPS
)
15160 /* When the current size is best to be copied by a libcall,
15161 but we are still forced to inline, run the heuristic below
15162 that will pick code for medium sized blocks. */
15163 if (alg
!= libcall
)
15167 else if (ALG_USABLE_P (candidate
))
15171 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
15173 /* When asked to inline the call anyway, try to pick meaningful choice.
15174 We look for maximal size of block that is faster to copy by hand and
15175 take blocks of at most of that size guessing that average size will
15176 be roughly half of the block.
15178 If this turns out to be bad, we might simply specify the preferred
15179 choice in ix86_costs. */
15180 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15181 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
15184 enum stringop_alg alg
;
15186 bool any_alg_usable_p
= true;
15188 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
15190 enum stringop_alg candidate
= algs
->size
[i
].alg
;
15191 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
15193 if (candidate
!= libcall
&& candidate
15194 && ALG_USABLE_P (candidate
))
15195 max
= algs
->size
[i
].max
;
15197 /* If there aren't any usable algorithms, then recursing on
15198 smaller sizes isn't going to find anything. Just return the
15199 simple byte-at-a-time copy loop. */
15200 if (!any_alg_usable_p
)
15202 /* Pick something reasonable. */
15203 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15204 *dynamic_check
= 128;
15205 return loop_1_byte
;
15209 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
15210 gcc_assert (*dynamic_check
== -1);
15211 gcc_assert (alg
!= libcall
);
15212 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15213 *dynamic_check
= max
;
15216 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
15217 #undef ALG_USABLE_P
15220 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15221 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15223 decide_alignment (int align
,
15224 enum stringop_alg alg
,
15227 int desired_align
= 0;
15231 gcc_unreachable ();
15233 case unrolled_loop
:
15234 desired_align
= GET_MODE_SIZE (Pmode
);
15236 case rep_prefix_8_byte
:
15239 case rep_prefix_4_byte
:
15240 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15241 copying whole cacheline at once. */
15242 if (TARGET_PENTIUMPRO
)
15247 case rep_prefix_1_byte
:
15248 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15249 copying whole cacheline at once. */
15250 if (TARGET_PENTIUMPRO
)
15264 if (desired_align
< align
)
15265 desired_align
= align
;
15266 if (expected_size
!= -1 && expected_size
< 4)
15267 desired_align
= align
;
15268 return desired_align
;
15271 /* Return the smallest power of 2 greater than VAL. */
15273 smallest_pow2_greater_than (int val
)
15281 /* Expand string move (memcpy) operation. Use i386 string operations when
15282 profitable. expand_clrmem contains similar code. The code depends upon
15283 architecture, block size and alignment, but always has the same
15286 1) Prologue guard: Conditional that jumps up to epilogues for small
15287 blocks that can be handled by epilogue alone. This is faster but
15288 also needed for correctness, since prologue assume the block is larger
15289 than the desired alignment.
15291 Optional dynamic check for size and libcall for large
15292 blocks is emitted here too, with -minline-stringops-dynamically.
15294 2) Prologue: copy first few bytes in order to get destination aligned
15295 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15296 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15297 We emit either a jump tree on power of two sized blocks, or a byte loop.
15299 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15300 with specified algorithm.
15302 4) Epilogue: code copying tail of the block that is too small to be
15303 handled by main body (or up to size guarded by prologue guard). */
15306 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
15307 rtx expected_align_exp
, rtx expected_size_exp
)
15313 rtx jump_around_label
= NULL
;
15314 HOST_WIDE_INT align
= 1;
15315 unsigned HOST_WIDE_INT count
= 0;
15316 HOST_WIDE_INT expected_size
= -1;
15317 int size_needed
= 0, epilogue_size_needed
;
15318 int desired_align
= 0;
15319 enum stringop_alg alg
;
15322 if (CONST_INT_P (align_exp
))
15323 align
= INTVAL (align_exp
);
15324 /* i386 can do misaligned access on reasonably increased cost. */
15325 if (CONST_INT_P (expected_align_exp
)
15326 && INTVAL (expected_align_exp
) > align
)
15327 align
= INTVAL (expected_align_exp
);
15328 if (CONST_INT_P (count_exp
))
15329 count
= expected_size
= INTVAL (count_exp
);
15330 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15331 expected_size
= INTVAL (expected_size_exp
);
15333 /* Step 0: Decide on preferred algorithm, desired alignment and
15334 size of chunks to be copied by main loop. */
15336 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
15337 desired_align
= decide_alignment (align
, alg
, expected_size
);
15339 if (!TARGET_ALIGN_STRINGOPS
)
15340 align
= desired_align
;
15342 if (alg
== libcall
)
15344 gcc_assert (alg
!= no_stringop
);
15346 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
15347 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15348 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
15353 gcc_unreachable ();
15355 size_needed
= GET_MODE_SIZE (Pmode
);
15357 case unrolled_loop
:
15358 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
15360 case rep_prefix_8_byte
:
15363 case rep_prefix_4_byte
:
15366 case rep_prefix_1_byte
:
15372 epilogue_size_needed
= size_needed
;
15374 /* Step 1: Prologue guard. */
15376 /* Alignment code needs count to be in register. */
15377 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15378 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
15379 gcc_assert (desired_align
>= 1 && align
>= 1);
15381 /* Ensure that alignment prologue won't copy past end of block. */
15382 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15384 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15385 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15386 Make sure it is power of 2. */
15387 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15389 if (CONST_INT_P (count_exp
))
15391 if (UINTVAL (count_exp
) < (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
15396 label
= gen_label_rtx ();
15397 emit_cmp_and_jump_insns (count_exp
,
15398 GEN_INT (epilogue_size_needed
),
15399 LTU
, 0, counter_mode (count_exp
), 1, label
);
15400 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
15401 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15403 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15407 /* Emit code to decide on runtime whether library call or inline should be
15409 if (dynamic_check
!= -1)
15411 if (CONST_INT_P (count_exp
))
15413 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
15415 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
15416 count_exp
= const0_rtx
;
15422 rtx hot_label
= gen_label_rtx ();
15423 jump_around_label
= gen_label_rtx ();
15424 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15425 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
15426 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15427 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
15428 emit_jump (jump_around_label
);
15429 emit_label (hot_label
);
15433 /* Step 2: Alignment prologue. */
15435 if (desired_align
> align
)
15437 /* Except for the first move in epilogue, we no longer know
15438 constant offset in aliasing info. It don't seems to worth
15439 the pain to maintain it for the first move, so throw away
15441 src
= change_address (src
, BLKmode
, srcreg
);
15442 dst
= change_address (dst
, BLKmode
, destreg
);
15443 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
15446 if (label
&& size_needed
== 1)
15448 emit_label (label
);
15449 LABEL_NUSES (label
) = 1;
15453 /* Step 3: Main loop. */
15459 gcc_unreachable ();
15461 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15462 count_exp
, QImode
, 1, expected_size
);
15465 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15466 count_exp
, Pmode
, 1, expected_size
);
15468 case unrolled_loop
:
15469 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15470 registers for 4 temporaries anyway. */
15471 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15472 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
15475 case rep_prefix_8_byte
:
15476 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15479 case rep_prefix_4_byte
:
15480 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15483 case rep_prefix_1_byte
:
15484 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15488 /* Adjust properly the offset of src and dest memory for aliasing. */
15489 if (CONST_INT_P (count_exp
))
15491 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
15492 (count
/ size_needed
) * size_needed
);
15493 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15494 (count
/ size_needed
) * size_needed
);
15498 src
= change_address (src
, BLKmode
, srcreg
);
15499 dst
= change_address (dst
, BLKmode
, destreg
);
15502 /* Step 4: Epilogue to copy the remaining bytes. */
15506 /* When the main loop is done, COUNT_EXP might hold original count,
15507 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15508 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15509 bytes. Compensate if needed. */
15511 if (size_needed
< epilogue_size_needed
)
15514 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15515 GEN_INT (size_needed
- 1), count_exp
, 1,
15517 if (tmp
!= count_exp
)
15518 emit_move_insn (count_exp
, tmp
);
15520 emit_label (label
);
15521 LABEL_NUSES (label
) = 1;
15524 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15525 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
15526 epilogue_size_needed
);
15527 if (jump_around_label
)
15528 emit_label (jump_around_label
);
15532 /* Helper function for memcpy. For QImode value 0xXY produce
15533 0xXYXYXYXY of wide specified by MODE. This is essentially
15534 a * 0x10101010, but we can do slightly better than
15535 synth_mult by unwinding the sequence by hand on CPUs with
15538 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
15540 enum machine_mode valmode
= GET_MODE (val
);
15542 int nops
= mode
== DImode
? 3 : 2;
15544 gcc_assert (mode
== SImode
|| mode
== DImode
);
15545 if (val
== const0_rtx
)
15546 return copy_to_mode_reg (mode
, const0_rtx
);
15547 if (CONST_INT_P (val
))
15549 HOST_WIDE_INT v
= INTVAL (val
) & 255;
15553 if (mode
== DImode
)
15554 v
|= (v
<< 16) << 16;
15555 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
15558 if (valmode
== VOIDmode
)
15560 if (valmode
!= QImode
)
15561 val
= gen_lowpart (QImode
, val
);
15562 if (mode
== QImode
)
15564 if (!TARGET_PARTIAL_REG_STALL
)
15566 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
15567 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
15568 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
15569 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
15571 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15572 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
15573 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
15578 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15580 if (!TARGET_PARTIAL_REG_STALL
)
15581 if (mode
== SImode
)
15582 emit_insn (gen_movsi_insv_1 (reg
, reg
));
15584 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
15587 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
15588 NULL
, 1, OPTAB_DIRECT
);
15590 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15592 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
15593 NULL
, 1, OPTAB_DIRECT
);
15594 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15595 if (mode
== SImode
)
15597 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
15598 NULL
, 1, OPTAB_DIRECT
);
15599 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15604 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15605 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15606 alignment from ALIGN to DESIRED_ALIGN. */
15608 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
15613 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
15614 promoted_val
= promote_duplicated_reg (DImode
, val
);
15615 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
15616 promoted_val
= promote_duplicated_reg (SImode
, val
);
15617 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
15618 promoted_val
= promote_duplicated_reg (HImode
, val
);
15620 promoted_val
= val
;
15622 return promoted_val
;
15625 /* Expand string clear operation (bzero). Use i386 string operations when
15626 profitable. See expand_movmem comment for explanation of individual
15627 steps performed. */
15629 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
15630 rtx expected_align_exp
, rtx expected_size_exp
)
15635 rtx jump_around_label
= NULL
;
15636 HOST_WIDE_INT align
= 1;
15637 unsigned HOST_WIDE_INT count
= 0;
15638 HOST_WIDE_INT expected_size
= -1;
15639 int size_needed
= 0, epilogue_size_needed
;
15640 int desired_align
= 0;
15641 enum stringop_alg alg
;
15642 rtx promoted_val
= NULL
;
15643 bool force_loopy_epilogue
= false;
15646 if (CONST_INT_P (align_exp
))
15647 align
= INTVAL (align_exp
);
15648 /* i386 can do misaligned access on reasonably increased cost. */
15649 if (CONST_INT_P (expected_align_exp
)
15650 && INTVAL (expected_align_exp
) > align
)
15651 align
= INTVAL (expected_align_exp
);
15652 if (CONST_INT_P (count_exp
))
15653 count
= expected_size
= INTVAL (count_exp
);
15654 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15655 expected_size
= INTVAL (expected_size_exp
);
15657 /* Step 0: Decide on preferred algorithm, desired alignment and
15658 size of chunks to be copied by main loop. */
15660 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
15661 desired_align
= decide_alignment (align
, alg
, expected_size
);
15663 if (!TARGET_ALIGN_STRINGOPS
)
15664 align
= desired_align
;
15666 if (alg
== libcall
)
15668 gcc_assert (alg
!= no_stringop
);
15670 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
15671 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15676 gcc_unreachable ();
15678 size_needed
= GET_MODE_SIZE (Pmode
);
15680 case unrolled_loop
:
15681 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
15683 case rep_prefix_8_byte
:
15686 case rep_prefix_4_byte
:
15689 case rep_prefix_1_byte
:
15694 epilogue_size_needed
= size_needed
;
15696 /* Step 1: Prologue guard. */
15698 /* Alignment code needs count to be in register. */
15699 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15701 enum machine_mode mode
= SImode
;
15702 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15704 count_exp
= force_reg (mode
, count_exp
);
15706 /* Do the cheap promotion to allow better CSE across the
15707 main loop and epilogue (ie one load of the big constant in the
15708 front of all code. */
15709 if (CONST_INT_P (val_exp
))
15710 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15711 desired_align
, align
);
15712 /* Ensure that alignment prologue won't copy past end of block. */
15713 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15715 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15716 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15717 Make sure it is power of 2. */
15718 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15720 /* To improve performance of small blocks, we jump around the VAL
15721 promoting mode. This mean that if the promoted VAL is not constant,
15722 we might not use it in the epilogue and have to use byte
15724 if (epilogue_size_needed
> 2 && !promoted_val
)
15725 force_loopy_epilogue
= true;
15726 label
= gen_label_rtx ();
15727 emit_cmp_and_jump_insns (count_exp
,
15728 GEN_INT (epilogue_size_needed
),
15729 LTU
, 0, counter_mode (count_exp
), 1, label
);
15730 if (GET_CODE (count_exp
) == CONST_INT
)
15732 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
15733 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15735 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15737 if (dynamic_check
!= -1)
15739 rtx hot_label
= gen_label_rtx ();
15740 jump_around_label
= gen_label_rtx ();
15741 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15742 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
15743 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15744 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
15745 emit_jump (jump_around_label
);
15746 emit_label (hot_label
);
15749 /* Step 2: Alignment prologue. */
15751 /* Do the expensive promotion once we branched off the small blocks. */
15753 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15754 desired_align
, align
);
15755 gcc_assert (desired_align
>= 1 && align
>= 1);
15757 if (desired_align
> align
)
15759 /* Except for the first move in epilogue, we no longer know
15760 constant offset in aliasing info. It don't seems to worth
15761 the pain to maintain it for the first move, so throw away
15763 dst
= change_address (dst
, BLKmode
, destreg
);
15764 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
15767 if (label
&& size_needed
== 1)
15769 emit_label (label
);
15770 LABEL_NUSES (label
) = 1;
15774 /* Step 3: Main loop. */
15780 gcc_unreachable ();
15782 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15783 count_exp
, QImode
, 1, expected_size
);
15786 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15787 count_exp
, Pmode
, 1, expected_size
);
15789 case unrolled_loop
:
15790 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15791 count_exp
, Pmode
, 4, expected_size
);
15793 case rep_prefix_8_byte
:
15794 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15797 case rep_prefix_4_byte
:
15798 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15801 case rep_prefix_1_byte
:
15802 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15806 /* Adjust properly the offset of src and dest memory for aliasing. */
15807 if (CONST_INT_P (count_exp
))
15808 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15809 (count
/ size_needed
) * size_needed
);
15811 dst
= change_address (dst
, BLKmode
, destreg
);
15813 /* Step 4: Epilogue to copy the remaining bytes. */
15817 /* When the main loop is done, COUNT_EXP might hold original count,
15818 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15819 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15820 bytes. Compensate if needed. */
15822 if (size_needed
< desired_align
- align
)
15825 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15826 GEN_INT (size_needed
- 1), count_exp
, 1,
15828 size_needed
= desired_align
- align
+ 1;
15829 if (tmp
!= count_exp
)
15830 emit_move_insn (count_exp
, tmp
);
15832 emit_label (label
);
15833 LABEL_NUSES (label
) = 1;
15835 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15837 if (force_loopy_epilogue
)
15838 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
15841 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
15844 if (jump_around_label
)
15845 emit_label (jump_around_label
);
15849 /* Expand the appropriate insns for doing strlen if not just doing
15852 out = result, initialized with the start address
15853 align_rtx = alignment of the address.
15854 scratch = scratch register, initialized with the startaddress when
15855 not aligned, otherwise undefined
15857 This is just the body. It needs the initializations mentioned above and
15858 some address computing at the end. These things are done in i386.md. */
15861 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
15865 rtx align_2_label
= NULL_RTX
;
15866 rtx align_3_label
= NULL_RTX
;
15867 rtx align_4_label
= gen_label_rtx ();
15868 rtx end_0_label
= gen_label_rtx ();
15870 rtx tmpreg
= gen_reg_rtx (SImode
);
15871 rtx scratch
= gen_reg_rtx (SImode
);
15875 if (CONST_INT_P (align_rtx
))
15876 align
= INTVAL (align_rtx
);
15878 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15880 /* Is there a known alignment and is it less than 4? */
15883 rtx scratch1
= gen_reg_rtx (Pmode
);
15884 emit_move_insn (scratch1
, out
);
15885 /* Is there a known alignment and is it not 2? */
15888 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
15889 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
15891 /* Leave just the 3 lower bits. */
15892 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
15893 NULL_RTX
, 0, OPTAB_WIDEN
);
15895 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15896 Pmode
, 1, align_4_label
);
15897 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
15898 Pmode
, 1, align_2_label
);
15899 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
15900 Pmode
, 1, align_3_label
);
15904 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15905 check if is aligned to 4 - byte. */
15907 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
15908 NULL_RTX
, 0, OPTAB_WIDEN
);
15910 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15911 Pmode
, 1, align_4_label
);
15914 mem
= change_address (src
, QImode
, out
);
15916 /* Now compare the bytes. */
15918 /* Compare the first n unaligned byte on a byte per byte basis. */
15919 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15920 QImode
, 1, end_0_label
);
15922 /* Increment the address. */
15924 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15926 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15928 /* Not needed with an alignment of 2 */
15931 emit_label (align_2_label
);
15933 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15937 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15939 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15941 emit_label (align_3_label
);
15944 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15948 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15950 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15953 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15954 align this loop. It gives only huge programs, but does not help to
15956 emit_label (align_4_label
);
15958 mem
= change_address (src
, SImode
, out
);
15959 emit_move_insn (scratch
, mem
);
15961 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15963 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15965 /* This formula yields a nonzero result iff one of the bytes is zero.
15966 This saves three branches inside loop and many cycles. */
15968 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15969 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15970 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15971 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15972 gen_int_mode (0x80808080, SImode
)));
15973 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15978 rtx reg
= gen_reg_rtx (SImode
);
15979 rtx reg2
= gen_reg_rtx (Pmode
);
15980 emit_move_insn (reg
, tmpreg
);
15981 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15983 /* If zero is not in the first two bytes, move two bytes forward. */
15984 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15985 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15986 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15987 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15988 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15991 /* Emit lea manually to avoid clobbering of flags. */
15992 emit_insn (gen_rtx_SET (SImode
, reg2
,
15993 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15995 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15996 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15997 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15998 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
16005 rtx end_2_label
= gen_label_rtx ();
16006 /* Is zero in the first two bytes? */
16008 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
16009 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16010 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
16011 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
16012 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
16014 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
16015 JUMP_LABEL (tmp
) = end_2_label
;
16017 /* Not in the first two. Move two bytes forward. */
16018 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
16020 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
16022 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
16024 emit_label (end_2_label
);
16028 /* Avoid branch in fixing the byte. */
16029 tmpreg
= gen_lowpart (QImode
, tmpreg
);
16030 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
16031 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
16033 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
16035 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
16037 emit_label (end_0_label
);
16040 /* Expand strlen. */
16043 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
16045 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
16047 /* The generic case of strlen expander is long. Avoid it's
16048 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16050 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
16051 && !TARGET_INLINE_ALL_STRINGOPS
16053 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
16056 addr
= force_reg (Pmode
, XEXP (src
, 0));
16057 scratch1
= gen_reg_rtx (Pmode
);
16059 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
16062 /* Well it seems that some optimizer does not combine a call like
16063 foo(strlen(bar), strlen(bar));
16064 when the move and the subtraction is done here. It does calculate
16065 the length just once when these instructions are done inside of
16066 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16067 often used and I use one fewer register for the lifetime of
16068 output_strlen_unroll() this is better. */
16070 emit_move_insn (out
, addr
);
16072 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
16074 /* strlensi_unroll_1 returns the address of the zero at the end of
16075 the string, like memchr(), so compute the length by subtracting
16076 the start address. */
16078 emit_insn (gen_subdi3 (out
, out
, addr
));
16080 emit_insn (gen_subsi3 (out
, out
, addr
));
16086 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16087 if (global_regs
[AX_REG
] || global_regs
[CX_REG
] || global_regs
[DI_REG
])
16090 scratch2
= gen_reg_rtx (Pmode
);
16091 scratch3
= gen_reg_rtx (Pmode
);
16092 scratch4
= force_reg (Pmode
, constm1_rtx
);
16094 emit_move_insn (scratch3
, addr
);
16095 eoschar
= force_reg (QImode
, eoschar
);
16097 src
= replace_equiv_address_nv (src
, scratch3
);
16099 /* If .md starts supporting :P, this can be done in .md. */
16100 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
16101 scratch4
), UNSPEC_SCAS
);
16102 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
16105 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
16106 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
16110 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
16111 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
16117 /* For given symbol (function) construct code to compute address of it's PLT
16118 entry in large x86-64 PIC model. */
16120 construct_plt_address (rtx symbol
)
16122 rtx tmp
= gen_reg_rtx (Pmode
);
16123 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
16125 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
16126 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
16128 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
16129 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
16134 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
16135 rtx callarg2 ATTRIBUTE_UNUSED
,
16136 rtx pop
, int sibcall
)
16138 rtx use
= NULL
, call
;
16140 if (pop
== const0_rtx
)
16142 gcc_assert (!TARGET_64BIT
|| !pop
);
16144 if (TARGET_MACHO
&& !TARGET_64BIT
)
16147 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
16148 fnaddr
= machopic_indirect_call_target (fnaddr
);
16153 /* Static functions and indirect calls don't need the pic register. */
16154 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
16155 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
16156 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
16157 use_reg (&use
, pic_offset_table_rtx
);
16160 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
16162 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
16163 emit_move_insn (al
, callarg2
);
16164 use_reg (&use
, al
);
16167 if (ix86_cmodel
== CM_LARGE_PIC
16168 && GET_CODE (fnaddr
) == MEM
16169 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
16170 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
16171 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
16172 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
16174 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
16175 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
16177 if (sibcall
&& TARGET_64BIT
16178 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
16181 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
16182 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
16183 emit_move_insn (fnaddr
, addr
);
16184 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
16187 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
16189 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
16192 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
16193 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
16194 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
16197 call
= emit_call_insn (call
);
16199 CALL_INSN_FUNCTION_USAGE (call
) = use
;
16203 /* Clear stack slot assignments remembered from previous functions.
16204 This is called from INIT_EXPANDERS once before RTL is emitted for each
16207 static struct machine_function
*
16208 ix86_init_machine_status (void)
16210 struct machine_function
*f
;
16212 f
= GGC_CNEW (struct machine_function
);
16213 f
->use_fast_prologue_epilogue_nregs
= -1;
16214 f
->tls_descriptor_call_expanded_p
= 0;
16219 /* Return a MEM corresponding to a stack slot with mode MODE.
16220 Allocate a new slot if necessary.
16222 The RTL for a function can have several slots available: N is
16223 which slot to use. */
16226 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
16228 struct stack_local_entry
*s
;
16230 gcc_assert (n
< MAX_386_STACK_LOCALS
);
16232 /* Virtual slot is valid only before vregs are instantiated. */
16233 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
16235 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16236 if (s
->mode
== mode
&& s
->n
== n
)
16237 return copy_rtx (s
->rtl
);
16239 s
= (struct stack_local_entry
*)
16240 ggc_alloc (sizeof (struct stack_local_entry
));
16243 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
16245 s
->next
= ix86_stack_locals
;
16246 ix86_stack_locals
= s
;
16250 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16252 static GTY(()) rtx ix86_tls_symbol
;
16254 ix86_tls_get_addr (void)
16257 if (!ix86_tls_symbol
)
16259 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
16260 (TARGET_ANY_GNU_TLS
16262 ? "___tls_get_addr"
16263 : "__tls_get_addr");
16266 return ix86_tls_symbol
;
16269 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16271 static GTY(()) rtx ix86_tls_module_base_symbol
;
16273 ix86_tls_module_base (void)
16276 if (!ix86_tls_module_base_symbol
)
16278 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
16279 "_TLS_MODULE_BASE_");
16280 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
16281 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
16284 return ix86_tls_module_base_symbol
;
16287 /* Calculate the length of the memory address in the instruction
16288 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16291 memory_address_length (rtx addr
)
16293 struct ix86_address parts
;
16294 rtx base
, index
, disp
;
16298 if (GET_CODE (addr
) == PRE_DEC
16299 || GET_CODE (addr
) == POST_INC
16300 || GET_CODE (addr
) == PRE_MODIFY
16301 || GET_CODE (addr
) == POST_MODIFY
)
16304 ok
= ix86_decompose_address (addr
, &parts
);
16307 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
16308 parts
.base
= SUBREG_REG (parts
.base
);
16309 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
16310 parts
.index
= SUBREG_REG (parts
.index
);
16313 index
= parts
.index
;
16318 - esp as the base always wants an index,
16319 - ebp as the base always wants a displacement. */
16321 /* Register Indirect. */
16322 if (base
&& !index
&& !disp
)
16324 /* esp (for its index) and ebp (for its displacement) need
16325 the two-byte modrm form. */
16326 if (addr
== stack_pointer_rtx
16327 || addr
== arg_pointer_rtx
16328 || addr
== frame_pointer_rtx
16329 || addr
== hard_frame_pointer_rtx
)
16333 /* Direct Addressing. */
16334 else if (disp
&& !base
&& !index
)
16339 /* Find the length of the displacement constant. */
16342 if (base
&& satisfies_constraint_K (disp
))
16347 /* ebp always wants a displacement. */
16348 else if (base
== hard_frame_pointer_rtx
)
16351 /* An index requires the two-byte modrm form.... */
16353 /* ...like esp, which always wants an index. */
16354 || base
== stack_pointer_rtx
16355 || base
== arg_pointer_rtx
16356 || base
== frame_pointer_rtx
)
16363 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16364 is set, expect that insn have 8bit immediate alternative. */
16366 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
16370 extract_insn_cached (insn
);
16371 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16372 if (CONSTANT_P (recog_data
.operand
[i
]))
16375 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
16379 switch (get_attr_mode (insn
))
16390 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16395 fatal_insn ("unknown insn mode", insn
);
16401 /* Compute default value for "length_address" attribute. */
16403 ix86_attr_length_address_default (rtx insn
)
16407 if (get_attr_type (insn
) == TYPE_LEA
)
16409 rtx set
= PATTERN (insn
);
16411 if (GET_CODE (set
) == PARALLEL
)
16412 set
= XVECEXP (set
, 0, 0);
16414 gcc_assert (GET_CODE (set
) == SET
);
16416 return memory_address_length (SET_SRC (set
));
16419 extract_insn_cached (insn
);
16420 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16421 if (MEM_P (recog_data
.operand
[i
]))
16423 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
16429 /* Return the maximum number of instructions a cpu can issue. */
16432 ix86_issue_rate (void)
16436 case PROCESSOR_PENTIUM
:
16440 case PROCESSOR_PENTIUMPRO
:
16441 case PROCESSOR_PENTIUM4
:
16442 case PROCESSOR_ATHLON
:
16444 case PROCESSOR_AMDFAM10
:
16445 case PROCESSOR_NOCONA
:
16446 case PROCESSOR_GENERIC32
:
16447 case PROCESSOR_GENERIC64
:
16450 case PROCESSOR_CORE2
:
16458 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16459 by DEP_INSN and nothing set by DEP_INSN. */
16462 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16466 /* Simplify the test for uninteresting insns. */
16467 if (insn_type
!= TYPE_SETCC
16468 && insn_type
!= TYPE_ICMOV
16469 && insn_type
!= TYPE_FCMOV
16470 && insn_type
!= TYPE_IBR
)
16473 if ((set
= single_set (dep_insn
)) != 0)
16475 set
= SET_DEST (set
);
16478 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
16479 && XVECLEN (PATTERN (dep_insn
), 0) == 2
16480 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
16481 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
16483 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16484 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16489 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
16492 /* This test is true if the dependent insn reads the flags but
16493 not any other potentially set register. */
16494 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
16497 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
16503 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16504 address with operands set by DEP_INSN. */
16507 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16511 if (insn_type
== TYPE_LEA
16514 addr
= PATTERN (insn
);
16516 if (GET_CODE (addr
) == PARALLEL
)
16517 addr
= XVECEXP (addr
, 0, 0);
16519 gcc_assert (GET_CODE (addr
) == SET
);
16521 addr
= SET_SRC (addr
);
16526 extract_insn_cached (insn
);
16527 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16528 if (MEM_P (recog_data
.operand
[i
]))
16530 addr
= XEXP (recog_data
.operand
[i
], 0);
16537 return modified_in_p (addr
, dep_insn
);
16541 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
16543 enum attr_type insn_type
, dep_insn_type
;
16544 enum attr_memory memory
;
16546 int dep_insn_code_number
;
16548 /* Anti and output dependencies have zero cost on all CPUs. */
16549 if (REG_NOTE_KIND (link
) != 0)
16552 dep_insn_code_number
= recog_memoized (dep_insn
);
16554 /* If we can't recognize the insns, we can't really do anything. */
16555 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
16558 insn_type
= get_attr_type (insn
);
16559 dep_insn_type
= get_attr_type (dep_insn
);
16563 case PROCESSOR_PENTIUM
:
16564 /* Address Generation Interlock adds a cycle of latency. */
16565 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16568 /* ??? Compares pair with jump/setcc. */
16569 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
16572 /* Floating point stores require value to be ready one cycle earlier. */
16573 if (insn_type
== TYPE_FMOV
16574 && get_attr_memory (insn
) == MEMORY_STORE
16575 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16579 case PROCESSOR_PENTIUMPRO
:
16580 memory
= get_attr_memory (insn
);
16582 /* INT->FP conversion is expensive. */
16583 if (get_attr_fp_int_src (dep_insn
))
16586 /* There is one cycle extra latency between an FP op and a store. */
16587 if (insn_type
== TYPE_FMOV
16588 && (set
= single_set (dep_insn
)) != NULL_RTX
16589 && (set2
= single_set (insn
)) != NULL_RTX
16590 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
16591 && MEM_P (SET_DEST (set2
)))
16594 /* Show ability of reorder buffer to hide latency of load by executing
16595 in parallel with previous instruction in case
16596 previous instruction is not needed to compute the address. */
16597 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16598 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16600 /* Claim moves to take one cycle, as core can issue one load
16601 at time and the next load can start cycle later. */
16602 if (dep_insn_type
== TYPE_IMOV
16603 || dep_insn_type
== TYPE_FMOV
)
16611 memory
= get_attr_memory (insn
);
16613 /* The esp dependency is resolved before the instruction is really
16615 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
16616 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
16619 /* INT->FP conversion is expensive. */
16620 if (get_attr_fp_int_src (dep_insn
))
16623 /* Show ability of reorder buffer to hide latency of load by executing
16624 in parallel with previous instruction in case
16625 previous instruction is not needed to compute the address. */
16626 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16627 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16629 /* Claim moves to take one cycle, as core can issue one load
16630 at time and the next load can start cycle later. */
16631 if (dep_insn_type
== TYPE_IMOV
16632 || dep_insn_type
== TYPE_FMOV
)
16641 case PROCESSOR_ATHLON
:
16643 case PROCESSOR_AMDFAM10
:
16644 case PROCESSOR_GENERIC32
:
16645 case PROCESSOR_GENERIC64
:
16646 memory
= get_attr_memory (insn
);
16648 /* Show ability of reorder buffer to hide latency of load by executing
16649 in parallel with previous instruction in case
16650 previous instruction is not needed to compute the address. */
16651 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16652 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16654 enum attr_unit unit
= get_attr_unit (insn
);
16657 /* Because of the difference between the length of integer and
16658 floating unit pipeline preparation stages, the memory operands
16659 for floating point are cheaper.
16661 ??? For Athlon it the difference is most probably 2. */
16662 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
16665 loadcost
= TARGET_ATHLON
? 2 : 0;
16667 if (cost
>= loadcost
)
16680 /* How many alternative schedules to try. This should be as wide as the
16681 scheduling freedom in the DFA, but no wider. Making this value too
16682 large results extra work for the scheduler. */
16685 ia32_multipass_dfa_lookahead (void)
16689 case PROCESSOR_PENTIUM
:
16692 case PROCESSOR_PENTIUMPRO
:
16702 /* Compute the alignment given to a constant that is being placed in memory.
16703 EXP is the constant and ALIGN is the alignment that the object would
16705 The value of this function is used instead of that alignment to align
16709 ix86_constant_alignment (tree exp
, int align
)
16711 if (TREE_CODE (exp
) == REAL_CST
)
16713 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
16715 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
16718 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16719 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16720 return BITS_PER_WORD
;
16725 /* Compute the alignment for a static variable.
16726 TYPE is the data type, and ALIGN is the alignment that
16727 the object would ordinarily have. The value of this function is used
16728 instead of that alignment to align the object. */
16731 ix86_data_alignment (tree type
, int align
)
16733 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
16735 if (AGGREGATE_TYPE_P (type
)
16736 && TYPE_SIZE (type
)
16737 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16738 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
16739 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
16740 && align
< max_align
)
16743 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16744 to 16byte boundary. */
16747 if (AGGREGATE_TYPE_P (type
)
16748 && TYPE_SIZE (type
)
16749 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16750 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
16751 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16755 if (TREE_CODE (type
) == ARRAY_TYPE
)
16757 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16759 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16762 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16765 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16767 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16770 else if ((TREE_CODE (type
) == RECORD_TYPE
16771 || TREE_CODE (type
) == UNION_TYPE
16772 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16773 && TYPE_FIELDS (type
))
16775 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16777 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16780 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16781 || TREE_CODE (type
) == INTEGER_TYPE
)
16783 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16785 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16792 /* Compute the alignment for a local variable.
16793 TYPE is the data type, and ALIGN is the alignment that
16794 the object would ordinarily have. The value of this macro is used
16795 instead of that alignment to align the object. */
16798 ix86_local_alignment (tree type
, int align
)
16800 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16801 to 16byte boundary. */
16804 if (AGGREGATE_TYPE_P (type
)
16805 && TYPE_SIZE (type
)
16806 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16807 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
16808 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16811 if (TREE_CODE (type
) == ARRAY_TYPE
)
16813 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16815 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16818 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16820 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16822 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16825 else if ((TREE_CODE (type
) == RECORD_TYPE
16826 || TREE_CODE (type
) == UNION_TYPE
16827 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16828 && TYPE_FIELDS (type
))
16830 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16832 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16835 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16836 || TREE_CODE (type
) == INTEGER_TYPE
)
16839 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16841 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16847 /* Emit RTL insns to initialize the variable parts of a trampoline.
16848 FNADDR is an RTX for the address of the function's pure code.
16849 CXT is an RTX for the static chain value for the function. */
16851 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
16855 /* Compute offset from the end of the jmp to the target function. */
16856 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16857 plus_constant (tramp
, 10),
16858 NULL_RTX
, 1, OPTAB_DIRECT
);
16859 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
16860 gen_int_mode (0xb9, QImode
));
16861 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
16862 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
16863 gen_int_mode (0xe9, QImode
));
16864 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
16869 /* Try to load address using shorter movl instead of movabs.
16870 We may want to support movq for kernel mode, but kernel does not use
16871 trampolines at the moment. */
16872 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16874 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
16875 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16876 gen_int_mode (0xbb41, HImode
));
16877 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
16878 gen_lowpart (SImode
, fnaddr
));
16883 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16884 gen_int_mode (0xbb49, HImode
));
16885 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16889 /* Load static chain using movabs to r10. */
16890 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16891 gen_int_mode (0xba49, HImode
));
16892 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16895 /* Jump to the r11 */
16896 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16897 gen_int_mode (0xff49, HImode
));
16898 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
16899 gen_int_mode (0xe3, QImode
));
16901 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16904 #ifdef ENABLE_EXECUTE_STACK
16905 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16906 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
16910 /* Codes for all the SSE/MMX builtins. */
16913 IX86_BUILTIN_ADDPS
,
16914 IX86_BUILTIN_ADDSS
,
16915 IX86_BUILTIN_DIVPS
,
16916 IX86_BUILTIN_DIVSS
,
16917 IX86_BUILTIN_MULPS
,
16918 IX86_BUILTIN_MULSS
,
16919 IX86_BUILTIN_SUBPS
,
16920 IX86_BUILTIN_SUBSS
,
16922 IX86_BUILTIN_CMPEQPS
,
16923 IX86_BUILTIN_CMPLTPS
,
16924 IX86_BUILTIN_CMPLEPS
,
16925 IX86_BUILTIN_CMPGTPS
,
16926 IX86_BUILTIN_CMPGEPS
,
16927 IX86_BUILTIN_CMPNEQPS
,
16928 IX86_BUILTIN_CMPNLTPS
,
16929 IX86_BUILTIN_CMPNLEPS
,
16930 IX86_BUILTIN_CMPNGTPS
,
16931 IX86_BUILTIN_CMPNGEPS
,
16932 IX86_BUILTIN_CMPORDPS
,
16933 IX86_BUILTIN_CMPUNORDPS
,
16934 IX86_BUILTIN_CMPEQSS
,
16935 IX86_BUILTIN_CMPLTSS
,
16936 IX86_BUILTIN_CMPLESS
,
16937 IX86_BUILTIN_CMPNEQSS
,
16938 IX86_BUILTIN_CMPNLTSS
,
16939 IX86_BUILTIN_CMPNLESS
,
16940 IX86_BUILTIN_CMPNGTSS
,
16941 IX86_BUILTIN_CMPNGESS
,
16942 IX86_BUILTIN_CMPORDSS
,
16943 IX86_BUILTIN_CMPUNORDSS
,
16945 IX86_BUILTIN_COMIEQSS
,
16946 IX86_BUILTIN_COMILTSS
,
16947 IX86_BUILTIN_COMILESS
,
16948 IX86_BUILTIN_COMIGTSS
,
16949 IX86_BUILTIN_COMIGESS
,
16950 IX86_BUILTIN_COMINEQSS
,
16951 IX86_BUILTIN_UCOMIEQSS
,
16952 IX86_BUILTIN_UCOMILTSS
,
16953 IX86_BUILTIN_UCOMILESS
,
16954 IX86_BUILTIN_UCOMIGTSS
,
16955 IX86_BUILTIN_UCOMIGESS
,
16956 IX86_BUILTIN_UCOMINEQSS
,
16958 IX86_BUILTIN_CVTPI2PS
,
16959 IX86_BUILTIN_CVTPS2PI
,
16960 IX86_BUILTIN_CVTSI2SS
,
16961 IX86_BUILTIN_CVTSI642SS
,
16962 IX86_BUILTIN_CVTSS2SI
,
16963 IX86_BUILTIN_CVTSS2SI64
,
16964 IX86_BUILTIN_CVTTPS2PI
,
16965 IX86_BUILTIN_CVTTSS2SI
,
16966 IX86_BUILTIN_CVTTSS2SI64
,
16968 IX86_BUILTIN_MAXPS
,
16969 IX86_BUILTIN_MAXSS
,
16970 IX86_BUILTIN_MINPS
,
16971 IX86_BUILTIN_MINSS
,
16973 IX86_BUILTIN_LOADUPS
,
16974 IX86_BUILTIN_STOREUPS
,
16975 IX86_BUILTIN_MOVSS
,
16977 IX86_BUILTIN_MOVHLPS
,
16978 IX86_BUILTIN_MOVLHPS
,
16979 IX86_BUILTIN_LOADHPS
,
16980 IX86_BUILTIN_LOADLPS
,
16981 IX86_BUILTIN_STOREHPS
,
16982 IX86_BUILTIN_STORELPS
,
16984 IX86_BUILTIN_MASKMOVQ
,
16985 IX86_BUILTIN_MOVMSKPS
,
16986 IX86_BUILTIN_PMOVMSKB
,
16988 IX86_BUILTIN_MOVNTPS
,
16989 IX86_BUILTIN_MOVNTQ
,
16991 IX86_BUILTIN_LOADDQU
,
16992 IX86_BUILTIN_STOREDQU
,
16994 IX86_BUILTIN_PACKSSWB
,
16995 IX86_BUILTIN_PACKSSDW
,
16996 IX86_BUILTIN_PACKUSWB
,
16998 IX86_BUILTIN_PADDB
,
16999 IX86_BUILTIN_PADDW
,
17000 IX86_BUILTIN_PADDD
,
17001 IX86_BUILTIN_PADDQ
,
17002 IX86_BUILTIN_PADDSB
,
17003 IX86_BUILTIN_PADDSW
,
17004 IX86_BUILTIN_PADDUSB
,
17005 IX86_BUILTIN_PADDUSW
,
17006 IX86_BUILTIN_PSUBB
,
17007 IX86_BUILTIN_PSUBW
,
17008 IX86_BUILTIN_PSUBD
,
17009 IX86_BUILTIN_PSUBQ
,
17010 IX86_BUILTIN_PSUBSB
,
17011 IX86_BUILTIN_PSUBSW
,
17012 IX86_BUILTIN_PSUBUSB
,
17013 IX86_BUILTIN_PSUBUSW
,
17016 IX86_BUILTIN_PANDN
,
17020 IX86_BUILTIN_PAVGB
,
17021 IX86_BUILTIN_PAVGW
,
17023 IX86_BUILTIN_PCMPEQB
,
17024 IX86_BUILTIN_PCMPEQW
,
17025 IX86_BUILTIN_PCMPEQD
,
17026 IX86_BUILTIN_PCMPGTB
,
17027 IX86_BUILTIN_PCMPGTW
,
17028 IX86_BUILTIN_PCMPGTD
,
17030 IX86_BUILTIN_PMADDWD
,
17032 IX86_BUILTIN_PMAXSW
,
17033 IX86_BUILTIN_PMAXUB
,
17034 IX86_BUILTIN_PMINSW
,
17035 IX86_BUILTIN_PMINUB
,
17037 IX86_BUILTIN_PMULHUW
,
17038 IX86_BUILTIN_PMULHW
,
17039 IX86_BUILTIN_PMULLW
,
17041 IX86_BUILTIN_PSADBW
,
17042 IX86_BUILTIN_PSHUFW
,
17044 IX86_BUILTIN_PSLLW
,
17045 IX86_BUILTIN_PSLLD
,
17046 IX86_BUILTIN_PSLLQ
,
17047 IX86_BUILTIN_PSRAW
,
17048 IX86_BUILTIN_PSRAD
,
17049 IX86_BUILTIN_PSRLW
,
17050 IX86_BUILTIN_PSRLD
,
17051 IX86_BUILTIN_PSRLQ
,
17052 IX86_BUILTIN_PSLLWI
,
17053 IX86_BUILTIN_PSLLDI
,
17054 IX86_BUILTIN_PSLLQI
,
17055 IX86_BUILTIN_PSRAWI
,
17056 IX86_BUILTIN_PSRADI
,
17057 IX86_BUILTIN_PSRLWI
,
17058 IX86_BUILTIN_PSRLDI
,
17059 IX86_BUILTIN_PSRLQI
,
17061 IX86_BUILTIN_PUNPCKHBW
,
17062 IX86_BUILTIN_PUNPCKHWD
,
17063 IX86_BUILTIN_PUNPCKHDQ
,
17064 IX86_BUILTIN_PUNPCKLBW
,
17065 IX86_BUILTIN_PUNPCKLWD
,
17066 IX86_BUILTIN_PUNPCKLDQ
,
17068 IX86_BUILTIN_SHUFPS
,
17070 IX86_BUILTIN_RCPPS
,
17071 IX86_BUILTIN_RCPSS
,
17072 IX86_BUILTIN_RSQRTPS
,
17073 IX86_BUILTIN_RSQRTSS
,
17074 IX86_BUILTIN_RSQRTF
,
17075 IX86_BUILTIN_SQRTPS
,
17076 IX86_BUILTIN_SQRTSS
,
17078 IX86_BUILTIN_UNPCKHPS
,
17079 IX86_BUILTIN_UNPCKLPS
,
17081 IX86_BUILTIN_ANDPS
,
17082 IX86_BUILTIN_ANDNPS
,
17084 IX86_BUILTIN_XORPS
,
17087 IX86_BUILTIN_LDMXCSR
,
17088 IX86_BUILTIN_STMXCSR
,
17089 IX86_BUILTIN_SFENCE
,
17091 /* 3DNow! Original */
17092 IX86_BUILTIN_FEMMS
,
17093 IX86_BUILTIN_PAVGUSB
,
17094 IX86_BUILTIN_PF2ID
,
17095 IX86_BUILTIN_PFACC
,
17096 IX86_BUILTIN_PFADD
,
17097 IX86_BUILTIN_PFCMPEQ
,
17098 IX86_BUILTIN_PFCMPGE
,
17099 IX86_BUILTIN_PFCMPGT
,
17100 IX86_BUILTIN_PFMAX
,
17101 IX86_BUILTIN_PFMIN
,
17102 IX86_BUILTIN_PFMUL
,
17103 IX86_BUILTIN_PFRCP
,
17104 IX86_BUILTIN_PFRCPIT1
,
17105 IX86_BUILTIN_PFRCPIT2
,
17106 IX86_BUILTIN_PFRSQIT1
,
17107 IX86_BUILTIN_PFRSQRT
,
17108 IX86_BUILTIN_PFSUB
,
17109 IX86_BUILTIN_PFSUBR
,
17110 IX86_BUILTIN_PI2FD
,
17111 IX86_BUILTIN_PMULHRW
,
17113 /* 3DNow! Athlon Extensions */
17114 IX86_BUILTIN_PF2IW
,
17115 IX86_BUILTIN_PFNACC
,
17116 IX86_BUILTIN_PFPNACC
,
17117 IX86_BUILTIN_PI2FW
,
17118 IX86_BUILTIN_PSWAPDSI
,
17119 IX86_BUILTIN_PSWAPDSF
,
17122 IX86_BUILTIN_ADDPD
,
17123 IX86_BUILTIN_ADDSD
,
17124 IX86_BUILTIN_DIVPD
,
17125 IX86_BUILTIN_DIVSD
,
17126 IX86_BUILTIN_MULPD
,
17127 IX86_BUILTIN_MULSD
,
17128 IX86_BUILTIN_SUBPD
,
17129 IX86_BUILTIN_SUBSD
,
17131 IX86_BUILTIN_CMPEQPD
,
17132 IX86_BUILTIN_CMPLTPD
,
17133 IX86_BUILTIN_CMPLEPD
,
17134 IX86_BUILTIN_CMPGTPD
,
17135 IX86_BUILTIN_CMPGEPD
,
17136 IX86_BUILTIN_CMPNEQPD
,
17137 IX86_BUILTIN_CMPNLTPD
,
17138 IX86_BUILTIN_CMPNLEPD
,
17139 IX86_BUILTIN_CMPNGTPD
,
17140 IX86_BUILTIN_CMPNGEPD
,
17141 IX86_BUILTIN_CMPORDPD
,
17142 IX86_BUILTIN_CMPUNORDPD
,
17143 IX86_BUILTIN_CMPEQSD
,
17144 IX86_BUILTIN_CMPLTSD
,
17145 IX86_BUILTIN_CMPLESD
,
17146 IX86_BUILTIN_CMPNEQSD
,
17147 IX86_BUILTIN_CMPNLTSD
,
17148 IX86_BUILTIN_CMPNLESD
,
17149 IX86_BUILTIN_CMPORDSD
,
17150 IX86_BUILTIN_CMPUNORDSD
,
17152 IX86_BUILTIN_COMIEQSD
,
17153 IX86_BUILTIN_COMILTSD
,
17154 IX86_BUILTIN_COMILESD
,
17155 IX86_BUILTIN_COMIGTSD
,
17156 IX86_BUILTIN_COMIGESD
,
17157 IX86_BUILTIN_COMINEQSD
,
17158 IX86_BUILTIN_UCOMIEQSD
,
17159 IX86_BUILTIN_UCOMILTSD
,
17160 IX86_BUILTIN_UCOMILESD
,
17161 IX86_BUILTIN_UCOMIGTSD
,
17162 IX86_BUILTIN_UCOMIGESD
,
17163 IX86_BUILTIN_UCOMINEQSD
,
17165 IX86_BUILTIN_MAXPD
,
17166 IX86_BUILTIN_MAXSD
,
17167 IX86_BUILTIN_MINPD
,
17168 IX86_BUILTIN_MINSD
,
17170 IX86_BUILTIN_ANDPD
,
17171 IX86_BUILTIN_ANDNPD
,
17173 IX86_BUILTIN_XORPD
,
17175 IX86_BUILTIN_SQRTPD
,
17176 IX86_BUILTIN_SQRTSD
,
17178 IX86_BUILTIN_UNPCKHPD
,
17179 IX86_BUILTIN_UNPCKLPD
,
17181 IX86_BUILTIN_SHUFPD
,
17183 IX86_BUILTIN_LOADUPD
,
17184 IX86_BUILTIN_STOREUPD
,
17185 IX86_BUILTIN_MOVSD
,
17187 IX86_BUILTIN_LOADHPD
,
17188 IX86_BUILTIN_LOADLPD
,
17190 IX86_BUILTIN_CVTDQ2PD
,
17191 IX86_BUILTIN_CVTDQ2PS
,
17193 IX86_BUILTIN_CVTPD2DQ
,
17194 IX86_BUILTIN_CVTPD2PI
,
17195 IX86_BUILTIN_CVTPD2PS
,
17196 IX86_BUILTIN_CVTTPD2DQ
,
17197 IX86_BUILTIN_CVTTPD2PI
,
17199 IX86_BUILTIN_CVTPI2PD
,
17200 IX86_BUILTIN_CVTSI2SD
,
17201 IX86_BUILTIN_CVTSI642SD
,
17203 IX86_BUILTIN_CVTSD2SI
,
17204 IX86_BUILTIN_CVTSD2SI64
,
17205 IX86_BUILTIN_CVTSD2SS
,
17206 IX86_BUILTIN_CVTSS2SD
,
17207 IX86_BUILTIN_CVTTSD2SI
,
17208 IX86_BUILTIN_CVTTSD2SI64
,
17210 IX86_BUILTIN_CVTPS2DQ
,
17211 IX86_BUILTIN_CVTPS2PD
,
17212 IX86_BUILTIN_CVTTPS2DQ
,
17214 IX86_BUILTIN_MOVNTI
,
17215 IX86_BUILTIN_MOVNTPD
,
17216 IX86_BUILTIN_MOVNTDQ
,
17219 IX86_BUILTIN_MASKMOVDQU
,
17220 IX86_BUILTIN_MOVMSKPD
,
17221 IX86_BUILTIN_PMOVMSKB128
,
17223 IX86_BUILTIN_PACKSSWB128
,
17224 IX86_BUILTIN_PACKSSDW128
,
17225 IX86_BUILTIN_PACKUSWB128
,
17227 IX86_BUILTIN_PADDB128
,
17228 IX86_BUILTIN_PADDW128
,
17229 IX86_BUILTIN_PADDD128
,
17230 IX86_BUILTIN_PADDQ128
,
17231 IX86_BUILTIN_PADDSB128
,
17232 IX86_BUILTIN_PADDSW128
,
17233 IX86_BUILTIN_PADDUSB128
,
17234 IX86_BUILTIN_PADDUSW128
,
17235 IX86_BUILTIN_PSUBB128
,
17236 IX86_BUILTIN_PSUBW128
,
17237 IX86_BUILTIN_PSUBD128
,
17238 IX86_BUILTIN_PSUBQ128
,
17239 IX86_BUILTIN_PSUBSB128
,
17240 IX86_BUILTIN_PSUBSW128
,
17241 IX86_BUILTIN_PSUBUSB128
,
17242 IX86_BUILTIN_PSUBUSW128
,
17244 IX86_BUILTIN_PAND128
,
17245 IX86_BUILTIN_PANDN128
,
17246 IX86_BUILTIN_POR128
,
17247 IX86_BUILTIN_PXOR128
,
17249 IX86_BUILTIN_PAVGB128
,
17250 IX86_BUILTIN_PAVGW128
,
17252 IX86_BUILTIN_PCMPEQB128
,
17253 IX86_BUILTIN_PCMPEQW128
,
17254 IX86_BUILTIN_PCMPEQD128
,
17255 IX86_BUILTIN_PCMPGTB128
,
17256 IX86_BUILTIN_PCMPGTW128
,
17257 IX86_BUILTIN_PCMPGTD128
,
17259 IX86_BUILTIN_PMADDWD128
,
17261 IX86_BUILTIN_PMAXSW128
,
17262 IX86_BUILTIN_PMAXUB128
,
17263 IX86_BUILTIN_PMINSW128
,
17264 IX86_BUILTIN_PMINUB128
,
17266 IX86_BUILTIN_PMULUDQ
,
17267 IX86_BUILTIN_PMULUDQ128
,
17268 IX86_BUILTIN_PMULHUW128
,
17269 IX86_BUILTIN_PMULHW128
,
17270 IX86_BUILTIN_PMULLW128
,
17272 IX86_BUILTIN_PSADBW128
,
17273 IX86_BUILTIN_PSHUFHW
,
17274 IX86_BUILTIN_PSHUFLW
,
17275 IX86_BUILTIN_PSHUFD
,
17277 IX86_BUILTIN_PSLLDQI128
,
17278 IX86_BUILTIN_PSLLWI128
,
17279 IX86_BUILTIN_PSLLDI128
,
17280 IX86_BUILTIN_PSLLQI128
,
17281 IX86_BUILTIN_PSRAWI128
,
17282 IX86_BUILTIN_PSRADI128
,
17283 IX86_BUILTIN_PSRLDQI128
,
17284 IX86_BUILTIN_PSRLWI128
,
17285 IX86_BUILTIN_PSRLDI128
,
17286 IX86_BUILTIN_PSRLQI128
,
17288 IX86_BUILTIN_PSLLDQ128
,
17289 IX86_BUILTIN_PSLLW128
,
17290 IX86_BUILTIN_PSLLD128
,
17291 IX86_BUILTIN_PSLLQ128
,
17292 IX86_BUILTIN_PSRAW128
,
17293 IX86_BUILTIN_PSRAD128
,
17294 IX86_BUILTIN_PSRLW128
,
17295 IX86_BUILTIN_PSRLD128
,
17296 IX86_BUILTIN_PSRLQ128
,
17298 IX86_BUILTIN_PUNPCKHBW128
,
17299 IX86_BUILTIN_PUNPCKHWD128
,
17300 IX86_BUILTIN_PUNPCKHDQ128
,
17301 IX86_BUILTIN_PUNPCKHQDQ128
,
17302 IX86_BUILTIN_PUNPCKLBW128
,
17303 IX86_BUILTIN_PUNPCKLWD128
,
17304 IX86_BUILTIN_PUNPCKLDQ128
,
17305 IX86_BUILTIN_PUNPCKLQDQ128
,
17307 IX86_BUILTIN_CLFLUSH
,
17308 IX86_BUILTIN_MFENCE
,
17309 IX86_BUILTIN_LFENCE
,
17311 /* Prescott New Instructions. */
17312 IX86_BUILTIN_ADDSUBPS
,
17313 IX86_BUILTIN_HADDPS
,
17314 IX86_BUILTIN_HSUBPS
,
17315 IX86_BUILTIN_MOVSHDUP
,
17316 IX86_BUILTIN_MOVSLDUP
,
17317 IX86_BUILTIN_ADDSUBPD
,
17318 IX86_BUILTIN_HADDPD
,
17319 IX86_BUILTIN_HSUBPD
,
17320 IX86_BUILTIN_LDDQU
,
17322 IX86_BUILTIN_MONITOR
,
17323 IX86_BUILTIN_MWAIT
,
17326 IX86_BUILTIN_PHADDW
,
17327 IX86_BUILTIN_PHADDD
,
17328 IX86_BUILTIN_PHADDSW
,
17329 IX86_BUILTIN_PHSUBW
,
17330 IX86_BUILTIN_PHSUBD
,
17331 IX86_BUILTIN_PHSUBSW
,
17332 IX86_BUILTIN_PMADDUBSW
,
17333 IX86_BUILTIN_PMULHRSW
,
17334 IX86_BUILTIN_PSHUFB
,
17335 IX86_BUILTIN_PSIGNB
,
17336 IX86_BUILTIN_PSIGNW
,
17337 IX86_BUILTIN_PSIGND
,
17338 IX86_BUILTIN_PALIGNR
,
17339 IX86_BUILTIN_PABSB
,
17340 IX86_BUILTIN_PABSW
,
17341 IX86_BUILTIN_PABSD
,
17343 IX86_BUILTIN_PHADDW128
,
17344 IX86_BUILTIN_PHADDD128
,
17345 IX86_BUILTIN_PHADDSW128
,
17346 IX86_BUILTIN_PHSUBW128
,
17347 IX86_BUILTIN_PHSUBD128
,
17348 IX86_BUILTIN_PHSUBSW128
,
17349 IX86_BUILTIN_PMADDUBSW128
,
17350 IX86_BUILTIN_PMULHRSW128
,
17351 IX86_BUILTIN_PSHUFB128
,
17352 IX86_BUILTIN_PSIGNB128
,
17353 IX86_BUILTIN_PSIGNW128
,
17354 IX86_BUILTIN_PSIGND128
,
17355 IX86_BUILTIN_PALIGNR128
,
17356 IX86_BUILTIN_PABSB128
,
17357 IX86_BUILTIN_PABSW128
,
17358 IX86_BUILTIN_PABSD128
,
17360 /* AMDFAM10 - SSE4A New Instructions. */
17361 IX86_BUILTIN_MOVNTSD
,
17362 IX86_BUILTIN_MOVNTSS
,
17363 IX86_BUILTIN_EXTRQI
,
17364 IX86_BUILTIN_EXTRQ
,
17365 IX86_BUILTIN_INSERTQI
,
17366 IX86_BUILTIN_INSERTQ
,
17369 IX86_BUILTIN_BLENDPD
,
17370 IX86_BUILTIN_BLENDPS
,
17371 IX86_BUILTIN_BLENDVPD
,
17372 IX86_BUILTIN_BLENDVPS
,
17373 IX86_BUILTIN_PBLENDVB128
,
17374 IX86_BUILTIN_PBLENDW128
,
17379 IX86_BUILTIN_INSERTPS128
,
17381 IX86_BUILTIN_MOVNTDQA
,
17382 IX86_BUILTIN_MPSADBW128
,
17383 IX86_BUILTIN_PACKUSDW128
,
17384 IX86_BUILTIN_PCMPEQQ
,
17385 IX86_BUILTIN_PHMINPOSUW128
,
17387 IX86_BUILTIN_PMAXSB128
,
17388 IX86_BUILTIN_PMAXSD128
,
17389 IX86_BUILTIN_PMAXUD128
,
17390 IX86_BUILTIN_PMAXUW128
,
17392 IX86_BUILTIN_PMINSB128
,
17393 IX86_BUILTIN_PMINSD128
,
17394 IX86_BUILTIN_PMINUD128
,
17395 IX86_BUILTIN_PMINUW128
,
17397 IX86_BUILTIN_PMOVSXBW128
,
17398 IX86_BUILTIN_PMOVSXBD128
,
17399 IX86_BUILTIN_PMOVSXBQ128
,
17400 IX86_BUILTIN_PMOVSXWD128
,
17401 IX86_BUILTIN_PMOVSXWQ128
,
17402 IX86_BUILTIN_PMOVSXDQ128
,
17404 IX86_BUILTIN_PMOVZXBW128
,
17405 IX86_BUILTIN_PMOVZXBD128
,
17406 IX86_BUILTIN_PMOVZXBQ128
,
17407 IX86_BUILTIN_PMOVZXWD128
,
17408 IX86_BUILTIN_PMOVZXWQ128
,
17409 IX86_BUILTIN_PMOVZXDQ128
,
17411 IX86_BUILTIN_PMULDQ128
,
17412 IX86_BUILTIN_PMULLD128
,
17414 IX86_BUILTIN_ROUNDPD
,
17415 IX86_BUILTIN_ROUNDPS
,
17416 IX86_BUILTIN_ROUNDSD
,
17417 IX86_BUILTIN_ROUNDSS
,
17419 IX86_BUILTIN_PTESTZ
,
17420 IX86_BUILTIN_PTESTC
,
17421 IX86_BUILTIN_PTESTNZC
,
17423 IX86_BUILTIN_VEC_INIT_V2SI
,
17424 IX86_BUILTIN_VEC_INIT_V4HI
,
17425 IX86_BUILTIN_VEC_INIT_V8QI
,
17426 IX86_BUILTIN_VEC_EXT_V2DF
,
17427 IX86_BUILTIN_VEC_EXT_V2DI
,
17428 IX86_BUILTIN_VEC_EXT_V4SF
,
17429 IX86_BUILTIN_VEC_EXT_V4SI
,
17430 IX86_BUILTIN_VEC_EXT_V8HI
,
17431 IX86_BUILTIN_VEC_EXT_V2SI
,
17432 IX86_BUILTIN_VEC_EXT_V4HI
,
17433 IX86_BUILTIN_VEC_EXT_V16QI
,
17434 IX86_BUILTIN_VEC_SET_V2DI
,
17435 IX86_BUILTIN_VEC_SET_V4SF
,
17436 IX86_BUILTIN_VEC_SET_V4SI
,
17437 IX86_BUILTIN_VEC_SET_V8HI
,
17438 IX86_BUILTIN_VEC_SET_V4HI
,
17439 IX86_BUILTIN_VEC_SET_V16QI
,
17441 IX86_BUILTIN_VEC_PACK_SFIX
,
17444 IX86_BUILTIN_CRC32QI
,
17445 IX86_BUILTIN_CRC32HI
,
17446 IX86_BUILTIN_CRC32SI
,
17447 IX86_BUILTIN_CRC32DI
,
17449 IX86_BUILTIN_PCMPESTRI128
,
17450 IX86_BUILTIN_PCMPESTRM128
,
17451 IX86_BUILTIN_PCMPESTRA128
,
17452 IX86_BUILTIN_PCMPESTRC128
,
17453 IX86_BUILTIN_PCMPESTRO128
,
17454 IX86_BUILTIN_PCMPESTRS128
,
17455 IX86_BUILTIN_PCMPESTRZ128
,
17456 IX86_BUILTIN_PCMPISTRI128
,
17457 IX86_BUILTIN_PCMPISTRM128
,
17458 IX86_BUILTIN_PCMPISTRA128
,
17459 IX86_BUILTIN_PCMPISTRC128
,
17460 IX86_BUILTIN_PCMPISTRO128
,
17461 IX86_BUILTIN_PCMPISTRS128
,
17462 IX86_BUILTIN_PCMPISTRZ128
,
17464 IX86_BUILTIN_PCMPGTQ
,
17466 /* TFmode support builtins. */
17468 IX86_BUILTIN_FABSQ
,
17469 IX86_BUILTIN_COPYSIGNQ
,
17471 /* SSE5 instructions */
17472 IX86_BUILTIN_FMADDSS
,
17473 IX86_BUILTIN_FMADDSD
,
17474 IX86_BUILTIN_FMADDPS
,
17475 IX86_BUILTIN_FMADDPD
,
17476 IX86_BUILTIN_FMSUBSS
,
17477 IX86_BUILTIN_FMSUBSD
,
17478 IX86_BUILTIN_FMSUBPS
,
17479 IX86_BUILTIN_FMSUBPD
,
17480 IX86_BUILTIN_FNMADDSS
,
17481 IX86_BUILTIN_FNMADDSD
,
17482 IX86_BUILTIN_FNMADDPS
,
17483 IX86_BUILTIN_FNMADDPD
,
17484 IX86_BUILTIN_FNMSUBSS
,
17485 IX86_BUILTIN_FNMSUBSD
,
17486 IX86_BUILTIN_FNMSUBPS
,
17487 IX86_BUILTIN_FNMSUBPD
,
17488 IX86_BUILTIN_PCMOV_V2DI
,
17489 IX86_BUILTIN_PCMOV_V4SI
,
17490 IX86_BUILTIN_PCMOV_V8HI
,
17491 IX86_BUILTIN_PCMOV_V16QI
,
17492 IX86_BUILTIN_PCMOV_V4SF
,
17493 IX86_BUILTIN_PCMOV_V2DF
,
17494 IX86_BUILTIN_PPERM
,
17495 IX86_BUILTIN_PERMPS
,
17496 IX86_BUILTIN_PERMPD
,
17497 IX86_BUILTIN_PMACSSWW
,
17498 IX86_BUILTIN_PMACSWW
,
17499 IX86_BUILTIN_PMACSSWD
,
17500 IX86_BUILTIN_PMACSWD
,
17501 IX86_BUILTIN_PMACSSDD
,
17502 IX86_BUILTIN_PMACSDD
,
17503 IX86_BUILTIN_PMACSSDQL
,
17504 IX86_BUILTIN_PMACSSDQH
,
17505 IX86_BUILTIN_PMACSDQL
,
17506 IX86_BUILTIN_PMACSDQH
,
17507 IX86_BUILTIN_PMADCSSWD
,
17508 IX86_BUILTIN_PMADCSWD
,
17509 IX86_BUILTIN_PHADDBW
,
17510 IX86_BUILTIN_PHADDBD
,
17511 IX86_BUILTIN_PHADDBQ
,
17512 IX86_BUILTIN_PHADDWD
,
17513 IX86_BUILTIN_PHADDWQ
,
17514 IX86_BUILTIN_PHADDDQ
,
17515 IX86_BUILTIN_PHADDUBW
,
17516 IX86_BUILTIN_PHADDUBD
,
17517 IX86_BUILTIN_PHADDUBQ
,
17518 IX86_BUILTIN_PHADDUWD
,
17519 IX86_BUILTIN_PHADDUWQ
,
17520 IX86_BUILTIN_PHADDUDQ
,
17521 IX86_BUILTIN_PHSUBBW
,
17522 IX86_BUILTIN_PHSUBWD
,
17523 IX86_BUILTIN_PHSUBDQ
,
17524 IX86_BUILTIN_PROTB
,
17525 IX86_BUILTIN_PROTW
,
17526 IX86_BUILTIN_PROTD
,
17527 IX86_BUILTIN_PROTQ
,
17528 IX86_BUILTIN_PROTB_IMM
,
17529 IX86_BUILTIN_PROTW_IMM
,
17530 IX86_BUILTIN_PROTD_IMM
,
17531 IX86_BUILTIN_PROTQ_IMM
,
17532 IX86_BUILTIN_PSHLB
,
17533 IX86_BUILTIN_PSHLW
,
17534 IX86_BUILTIN_PSHLD
,
17535 IX86_BUILTIN_PSHLQ
,
17536 IX86_BUILTIN_PSHAB
,
17537 IX86_BUILTIN_PSHAW
,
17538 IX86_BUILTIN_PSHAD
,
17539 IX86_BUILTIN_PSHAQ
,
17540 IX86_BUILTIN_FRCZSS
,
17541 IX86_BUILTIN_FRCZSD
,
17542 IX86_BUILTIN_FRCZPS
,
17543 IX86_BUILTIN_FRCZPD
,
17544 IX86_BUILTIN_CVTPH2PS
,
17545 IX86_BUILTIN_CVTPS2PH
,
17547 IX86_BUILTIN_COMEQSS
,
17548 IX86_BUILTIN_COMNESS
,
17549 IX86_BUILTIN_COMLTSS
,
17550 IX86_BUILTIN_COMLESS
,
17551 IX86_BUILTIN_COMGTSS
,
17552 IX86_BUILTIN_COMGESS
,
17553 IX86_BUILTIN_COMUEQSS
,
17554 IX86_BUILTIN_COMUNESS
,
17555 IX86_BUILTIN_COMULTSS
,
17556 IX86_BUILTIN_COMULESS
,
17557 IX86_BUILTIN_COMUGTSS
,
17558 IX86_BUILTIN_COMUGESS
,
17559 IX86_BUILTIN_COMORDSS
,
17560 IX86_BUILTIN_COMUNORDSS
,
17561 IX86_BUILTIN_COMFALSESS
,
17562 IX86_BUILTIN_COMTRUESS
,
17564 IX86_BUILTIN_COMEQSD
,
17565 IX86_BUILTIN_COMNESD
,
17566 IX86_BUILTIN_COMLTSD
,
17567 IX86_BUILTIN_COMLESD
,
17568 IX86_BUILTIN_COMGTSD
,
17569 IX86_BUILTIN_COMGESD
,
17570 IX86_BUILTIN_COMUEQSD
,
17571 IX86_BUILTIN_COMUNESD
,
17572 IX86_BUILTIN_COMULTSD
,
17573 IX86_BUILTIN_COMULESD
,
17574 IX86_BUILTIN_COMUGTSD
,
17575 IX86_BUILTIN_COMUGESD
,
17576 IX86_BUILTIN_COMORDSD
,
17577 IX86_BUILTIN_COMUNORDSD
,
17578 IX86_BUILTIN_COMFALSESD
,
17579 IX86_BUILTIN_COMTRUESD
,
17581 IX86_BUILTIN_COMEQPS
,
17582 IX86_BUILTIN_COMNEPS
,
17583 IX86_BUILTIN_COMLTPS
,
17584 IX86_BUILTIN_COMLEPS
,
17585 IX86_BUILTIN_COMGTPS
,
17586 IX86_BUILTIN_COMGEPS
,
17587 IX86_BUILTIN_COMUEQPS
,
17588 IX86_BUILTIN_COMUNEPS
,
17589 IX86_BUILTIN_COMULTPS
,
17590 IX86_BUILTIN_COMULEPS
,
17591 IX86_BUILTIN_COMUGTPS
,
17592 IX86_BUILTIN_COMUGEPS
,
17593 IX86_BUILTIN_COMORDPS
,
17594 IX86_BUILTIN_COMUNORDPS
,
17595 IX86_BUILTIN_COMFALSEPS
,
17596 IX86_BUILTIN_COMTRUEPS
,
17598 IX86_BUILTIN_COMEQPD
,
17599 IX86_BUILTIN_COMNEPD
,
17600 IX86_BUILTIN_COMLTPD
,
17601 IX86_BUILTIN_COMLEPD
,
17602 IX86_BUILTIN_COMGTPD
,
17603 IX86_BUILTIN_COMGEPD
,
17604 IX86_BUILTIN_COMUEQPD
,
17605 IX86_BUILTIN_COMUNEPD
,
17606 IX86_BUILTIN_COMULTPD
,
17607 IX86_BUILTIN_COMULEPD
,
17608 IX86_BUILTIN_COMUGTPD
,
17609 IX86_BUILTIN_COMUGEPD
,
17610 IX86_BUILTIN_COMORDPD
,
17611 IX86_BUILTIN_COMUNORDPD
,
17612 IX86_BUILTIN_COMFALSEPD
,
17613 IX86_BUILTIN_COMTRUEPD
,
17615 IX86_BUILTIN_PCOMEQUB
,
17616 IX86_BUILTIN_PCOMNEUB
,
17617 IX86_BUILTIN_PCOMLTUB
,
17618 IX86_BUILTIN_PCOMLEUB
,
17619 IX86_BUILTIN_PCOMGTUB
,
17620 IX86_BUILTIN_PCOMGEUB
,
17621 IX86_BUILTIN_PCOMFALSEUB
,
17622 IX86_BUILTIN_PCOMTRUEUB
,
17623 IX86_BUILTIN_PCOMEQUW
,
17624 IX86_BUILTIN_PCOMNEUW
,
17625 IX86_BUILTIN_PCOMLTUW
,
17626 IX86_BUILTIN_PCOMLEUW
,
17627 IX86_BUILTIN_PCOMGTUW
,
17628 IX86_BUILTIN_PCOMGEUW
,
17629 IX86_BUILTIN_PCOMFALSEUW
,
17630 IX86_BUILTIN_PCOMTRUEUW
,
17631 IX86_BUILTIN_PCOMEQUD
,
17632 IX86_BUILTIN_PCOMNEUD
,
17633 IX86_BUILTIN_PCOMLTUD
,
17634 IX86_BUILTIN_PCOMLEUD
,
17635 IX86_BUILTIN_PCOMGTUD
,
17636 IX86_BUILTIN_PCOMGEUD
,
17637 IX86_BUILTIN_PCOMFALSEUD
,
17638 IX86_BUILTIN_PCOMTRUEUD
,
17639 IX86_BUILTIN_PCOMEQUQ
,
17640 IX86_BUILTIN_PCOMNEUQ
,
17641 IX86_BUILTIN_PCOMLTUQ
,
17642 IX86_BUILTIN_PCOMLEUQ
,
17643 IX86_BUILTIN_PCOMGTUQ
,
17644 IX86_BUILTIN_PCOMGEUQ
,
17645 IX86_BUILTIN_PCOMFALSEUQ
,
17646 IX86_BUILTIN_PCOMTRUEUQ
,
17648 IX86_BUILTIN_PCOMEQB
,
17649 IX86_BUILTIN_PCOMNEB
,
17650 IX86_BUILTIN_PCOMLTB
,
17651 IX86_BUILTIN_PCOMLEB
,
17652 IX86_BUILTIN_PCOMGTB
,
17653 IX86_BUILTIN_PCOMGEB
,
17654 IX86_BUILTIN_PCOMFALSEB
,
17655 IX86_BUILTIN_PCOMTRUEB
,
17656 IX86_BUILTIN_PCOMEQW
,
17657 IX86_BUILTIN_PCOMNEW
,
17658 IX86_BUILTIN_PCOMLTW
,
17659 IX86_BUILTIN_PCOMLEW
,
17660 IX86_BUILTIN_PCOMGTW
,
17661 IX86_BUILTIN_PCOMGEW
,
17662 IX86_BUILTIN_PCOMFALSEW
,
17663 IX86_BUILTIN_PCOMTRUEW
,
17664 IX86_BUILTIN_PCOMEQD
,
17665 IX86_BUILTIN_PCOMNED
,
17666 IX86_BUILTIN_PCOMLTD
,
17667 IX86_BUILTIN_PCOMLED
,
17668 IX86_BUILTIN_PCOMGTD
,
17669 IX86_BUILTIN_PCOMGED
,
17670 IX86_BUILTIN_PCOMFALSED
,
17671 IX86_BUILTIN_PCOMTRUED
,
17672 IX86_BUILTIN_PCOMEQQ
,
17673 IX86_BUILTIN_PCOMNEQ
,
17674 IX86_BUILTIN_PCOMLTQ
,
17675 IX86_BUILTIN_PCOMLEQ
,
17676 IX86_BUILTIN_PCOMGTQ
,
17677 IX86_BUILTIN_PCOMGEQ
,
17678 IX86_BUILTIN_PCOMFALSEQ
,
17679 IX86_BUILTIN_PCOMTRUEQ
,
17684 /* Table for the ix86 builtin decls. */
17685 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
17687 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17688 * if the target_flags include one of MASK. Stores the function decl
17689 * in the ix86_builtins array.
17690 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17693 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
17695 tree decl
= NULL_TREE
;
17697 if (mask
& ix86_isa_flags
17698 && (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
))
17700 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
17702 ix86_builtins
[(int) code
] = decl
;
17708 /* Like def_builtin, but also marks the function decl "const". */
17711 def_builtin_const (int mask
, const char *name
, tree type
,
17712 enum ix86_builtins code
)
17714 tree decl
= def_builtin (mask
, name
, type
, code
);
17716 TREE_READONLY (decl
) = 1;
17720 /* Bits for builtin_description.flag. */
17722 /* Set when we don't support the comparison natively, and should
17723 swap_comparison in order to support it. */
17724 #define BUILTIN_DESC_SWAP_OPERANDS 1
17726 struct builtin_description
17728 const unsigned int mask
;
17729 const enum insn_code icode
;
17730 const char *const name
;
17731 const enum ix86_builtins code
;
17732 const enum rtx_code comparison
;
17736 static const struct builtin_description bdesc_comi
[] =
17738 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
17739 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
17740 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
17741 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
17742 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
17743 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
17744 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
17745 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
17746 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
17747 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
17748 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
17749 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
17750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
17751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
17752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
17753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
17754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
17755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
17756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
17757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
17758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
17759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
17760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
17761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
17764 static const struct builtin_description bdesc_ptest
[] =
17767 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
17768 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
17769 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
17772 static const struct builtin_description bdesc_pcmpestr
[] =
17775 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
17776 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
17777 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
17778 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
17779 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
17780 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
17781 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
17784 static const struct builtin_description bdesc_pcmpistr
[] =
17787 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
17788 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
17789 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
17790 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
17791 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
17792 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
17793 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
17796 static const struct builtin_description bdesc_crc32
[] =
17799 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32qi
, 0, IX86_BUILTIN_CRC32QI
, UNKNOWN
, 0 },
17800 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32hi
, 0, IX86_BUILTIN_CRC32HI
, UNKNOWN
, 0 },
17801 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32si
, 0, IX86_BUILTIN_CRC32SI
, UNKNOWN
, 0 },
17802 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32di
, 0, IX86_BUILTIN_CRC32DI
, UNKNOWN
, 0 },
17805 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17806 static const struct builtin_description bdesc_sse_3arg
[] =
17809 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, 0 },
17810 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, 0 },
17811 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, 0 },
17812 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, 0 },
17813 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, 0 },
17814 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, 0 },
17815 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, 0 },
17816 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, 0 },
17817 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, 0 },
17818 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, 0 },
17819 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, UNKNOWN
, 0 },
17820 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, UNKNOWN
, 0 },
17823 static const struct builtin_description bdesc_2arg
[] =
17826 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, 0 },
17827 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, 0 },
17828 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, 0 },
17829 { OPTION_MASK_ISA_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, 0 },
17830 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, 0 },
17831 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, 0 },
17832 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, 0 },
17833 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, 0 },
17835 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
17836 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
17837 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
17838 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17839 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17840 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
17841 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
17842 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
17843 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
17844 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17845 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17846 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
17847 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
17848 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
17849 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
17850 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
17851 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
17852 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
17853 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
17854 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17855 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17856 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
17858 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, 0 },
17859 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, 0 },
17860 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, 0 },
17861 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, 0 },
17863 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, 0 },
17864 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, 0 },
17865 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, 0 },
17866 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, 0 },
17868 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, 0 },
17869 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, 0 },
17870 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, 0 },
17871 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, 0 },
17872 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, 0 },
17875 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, 0 },
17876 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, 0 },
17877 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, 0 },
17878 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, 0 },
17879 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, 0 },
17880 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, 0 },
17881 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, 0 },
17882 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, 0 },
17884 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, 0 },
17885 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, 0 },
17886 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, 0 },
17887 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, 0 },
17888 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, 0 },
17889 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, 0 },
17890 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, 0 },
17891 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, 0 },
17893 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, 0 },
17894 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, 0 },
17895 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, 0 },
17897 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, 0 },
17898 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, 0 },
17899 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, 0 },
17900 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, 0 },
17902 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, 0 },
17903 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, 0 },
17905 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, 0 },
17906 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, 0 },
17907 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, 0 },
17908 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, 0 },
17909 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, 0 },
17910 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, 0 },
17912 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, 0 },
17913 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, 0 },
17914 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, 0 },
17915 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, 0 },
17917 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, 0 },
17918 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, 0 },
17919 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, 0 },
17920 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, 0 },
17921 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, 0 },
17922 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, 0 },
17925 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, UNKNOWN
, 0 },
17926 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, UNKNOWN
, 0 },
17927 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, UNKNOWN
, 0 },
17929 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, 0 },
17930 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, 0 },
17931 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, 0 },
17933 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, UNKNOWN
, 0 },
17934 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, UNKNOWN
, 0 },
17935 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, UNKNOWN
, 0 },
17936 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, UNKNOWN
, 0 },
17937 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, UNKNOWN
, 0 },
17938 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, UNKNOWN
, 0 },
17940 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, UNKNOWN
, 0 },
17941 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, UNKNOWN
, 0 },
17942 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, UNKNOWN
, 0 },
17943 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, UNKNOWN
, 0 },
17944 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, UNKNOWN
, 0 },
17945 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, UNKNOWN
, 0 },
17947 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, UNKNOWN
, 0 },
17948 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, UNKNOWN
, 0 },
17949 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, UNKNOWN
, 0 },
17950 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, UNKNOWN
, 0 },
17952 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, UNKNOWN
, 0 },
17953 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, UNKNOWN
, 0 },
17956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, 0 },
17957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, 0 },
17958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, 0 },
17959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, 0 },
17960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, 0 },
17961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, 0 },
17962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, 0 },
17963 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, 0 },
17965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
17966 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
17967 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
17968 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17969 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
17971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
17972 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
17973 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
17974 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17975 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
17977 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
17978 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
17979 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
17980 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
17981 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
17982 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
17983 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
17984 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
17986 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, 0 },
17987 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, 0 },
17988 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, 0 },
17989 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, 0 },
17991 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, 0 },
17992 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, 0 },
17993 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, 0 },
17994 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, 0 },
17996 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, 0 },
17997 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, 0 },
17998 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, 0 },
18000 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, 0 },
18003 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, 0 },
18004 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, 0 },
18005 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, 0 },
18006 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, 0 },
18007 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, 0 },
18008 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, 0 },
18009 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, 0 },
18010 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, 0 },
18012 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, 0 },
18013 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, 0 },
18014 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, 0 },
18015 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, 0 },
18016 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, 0 },
18017 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, 0 },
18018 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, 0 },
18019 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, 0 },
18021 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, 0 },
18022 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
, 0 },
18024 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, 0 },
18025 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, 0 },
18026 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, 0 },
18027 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, 0 },
18029 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, 0 },
18030 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, 0 },
18032 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, 0 },
18033 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, 0 },
18034 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, 0 },
18035 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, 0 },
18036 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, 0 },
18037 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, 0 },
18039 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, 0 },
18040 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, 0 },
18041 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, 0 },
18042 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, 0 },
18044 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, 0 },
18045 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, 0 },
18046 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, 0 },
18047 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, 0 },
18048 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, 0 },
18049 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, 0 },
18050 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, 0 },
18051 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, 0 },
18053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, 0 },
18054 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, 0 },
18055 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, 0 },
18057 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, 0 },
18058 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, UNKNOWN
, 0 },
18060 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, UNKNOWN
, 0 },
18061 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, 0 },
18063 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, UNKNOWN
, 0 },
18064 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, UNKNOWN
, 0 },
18065 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, UNKNOWN
, 0 },
18067 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, UNKNOWN
, 0 },
18068 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, UNKNOWN
, 0 },
18069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, UNKNOWN
, 0 },
18071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, UNKNOWN
, 0 },
18072 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, UNKNOWN
, 0 },
18074 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, UNKNOWN
, 0 },
18076 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, 0 },
18077 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, 0 },
18078 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, 0 },
18079 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, 0 },
18082 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, 0 },
18083 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, 0 },
18084 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, 0 },
18085 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, 0 },
18086 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, 0 },
18087 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, 0 },
18090 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, 0 },
18091 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, 0 },
18092 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, 0 },
18093 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, 0 },
18094 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, 0 },
18095 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, 0 },
18096 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, 0 },
18097 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, 0 },
18098 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, 0 },
18099 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, 0 },
18100 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, 0 },
18101 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, 0 },
18102 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, 0 },
18103 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, 0 },
18104 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, 0 },
18105 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, 0 },
18106 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, 0 },
18107 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, 0 },
18108 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, 0 },
18109 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, 0 },
18110 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, 0 },
18111 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, 0 },
18112 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, 0 },
18113 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, 0 },
18116 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, 0 },
18117 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, 0 },
18118 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, 0 },
18119 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, 0 },
18120 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, 0 },
18121 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, 0 },
18122 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, 0 },
18123 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, 0 },
18124 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, 0 },
18125 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, 0 },
18126 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, UNKNOWN
, 0 },
18127 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, 0 },
18130 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, 0 },
18133 static const struct builtin_description bdesc_1arg
[] =
18135 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, 0 },
18136 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, 0 },
18138 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, UNKNOWN
, 0 },
18139 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, UNKNOWN
, 0 },
18140 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, UNKNOWN
, 0 },
18142 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, 0 },
18143 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, 0 },
18144 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, 0 },
18145 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, 0 },
18146 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, 0 },
18147 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, 0 },
18149 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, 0 },
18150 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, 0 },
18152 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, UNKNOWN
, 0 },
18154 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, 0 },
18155 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, 0 },
18157 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, 0 },
18158 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, 0 },
18159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, 0 },
18160 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, 0 },
18161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, 0 },
18163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, 0 },
18165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, 0 },
18166 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, 0 },
18167 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, 0 },
18168 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, 0 },
18170 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, 0 },
18171 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, 0 },
18172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, 0 },
18175 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, 0 },
18176 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, 0 },
18179 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, 0 },
18180 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, 0 },
18181 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, 0 },
18182 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, 0 },
18183 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, 0 },
18184 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, 0 },
18187 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, 0 },
18188 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, 0 },
18189 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, 0 },
18190 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, 0 },
18191 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, 0 },
18192 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, 0 },
18193 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, 0 },
18194 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, 0 },
18195 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, 0 },
18196 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, 0 },
18197 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, 0 },
18198 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, 0 },
18199 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, 0 },
18201 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18202 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, UNKNOWN
, 0 },
18203 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, UNKNOWN
, 0 },
18207 enum multi_arg_type
{
18217 MULTI_ARG_3_PERMPS
,
18218 MULTI_ARG_3_PERMPD
,
18225 MULTI_ARG_2_DI_IMM
,
18226 MULTI_ARG_2_SI_IMM
,
18227 MULTI_ARG_2_HI_IMM
,
18228 MULTI_ARG_2_QI_IMM
,
18229 MULTI_ARG_2_SF_CMP
,
18230 MULTI_ARG_2_DF_CMP
,
18231 MULTI_ARG_2_DI_CMP
,
18232 MULTI_ARG_2_SI_CMP
,
18233 MULTI_ARG_2_HI_CMP
,
18234 MULTI_ARG_2_QI_CMP
,
18257 static const struct builtin_description bdesc_multi_arg
[] =
18259 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv4sf4
, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS
, 0, (int)MULTI_ARG_3_SF
},
18260 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv2df4
, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD
, 0, (int)MULTI_ARG_3_DF
},
18261 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv4sf4
, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS
, 0, (int)MULTI_ARG_3_SF
},
18262 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv2df4
, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD
, 0, (int)MULTI_ARG_3_DF
},
18263 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv4sf4
, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS
, 0, (int)MULTI_ARG_3_SF
},
18264 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv2df4
, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD
, 0, (int)MULTI_ARG_3_DF
},
18265 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv4sf4
, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS
, 0, (int)MULTI_ARG_3_SF
},
18266 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv2df4
, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD
, 0, (int)MULTI_ARG_3_DF
},
18267 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv4sf4
, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS
, 0, (int)MULTI_ARG_3_SF
},
18268 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv2df4
, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD
, 0, (int)MULTI_ARG_3_DF
},
18269 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv4sf4
, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS
, 0, (int)MULTI_ARG_3_SF
},
18270 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv2df4
, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD
, 0, (int)MULTI_ARG_3_DF
},
18271 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv4sf4
, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS
, 0, (int)MULTI_ARG_3_SF
},
18272 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv2df4
, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD
, 0, (int)MULTI_ARG_3_DF
},
18273 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv4sf4
, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS
, 0, (int)MULTI_ARG_3_SF
},
18274 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv2df4
, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD
, 0, (int)MULTI_ARG_3_DF
},
18275 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI
, 0, (int)MULTI_ARG_3_DI
},
18276 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI
, 0, (int)MULTI_ARG_3_DI
},
18277 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4si
, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI
, 0, (int)MULTI_ARG_3_SI
},
18278 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v8hi
, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI
, 0, (int)MULTI_ARG_3_HI
},
18279 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v16qi
, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI
,0, (int)MULTI_ARG_3_QI
},
18280 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2df
, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF
, 0, (int)MULTI_ARG_3_DF
},
18281 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4sf
, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF
, 0, (int)MULTI_ARG_3_SF
},
18282 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pperm
, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM
, 0, (int)MULTI_ARG_3_QI
},
18283 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv4sf
, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS
, 0, (int)MULTI_ARG_3_PERMPS
},
18284 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv2df
, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD
, 0, (int)MULTI_ARG_3_PERMPD
},
18285 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssww
, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW
, 0, (int)MULTI_ARG_3_HI
},
18286 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsww
, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW
, 0, (int)MULTI_ARG_3_HI
},
18287 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsswd
, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18288 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacswd
, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18289 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdd
, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD
, 0, (int)MULTI_ARG_3_SI
},
18290 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdd
, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD
, 0, (int)MULTI_ARG_3_SI
},
18291 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdql
, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL
, 0, (int)MULTI_ARG_3_SI_DI
},
18292 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdqh
, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH
, 0, (int)MULTI_ARG_3_SI_DI
},
18293 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdql
, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL
, 0, (int)MULTI_ARG_3_SI_DI
},
18294 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdqh
, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH
, 0, (int)MULTI_ARG_3_SI_DI
},
18295 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcsswd
, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18296 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcswd
, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18297 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv2di3
, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ
, 0, (int)MULTI_ARG_2_DI
},
18298 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv4si3
, "__builtin_ia32_protd", IX86_BUILTIN_PROTD
, 0, (int)MULTI_ARG_2_SI
},
18299 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv8hi3
, "__builtin_ia32_protw", IX86_BUILTIN_PROTW
, 0, (int)MULTI_ARG_2_HI
},
18300 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv16qi3
, "__builtin_ia32_protb", IX86_BUILTIN_PROTB
, 0, (int)MULTI_ARG_2_QI
},
18301 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv2di3
, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM
, 0, (int)MULTI_ARG_2_DI_IMM
},
18302 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv4si3
, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM
, 0, (int)MULTI_ARG_2_SI_IMM
},
18303 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv8hi3
, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM
, 0, (int)MULTI_ARG_2_HI_IMM
},
18304 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv16qi3
, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM
, 0, (int)MULTI_ARG_2_QI_IMM
},
18305 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv2di3
, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ
, 0, (int)MULTI_ARG_2_DI
},
18306 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv4si3
, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD
, 0, (int)MULTI_ARG_2_SI
},
18307 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv8hi3
, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW
, 0, (int)MULTI_ARG_2_HI
},
18308 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv16qi3
, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB
, 0, (int)MULTI_ARG_2_QI
},
18309 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv2di3
, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ
, 0, (int)MULTI_ARG_2_DI
},
18310 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv4si3
, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD
, 0, (int)MULTI_ARG_2_SI
},
18311 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv8hi3
, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW
, 0, (int)MULTI_ARG_2_HI
},
18312 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv16qi3
, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB
, 0, (int)MULTI_ARG_2_QI
},
18313 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv4sf2
, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS
, 0, (int)MULTI_ARG_2_SF
},
18314 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv2df2
, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD
, 0, (int)MULTI_ARG_2_DF
},
18315 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv4sf2
, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS
, 0, (int)MULTI_ARG_1_SF
},
18316 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv2df2
, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD
, 0, (int)MULTI_ARG_1_DF
},
18317 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtph2ps
, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS
, 0, (int)MULTI_ARG_1_PH2PS
},
18318 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtps2ph
, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH
, 0, (int)MULTI_ARG_1_PS2PH
},
18319 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbw
, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18320 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbd
, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD
, 0, (int)MULTI_ARG_1_QI_SI
},
18321 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbq
, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ
, 0, (int)MULTI_ARG_1_QI_DI
},
18322 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwd
, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18323 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwq
, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ
, 0, (int)MULTI_ARG_1_HI_DI
},
18324 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadddq
, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18325 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubw
, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18326 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubd
, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD
, 0, (int)MULTI_ARG_1_QI_SI
},
18327 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubq
, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ
, 0, (int)MULTI_ARG_1_QI_DI
},
18328 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwd
, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18329 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwq
, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ
, 0, (int)MULTI_ARG_1_HI_DI
},
18330 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddudq
, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18331 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubbw
, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18332 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubwd
, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18333 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubdq
, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18335 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
18336 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18337 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18338 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
18339 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
18340 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
18341 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
18342 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
18343 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18344 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18345 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
18346 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
18347 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
18348 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
18349 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18350 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18352 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
18353 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18354 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18355 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
18356 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
18357 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
18358 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
18359 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
18360 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18361 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18362 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
18363 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
18364 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
18365 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
18366 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18367 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18369 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
18370 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18371 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18372 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
18373 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
18374 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
18375 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
18376 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
18377 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18378 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18379 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
18380 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
18381 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
18382 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
18383 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18384 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18386 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
18387 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18388 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18389 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
18390 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
18391 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
18392 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
18393 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
18394 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18395 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18396 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
18397 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
18398 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
18399 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
18400 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18401 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18403 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
18404 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18405 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18406 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
18407 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
18408 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
18409 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
18411 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
18412 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18413 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18414 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
18415 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
18416 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
18417 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
18419 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
18420 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18421 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18422 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
18423 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
18424 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
18425 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
18427 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
18428 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18429 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18430 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
18431 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
18432 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
18433 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
18435 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
18436 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18437 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18438 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
18439 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
18440 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
18441 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
18443 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
18444 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18445 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18446 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
18447 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
18448 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
18449 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
18451 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
18452 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18453 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18454 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
18455 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
18456 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
18457 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
18459 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
18460 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18461 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18462 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
18463 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
18464 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
18465 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
18467 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS
, COM_FALSE_S
, (int)MULTI_ARG_2_SF_TF
},
18468 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS
, COM_TRUE_S
, (int)MULTI_ARG_2_SF_TF
},
18469 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS
, COM_FALSE_P
, (int)MULTI_ARG_2_SF_TF
},
18470 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS
, COM_TRUE_P
, (int)MULTI_ARG_2_SF_TF
},
18471 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD
, COM_FALSE_S
, (int)MULTI_ARG_2_DF_TF
},
18472 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD
, COM_TRUE_S
, (int)MULTI_ARG_2_DF_TF
},
18473 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD
, COM_FALSE_P
, (int)MULTI_ARG_2_DF_TF
},
18474 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD
, COM_TRUE_P
, (int)MULTI_ARG_2_DF_TF
},
18476 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB
, PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
18477 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW
, PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
18478 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED
, PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
18479 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ
, PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
18480 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB
,PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
18481 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW
,PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
18482 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD
,PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
18483 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ
,PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
18485 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB
, PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
18486 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW
, PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
18487 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED
, PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
18488 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ
, PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
18489 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB
, PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
18490 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW
, PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
18491 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD
, PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
18492 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ
, PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
18495 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18496 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18499 ix86_init_mmx_sse_builtins (void)
18501 const struct builtin_description
* d
;
18504 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
18505 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
18506 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
18507 tree V2DI_type_node
18508 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
18509 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
18510 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
18511 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
18512 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
18513 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
18514 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
18516 tree pchar_type_node
= build_pointer_type (char_type_node
);
18517 tree pcchar_type_node
= build_pointer_type (
18518 build_type_variant (char_type_node
, 1, 0));
18519 tree pfloat_type_node
= build_pointer_type (float_type_node
);
18520 tree pcfloat_type_node
= build_pointer_type (
18521 build_type_variant (float_type_node
, 1, 0));
18522 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
18523 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
18524 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
18527 tree int_ftype_v4sf_v4sf
18528 = build_function_type_list (integer_type_node
,
18529 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18530 tree v4si_ftype_v4sf_v4sf
18531 = build_function_type_list (V4SI_type_node
,
18532 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18533 /* MMX/SSE/integer conversions. */
18534 tree int_ftype_v4sf
18535 = build_function_type_list (integer_type_node
,
18536 V4SF_type_node
, NULL_TREE
);
18537 tree int64_ftype_v4sf
18538 = build_function_type_list (long_long_integer_type_node
,
18539 V4SF_type_node
, NULL_TREE
);
18540 tree int_ftype_v8qi
18541 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
18542 tree v4sf_ftype_v4sf_int
18543 = build_function_type_list (V4SF_type_node
,
18544 V4SF_type_node
, integer_type_node
, NULL_TREE
);
18545 tree v4sf_ftype_v4sf_int64
18546 = build_function_type_list (V4SF_type_node
,
18547 V4SF_type_node
, long_long_integer_type_node
,
18549 tree v4sf_ftype_v4sf_v2si
18550 = build_function_type_list (V4SF_type_node
,
18551 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
18553 /* Miscellaneous. */
18554 tree v8qi_ftype_v4hi_v4hi
18555 = build_function_type_list (V8QI_type_node
,
18556 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18557 tree v4hi_ftype_v2si_v2si
18558 = build_function_type_list (V4HI_type_node
,
18559 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18560 tree v4sf_ftype_v4sf_v4sf_int
18561 = build_function_type_list (V4SF_type_node
,
18562 V4SF_type_node
, V4SF_type_node
,
18563 integer_type_node
, NULL_TREE
);
18564 tree v2si_ftype_v4hi_v4hi
18565 = build_function_type_list (V2SI_type_node
,
18566 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18567 tree v4hi_ftype_v4hi_int
18568 = build_function_type_list (V4HI_type_node
,
18569 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18570 tree v4hi_ftype_v4hi_di
18571 = build_function_type_list (V4HI_type_node
,
18572 V4HI_type_node
, long_long_unsigned_type_node
,
18574 tree v2si_ftype_v2si_di
18575 = build_function_type_list (V2SI_type_node
,
18576 V2SI_type_node
, long_long_unsigned_type_node
,
18578 tree void_ftype_void
18579 = build_function_type (void_type_node
, void_list_node
);
18580 tree void_ftype_unsigned
18581 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
18582 tree void_ftype_unsigned_unsigned
18583 = build_function_type_list (void_type_node
, unsigned_type_node
,
18584 unsigned_type_node
, NULL_TREE
);
18585 tree void_ftype_pcvoid_unsigned_unsigned
18586 = build_function_type_list (void_type_node
, const_ptr_type_node
,
18587 unsigned_type_node
, unsigned_type_node
,
18589 tree unsigned_ftype_void
18590 = build_function_type (unsigned_type_node
, void_list_node
);
18591 tree v2si_ftype_v4sf
18592 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
18593 /* Loads/stores. */
18594 tree void_ftype_v8qi_v8qi_pchar
18595 = build_function_type_list (void_type_node
,
18596 V8QI_type_node
, V8QI_type_node
,
18597 pchar_type_node
, NULL_TREE
);
18598 tree v4sf_ftype_pcfloat
18599 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
18600 /* @@@ the type is bogus */
18601 tree v4sf_ftype_v4sf_pv2si
18602 = build_function_type_list (V4SF_type_node
,
18603 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
18604 tree void_ftype_pv2si_v4sf
18605 = build_function_type_list (void_type_node
,
18606 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
18607 tree void_ftype_pfloat_v4sf
18608 = build_function_type_list (void_type_node
,
18609 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
18610 tree void_ftype_pdi_di
18611 = build_function_type_list (void_type_node
,
18612 pdi_type_node
, long_long_unsigned_type_node
,
18614 tree void_ftype_pv2di_v2di
18615 = build_function_type_list (void_type_node
,
18616 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
18617 /* Normal vector unops. */
18618 tree v4sf_ftype_v4sf
18619 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18620 tree v16qi_ftype_v16qi
18621 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18622 tree v8hi_ftype_v8hi
18623 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18624 tree v4si_ftype_v4si
18625 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18626 tree v8qi_ftype_v8qi
18627 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18628 tree v4hi_ftype_v4hi
18629 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18631 /* Normal vector binops. */
18632 tree v4sf_ftype_v4sf_v4sf
18633 = build_function_type_list (V4SF_type_node
,
18634 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18635 tree v8qi_ftype_v8qi_v8qi
18636 = build_function_type_list (V8QI_type_node
,
18637 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18638 tree v4hi_ftype_v4hi_v4hi
18639 = build_function_type_list (V4HI_type_node
,
18640 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18641 tree v2si_ftype_v2si_v2si
18642 = build_function_type_list (V2SI_type_node
,
18643 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18644 tree di_ftype_di_di
18645 = build_function_type_list (long_long_unsigned_type_node
,
18646 long_long_unsigned_type_node
,
18647 long_long_unsigned_type_node
, NULL_TREE
);
18649 tree di_ftype_di_di_int
18650 = build_function_type_list (long_long_unsigned_type_node
,
18651 long_long_unsigned_type_node
,
18652 long_long_unsigned_type_node
,
18653 integer_type_node
, NULL_TREE
);
18655 tree v2si_ftype_v2sf
18656 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
18657 tree v2sf_ftype_v2si
18658 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
18659 tree v2si_ftype_v2si
18660 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18661 tree v2sf_ftype_v2sf
18662 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18663 tree v2sf_ftype_v2sf_v2sf
18664 = build_function_type_list (V2SF_type_node
,
18665 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18666 tree v2si_ftype_v2sf_v2sf
18667 = build_function_type_list (V2SI_type_node
,
18668 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18669 tree pint_type_node
= build_pointer_type (integer_type_node
);
18670 tree pdouble_type_node
= build_pointer_type (double_type_node
);
18671 tree pcdouble_type_node
= build_pointer_type (
18672 build_type_variant (double_type_node
, 1, 0));
18673 tree int_ftype_v2df_v2df
18674 = build_function_type_list (integer_type_node
,
18675 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18677 tree void_ftype_pcvoid
18678 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
18679 tree v4sf_ftype_v4si
18680 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
18681 tree v4si_ftype_v4sf
18682 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
18683 tree v2df_ftype_v4si
18684 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
18685 tree v4si_ftype_v2df
18686 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
18687 tree v4si_ftype_v2df_v2df
18688 = build_function_type_list (V4SI_type_node
,
18689 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18690 tree v2si_ftype_v2df
18691 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
18692 tree v4sf_ftype_v2df
18693 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
18694 tree v2df_ftype_v2si
18695 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
18696 tree v2df_ftype_v4sf
18697 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
18698 tree int_ftype_v2df
18699 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
18700 tree int64_ftype_v2df
18701 = build_function_type_list (long_long_integer_type_node
,
18702 V2DF_type_node
, NULL_TREE
);
18703 tree v2df_ftype_v2df_int
18704 = build_function_type_list (V2DF_type_node
,
18705 V2DF_type_node
, integer_type_node
, NULL_TREE
);
18706 tree v2df_ftype_v2df_int64
18707 = build_function_type_list (V2DF_type_node
,
18708 V2DF_type_node
, long_long_integer_type_node
,
18710 tree v4sf_ftype_v4sf_v2df
18711 = build_function_type_list (V4SF_type_node
,
18712 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
18713 tree v2df_ftype_v2df_v4sf
18714 = build_function_type_list (V2DF_type_node
,
18715 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
18716 tree v2df_ftype_v2df_v2df_int
18717 = build_function_type_list (V2DF_type_node
,
18718 V2DF_type_node
, V2DF_type_node
,
18721 tree v2df_ftype_v2df_pcdouble
18722 = build_function_type_list (V2DF_type_node
,
18723 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
18724 tree void_ftype_pdouble_v2df
18725 = build_function_type_list (void_type_node
,
18726 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
18727 tree void_ftype_pint_int
18728 = build_function_type_list (void_type_node
,
18729 pint_type_node
, integer_type_node
, NULL_TREE
);
18730 tree void_ftype_v16qi_v16qi_pchar
18731 = build_function_type_list (void_type_node
,
18732 V16QI_type_node
, V16QI_type_node
,
18733 pchar_type_node
, NULL_TREE
);
18734 tree v2df_ftype_pcdouble
18735 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
18736 tree v2df_ftype_v2df_v2df
18737 = build_function_type_list (V2DF_type_node
,
18738 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18739 tree v16qi_ftype_v16qi_v16qi
18740 = build_function_type_list (V16QI_type_node
,
18741 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18742 tree v8hi_ftype_v8hi_v8hi
18743 = build_function_type_list (V8HI_type_node
,
18744 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18745 tree v4si_ftype_v4si_v4si
18746 = build_function_type_list (V4SI_type_node
,
18747 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18748 tree v2di_ftype_v2di_v2di
18749 = build_function_type_list (V2DI_type_node
,
18750 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18751 tree v2di_ftype_v2df_v2df
18752 = build_function_type_list (V2DI_type_node
,
18753 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18754 tree v2df_ftype_v2df
18755 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18756 tree v2di_ftype_v2di_int
18757 = build_function_type_list (V2DI_type_node
,
18758 V2DI_type_node
, integer_type_node
, NULL_TREE
);
18759 tree v2di_ftype_v2di_v2di_int
18760 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18761 V2DI_type_node
, integer_type_node
, NULL_TREE
);
18762 tree v4si_ftype_v4si_int
18763 = build_function_type_list (V4SI_type_node
,
18764 V4SI_type_node
, integer_type_node
, NULL_TREE
);
18765 tree v8hi_ftype_v8hi_int
18766 = build_function_type_list (V8HI_type_node
,
18767 V8HI_type_node
, integer_type_node
, NULL_TREE
);
18768 tree v4si_ftype_v8hi_v8hi
18769 = build_function_type_list (V4SI_type_node
,
18770 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18771 tree di_ftype_v8qi_v8qi
18772 = build_function_type_list (long_long_unsigned_type_node
,
18773 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18774 tree di_ftype_v2si_v2si
18775 = build_function_type_list (long_long_unsigned_type_node
,
18776 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18777 tree v2di_ftype_v16qi_v16qi
18778 = build_function_type_list (V2DI_type_node
,
18779 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18780 tree v2di_ftype_v4si_v4si
18781 = build_function_type_list (V2DI_type_node
,
18782 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18783 tree int_ftype_v16qi
18784 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
18785 tree v16qi_ftype_pcchar
18786 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
18787 tree void_ftype_pchar_v16qi
18788 = build_function_type_list (void_type_node
,
18789 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
18791 tree v2di_ftype_v2di_unsigned_unsigned
18792 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18793 unsigned_type_node
, unsigned_type_node
,
18795 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18796 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
18797 unsigned_type_node
, unsigned_type_node
,
18799 tree v2di_ftype_v2di_v16qi
18800 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
18802 tree v2df_ftype_v2df_v2df_v2df
18803 = build_function_type_list (V2DF_type_node
,
18804 V2DF_type_node
, V2DF_type_node
,
18805 V2DF_type_node
, NULL_TREE
);
18806 tree v4sf_ftype_v4sf_v4sf_v4sf
18807 = build_function_type_list (V4SF_type_node
,
18808 V4SF_type_node
, V4SF_type_node
,
18809 V4SF_type_node
, NULL_TREE
);
18810 tree v8hi_ftype_v16qi
18811 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
18813 tree v4si_ftype_v16qi
18814 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
18816 tree v2di_ftype_v16qi
18817 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
18819 tree v4si_ftype_v8hi
18820 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
18822 tree v2di_ftype_v8hi
18823 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
18825 tree v2di_ftype_v4si
18826 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
18828 tree v2di_ftype_pv2di
18829 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
18831 tree v16qi_ftype_v16qi_v16qi_int
18832 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18833 V16QI_type_node
, integer_type_node
,
18835 tree v16qi_ftype_v16qi_v16qi_v16qi
18836 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18837 V16QI_type_node
, V16QI_type_node
,
18839 tree v8hi_ftype_v8hi_v8hi_int
18840 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18841 V8HI_type_node
, integer_type_node
,
18843 tree v4si_ftype_v4si_v4si_int
18844 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18845 V4SI_type_node
, integer_type_node
,
18847 tree int_ftype_v2di_v2di
18848 = build_function_type_list (integer_type_node
,
18849 V2DI_type_node
, V2DI_type_node
,
18851 tree int_ftype_v16qi_int_v16qi_int_int
18852 = build_function_type_list (integer_type_node
,
18859 tree v16qi_ftype_v16qi_int_v16qi_int_int
18860 = build_function_type_list (V16QI_type_node
,
18867 tree int_ftype_v16qi_v16qi_int
18868 = build_function_type_list (integer_type_node
,
18874 /* SSE5 instructions */
18875 tree v2di_ftype_v2di_v2di_v2di
18876 = build_function_type_list (V2DI_type_node
,
18882 tree v4si_ftype_v4si_v4si_v4si
18883 = build_function_type_list (V4SI_type_node
,
18889 tree v4si_ftype_v4si_v4si_v2di
18890 = build_function_type_list (V4SI_type_node
,
18896 tree v8hi_ftype_v8hi_v8hi_v8hi
18897 = build_function_type_list (V8HI_type_node
,
18903 tree v8hi_ftype_v8hi_v8hi_v4si
18904 = build_function_type_list (V8HI_type_node
,
18910 tree v2df_ftype_v2df_v2df_v16qi
18911 = build_function_type_list (V2DF_type_node
,
18917 tree v4sf_ftype_v4sf_v4sf_v16qi
18918 = build_function_type_list (V4SF_type_node
,
18924 tree v2di_ftype_v2di_si
18925 = build_function_type_list (V2DI_type_node
,
18930 tree v4si_ftype_v4si_si
18931 = build_function_type_list (V4SI_type_node
,
18936 tree v8hi_ftype_v8hi_si
18937 = build_function_type_list (V8HI_type_node
,
18942 tree v16qi_ftype_v16qi_si
18943 = build_function_type_list (V16QI_type_node
,
18947 tree v4sf_ftype_v4hi
18948 = build_function_type_list (V4SF_type_node
,
18952 tree v4hi_ftype_v4sf
18953 = build_function_type_list (V4HI_type_node
,
18957 tree v2di_ftype_v2di
18958 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18962 /* The __float80 type. */
18963 if (TYPE_MODE (long_double_type_node
) == XFmode
)
18964 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
18968 /* The __float80 type. */
18969 tree float80_type_node
= make_node (REAL_TYPE
);
18971 TYPE_PRECISION (float80_type_node
) = 80;
18972 layout_type (float80_type_node
);
18973 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
18979 tree float128_type_node
= make_node (REAL_TYPE
);
18981 TYPE_PRECISION (float128_type_node
) = 128;
18982 layout_type (float128_type_node
);
18983 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
18986 /* TFmode support builtins. */
18987 ftype
= build_function_type (float128_type_node
,
18989 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_infq", ftype
, IX86_BUILTIN_INFQ
);
18991 ftype
= build_function_type_list (float128_type_node
,
18992 float128_type_node
,
18994 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
);
18996 ftype
= build_function_type_list (float128_type_node
,
18997 float128_type_node
,
18998 float128_type_node
,
19000 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_copysignq", ftype
, IX86_BUILTIN_COPYSIGNQ
);
19003 /* Add all SSE builtins that are more or less simple operations on
19005 for (i
= 0, d
= bdesc_sse_3arg
;
19006 i
< ARRAY_SIZE (bdesc_sse_3arg
);
19009 /* Use one of the operands; the target can have a different mode for
19010 mask-generating compares. */
19011 enum machine_mode mode
;
19016 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19021 type
= v16qi_ftype_v16qi_v16qi_int
;
19024 type
= v8hi_ftype_v8hi_v8hi_int
;
19027 type
= v4si_ftype_v4si_v4si_int
;
19030 type
= v2di_ftype_v2di_v2di_int
;
19033 type
= v2df_ftype_v2df_v2df_int
;
19036 type
= v4sf_ftype_v4sf_v4sf_int
;
19039 gcc_unreachable ();
19042 /* Override for variable blends. */
19045 case CODE_FOR_sse4_1_blendvpd
:
19046 type
= v2df_ftype_v2df_v2df_v2df
;
19048 case CODE_FOR_sse4_1_blendvps
:
19049 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
19051 case CODE_FOR_sse4_1_pblendvb
:
19052 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
19058 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19061 /* Add all builtins that are more or less simple operations on two
19063 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19065 /* Use one of the operands; the target can have a different mode for
19066 mask-generating compares. */
19067 enum machine_mode mode
;
19072 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19077 type
= v16qi_ftype_v16qi_v16qi
;
19080 type
= v8hi_ftype_v8hi_v8hi
;
19083 type
= v4si_ftype_v4si_v4si
;
19086 type
= v2di_ftype_v2di_v2di
;
19089 type
= v2df_ftype_v2df_v2df
;
19092 type
= v4sf_ftype_v4sf_v4sf
;
19095 type
= v8qi_ftype_v8qi_v8qi
;
19098 type
= v4hi_ftype_v4hi_v4hi
;
19101 type
= v2si_ftype_v2si_v2si
;
19104 type
= di_ftype_di_di
;
19108 gcc_unreachable ();
19111 /* Override for comparisons. */
19112 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
19113 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
19114 type
= v4si_ftype_v4sf_v4sf
;
19116 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
19117 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
19118 type
= v2di_ftype_v2df_v2df
;
19120 if (d
->icode
== CODE_FOR_vec_pack_sfix_v2df
)
19121 type
= v4si_ftype_v2df_v2df
;
19123 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19126 /* Add all builtins that are more or less simple operations on 1 operand. */
19127 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19129 enum machine_mode mode
;
19134 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19139 type
= v16qi_ftype_v16qi
;
19142 type
= v8hi_ftype_v8hi
;
19145 type
= v4si_ftype_v4si
;
19148 type
= v2df_ftype_v2df
;
19151 type
= v4sf_ftype_v4sf
;
19154 type
= v8qi_ftype_v8qi
;
19157 type
= v4hi_ftype_v4hi
;
19160 type
= v2si_ftype_v2si
;
19167 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19170 /* pcmpestr[im] insns. */
19171 for (i
= 0, d
= bdesc_pcmpestr
;
19172 i
< ARRAY_SIZE (bdesc_pcmpestr
);
19175 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
19176 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
19178 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
19179 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
19182 /* pcmpistr[im] insns. */
19183 for (i
= 0, d
= bdesc_pcmpistr
;
19184 i
< ARRAY_SIZE (bdesc_pcmpistr
);
19187 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
19188 ftype
= v16qi_ftype_v16qi_v16qi_int
;
19190 ftype
= int_ftype_v16qi_v16qi_int
;
19191 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
19194 /* Add the remaining MMX insns with somewhat more complicated types. */
19195 def_builtin (OPTION_MASK_ISA_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
19196 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
19197 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
19198 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
19200 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
19201 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
19202 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
19204 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
19205 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
19207 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
19208 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
19210 /* comi/ucomi insns. */
19211 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
19212 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
19213 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
19215 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
19218 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
19219 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
19221 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
19222 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
19223 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
19225 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
19226 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
19227 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
19228 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
19229 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
19230 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
19231 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
19232 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
19233 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
19234 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
19235 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
19237 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
19239 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
19240 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
19242 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
19243 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
19244 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
19245 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
19247 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
19248 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
19249 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
19250 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
19252 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
19254 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
19256 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
19257 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
19258 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
19259 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
19260 ftype
= build_function_type_list (float_type_node
,
19263 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtf", ftype
, IX86_BUILTIN_RSQRTF
);
19264 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
19265 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
19267 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
19269 /* Original 3DNow! */
19270 def_builtin (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
19271 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
19272 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
19273 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
19274 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
19275 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
19276 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
19277 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
19278 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
19279 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
19280 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
19281 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
19282 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
19283 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
19284 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
19285 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
19286 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
19287 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
19288 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
19289 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
19291 /* 3DNow! extension as used in the Athlon CPU. */
19292 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
19293 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
19294 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
19295 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
19296 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
19297 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
19300 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
19302 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
19303 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
19305 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
19306 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
19308 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
19309 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
19310 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
19311 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
19312 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
19314 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
19315 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
19316 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
19317 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
19319 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
19320 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
19322 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
19324 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
19325 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
19327 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
19328 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
19329 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
19330 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
19331 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
19333 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
19335 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
19336 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
19337 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
19338 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
19340 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
19341 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
19342 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
19344 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
19345 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
19346 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
19347 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
19349 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
19350 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
19351 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
19353 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
19354 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
19356 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
19357 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
19360 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
19361 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
19363 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
19364 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
19367 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
19368 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
19369 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
19371 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
19373 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
19376 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
19377 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
19378 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
19380 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
19382 /* Prescott New Instructions. */
19383 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
19384 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
19385 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_lddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
19388 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
19389 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
, IX86_BUILTIN_PALIGNR
);
19392 def_builtin (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_movntdqa", v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
19393 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
19394 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
19395 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
19396 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
19397 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
19398 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
19399 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
19400 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
19401 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
19402 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
19403 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
19404 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
19405 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
19407 /* SSE4.1 and SSE5 */
19408 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundpd", v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
19409 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
19410 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
19411 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
19414 ftype
= build_function_type_list (unsigned_type_node
,
19415 unsigned_type_node
,
19416 unsigned_char_type_node
,
19418 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32qi", ftype
, IX86_BUILTIN_CRC32QI
);
19419 ftype
= build_function_type_list (unsigned_type_node
,
19420 unsigned_type_node
,
19421 short_unsigned_type_node
,
19423 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32hi", ftype
, IX86_BUILTIN_CRC32HI
);
19424 ftype
= build_function_type_list (unsigned_type_node
,
19425 unsigned_type_node
,
19426 unsigned_type_node
,
19428 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32si", ftype
, IX86_BUILTIN_CRC32SI
);
19429 ftype
= build_function_type_list (long_long_unsigned_type_node
,
19430 long_long_unsigned_type_node
,
19431 long_long_unsigned_type_node
,
19433 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32di", ftype
, IX86_BUILTIN_CRC32DI
);
19435 /* AMDFAM10 SSE4A New built-ins */
19436 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
19437 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
19438 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
19439 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
19440 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
19441 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
19443 /* Access to the vec_init patterns. */
19444 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
19445 integer_type_node
, NULL_TREE
);
19446 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
19448 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
19449 short_integer_type_node
,
19450 short_integer_type_node
,
19451 short_integer_type_node
, NULL_TREE
);
19452 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
19454 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
19455 char_type_node
, char_type_node
,
19456 char_type_node
, char_type_node
,
19457 char_type_node
, char_type_node
,
19458 char_type_node
, NULL_TREE
);
19459 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
19461 /* Access to the vec_extract patterns. */
19462 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
19463 integer_type_node
, NULL_TREE
);
19464 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
19466 ftype
= build_function_type_list (long_long_integer_type_node
,
19467 V2DI_type_node
, integer_type_node
,
19469 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
19471 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
19472 integer_type_node
, NULL_TREE
);
19473 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
19475 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
19476 integer_type_node
, NULL_TREE
);
19477 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
19479 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
19480 integer_type_node
, NULL_TREE
);
19481 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
19483 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
19484 integer_type_node
, NULL_TREE
);
19485 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
19487 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
19488 integer_type_node
, NULL_TREE
);
19489 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
19491 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
19492 integer_type_node
, NULL_TREE
);
19493 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
19495 /* Access to the vec_set patterns. */
19496 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
19498 integer_type_node
, NULL_TREE
);
19499 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
19501 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
19503 integer_type_node
, NULL_TREE
);
19504 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
19506 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
19508 integer_type_node
, NULL_TREE
);
19509 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
19511 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
19513 integer_type_node
, NULL_TREE
);
19514 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
19516 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
19518 integer_type_node
, NULL_TREE
);
19519 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
19521 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
19523 integer_type_node
, NULL_TREE
);
19524 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
19526 /* Add SSE5 multi-arg argument instructions */
19527 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
19529 tree mtype
= NULL_TREE
;
19534 switch ((enum multi_arg_type
)d
->flag
)
19536 case MULTI_ARG_3_SF
: mtype
= v4sf_ftype_v4sf_v4sf_v4sf
; break;
19537 case MULTI_ARG_3_DF
: mtype
= v2df_ftype_v2df_v2df_v2df
; break;
19538 case MULTI_ARG_3_DI
: mtype
= v2di_ftype_v2di_v2di_v2di
; break;
19539 case MULTI_ARG_3_SI
: mtype
= v4si_ftype_v4si_v4si_v4si
; break;
19540 case MULTI_ARG_3_SI_DI
: mtype
= v4si_ftype_v4si_v4si_v2di
; break;
19541 case MULTI_ARG_3_HI
: mtype
= v8hi_ftype_v8hi_v8hi_v8hi
; break;
19542 case MULTI_ARG_3_HI_SI
: mtype
= v8hi_ftype_v8hi_v8hi_v4si
; break;
19543 case MULTI_ARG_3_QI
: mtype
= v16qi_ftype_v16qi_v16qi_v16qi
; break;
19544 case MULTI_ARG_3_PERMPS
: mtype
= v4sf_ftype_v4sf_v4sf_v16qi
; break;
19545 case MULTI_ARG_3_PERMPD
: mtype
= v2df_ftype_v2df_v2df_v16qi
; break;
19546 case MULTI_ARG_2_SF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19547 case MULTI_ARG_2_DF
: mtype
= v2df_ftype_v2df_v2df
; break;
19548 case MULTI_ARG_2_DI
: mtype
= v2di_ftype_v2di_v2di
; break;
19549 case MULTI_ARG_2_SI
: mtype
= v4si_ftype_v4si_v4si
; break;
19550 case MULTI_ARG_2_HI
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19551 case MULTI_ARG_2_QI
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19552 case MULTI_ARG_2_DI_IMM
: mtype
= v2di_ftype_v2di_si
; break;
19553 case MULTI_ARG_2_SI_IMM
: mtype
= v4si_ftype_v4si_si
; break;
19554 case MULTI_ARG_2_HI_IMM
: mtype
= v8hi_ftype_v8hi_si
; break;
19555 case MULTI_ARG_2_QI_IMM
: mtype
= v16qi_ftype_v16qi_si
; break;
19556 case MULTI_ARG_2_SF_CMP
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19557 case MULTI_ARG_2_DF_CMP
: mtype
= v2df_ftype_v2df_v2df
; break;
19558 case MULTI_ARG_2_DI_CMP
: mtype
= v2di_ftype_v2di_v2di
; break;
19559 case MULTI_ARG_2_SI_CMP
: mtype
= v4si_ftype_v4si_v4si
; break;
19560 case MULTI_ARG_2_HI_CMP
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19561 case MULTI_ARG_2_QI_CMP
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19562 case MULTI_ARG_2_SF_TF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19563 case MULTI_ARG_2_DF_TF
: mtype
= v2df_ftype_v2df_v2df
; break;
19564 case MULTI_ARG_2_DI_TF
: mtype
= v2di_ftype_v2di_v2di
; break;
19565 case MULTI_ARG_2_SI_TF
: mtype
= v4si_ftype_v4si_v4si
; break;
19566 case MULTI_ARG_2_HI_TF
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19567 case MULTI_ARG_2_QI_TF
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19568 case MULTI_ARG_1_SF
: mtype
= v4sf_ftype_v4sf
; break;
19569 case MULTI_ARG_1_DF
: mtype
= v2df_ftype_v2df
; break;
19570 case MULTI_ARG_1_DI
: mtype
= v2di_ftype_v2di
; break;
19571 case MULTI_ARG_1_SI
: mtype
= v4si_ftype_v4si
; break;
19572 case MULTI_ARG_1_HI
: mtype
= v8hi_ftype_v8hi
; break;
19573 case MULTI_ARG_1_QI
: mtype
= v16qi_ftype_v16qi
; break;
19574 case MULTI_ARG_1_SI_DI
: mtype
= v2di_ftype_v4si
; break;
19575 case MULTI_ARG_1_HI_DI
: mtype
= v2di_ftype_v8hi
; break;
19576 case MULTI_ARG_1_HI_SI
: mtype
= v4si_ftype_v8hi
; break;
19577 case MULTI_ARG_1_QI_DI
: mtype
= v2di_ftype_v16qi
; break;
19578 case MULTI_ARG_1_QI_SI
: mtype
= v4si_ftype_v16qi
; break;
19579 case MULTI_ARG_1_QI_HI
: mtype
= v8hi_ftype_v16qi
; break;
19580 case MULTI_ARG_1_PH2PS
: mtype
= v4sf_ftype_v4hi
; break;
19581 case MULTI_ARG_1_PS2PH
: mtype
= v4hi_ftype_v4sf
; break;
19582 case MULTI_ARG_UNKNOWN
:
19584 gcc_unreachable ();
19588 def_builtin_const (d
->mask
, d
->name
, mtype
, d
->code
);
19593 ix86_init_builtins (void)
19596 ix86_init_mmx_sse_builtins ();
19599 /* Errors in the source file can cause expand_expr to return const0_rtx
19600 where we expect a vector. To avoid crashing, use one of the vector
19601 clear instructions. */
19603 safe_vector_operand (rtx x
, enum machine_mode mode
)
19605 if (x
== const0_rtx
)
19606 x
= CONST0_RTX (mode
);
19610 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19611 4 operands. The third argument must be a constant smaller than 8
19615 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
19619 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19620 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19621 tree arg2
= CALL_EXPR_ARG (exp
, 2);
19622 rtx op0
= expand_normal (arg0
);
19623 rtx op1
= expand_normal (arg1
);
19624 rtx op2
= expand_normal (arg2
);
19625 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19626 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
19627 enum machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
19628 enum machine_mode mode3
= insn_data
[icode
].operand
[3].mode
;
19630 if (VECTOR_MODE_P (mode1
))
19631 op0
= safe_vector_operand (op0
, mode1
);
19632 if (VECTOR_MODE_P (mode2
))
19633 op1
= safe_vector_operand (op1
, mode2
);
19634 if (VECTOR_MODE_P (mode3
))
19635 op2
= safe_vector_operand (op2
, mode3
);
19639 || GET_MODE (target
) != tmode
19640 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19641 target
= gen_reg_rtx (tmode
);
19643 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19644 op0
= copy_to_mode_reg (mode1
, op0
);
19645 if ((optimize
&& !register_operand (op1
, mode2
))
19646 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19647 op1
= copy_to_mode_reg (mode2
, op1
);
19649 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19652 case CODE_FOR_sse4_1_blendvpd
:
19653 case CODE_FOR_sse4_1_blendvps
:
19654 case CODE_FOR_sse4_1_pblendvb
:
19655 op2
= copy_to_mode_reg (mode3
, op2
);
19658 case CODE_FOR_sse4_1_roundsd
:
19659 case CODE_FOR_sse4_1_roundss
:
19660 error ("the third argument must be a 4-bit immediate");
19664 error ("the third argument must be an 8-bit immediate");
19668 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19675 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19678 ix86_expand_crc32 (enum insn_code icode
, tree exp
, rtx target
)
19681 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19682 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19683 rtx op0
= expand_normal (arg0
);
19684 rtx op1
= expand_normal (arg1
);
19685 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19686 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19687 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19691 || GET_MODE (target
) != tmode
19692 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19693 target
= gen_reg_rtx (tmode
);
19695 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19696 op0
= copy_to_mode_reg (mode0
, op0
);
19697 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19699 op1
= copy_to_reg (op1
);
19700 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
19703 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19710 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19713 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
19716 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19717 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19718 rtx op0
= expand_normal (arg0
);
19719 rtx op1
= expand_normal (arg1
);
19720 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19721 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19722 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19724 if (VECTOR_MODE_P (mode0
))
19725 op0
= safe_vector_operand (op0
, mode0
);
19726 if (VECTOR_MODE_P (mode1
))
19727 op1
= safe_vector_operand (op1
, mode1
);
19729 if (optimize
|| !target
19730 || GET_MODE (target
) != tmode
19731 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19732 target
= gen_reg_rtx (tmode
);
19734 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
19736 rtx x
= gen_reg_rtx (V4SImode
);
19737 emit_insn (gen_sse2_loadd (x
, op1
));
19738 op1
= gen_lowpart (TImode
, x
);
19741 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19742 op0
= copy_to_mode_reg (mode0
, op0
);
19743 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19744 op1
= copy_to_mode_reg (mode1
, op1
);
19746 /* ??? Using ix86_fixup_binary_operands is problematic when
19747 we've got mismatched modes. Fake it. */
19753 if (tmode
== mode0
&& tmode
== mode1
)
19755 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
19759 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
19761 op0
= force_reg (mode0
, op0
);
19762 op1
= force_reg (mode1
, op1
);
19763 target
= gen_reg_rtx (tmode
);
19766 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19773 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19776 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
19777 enum multi_arg_type m_type
,
19778 enum insn_code sub_code
)
19783 bool comparison_p
= false;
19785 bool last_arg_constant
= false;
19786 int num_memory
= 0;
19789 enum machine_mode mode
;
19792 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19796 case MULTI_ARG_3_SF
:
19797 case MULTI_ARG_3_DF
:
19798 case MULTI_ARG_3_DI
:
19799 case MULTI_ARG_3_SI
:
19800 case MULTI_ARG_3_SI_DI
:
19801 case MULTI_ARG_3_HI
:
19802 case MULTI_ARG_3_HI_SI
:
19803 case MULTI_ARG_3_QI
:
19804 case MULTI_ARG_3_PERMPS
:
19805 case MULTI_ARG_3_PERMPD
:
19809 case MULTI_ARG_2_SF
:
19810 case MULTI_ARG_2_DF
:
19811 case MULTI_ARG_2_DI
:
19812 case MULTI_ARG_2_SI
:
19813 case MULTI_ARG_2_HI
:
19814 case MULTI_ARG_2_QI
:
19818 case MULTI_ARG_2_DI_IMM
:
19819 case MULTI_ARG_2_SI_IMM
:
19820 case MULTI_ARG_2_HI_IMM
:
19821 case MULTI_ARG_2_QI_IMM
:
19823 last_arg_constant
= true;
19826 case MULTI_ARG_1_SF
:
19827 case MULTI_ARG_1_DF
:
19828 case MULTI_ARG_1_DI
:
19829 case MULTI_ARG_1_SI
:
19830 case MULTI_ARG_1_HI
:
19831 case MULTI_ARG_1_QI
:
19832 case MULTI_ARG_1_SI_DI
:
19833 case MULTI_ARG_1_HI_DI
:
19834 case MULTI_ARG_1_HI_SI
:
19835 case MULTI_ARG_1_QI_DI
:
19836 case MULTI_ARG_1_QI_SI
:
19837 case MULTI_ARG_1_QI_HI
:
19838 case MULTI_ARG_1_PH2PS
:
19839 case MULTI_ARG_1_PS2PH
:
19843 case MULTI_ARG_2_SF_CMP
:
19844 case MULTI_ARG_2_DF_CMP
:
19845 case MULTI_ARG_2_DI_CMP
:
19846 case MULTI_ARG_2_SI_CMP
:
19847 case MULTI_ARG_2_HI_CMP
:
19848 case MULTI_ARG_2_QI_CMP
:
19850 comparison_p
= true;
19853 case MULTI_ARG_2_SF_TF
:
19854 case MULTI_ARG_2_DF_TF
:
19855 case MULTI_ARG_2_DI_TF
:
19856 case MULTI_ARG_2_SI_TF
:
19857 case MULTI_ARG_2_HI_TF
:
19858 case MULTI_ARG_2_QI_TF
:
19863 case MULTI_ARG_UNKNOWN
:
19865 gcc_unreachable ();
19868 if (optimize
|| !target
19869 || GET_MODE (target
) != tmode
19870 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19871 target
= gen_reg_rtx (tmode
);
19873 gcc_assert (nargs
<= 4);
19875 for (i
= 0; i
< nargs
; i
++)
19877 tree arg
= CALL_EXPR_ARG (exp
, i
);
19878 rtx op
= expand_normal (arg
);
19879 int adjust
= (comparison_p
) ? 1 : 0;
19880 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
19882 if (last_arg_constant
&& i
== nargs
-1)
19884 if (GET_CODE (op
) != CONST_INT
)
19886 error ("last argument must be an immediate");
19887 return gen_reg_rtx (tmode
);
19892 if (VECTOR_MODE_P (mode
))
19893 op
= safe_vector_operand (op
, mode
);
19895 /* If we aren't optimizing, only allow one memory operand to be
19897 if (memory_operand (op
, mode
))
19900 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
19903 || ! (*insn_data
[icode
].operand
[i
+adjust
+1].predicate
) (op
, mode
)
19905 op
= force_reg (mode
, op
);
19909 args
[i
].mode
= mode
;
19915 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
19920 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
19921 GEN_INT ((int)sub_code
));
19922 else if (! comparison_p
)
19923 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
19926 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
19930 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
19935 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
19939 gcc_unreachable ();
19949 /* Subroutine of ix86_expand_builtin to take care of stores. */
19952 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
19955 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19956 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19957 rtx op0
= expand_normal (arg0
);
19958 rtx op1
= expand_normal (arg1
);
19959 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
19960 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
19962 if (VECTOR_MODE_P (mode1
))
19963 op1
= safe_vector_operand (op1
, mode1
);
19965 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19966 op1
= copy_to_mode_reg (mode1
, op1
);
19968 pat
= GEN_FCN (icode
) (op0
, op1
);
19974 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19977 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
19978 rtx target
, int do_load
)
19981 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19982 rtx op0
= expand_normal (arg0
);
19983 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19984 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19986 if (optimize
|| !target
19987 || GET_MODE (target
) != tmode
19988 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19989 target
= gen_reg_rtx (tmode
);
19991 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19994 if (VECTOR_MODE_P (mode0
))
19995 op0
= safe_vector_operand (op0
, mode0
);
19997 if ((optimize
&& !register_operand (op0
, mode0
))
19998 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19999 op0
= copy_to_mode_reg (mode0
, op0
);
20004 case CODE_FOR_sse4_1_roundpd
:
20005 case CODE_FOR_sse4_1_roundps
:
20007 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20008 rtx op1
= expand_normal (arg1
);
20009 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
20011 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20013 error ("the second argument must be a 4-bit immediate");
20016 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20020 pat
= GEN_FCN (icode
) (target
, op0
);
20030 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20031 sqrtss, rsqrtss, rcpss. */
20034 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
20037 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20038 rtx op1
, op0
= expand_normal (arg0
);
20039 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20040 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20042 if (optimize
|| !target
20043 || GET_MODE (target
) != tmode
20044 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20045 target
= gen_reg_rtx (tmode
);
20047 if (VECTOR_MODE_P (mode0
))
20048 op0
= safe_vector_operand (op0
, mode0
);
20050 if ((optimize
&& !register_operand (op0
, mode0
))
20051 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20052 op0
= copy_to_mode_reg (mode0
, op0
);
20055 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
20056 op1
= copy_to_mode_reg (mode0
, op1
);
20058 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20065 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20068 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
20072 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20073 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20074 rtx op0
= expand_normal (arg0
);
20075 rtx op1
= expand_normal (arg1
);
20077 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
20078 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
20079 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
20080 enum rtx_code comparison
= d
->comparison
;
20082 if (VECTOR_MODE_P (mode0
))
20083 op0
= safe_vector_operand (op0
, mode0
);
20084 if (VECTOR_MODE_P (mode1
))
20085 op1
= safe_vector_operand (op1
, mode1
);
20087 /* Swap operands if we have a comparison that isn't available in
20089 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
20091 rtx tmp
= gen_reg_rtx (mode1
);
20092 emit_move_insn (tmp
, op1
);
20097 if (optimize
|| !target
20098 || GET_MODE (target
) != tmode
20099 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
20100 target
= gen_reg_rtx (tmode
);
20102 if ((optimize
&& !register_operand (op0
, mode0
))
20103 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
20104 op0
= copy_to_mode_reg (mode0
, op0
);
20105 if ((optimize
&& !register_operand (op1
, mode1
))
20106 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
20107 op1
= copy_to_mode_reg (mode1
, op1
);
20109 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
20110 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
20117 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20120 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
20124 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20125 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20126 rtx op0
= expand_normal (arg0
);
20127 rtx op1
= expand_normal (arg1
);
20128 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
20129 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
20130 enum rtx_code comparison
= d
->comparison
;
20132 if (VECTOR_MODE_P (mode0
))
20133 op0
= safe_vector_operand (op0
, mode0
);
20134 if (VECTOR_MODE_P (mode1
))
20135 op1
= safe_vector_operand (op1
, mode1
);
20137 /* Swap operands if we have a comparison that isn't available in
20139 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
20146 target
= gen_reg_rtx (SImode
);
20147 emit_move_insn (target
, const0_rtx
);
20148 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20150 if ((optimize
&& !register_operand (op0
, mode0
))
20151 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
20152 op0
= copy_to_mode_reg (mode0
, op0
);
20153 if ((optimize
&& !register_operand (op1
, mode1
))
20154 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
20155 op1
= copy_to_mode_reg (mode1
, op1
);
20157 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
20161 emit_insn (gen_rtx_SET (VOIDmode
,
20162 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20163 gen_rtx_fmt_ee (comparison
, QImode
,
20167 return SUBREG_REG (target
);
20170 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20173 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
20177 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20178 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20179 rtx op0
= expand_normal (arg0
);
20180 rtx op1
= expand_normal (arg1
);
20181 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
20182 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
20183 enum rtx_code comparison
= d
->comparison
;
20185 if (VECTOR_MODE_P (mode0
))
20186 op0
= safe_vector_operand (op0
, mode0
);
20187 if (VECTOR_MODE_P (mode1
))
20188 op1
= safe_vector_operand (op1
, mode1
);
20190 target
= gen_reg_rtx (SImode
);
20191 emit_move_insn (target
, const0_rtx
);
20192 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20194 if ((optimize
&& !register_operand (op0
, mode0
))
20195 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
20196 op0
= copy_to_mode_reg (mode0
, op0
);
20197 if ((optimize
&& !register_operand (op1
, mode1
))
20198 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
20199 op1
= copy_to_mode_reg (mode1
, op1
);
20201 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
20205 emit_insn (gen_rtx_SET (VOIDmode
,
20206 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20207 gen_rtx_fmt_ee (comparison
, QImode
,
20211 return SUBREG_REG (target
);
20214 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20217 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
20218 tree exp
, rtx target
)
20221 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20222 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20223 tree arg2
= CALL_EXPR_ARG (exp
, 2);
20224 tree arg3
= CALL_EXPR_ARG (exp
, 3);
20225 tree arg4
= CALL_EXPR_ARG (exp
, 4);
20226 rtx scratch0
, scratch1
;
20227 rtx op0
= expand_normal (arg0
);
20228 rtx op1
= expand_normal (arg1
);
20229 rtx op2
= expand_normal (arg2
);
20230 rtx op3
= expand_normal (arg3
);
20231 rtx op4
= expand_normal (arg4
);
20232 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
20234 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
20235 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
20236 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
20237 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
20238 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
20239 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
20240 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
20242 if (VECTOR_MODE_P (modev2
))
20243 op0
= safe_vector_operand (op0
, modev2
);
20244 if (VECTOR_MODE_P (modev4
))
20245 op2
= safe_vector_operand (op2
, modev4
);
20247 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
20248 op0
= copy_to_mode_reg (modev2
, op0
);
20249 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
20250 op1
= copy_to_mode_reg (modei3
, op1
);
20251 if ((optimize
&& !register_operand (op2
, modev4
))
20252 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
20253 op2
= copy_to_mode_reg (modev4
, op2
);
20254 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
20255 op3
= copy_to_mode_reg (modei5
, op3
);
20257 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
20259 error ("the fifth argument must be a 8-bit immediate");
20263 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
20265 if (optimize
|| !target
20266 || GET_MODE (target
) != tmode0
20267 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
20268 target
= gen_reg_rtx (tmode0
);
20270 scratch1
= gen_reg_rtx (tmode1
);
20272 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
20274 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
20276 if (optimize
|| !target
20277 || GET_MODE (target
) != tmode1
20278 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
20279 target
= gen_reg_rtx (tmode1
);
20281 scratch0
= gen_reg_rtx (tmode0
);
20283 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
20287 gcc_assert (d
->flag
);
20289 scratch0
= gen_reg_rtx (tmode0
);
20290 scratch1
= gen_reg_rtx (tmode1
);
20292 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
20302 target
= gen_reg_rtx (SImode
);
20303 emit_move_insn (target
, const0_rtx
);
20304 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20307 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20308 gen_rtx_fmt_ee (EQ
, QImode
,
20309 gen_rtx_REG ((enum machine_mode
) d
->flag
,
20312 return SUBREG_REG (target
);
20319 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20322 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
20323 tree exp
, rtx target
)
20326 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20327 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20328 tree arg2
= CALL_EXPR_ARG (exp
, 2);
20329 rtx scratch0
, scratch1
;
20330 rtx op0
= expand_normal (arg0
);
20331 rtx op1
= expand_normal (arg1
);
20332 rtx op2
= expand_normal (arg2
);
20333 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
20335 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
20336 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
20337 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
20338 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
20339 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
20341 if (VECTOR_MODE_P (modev2
))
20342 op0
= safe_vector_operand (op0
, modev2
);
20343 if (VECTOR_MODE_P (modev3
))
20344 op1
= safe_vector_operand (op1
, modev3
);
20346 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
20347 op0
= copy_to_mode_reg (modev2
, op0
);
20348 if ((optimize
&& !register_operand (op1
, modev3
))
20349 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
20350 op1
= copy_to_mode_reg (modev3
, op1
);
20352 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
20354 error ("the third argument must be a 8-bit immediate");
20358 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
20360 if (optimize
|| !target
20361 || GET_MODE (target
) != tmode0
20362 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
20363 target
= gen_reg_rtx (tmode0
);
20365 scratch1
= gen_reg_rtx (tmode1
);
20367 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
20369 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
20371 if (optimize
|| !target
20372 || GET_MODE (target
) != tmode1
20373 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
20374 target
= gen_reg_rtx (tmode1
);
20376 scratch0
= gen_reg_rtx (tmode0
);
20378 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
20382 gcc_assert (d
->flag
);
20384 scratch0
= gen_reg_rtx (tmode0
);
20385 scratch1
= gen_reg_rtx (tmode1
);
20387 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
20397 target
= gen_reg_rtx (SImode
);
20398 emit_move_insn (target
, const0_rtx
);
20399 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20402 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20403 gen_rtx_fmt_ee (EQ
, QImode
,
20404 gen_rtx_REG ((enum machine_mode
) d
->flag
,
20407 return SUBREG_REG (target
);
20413 /* Return the integer constant in ARG. Constrain it to be in the range
20414 of the subparts of VEC_TYPE; issue an error if not. */
20417 get_element_number (tree vec_type
, tree arg
)
20419 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
20421 if (!host_integerp (arg
, 1)
20422 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
20424 error ("selector must be an integer constant in the range 0..%wi", max
);
20431 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20432 ix86_expand_vector_init. We DO have language-level syntax for this, in
20433 the form of (type){ init-list }. Except that since we can't place emms
20434 instructions from inside the compiler, we can't allow the use of MMX
20435 registers unless the user explicitly asks for it. So we do *not* define
20436 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20437 we have builtins invoked by mmintrin.h that gives us license to emit
20438 these sorts of instructions. */
20441 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
20443 enum machine_mode tmode
= TYPE_MODE (type
);
20444 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
20445 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
20446 rtvec v
= rtvec_alloc (n_elt
);
20448 gcc_assert (VECTOR_MODE_P (tmode
));
20449 gcc_assert (call_expr_nargs (exp
) == n_elt
);
20451 for (i
= 0; i
< n_elt
; ++i
)
20453 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
20454 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
20457 if (!target
|| !register_operand (target
, tmode
))
20458 target
= gen_reg_rtx (tmode
);
20460 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
20464 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20465 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20466 had a language-level syntax for referencing vector elements. */
20469 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
20471 enum machine_mode tmode
, mode0
;
20476 arg0
= CALL_EXPR_ARG (exp
, 0);
20477 arg1
= CALL_EXPR_ARG (exp
, 1);
20479 op0
= expand_normal (arg0
);
20480 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
20482 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
20483 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
20484 gcc_assert (VECTOR_MODE_P (mode0
));
20486 op0
= force_reg (mode0
, op0
);
20488 if (optimize
|| !target
|| !register_operand (target
, tmode
))
20489 target
= gen_reg_rtx (tmode
);
20491 ix86_expand_vector_extract (true, target
, op0
, elt
);
20496 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20497 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20498 a language-level syntax for referencing vector elements. */
20501 ix86_expand_vec_set_builtin (tree exp
)
20503 enum machine_mode tmode
, mode1
;
20504 tree arg0
, arg1
, arg2
;
20506 rtx op0
, op1
, target
;
20508 arg0
= CALL_EXPR_ARG (exp
, 0);
20509 arg1
= CALL_EXPR_ARG (exp
, 1);
20510 arg2
= CALL_EXPR_ARG (exp
, 2);
20512 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
20513 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
20514 gcc_assert (VECTOR_MODE_P (tmode
));
20516 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
20517 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
20518 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
20520 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
20521 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
20523 op0
= force_reg (tmode
, op0
);
20524 op1
= force_reg (mode1
, op1
);
20526 /* OP0 is the source of these builtin functions and shouldn't be
20527 modified. Create a copy, use it and return it as target. */
20528 target
= gen_reg_rtx (tmode
);
20529 emit_move_insn (target
, op0
);
20530 ix86_expand_vector_set (true, target
, op1
, elt
);
20535 /* Expand an expression EXP that calls a built-in function,
20536 with result going to TARGET if that's convenient
20537 (and in mode MODE if that's convenient).
20538 SUBTARGET may be used as the target for computing one of EXP's operands.
20539 IGNORE is nonzero if the value is to be ignored. */
20542 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
20543 enum machine_mode mode ATTRIBUTE_UNUSED
,
20544 int ignore ATTRIBUTE_UNUSED
)
20546 const struct builtin_description
*d
;
20548 enum insn_code icode
;
20549 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
20550 tree arg0
, arg1
, arg2
, arg3
;
20551 rtx op0
, op1
, op2
, op3
, pat
;
20552 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
20553 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
20557 case IX86_BUILTIN_EMMS
:
20558 emit_insn (gen_mmx_emms ());
20561 case IX86_BUILTIN_SFENCE
:
20562 emit_insn (gen_sse_sfence ());
20565 case IX86_BUILTIN_MASKMOVQ
:
20566 case IX86_BUILTIN_MASKMOVDQU
:
20567 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
20568 ? CODE_FOR_mmx_maskmovq
20569 : CODE_FOR_sse2_maskmovdqu
);
20570 /* Note the arg order is different from the operand order. */
20571 arg1
= CALL_EXPR_ARG (exp
, 0);
20572 arg2
= CALL_EXPR_ARG (exp
, 1);
20573 arg0
= CALL_EXPR_ARG (exp
, 2);
20574 op0
= expand_normal (arg0
);
20575 op1
= expand_normal (arg1
);
20576 op2
= expand_normal (arg2
);
20577 mode0
= insn_data
[icode
].operand
[0].mode
;
20578 mode1
= insn_data
[icode
].operand
[1].mode
;
20579 mode2
= insn_data
[icode
].operand
[2].mode
;
20581 op0
= force_reg (Pmode
, op0
);
20582 op0
= gen_rtx_MEM (mode1
, op0
);
20584 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
20585 op0
= copy_to_mode_reg (mode0
, op0
);
20586 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
20587 op1
= copy_to_mode_reg (mode1
, op1
);
20588 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
20589 op2
= copy_to_mode_reg (mode2
, op2
);
20590 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
20596 case IX86_BUILTIN_RSQRTF
:
20597 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
20599 case IX86_BUILTIN_SQRTSS
:
20600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
20601 case IX86_BUILTIN_RSQRTSS
:
20602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
20603 case IX86_BUILTIN_RCPSS
:
20604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
20606 case IX86_BUILTIN_LOADUPS
:
20607 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
20609 case IX86_BUILTIN_STOREUPS
:
20610 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
20612 case IX86_BUILTIN_LOADHPS
:
20613 case IX86_BUILTIN_LOADLPS
:
20614 case IX86_BUILTIN_LOADHPD
:
20615 case IX86_BUILTIN_LOADLPD
:
20616 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
20617 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
20618 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
20619 : CODE_FOR_sse2_loadlpd
);
20620 arg0
= CALL_EXPR_ARG (exp
, 0);
20621 arg1
= CALL_EXPR_ARG (exp
, 1);
20622 op0
= expand_normal (arg0
);
20623 op1
= expand_normal (arg1
);
20624 tmode
= insn_data
[icode
].operand
[0].mode
;
20625 mode0
= insn_data
[icode
].operand
[1].mode
;
20626 mode1
= insn_data
[icode
].operand
[2].mode
;
20628 op0
= force_reg (mode0
, op0
);
20629 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
20630 if (optimize
|| target
== 0
20631 || GET_MODE (target
) != tmode
20632 || !register_operand (target
, tmode
))
20633 target
= gen_reg_rtx (tmode
);
20634 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20640 case IX86_BUILTIN_STOREHPS
:
20641 case IX86_BUILTIN_STORELPS
:
20642 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
20643 : CODE_FOR_sse_storelps
);
20644 arg0
= CALL_EXPR_ARG (exp
, 0);
20645 arg1
= CALL_EXPR_ARG (exp
, 1);
20646 op0
= expand_normal (arg0
);
20647 op1
= expand_normal (arg1
);
20648 mode0
= insn_data
[icode
].operand
[0].mode
;
20649 mode1
= insn_data
[icode
].operand
[1].mode
;
20651 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
20652 op1
= force_reg (mode1
, op1
);
20654 pat
= GEN_FCN (icode
) (op0
, op1
);
20660 case IX86_BUILTIN_MOVNTPS
:
20661 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
20662 case IX86_BUILTIN_MOVNTQ
:
20663 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
20665 case IX86_BUILTIN_LDMXCSR
:
20666 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
20667 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
20668 emit_move_insn (target
, op0
);
20669 emit_insn (gen_sse_ldmxcsr (target
));
20672 case IX86_BUILTIN_STMXCSR
:
20673 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
20674 emit_insn (gen_sse_stmxcsr (target
));
20675 return copy_to_mode_reg (SImode
, target
);
20677 case IX86_BUILTIN_SHUFPS
:
20678 case IX86_BUILTIN_SHUFPD
:
20679 icode
= (fcode
== IX86_BUILTIN_SHUFPS
20680 ? CODE_FOR_sse_shufps
20681 : CODE_FOR_sse2_shufpd
);
20682 arg0
= CALL_EXPR_ARG (exp
, 0);
20683 arg1
= CALL_EXPR_ARG (exp
, 1);
20684 arg2
= CALL_EXPR_ARG (exp
, 2);
20685 op0
= expand_normal (arg0
);
20686 op1
= expand_normal (arg1
);
20687 op2
= expand_normal (arg2
);
20688 tmode
= insn_data
[icode
].operand
[0].mode
;
20689 mode0
= insn_data
[icode
].operand
[1].mode
;
20690 mode1
= insn_data
[icode
].operand
[2].mode
;
20691 mode2
= insn_data
[icode
].operand
[3].mode
;
20693 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20694 op0
= copy_to_mode_reg (mode0
, op0
);
20695 if ((optimize
&& !register_operand (op1
, mode1
))
20696 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20697 op1
= copy_to_mode_reg (mode1
, op1
);
20698 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20700 /* @@@ better error message */
20701 error ("mask must be an immediate");
20702 return gen_reg_rtx (tmode
);
20704 if (optimize
|| target
== 0
20705 || GET_MODE (target
) != tmode
20706 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20707 target
= gen_reg_rtx (tmode
);
20708 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20714 case IX86_BUILTIN_PSHUFW
:
20715 case IX86_BUILTIN_PSHUFD
:
20716 case IX86_BUILTIN_PSHUFHW
:
20717 case IX86_BUILTIN_PSHUFLW
:
20718 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
20719 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
20720 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
20721 : CODE_FOR_mmx_pshufw
);
20722 arg0
= CALL_EXPR_ARG (exp
, 0);
20723 arg1
= CALL_EXPR_ARG (exp
, 1);
20724 op0
= expand_normal (arg0
);
20725 op1
= expand_normal (arg1
);
20726 tmode
= insn_data
[icode
].operand
[0].mode
;
20727 mode1
= insn_data
[icode
].operand
[1].mode
;
20728 mode2
= insn_data
[icode
].operand
[2].mode
;
20730 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20731 op0
= copy_to_mode_reg (mode1
, op0
);
20732 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20734 /* @@@ better error message */
20735 error ("mask must be an immediate");
20739 || GET_MODE (target
) != tmode
20740 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20741 target
= gen_reg_rtx (tmode
);
20742 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20748 case IX86_BUILTIN_PSLLW128
:
20749 case IX86_BUILTIN_PSLLWI128
:
20750 icode
= CODE_FOR_ashlv8hi3
;
20752 case IX86_BUILTIN_PSLLD128
:
20753 case IX86_BUILTIN_PSLLDI128
:
20754 icode
= CODE_FOR_ashlv4si3
;
20756 case IX86_BUILTIN_PSLLQ128
:
20757 case IX86_BUILTIN_PSLLQI128
:
20758 icode
= CODE_FOR_ashlv2di3
;
20760 case IX86_BUILTIN_PSRAW128
:
20761 case IX86_BUILTIN_PSRAWI128
:
20762 icode
= CODE_FOR_ashrv8hi3
;
20764 case IX86_BUILTIN_PSRAD128
:
20765 case IX86_BUILTIN_PSRADI128
:
20766 icode
= CODE_FOR_ashrv4si3
;
20768 case IX86_BUILTIN_PSRLW128
:
20769 case IX86_BUILTIN_PSRLWI128
:
20770 icode
= CODE_FOR_lshrv8hi3
;
20772 case IX86_BUILTIN_PSRLD128
:
20773 case IX86_BUILTIN_PSRLDI128
:
20774 icode
= CODE_FOR_lshrv4si3
;
20776 case IX86_BUILTIN_PSRLQ128
:
20777 case IX86_BUILTIN_PSRLQI128
:
20778 icode
= CODE_FOR_lshrv2di3
;
20781 arg0
= CALL_EXPR_ARG (exp
, 0);
20782 arg1
= CALL_EXPR_ARG (exp
, 1);
20783 op0
= expand_normal (arg0
);
20784 op1
= expand_normal (arg1
);
20786 tmode
= insn_data
[icode
].operand
[0].mode
;
20787 mode1
= insn_data
[icode
].operand
[1].mode
;
20789 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20790 op0
= copy_to_reg (op0
);
20792 if (!CONST_INT_P (op1
))
20793 op1
= simplify_gen_subreg (SImode
, op1
, GET_MODE (op1
), 0);
20795 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, SImode
))
20796 op1
= copy_to_reg (op1
);
20798 target
= gen_reg_rtx (tmode
);
20799 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20805 case IX86_BUILTIN_PSLLDQI128
:
20806 case IX86_BUILTIN_PSRLDQI128
:
20807 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
20808 : CODE_FOR_sse2_lshrti3
);
20809 arg0
= CALL_EXPR_ARG (exp
, 0);
20810 arg1
= CALL_EXPR_ARG (exp
, 1);
20811 op0
= expand_normal (arg0
);
20812 op1
= expand_normal (arg1
);
20813 tmode
= insn_data
[icode
].operand
[0].mode
;
20814 mode1
= insn_data
[icode
].operand
[1].mode
;
20815 mode2
= insn_data
[icode
].operand
[2].mode
;
20817 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20819 op0
= copy_to_reg (op0
);
20820 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
20822 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20824 error ("shift must be an immediate");
20827 target
= gen_reg_rtx (V2DImode
);
20828 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
20835 case IX86_BUILTIN_FEMMS
:
20836 emit_insn (gen_mmx_femms ());
20839 case IX86_BUILTIN_PAVGUSB
:
20840 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
20842 case IX86_BUILTIN_PF2ID
:
20843 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
20845 case IX86_BUILTIN_PFACC
:
20846 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
20848 case IX86_BUILTIN_PFADD
:
20849 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
20851 case IX86_BUILTIN_PFCMPEQ
:
20852 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
20854 case IX86_BUILTIN_PFCMPGE
:
20855 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
20857 case IX86_BUILTIN_PFCMPGT
:
20858 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
20860 case IX86_BUILTIN_PFMAX
:
20861 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
20863 case IX86_BUILTIN_PFMIN
:
20864 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
20866 case IX86_BUILTIN_PFMUL
:
20867 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
20869 case IX86_BUILTIN_PFRCP
:
20870 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
20872 case IX86_BUILTIN_PFRCPIT1
:
20873 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
20875 case IX86_BUILTIN_PFRCPIT2
:
20876 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
20878 case IX86_BUILTIN_PFRSQIT1
:
20879 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
20881 case IX86_BUILTIN_PFRSQRT
:
20882 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
20884 case IX86_BUILTIN_PFSUB
:
20885 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
20887 case IX86_BUILTIN_PFSUBR
:
20888 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
20890 case IX86_BUILTIN_PI2FD
:
20891 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
20893 case IX86_BUILTIN_PMULHRW
:
20894 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
20896 case IX86_BUILTIN_PF2IW
:
20897 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
20899 case IX86_BUILTIN_PFNACC
:
20900 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
20902 case IX86_BUILTIN_PFPNACC
:
20903 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
20905 case IX86_BUILTIN_PI2FW
:
20906 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
20908 case IX86_BUILTIN_PSWAPDSI
:
20909 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
20911 case IX86_BUILTIN_PSWAPDSF
:
20912 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
20914 case IX86_BUILTIN_SQRTSD
:
20915 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
20916 case IX86_BUILTIN_LOADUPD
:
20917 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
20918 case IX86_BUILTIN_STOREUPD
:
20919 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
20921 case IX86_BUILTIN_MFENCE
:
20922 emit_insn (gen_sse2_mfence ());
20924 case IX86_BUILTIN_LFENCE
:
20925 emit_insn (gen_sse2_lfence ());
20928 case IX86_BUILTIN_CLFLUSH
:
20929 arg0
= CALL_EXPR_ARG (exp
, 0);
20930 op0
= expand_normal (arg0
);
20931 icode
= CODE_FOR_sse2_clflush
;
20932 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
20933 op0
= copy_to_mode_reg (Pmode
, op0
);
20935 emit_insn (gen_sse2_clflush (op0
));
20938 case IX86_BUILTIN_MOVNTPD
:
20939 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
20940 case IX86_BUILTIN_MOVNTDQ
:
20941 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
20942 case IX86_BUILTIN_MOVNTI
:
20943 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
20945 case IX86_BUILTIN_LOADDQU
:
20946 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
20947 case IX86_BUILTIN_STOREDQU
:
20948 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
20950 case IX86_BUILTIN_MONITOR
:
20951 arg0
= CALL_EXPR_ARG (exp
, 0);
20952 arg1
= CALL_EXPR_ARG (exp
, 1);
20953 arg2
= CALL_EXPR_ARG (exp
, 2);
20954 op0
= expand_normal (arg0
);
20955 op1
= expand_normal (arg1
);
20956 op2
= expand_normal (arg2
);
20958 op0
= copy_to_mode_reg (Pmode
, op0
);
20960 op1
= copy_to_mode_reg (SImode
, op1
);
20962 op2
= copy_to_mode_reg (SImode
, op2
);
20964 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
20966 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
20969 case IX86_BUILTIN_MWAIT
:
20970 arg0
= CALL_EXPR_ARG (exp
, 0);
20971 arg1
= CALL_EXPR_ARG (exp
, 1);
20972 op0
= expand_normal (arg0
);
20973 op1
= expand_normal (arg1
);
20975 op0
= copy_to_mode_reg (SImode
, op0
);
20977 op1
= copy_to_mode_reg (SImode
, op1
);
20978 emit_insn (gen_sse3_mwait (op0
, op1
));
20981 case IX86_BUILTIN_LDDQU
:
20982 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
20985 case IX86_BUILTIN_PALIGNR
:
20986 case IX86_BUILTIN_PALIGNR128
:
20987 if (fcode
== IX86_BUILTIN_PALIGNR
)
20989 icode
= CODE_FOR_ssse3_palignrdi
;
20994 icode
= CODE_FOR_ssse3_palignrti
;
20997 arg0
= CALL_EXPR_ARG (exp
, 0);
20998 arg1
= CALL_EXPR_ARG (exp
, 1);
20999 arg2
= CALL_EXPR_ARG (exp
, 2);
21000 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
21001 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
21002 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
21003 tmode
= insn_data
[icode
].operand
[0].mode
;
21004 mode1
= insn_data
[icode
].operand
[1].mode
;
21005 mode2
= insn_data
[icode
].operand
[2].mode
;
21006 mode3
= insn_data
[icode
].operand
[3].mode
;
21008 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21010 op0
= copy_to_reg (op0
);
21011 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
21013 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21015 op1
= copy_to_reg (op1
);
21016 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
21018 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21020 error ("shift must be an immediate");
21023 target
= gen_reg_rtx (mode
);
21024 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
21031 case IX86_BUILTIN_MOVNTDQA
:
21032 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
21035 case IX86_BUILTIN_MOVNTSD
:
21036 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
21038 case IX86_BUILTIN_MOVNTSS
:
21039 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
21041 case IX86_BUILTIN_INSERTQ
:
21042 case IX86_BUILTIN_EXTRQ
:
21043 icode
= (fcode
== IX86_BUILTIN_EXTRQ
21044 ? CODE_FOR_sse4a_extrq
21045 : CODE_FOR_sse4a_insertq
);
21046 arg0
= CALL_EXPR_ARG (exp
, 0);
21047 arg1
= CALL_EXPR_ARG (exp
, 1);
21048 op0
= expand_normal (arg0
);
21049 op1
= expand_normal (arg1
);
21050 tmode
= insn_data
[icode
].operand
[0].mode
;
21051 mode1
= insn_data
[icode
].operand
[1].mode
;
21052 mode2
= insn_data
[icode
].operand
[2].mode
;
21053 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21054 op0
= copy_to_mode_reg (mode1
, op0
);
21055 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21056 op1
= copy_to_mode_reg (mode2
, op1
);
21057 if (optimize
|| target
== 0
21058 || GET_MODE (target
) != tmode
21059 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21060 target
= gen_reg_rtx (tmode
);
21061 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21067 case IX86_BUILTIN_EXTRQI
:
21068 icode
= CODE_FOR_sse4a_extrqi
;
21069 arg0
= CALL_EXPR_ARG (exp
, 0);
21070 arg1
= CALL_EXPR_ARG (exp
, 1);
21071 arg2
= CALL_EXPR_ARG (exp
, 2);
21072 op0
= expand_normal (arg0
);
21073 op1
= expand_normal (arg1
);
21074 op2
= expand_normal (arg2
);
21075 tmode
= insn_data
[icode
].operand
[0].mode
;
21076 mode1
= insn_data
[icode
].operand
[1].mode
;
21077 mode2
= insn_data
[icode
].operand
[2].mode
;
21078 mode3
= insn_data
[icode
].operand
[3].mode
;
21079 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21080 op0
= copy_to_mode_reg (mode1
, op0
);
21081 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21083 error ("index mask must be an immediate");
21084 return gen_reg_rtx (tmode
);
21086 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21088 error ("length mask must be an immediate");
21089 return gen_reg_rtx (tmode
);
21091 if (optimize
|| target
== 0
21092 || GET_MODE (target
) != tmode
21093 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21094 target
= gen_reg_rtx (tmode
);
21095 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21101 case IX86_BUILTIN_INSERTQI
:
21102 icode
= CODE_FOR_sse4a_insertqi
;
21103 arg0
= CALL_EXPR_ARG (exp
, 0);
21104 arg1
= CALL_EXPR_ARG (exp
, 1);
21105 arg2
= CALL_EXPR_ARG (exp
, 2);
21106 arg3
= CALL_EXPR_ARG (exp
, 3);
21107 op0
= expand_normal (arg0
);
21108 op1
= expand_normal (arg1
);
21109 op2
= expand_normal (arg2
);
21110 op3
= expand_normal (arg3
);
21111 tmode
= insn_data
[icode
].operand
[0].mode
;
21112 mode1
= insn_data
[icode
].operand
[1].mode
;
21113 mode2
= insn_data
[icode
].operand
[2].mode
;
21114 mode3
= insn_data
[icode
].operand
[3].mode
;
21115 mode4
= insn_data
[icode
].operand
[4].mode
;
21117 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21118 op0
= copy_to_mode_reg (mode1
, op0
);
21120 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21121 op1
= copy_to_mode_reg (mode2
, op1
);
21123 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21125 error ("index mask must be an immediate");
21126 return gen_reg_rtx (tmode
);
21128 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
21130 error ("length mask must be an immediate");
21131 return gen_reg_rtx (tmode
);
21133 if (optimize
|| target
== 0
21134 || GET_MODE (target
) != tmode
21135 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21136 target
= gen_reg_rtx (tmode
);
21137 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
21143 case IX86_BUILTIN_VEC_INIT_V2SI
:
21144 case IX86_BUILTIN_VEC_INIT_V4HI
:
21145 case IX86_BUILTIN_VEC_INIT_V8QI
:
21146 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
21148 case IX86_BUILTIN_VEC_EXT_V2DF
:
21149 case IX86_BUILTIN_VEC_EXT_V2DI
:
21150 case IX86_BUILTIN_VEC_EXT_V4SF
:
21151 case IX86_BUILTIN_VEC_EXT_V4SI
:
21152 case IX86_BUILTIN_VEC_EXT_V8HI
:
21153 case IX86_BUILTIN_VEC_EXT_V2SI
:
21154 case IX86_BUILTIN_VEC_EXT_V4HI
:
21155 case IX86_BUILTIN_VEC_EXT_V16QI
:
21156 return ix86_expand_vec_ext_builtin (exp
, target
);
21158 case IX86_BUILTIN_VEC_SET_V2DI
:
21159 case IX86_BUILTIN_VEC_SET_V4SF
:
21160 case IX86_BUILTIN_VEC_SET_V4SI
:
21161 case IX86_BUILTIN_VEC_SET_V8HI
:
21162 case IX86_BUILTIN_VEC_SET_V4HI
:
21163 case IX86_BUILTIN_VEC_SET_V16QI
:
21164 return ix86_expand_vec_set_builtin (exp
);
21166 case IX86_BUILTIN_INFQ
:
21168 REAL_VALUE_TYPE inf
;
21172 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
21174 tmp
= validize_mem (force_const_mem (mode
, tmp
));
21177 target
= gen_reg_rtx (mode
);
21179 emit_move_insn (target
, tmp
);
21183 case IX86_BUILTIN_FABSQ
:
21184 return ix86_expand_unop_builtin (CODE_FOR_abstf2
, exp
, target
, 0);
21186 case IX86_BUILTIN_COPYSIGNQ
:
21187 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3
, exp
, target
);
21193 for (i
= 0, d
= bdesc_sse_3arg
;
21194 i
< ARRAY_SIZE (bdesc_sse_3arg
);
21196 if (d
->code
== fcode
)
21197 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
21200 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
21201 if (d
->code
== fcode
)
21203 /* Compares are treated specially. */
21204 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
21205 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
21206 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
21207 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
21208 return ix86_expand_sse_compare (d
, exp
, target
);
21210 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
21213 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
21214 if (d
->code
== fcode
)
21215 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
21217 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
21218 if (d
->code
== fcode
)
21219 return ix86_expand_sse_comi (d
, exp
, target
);
21221 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
21222 if (d
->code
== fcode
)
21223 return ix86_expand_sse_ptest (d
, exp
, target
);
21225 for (i
= 0, d
= bdesc_crc32
; i
< ARRAY_SIZE (bdesc_crc32
); i
++, d
++)
21226 if (d
->code
== fcode
)
21227 return ix86_expand_crc32 (d
->icode
, exp
, target
);
21229 for (i
= 0, d
= bdesc_pcmpestr
;
21230 i
< ARRAY_SIZE (bdesc_pcmpestr
);
21232 if (d
->code
== fcode
)
21233 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
21235 for (i
= 0, d
= bdesc_pcmpistr
;
21236 i
< ARRAY_SIZE (bdesc_pcmpistr
);
21238 if (d
->code
== fcode
)
21239 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
21241 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
21242 if (d
->code
== fcode
)
21243 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
21244 (enum multi_arg_type
)d
->flag
,
21247 gcc_unreachable ();
21250 /* Returns a function decl for a vectorized version of the builtin function
21251 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21252 if it is not available. */
21255 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
21258 enum machine_mode in_mode
, out_mode
;
21261 if (TREE_CODE (type_out
) != VECTOR_TYPE
21262 || TREE_CODE (type_in
) != VECTOR_TYPE
)
21265 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
21266 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
21267 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
21268 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
21272 case BUILT_IN_SQRT
:
21273 if (out_mode
== DFmode
&& out_n
== 2
21274 && in_mode
== DFmode
&& in_n
== 2)
21275 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
21278 case BUILT_IN_SQRTF
:
21279 if (out_mode
== SFmode
&& out_n
== 4
21280 && in_mode
== SFmode
&& in_n
== 4)
21281 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
21284 case BUILT_IN_LRINT
:
21285 if (out_mode
== SImode
&& out_n
== 4
21286 && in_mode
== DFmode
&& in_n
== 2)
21287 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
21290 case BUILT_IN_LRINTF
:
21291 if (out_mode
== SImode
&& out_n
== 4
21292 && in_mode
== SFmode
&& in_n
== 4)
21293 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
21300 /* Dispatch to a handler for a vectorization library. */
21301 if (ix86_veclib_handler
)
21302 return (*ix86_veclib_handler
)(fn
, type_out
, type_in
);
21307 /* Handler for an ACML-style interface to a library with vectorized
21311 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
21313 char name
[20] = "__vr.._";
21314 tree fntype
, new_fndecl
, args
;
21317 enum machine_mode el_mode
, in_mode
;
21320 /* The ACML is 64bits only and suitable for unsafe math only as
21321 it does not correctly support parts of IEEE with the required
21322 precision such as denormals. */
21324 || !flag_unsafe_math_optimizations
)
21327 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
21328 n
= TYPE_VECTOR_SUBPARTS (type_out
);
21329 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
21330 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
21331 if (el_mode
!= in_mode
21341 case BUILT_IN_LOG2
:
21342 case BUILT_IN_LOG10
:
21345 if (el_mode
!= DFmode
21350 case BUILT_IN_SINF
:
21351 case BUILT_IN_COSF
:
21352 case BUILT_IN_EXPF
:
21353 case BUILT_IN_POWF
:
21354 case BUILT_IN_LOGF
:
21355 case BUILT_IN_LOG2F
:
21356 case BUILT_IN_LOG10F
:
21359 if (el_mode
!= SFmode
21368 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
21369 sprintf (name
+ 7, "%s", bname
+10);
21372 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
21373 args
= TREE_CHAIN (args
))
21377 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
21379 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
21381 /* Build a function declaration for the vectorized function. */
21382 new_fndecl
= build_decl (FUNCTION_DECL
, get_identifier (name
), fntype
);
21383 TREE_PUBLIC (new_fndecl
) = 1;
21384 DECL_EXTERNAL (new_fndecl
) = 1;
21385 DECL_IS_NOVOPS (new_fndecl
) = 1;
21386 TREE_READONLY (new_fndecl
) = 1;
21392 /* Returns a decl of a function that implements conversion of the
21393 input vector of type TYPE, or NULL_TREE if it is not available. */
21396 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
21398 if (TREE_CODE (type
) != VECTOR_TYPE
)
21404 switch (TYPE_MODE (type
))
21407 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
21412 case FIX_TRUNC_EXPR
:
21413 switch (TYPE_MODE (type
))
21416 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
21426 /* Returns a code for a target-specific builtin that implements
21427 reciprocal of the function, or NULL_TREE if not available. */
21430 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
21431 bool sqrt ATTRIBUTE_UNUSED
)
21433 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_size
21434 && flag_finite_math_only
&& !flag_trapping_math
21435 && flag_unsafe_math_optimizations
))
21439 /* Machine dependent builtins. */
21442 /* Vectorized version of sqrt to rsqrt conversion. */
21443 case IX86_BUILTIN_SQRTPS
:
21444 return ix86_builtins
[IX86_BUILTIN_RSQRTPS
];
21450 /* Normal builtins. */
21453 /* Sqrt to rsqrt conversion. */
21454 case BUILT_IN_SQRTF
:
21455 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
21462 /* Store OPERAND to the memory after reload is completed. This means
21463 that we can't easily use assign_stack_local. */
21465 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
21469 gcc_assert (reload_completed
);
21470 if (TARGET_RED_ZONE
)
21472 result
= gen_rtx_MEM (mode
,
21473 gen_rtx_PLUS (Pmode
,
21475 GEN_INT (-RED_ZONE_SIZE
)));
21476 emit_move_insn (result
, operand
);
21478 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
21484 operand
= gen_lowpart (DImode
, operand
);
21488 gen_rtx_SET (VOIDmode
,
21489 gen_rtx_MEM (DImode
,
21490 gen_rtx_PRE_DEC (DImode
,
21491 stack_pointer_rtx
)),
21495 gcc_unreachable ();
21497 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
21506 split_di (&operand
, 1, operands
, operands
+ 1);
21508 gen_rtx_SET (VOIDmode
,
21509 gen_rtx_MEM (SImode
,
21510 gen_rtx_PRE_DEC (Pmode
,
21511 stack_pointer_rtx
)),
21514 gen_rtx_SET (VOIDmode
,
21515 gen_rtx_MEM (SImode
,
21516 gen_rtx_PRE_DEC (Pmode
,
21517 stack_pointer_rtx
)),
21522 /* Store HImodes as SImodes. */
21523 operand
= gen_lowpart (SImode
, operand
);
21527 gen_rtx_SET (VOIDmode
,
21528 gen_rtx_MEM (GET_MODE (operand
),
21529 gen_rtx_PRE_DEC (SImode
,
21530 stack_pointer_rtx
)),
21534 gcc_unreachable ();
21536 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
21541 /* Free operand from the memory. */
21543 ix86_free_from_memory (enum machine_mode mode
)
21545 if (!TARGET_RED_ZONE
)
21549 if (mode
== DImode
|| TARGET_64BIT
)
21553 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21554 to pop or add instruction if registers are available. */
21555 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21556 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
21561 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21562 QImode must go into class Q_REGS.
21563 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21564 movdf to do mem-to-mem moves through integer regs. */
21566 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
21568 enum machine_mode mode
= GET_MODE (x
);
21570 /* We're only allowed to return a subclass of CLASS. Many of the
21571 following checks fail for NO_REGS, so eliminate that early. */
21572 if (regclass
== NO_REGS
)
21575 /* All classes can load zeros. */
21576 if (x
== CONST0_RTX (mode
))
21579 /* Force constants into memory if we are loading a (nonzero) constant into
21580 an MMX or SSE register. This is because there are no MMX/SSE instructions
21581 to load from a constant. */
21583 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
21586 /* Prefer SSE regs only, if we can use them for math. */
21587 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
21588 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
21590 /* Floating-point constants need more complex checks. */
21591 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
21593 /* General regs can load everything. */
21594 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
21597 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21598 zero above. We only want to wind up preferring 80387 registers if
21599 we plan on doing computation with them. */
21601 && standard_80387_constant_p (x
))
21603 /* Limit class to non-sse. */
21604 if (regclass
== FLOAT_SSE_REGS
)
21606 if (regclass
== FP_TOP_SSE_REGS
)
21608 if (regclass
== FP_SECOND_SSE_REGS
)
21609 return FP_SECOND_REG
;
21610 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
21617 /* Generally when we see PLUS here, it's the function invariant
21618 (plus soft-fp const_int). Which can only be computed into general
21620 if (GET_CODE (x
) == PLUS
)
21621 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
21623 /* QImode constants are easy to load, but non-constant QImode data
21624 must go into Q_REGS. */
21625 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
21627 if (reg_class_subset_p (regclass
, Q_REGS
))
21629 if (reg_class_subset_p (Q_REGS
, regclass
))
21637 /* Discourage putting floating-point values in SSE registers unless
21638 SSE math is being used, and likewise for the 387 registers. */
21640 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
21642 enum machine_mode mode
= GET_MODE (x
);
21644 /* Restrict the output reload class to the register bank that we are doing
21645 math on. If we would like not to return a subset of CLASS, reject this
21646 alternative: if reload cannot do this, it will still use its choice. */
21647 mode
= GET_MODE (x
);
21648 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
21649 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
21651 if (X87_FLOAT_MODE_P (mode
))
21653 if (regclass
== FP_TOP_SSE_REGS
)
21655 else if (regclass
== FP_SECOND_SSE_REGS
)
21656 return FP_SECOND_REG
;
21658 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
21664 /* If we are copying between general and FP registers, we need a memory
21665 location. The same is true for SSE and MMX registers.
21667 To optimize register_move_cost performance, allow inline variant.
21669 The macro can't work reliably when one of the CLASSES is class containing
21670 registers from multiple units (SSE, MMX, integer). We avoid this by never
21671 combining those units in single alternative in the machine description.
21672 Ensure that this constraint holds to avoid unexpected surprises.
21674 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21675 enforce these sanity checks. */
21678 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
21679 enum machine_mode mode
, int strict
)
21681 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
21682 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
21683 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
21684 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
21685 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
21686 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
21688 gcc_assert (!strict
);
21692 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
21695 /* ??? This is a lie. We do have moves between mmx/general, and for
21696 mmx/sse2. But by saying we need secondary memory we discourage the
21697 register allocator from using the mmx registers unless needed. */
21698 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
21701 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
21703 /* SSE1 doesn't have any direct moves from other classes. */
21707 /* If the target says that inter-unit moves are more expensive
21708 than moving through memory, then don't generate them. */
21709 if (!TARGET_INTER_UNIT_MOVES
)
21712 /* Between SSE and general, we have moves no larger than word size. */
21713 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21721 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
21722 enum machine_mode mode
, int strict
)
21724 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
21727 /* Return true if the registers in CLASS cannot represent the change from
21728 modes FROM to TO. */
21731 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
21732 enum reg_class regclass
)
21737 /* x87 registers can't do subreg at all, as all values are reformatted
21738 to extended precision. */
21739 if (MAYBE_FLOAT_CLASS_P (regclass
))
21742 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
21744 /* Vector registers do not support QI or HImode loads. If we don't
21745 disallow a change to these modes, reload will assume it's ok to
21746 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21747 the vec_dupv4hi pattern. */
21748 if (GET_MODE_SIZE (from
) < 4)
21751 /* Vector registers do not support subreg with nonzero offsets, which
21752 are otherwise valid for integer registers. Since we can't see
21753 whether we have a nonzero offset from here, prohibit all
21754 nonparadoxical subregs changing size. */
21755 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
21762 /* Return the cost of moving data of mode M between a
21763 register and memory. A value of 2 is the default; this cost is
21764 relative to those in `REGISTER_MOVE_COST'.
21766 This function is used extensively by register_move_cost that is used to
21767 build tables at startup. Make it inline in this case.
21768 When IN is 2, return maximum of in and out move cost.
21770 If moving between registers and memory is more expensive than
21771 between two registers, you should define this macro to express the
21774 Model also increased moving costs of QImode registers in non
21778 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
21782 if (FLOAT_CLASS_P (regclass
))
21800 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
21801 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
21803 if (SSE_CLASS_P (regclass
))
21806 switch (GET_MODE_SIZE (mode
))
21821 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
21822 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
21824 if (MMX_CLASS_P (regclass
))
21827 switch (GET_MODE_SIZE (mode
))
21839 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
21840 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
21842 switch (GET_MODE_SIZE (mode
))
21845 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
21848 return ix86_cost
->int_store
[0];
21849 if (TARGET_PARTIAL_REG_DEPENDENCY
&& !optimize_size
)
21850 cost
= ix86_cost
->movzbl_load
;
21852 cost
= ix86_cost
->int_load
[0];
21854 return MAX (cost
, ix86_cost
->int_store
[0]);
21860 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
21862 return ix86_cost
->movzbl_load
;
21864 return ix86_cost
->int_store
[0] + 4;
21869 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
21870 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
21872 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21873 if (mode
== TFmode
)
21876 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
21878 cost
= ix86_cost
->int_load
[2];
21880 cost
= ix86_cost
->int_store
[2];
21881 return (cost
* (((int) GET_MODE_SIZE (mode
)
21882 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
21887 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
21889 return inline_memory_move_cost (mode
, regclass
, in
);
21893 /* Return the cost of moving data from a register in class CLASS1 to
21894 one in class CLASS2.
21896 It is not required that the cost always equal 2 when FROM is the same as TO;
21897 on some machines it is expensive to move between registers if they are not
21898 general registers. */
21901 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
21902 enum reg_class class2
)
21904 /* In case we require secondary memory, compute cost of the store followed
21905 by load. In order to avoid bad register allocation choices, we need
21906 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21908 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
21912 cost
+= inline_memory_move_cost (mode
, class1
, 2);
21913 cost
+= inline_memory_move_cost (mode
, class2
, 2);
21915 /* In case of copying from general_purpose_register we may emit multiple
21916 stores followed by single load causing memory size mismatch stall.
21917 Count this as arbitrarily high cost of 20. */
21918 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
21921 /* In the case of FP/MMX moves, the registers actually overlap, and we
21922 have to switch modes in order to treat them differently. */
21923 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
21924 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
21930 /* Moves between SSE/MMX and integer unit are expensive. */
21931 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
21932 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
21934 /* ??? By keeping returned value relatively high, we limit the number
21935 of moves between integer and MMX/SSE registers for all targets.
21936 Additionally, high value prevents problem with x86_modes_tieable_p(),
21937 where integer modes in MMX/SSE registers are not tieable
21938 because of missing QImode and HImode moves to, from or between
21939 MMX/SSE registers. */
21940 return MAX (ix86_cost
->mmxsse_to_integer
, 8);
21942 if (MAYBE_FLOAT_CLASS_P (class1
))
21943 return ix86_cost
->fp_move
;
21944 if (MAYBE_SSE_CLASS_P (class1
))
21945 return ix86_cost
->sse_move
;
21946 if (MAYBE_MMX_CLASS_P (class1
))
21947 return ix86_cost
->mmx_move
;
21951 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21954 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
21956 /* Flags and only flags can only hold CCmode values. */
21957 if (CC_REGNO_P (regno
))
21958 return GET_MODE_CLASS (mode
) == MODE_CC
;
21959 if (GET_MODE_CLASS (mode
) == MODE_CC
21960 || GET_MODE_CLASS (mode
) == MODE_RANDOM
21961 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
21963 if (FP_REGNO_P (regno
))
21964 return VALID_FP_MODE_P (mode
);
21965 if (SSE_REGNO_P (regno
))
21967 /* We implement the move patterns for all vector modes into and
21968 out of SSE registers, even when no operation instructions
21970 return (VALID_SSE_REG_MODE (mode
)
21971 || VALID_SSE2_REG_MODE (mode
)
21972 || VALID_MMX_REG_MODE (mode
)
21973 || VALID_MMX_REG_MODE_3DNOW (mode
));
21975 if (MMX_REGNO_P (regno
))
21977 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21978 so if the register is available at all, then we can move data of
21979 the given mode into or out of it. */
21980 return (VALID_MMX_REG_MODE (mode
)
21981 || VALID_MMX_REG_MODE_3DNOW (mode
));
21984 if (mode
== QImode
)
21986 /* Take care for QImode values - they can be in non-QI regs,
21987 but then they do cause partial register stalls. */
21988 if (regno
< 4 || TARGET_64BIT
)
21990 if (!TARGET_PARTIAL_REG_STALL
)
21992 return reload_in_progress
|| reload_completed
;
21994 /* We handle both integer and floats in the general purpose registers. */
21995 else if (VALID_INT_MODE_P (mode
))
21997 else if (VALID_FP_MODE_P (mode
))
21999 else if (VALID_DFP_MODE_P (mode
))
22001 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22002 on to use that value in smaller contexts, this can easily force a
22003 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22004 supporting DImode, allow it. */
22005 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
22011 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22012 tieable integer mode. */
22015 ix86_tieable_integer_mode_p (enum machine_mode mode
)
22024 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
22027 return TARGET_64BIT
;
22034 /* Return true if MODE1 is accessible in a register that can hold MODE2
22035 without copying. That is, all register classes that can hold MODE2
22036 can also hold MODE1. */
22039 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22041 if (mode1
== mode2
)
22044 if (ix86_tieable_integer_mode_p (mode1
)
22045 && ix86_tieable_integer_mode_p (mode2
))
22048 /* MODE2 being XFmode implies fp stack or general regs, which means we
22049 can tie any smaller floating point modes to it. Note that we do not
22050 tie this with TFmode. */
22051 if (mode2
== XFmode
)
22052 return mode1
== SFmode
|| mode1
== DFmode
;
22054 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22055 that we can tie it with SFmode. */
22056 if (mode2
== DFmode
)
22057 return mode1
== SFmode
;
22059 /* If MODE2 is only appropriate for an SSE register, then tie with
22060 any other mode acceptable to SSE registers. */
22061 if (GET_MODE_SIZE (mode2
) == 16
22062 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
22063 return (GET_MODE_SIZE (mode1
) == 16
22064 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
22066 /* If MODE2 is appropriate for an MMX register, then tie
22067 with any other mode acceptable to MMX registers. */
22068 if (GET_MODE_SIZE (mode2
) == 8
22069 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
22070 return (GET_MODE_SIZE (mode1
) == 8
22071 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
22076 /* Compute a (partial) cost for rtx X. Return true if the complete
22077 cost has been computed, and false if subexpressions should be
22078 scanned. In either case, *TOTAL contains the cost result. */
22081 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
)
22083 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
22084 enum machine_mode mode
= GET_MODE (x
);
22092 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
22094 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
22096 else if (flag_pic
&& SYMBOLIC_CONST (x
)
22098 || (!GET_CODE (x
) != LABEL_REF
22099 && (GET_CODE (x
) != SYMBOL_REF
22100 || !SYMBOL_REF_LOCAL_P (x
)))))
22107 if (mode
== VOIDmode
)
22110 switch (standard_80387_constant_p (x
))
22115 default: /* Other constants */
22120 /* Start with (MEM (SYMBOL_REF)), since that's where
22121 it'll probably end up. Add a penalty for size. */
22122 *total
= (COSTS_N_INSNS (1)
22123 + (flag_pic
!= 0 && !TARGET_64BIT
)
22124 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
22130 /* The zero extensions is often completely free on x86_64, so make
22131 it as cheap as possible. */
22132 if (TARGET_64BIT
&& mode
== DImode
22133 && GET_MODE (XEXP (x
, 0)) == SImode
)
22135 else if (TARGET_ZERO_EXTEND_WITH_AND
)
22136 *total
= ix86_cost
->add
;
22138 *total
= ix86_cost
->movzx
;
22142 *total
= ix86_cost
->movsx
;
22146 if (CONST_INT_P (XEXP (x
, 1))
22147 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
22149 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
22152 *total
= ix86_cost
->add
;
22155 if ((value
== 2 || value
== 3)
22156 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
22158 *total
= ix86_cost
->lea
;
22168 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
22170 if (CONST_INT_P (XEXP (x
, 1)))
22172 if (INTVAL (XEXP (x
, 1)) > 32)
22173 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
22175 *total
= ix86_cost
->shift_const
* 2;
22179 if (GET_CODE (XEXP (x
, 1)) == AND
)
22180 *total
= ix86_cost
->shift_var
* 2;
22182 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
22187 if (CONST_INT_P (XEXP (x
, 1)))
22188 *total
= ix86_cost
->shift_const
;
22190 *total
= ix86_cost
->shift_var
;
22195 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22197 /* ??? SSE scalar cost should be used here. */
22198 *total
= ix86_cost
->fmul
;
22201 else if (X87_FLOAT_MODE_P (mode
))
22203 *total
= ix86_cost
->fmul
;
22206 else if (FLOAT_MODE_P (mode
))
22208 /* ??? SSE vector cost should be used here. */
22209 *total
= ix86_cost
->fmul
;
22214 rtx op0
= XEXP (x
, 0);
22215 rtx op1
= XEXP (x
, 1);
22217 if (CONST_INT_P (XEXP (x
, 1)))
22219 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
22220 for (nbits
= 0; value
!= 0; value
&= value
- 1)
22224 /* This is arbitrary. */
22227 /* Compute costs correctly for widening multiplication. */
22228 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
22229 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
22230 == GET_MODE_SIZE (mode
))
22232 int is_mulwiden
= 0;
22233 enum machine_mode inner_mode
= GET_MODE (op0
);
22235 if (GET_CODE (op0
) == GET_CODE (op1
))
22236 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
22237 else if (CONST_INT_P (op1
))
22239 if (GET_CODE (op0
) == SIGN_EXTEND
)
22240 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
22243 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
22247 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
22250 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
22251 + nbits
* ix86_cost
->mult_bit
22252 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
22261 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22262 /* ??? SSE cost should be used here. */
22263 *total
= ix86_cost
->fdiv
;
22264 else if (X87_FLOAT_MODE_P (mode
))
22265 *total
= ix86_cost
->fdiv
;
22266 else if (FLOAT_MODE_P (mode
))
22267 /* ??? SSE vector cost should be used here. */
22268 *total
= ix86_cost
->fdiv
;
22270 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
22274 if (GET_MODE_CLASS (mode
) == MODE_INT
22275 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
22277 if (GET_CODE (XEXP (x
, 0)) == PLUS
22278 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
22279 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
22280 && CONSTANT_P (XEXP (x
, 1)))
22282 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
22283 if (val
== 2 || val
== 4 || val
== 8)
22285 *total
= ix86_cost
->lea
;
22286 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
22287 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
22289 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22293 else if (GET_CODE (XEXP (x
, 0)) == MULT
22294 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
22296 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
22297 if (val
== 2 || val
== 4 || val
== 8)
22299 *total
= ix86_cost
->lea
;
22300 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
22301 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22305 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
22307 *total
= ix86_cost
->lea
;
22308 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
22309 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
22310 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22317 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22319 /* ??? SSE cost should be used here. */
22320 *total
= ix86_cost
->fadd
;
22323 else if (X87_FLOAT_MODE_P (mode
))
22325 *total
= ix86_cost
->fadd
;
22328 else if (FLOAT_MODE_P (mode
))
22330 /* ??? SSE vector cost should be used here. */
22331 *total
= ix86_cost
->fadd
;
22339 if (!TARGET_64BIT
&& mode
== DImode
)
22341 *total
= (ix86_cost
->add
* 2
22342 + (rtx_cost (XEXP (x
, 0), outer_code
)
22343 << (GET_MODE (XEXP (x
, 0)) != DImode
))
22344 + (rtx_cost (XEXP (x
, 1), outer_code
)
22345 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
22351 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22353 /* ??? SSE cost should be used here. */
22354 *total
= ix86_cost
->fchs
;
22357 else if (X87_FLOAT_MODE_P (mode
))
22359 *total
= ix86_cost
->fchs
;
22362 else if (FLOAT_MODE_P (mode
))
22364 /* ??? SSE vector cost should be used here. */
22365 *total
= ix86_cost
->fchs
;
22371 if (!TARGET_64BIT
&& mode
== DImode
)
22372 *total
= ix86_cost
->add
* 2;
22374 *total
= ix86_cost
->add
;
22378 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
22379 && XEXP (XEXP (x
, 0), 1) == const1_rtx
22380 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
22381 && XEXP (x
, 1) == const0_rtx
)
22383 /* This kind of construct is implemented using test[bwl].
22384 Treat it as if we had an AND. */
22385 *total
= (ix86_cost
->add
22386 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
22387 + rtx_cost (const1_rtx
, outer_code
));
22393 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
22398 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22399 /* ??? SSE cost should be used here. */
22400 *total
= ix86_cost
->fabs
;
22401 else if (X87_FLOAT_MODE_P (mode
))
22402 *total
= ix86_cost
->fabs
;
22403 else if (FLOAT_MODE_P (mode
))
22404 /* ??? SSE vector cost should be used here. */
22405 *total
= ix86_cost
->fabs
;
22409 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22410 /* ??? SSE cost should be used here. */
22411 *total
= ix86_cost
->fsqrt
;
22412 else if (X87_FLOAT_MODE_P (mode
))
22413 *total
= ix86_cost
->fsqrt
;
22414 else if (FLOAT_MODE_P (mode
))
22415 /* ??? SSE vector cost should be used here. */
22416 *total
= ix86_cost
->fsqrt
;
22420 if (XINT (x
, 1) == UNSPEC_TP
)
22431 static int current_machopic_label_num
;
22433 /* Given a symbol name and its associated stub, write out the
22434 definition of the stub. */
22437 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
22439 unsigned int length
;
22440 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
22441 int label
= ++current_machopic_label_num
;
22443 /* For 64-bit we shouldn't get here. */
22444 gcc_assert (!TARGET_64BIT
);
22446 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22447 symb
= (*targetm
.strip_name_encoding
) (symb
);
22449 length
= strlen (stub
);
22450 binder_name
= alloca (length
+ 32);
22451 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
22453 length
= strlen (symb
);
22454 symbol_name
= alloca (length
+ 32);
22455 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
22457 sprintf (lazy_ptr_name
, "L%d$lz", label
);
22460 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
22462 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
22464 fprintf (file
, "%s:\n", stub
);
22465 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22469 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
22470 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
22471 fprintf (file
, "\tjmp\t*%%edx\n");
22474 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
22476 fprintf (file
, "%s:\n", binder_name
);
22480 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
22481 fprintf (file
, "\tpushl\t%%eax\n");
22484 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
22486 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
22488 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
22489 fprintf (file
, "%s:\n", lazy_ptr_name
);
22490 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22491 fprintf (file
, "\t.long %s\n", binder_name
);
22495 darwin_x86_file_end (void)
22497 darwin_file_end ();
22500 #endif /* TARGET_MACHO */
22502 /* Order the registers for register allocator. */
22505 x86_order_regs_for_local_alloc (void)
22510 /* First allocate the local general purpose registers. */
22511 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22512 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
22513 reg_alloc_order
[pos
++] = i
;
22515 /* Global general purpose registers. */
22516 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22517 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
22518 reg_alloc_order
[pos
++] = i
;
22520 /* x87 registers come first in case we are doing FP math
22522 if (!TARGET_SSE_MATH
)
22523 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22524 reg_alloc_order
[pos
++] = i
;
22526 /* SSE registers. */
22527 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
22528 reg_alloc_order
[pos
++] = i
;
22529 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
22530 reg_alloc_order
[pos
++] = i
;
22532 /* x87 registers. */
22533 if (TARGET_SSE_MATH
)
22534 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22535 reg_alloc_order
[pos
++] = i
;
22537 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
22538 reg_alloc_order
[pos
++] = i
;
22540 /* Initialize the rest of array as we do not allocate some registers
22542 while (pos
< FIRST_PSEUDO_REGISTER
)
22543 reg_alloc_order
[pos
++] = 0;
22546 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22547 struct attribute_spec.handler. */
22549 ix86_handle_struct_attribute (tree
*node
, tree name
,
22550 tree args ATTRIBUTE_UNUSED
,
22551 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
22554 if (DECL_P (*node
))
22556 if (TREE_CODE (*node
) == TYPE_DECL
)
22557 type
= &TREE_TYPE (*node
);
22562 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
22563 || TREE_CODE (*type
) == UNION_TYPE
)))
22565 warning (OPT_Wattributes
, "%qs attribute ignored",
22566 IDENTIFIER_POINTER (name
));
22567 *no_add_attrs
= true;
22570 else if ((is_attribute_p ("ms_struct", name
)
22571 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
22572 || ((is_attribute_p ("gcc_struct", name
)
22573 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
22575 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
22576 IDENTIFIER_POINTER (name
));
22577 *no_add_attrs
= true;
22584 ix86_ms_bitfield_layout_p (const_tree record_type
)
22586 return (TARGET_MS_BITFIELD_LAYOUT
&&
22587 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
22588 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
22591 /* Returns an expression indicating where the this parameter is
22592 located on entry to the FUNCTION. */
22595 x86_this_parameter (tree function
)
22597 tree type
= TREE_TYPE (function
);
22598 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
22602 const int *parm_regs
;
22604 if (TARGET_64BIT_MS_ABI
)
22605 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
22607 parm_regs
= x86_64_int_parameter_registers
;
22608 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
22611 if (ix86_function_regparm (type
, function
) > 0 && !stdarg_p (type
))
22613 int regno
= AX_REG
;
22614 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
22616 return gen_rtx_REG (SImode
, regno
);
22619 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
22622 /* Determine whether x86_output_mi_thunk can succeed. */
22625 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
22626 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
22627 HOST_WIDE_INT vcall_offset
, const_tree function
)
22629 /* 64-bit can handle anything. */
22633 /* For 32-bit, everything's fine if we have one free register. */
22634 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
22637 /* Need a free register for vcall_offset. */
22641 /* Need a free register for GOT references. */
22642 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
22645 /* Otherwise ok. */
22649 /* Output the assembler code for a thunk function. THUNK_DECL is the
22650 declaration for the thunk function itself, FUNCTION is the decl for
22651 the target function. DELTA is an immediate constant offset to be
22652 added to THIS. If VCALL_OFFSET is nonzero, the word at
22653 *(*this + vcall_offset) should be added to THIS. */
22656 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
22657 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
22658 HOST_WIDE_INT vcall_offset
, tree function
)
22661 rtx this_param
= x86_this_parameter (function
);
22664 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22665 pull it in now and let DELTA benefit. */
22666 if (REG_P (this_param
))
22667 this_reg
= this_param
;
22668 else if (vcall_offset
)
22670 /* Put the this parameter into %eax. */
22671 xops
[0] = this_param
;
22672 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
22673 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22676 this_reg
= NULL_RTX
;
22678 /* Adjust the this parameter by a fixed constant. */
22681 xops
[0] = GEN_INT (delta
);
22682 xops
[1] = this_reg
? this_reg
: this_param
;
22685 if (!x86_64_general_operand (xops
[0], DImode
))
22687 tmp
= gen_rtx_REG (DImode
, R10_REG
);
22689 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
22691 xops
[1] = this_param
;
22693 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
22696 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
22699 /* Adjust the this parameter by a value stored in the vtable. */
22703 tmp
= gen_rtx_REG (DImode
, R10_REG
);
22706 int tmp_regno
= CX_REG
;
22707 if (lookup_attribute ("fastcall",
22708 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
22709 tmp_regno
= AX_REG
;
22710 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
22713 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
22716 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
22718 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22720 /* Adjust the this parameter. */
22721 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
22722 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
22724 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
22725 xops
[0] = GEN_INT (vcall_offset
);
22727 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
22728 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
22730 xops
[1] = this_reg
;
22732 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
22734 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
22737 /* If necessary, drop THIS back to its stack slot. */
22738 if (this_reg
&& this_reg
!= this_param
)
22740 xops
[0] = this_reg
;
22741 xops
[1] = this_param
;
22742 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22745 xops
[0] = XEXP (DECL_RTL (function
), 0);
22748 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
22749 output_asm_insn ("jmp\t%P0", xops
);
22750 /* All thunks should be in the same object as their target,
22751 and thus binds_local_p should be true. */
22752 else if (TARGET_64BIT_MS_ABI
)
22753 gcc_unreachable ();
22756 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
22757 tmp
= gen_rtx_CONST (Pmode
, tmp
);
22758 tmp
= gen_rtx_MEM (QImode
, tmp
);
22760 output_asm_insn ("jmp\t%A0", xops
);
22765 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
22766 output_asm_insn ("jmp\t%P0", xops
);
22771 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
22772 tmp
= (gen_rtx_SYMBOL_REF
22774 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
22775 tmp
= gen_rtx_MEM (QImode
, tmp
);
22777 output_asm_insn ("jmp\t%0", xops
);
22780 #endif /* TARGET_MACHO */
22782 tmp
= gen_rtx_REG (SImode
, CX_REG
);
22783 output_set_got (tmp
, NULL_RTX
);
22786 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
22787 output_asm_insn ("jmp\t{*}%1", xops
);
22793 x86_file_start (void)
22795 default_file_start ();
22797 darwin_file_start ();
22799 if (X86_FILE_START_VERSION_DIRECTIVE
)
22800 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
22801 if (X86_FILE_START_FLTUSED
)
22802 fputs ("\t.global\t__fltused\n", asm_out_file
);
22803 if (ix86_asm_dialect
== ASM_INTEL
)
22804 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
22808 x86_field_alignment (tree field
, int computed
)
22810 enum machine_mode mode
;
22811 tree type
= TREE_TYPE (field
);
22813 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
22815 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
22816 ? get_inner_array_type (type
) : type
);
22817 if (mode
== DFmode
|| mode
== DCmode
22818 || GET_MODE_CLASS (mode
) == MODE_INT
22819 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
22820 return MIN (32, computed
);
22824 /* Output assembler code to FILE to increment profiler label # LABELNO
22825 for profiling a function entry. */
22827 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
22831 #ifndef NO_PROFILE_COUNTERS
22832 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
22835 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
22836 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
22838 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
22842 #ifndef NO_PROFILE_COUNTERS
22843 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22844 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
22846 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
22850 #ifndef NO_PROFILE_COUNTERS
22851 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
22852 PROFILE_COUNT_REGISTER
);
22854 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
22858 /* We don't have exact information about the insn sizes, but we may assume
22859 quite safely that we are informed about all 1 byte insns and memory
22860 address sizes. This is enough to eliminate unnecessary padding in
22864 min_insn_size (rtx insn
)
22868 if (!INSN_P (insn
) || !active_insn_p (insn
))
22871 /* Discard alignments we've emit and jump instructions. */
22872 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
22873 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
22876 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
22877 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
22880 /* Important case - calls are always 5 bytes.
22881 It is common to have many calls in the row. */
22883 && symbolic_reference_mentioned_p (PATTERN (insn
))
22884 && !SIBLING_CALL_P (insn
))
22886 if (get_attr_length (insn
) <= 1)
22889 /* For normal instructions we may rely on the sizes of addresses
22890 and the presence of symbol to require 4 bytes of encoding.
22891 This is not the case for jumps where references are PC relative. */
22892 if (!JUMP_P (insn
))
22894 l
= get_attr_length_address (insn
);
22895 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
22904 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22908 ix86_avoid_jump_misspredicts (void)
22910 rtx insn
, start
= get_insns ();
22911 int nbytes
= 0, njumps
= 0;
22914 /* Look for all minimal intervals of instructions containing 4 jumps.
22915 The intervals are bounded by START and INSN. NBYTES is the total
22916 size of instructions in the interval including INSN and not including
22917 START. When the NBYTES is smaller than 16 bytes, it is possible
22918 that the end of START and INSN ends up in the same 16byte page.
22920 The smallest offset in the page INSN can start is the case where START
22921 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22922 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22924 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22927 nbytes
+= min_insn_size (insn
);
22929 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
22930 INSN_UID (insn
), min_insn_size (insn
));
22932 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
22933 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
22941 start
= NEXT_INSN (start
);
22942 if ((JUMP_P (start
)
22943 && GET_CODE (PATTERN (start
)) != ADDR_VEC
22944 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
22946 njumps
--, isjump
= 1;
22949 nbytes
-= min_insn_size (start
);
22951 gcc_assert (njumps
>= 0);
22953 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
22954 INSN_UID (start
), INSN_UID (insn
), nbytes
);
22956 if (njumps
== 3 && isjump
&& nbytes
< 16)
22958 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
22961 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
22962 INSN_UID (insn
), padsize
);
22963 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
22968 /* AMD Athlon works faster
22969 when RET is not destination of conditional jump or directly preceded
22970 by other jump instruction. We avoid the penalty by inserting NOP just
22971 before the RET instructions in such cases. */
22973 ix86_pad_returns (void)
22978 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
22980 basic_block bb
= e
->src
;
22981 rtx ret
= BB_END (bb
);
22983 bool replace
= false;
22985 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
22986 || !maybe_hot_bb_p (bb
))
22988 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
22989 if (active_insn_p (prev
) || LABEL_P (prev
))
22991 if (prev
&& LABEL_P (prev
))
22996 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
22997 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
22998 && !(e
->flags
& EDGE_FALLTHRU
))
23003 prev
= prev_active_insn (ret
);
23005 && ((JUMP_P (prev
) && any_condjump_p (prev
))
23008 /* Empty functions get branch mispredict even when the jump destination
23009 is not visible to us. */
23010 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
23015 emit_insn_before (gen_return_internal_long (), ret
);
23021 /* Implement machine specific optimizations. We implement padding of returns
23022 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23026 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
23027 ix86_pad_returns ();
23028 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
23029 ix86_avoid_jump_misspredicts ();
23032 /* Return nonzero when QImode register that must be represented via REX prefix
23035 x86_extended_QIreg_mentioned_p (rtx insn
)
23038 extract_insn_cached (insn
);
23039 for (i
= 0; i
< recog_data
.n_operands
; i
++)
23040 if (REG_P (recog_data
.operand
[i
])
23041 && REGNO (recog_data
.operand
[i
]) >= 4)
23046 /* Return nonzero when P points to register encoded via REX prefix.
23047 Called via for_each_rtx. */
23049 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
23051 unsigned int regno
;
23054 regno
= REGNO (*p
);
23055 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
23058 /* Return true when INSN mentions register that must be encoded using REX
23061 x86_extended_reg_mentioned_p (rtx insn
)
23063 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
23066 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23067 optabs would emit if we didn't have TFmode patterns. */
23070 x86_emit_floatuns (rtx operands
[2])
23072 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
23073 enum machine_mode mode
, inmode
;
23075 inmode
= GET_MODE (operands
[1]);
23076 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
23079 in
= force_reg (inmode
, operands
[1]);
23080 mode
= GET_MODE (out
);
23081 neglab
= gen_label_rtx ();
23082 donelab
= gen_label_rtx ();
23083 f0
= gen_reg_rtx (mode
);
23085 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
23087 expand_float (out
, in
, 0);
23089 emit_jump_insn (gen_jump (donelab
));
23092 emit_label (neglab
);
23094 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
23096 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
23098 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
23100 expand_float (f0
, i0
, 0);
23102 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
23104 emit_label (donelab
);
23107 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23108 with all elements equal to VAR. Return true if successful. */
23111 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
23112 rtx target
, rtx val
)
23114 enum machine_mode smode
, wsmode
, wvmode
;
23129 val
= force_reg (GET_MODE_INNER (mode
), val
);
23130 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
23131 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23137 if (TARGET_SSE
|| TARGET_3DNOW_A
)
23139 val
= gen_lowpart (SImode
, val
);
23140 x
= gen_rtx_TRUNCATE (HImode
, val
);
23141 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
23142 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23164 /* Extend HImode to SImode using a paradoxical SUBREG. */
23165 tmp1
= gen_reg_rtx (SImode
);
23166 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
23167 /* Insert the SImode value as low element of V4SImode vector. */
23168 tmp2
= gen_reg_rtx (V4SImode
);
23169 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
23170 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
23171 CONST0_RTX (V4SImode
),
23173 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
23174 /* Cast the V4SImode vector back to a V8HImode vector. */
23175 tmp1
= gen_reg_rtx (V8HImode
);
23176 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
23177 /* Duplicate the low short through the whole low SImode word. */
23178 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
23179 /* Cast the V8HImode vector back to a V4SImode vector. */
23180 tmp2
= gen_reg_rtx (V4SImode
);
23181 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
23182 /* Replicate the low element of the V4SImode vector. */
23183 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
23184 /* Cast the V2SImode back to V8HImode, and store in target. */
23185 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
23196 /* Extend QImode to SImode using a paradoxical SUBREG. */
23197 tmp1
= gen_reg_rtx (SImode
);
23198 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
23199 /* Insert the SImode value as low element of V4SImode vector. */
23200 tmp2
= gen_reg_rtx (V4SImode
);
23201 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
23202 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
23203 CONST0_RTX (V4SImode
),
23205 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
23206 /* Cast the V4SImode vector back to a V16QImode vector. */
23207 tmp1
= gen_reg_rtx (V16QImode
);
23208 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
23209 /* Duplicate the low byte through the whole low SImode word. */
23210 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
23211 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
23212 /* Cast the V16QImode vector back to a V4SImode vector. */
23213 tmp2
= gen_reg_rtx (V4SImode
);
23214 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
23215 /* Replicate the low element of the V4SImode vector. */
23216 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
23217 /* Cast the V2SImode back to V16QImode, and store in target. */
23218 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
23226 /* Replicate the value once into the next wider mode and recurse. */
23227 val
= convert_modes (wsmode
, smode
, val
, true);
23228 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
23229 GEN_INT (GET_MODE_BITSIZE (smode
)),
23230 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23231 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
23233 x
= gen_reg_rtx (wvmode
);
23234 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
23235 gcc_unreachable ();
23236 emit_move_insn (target
, gen_lowpart (mode
, x
));
23244 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23245 whose ONE_VAR element is VAR, and other elements are zero. Return true
23249 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
23250 rtx target
, rtx var
, int one_var
)
23252 enum machine_mode vsimode
;
23268 var
= force_reg (GET_MODE_INNER (mode
), var
);
23269 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
23270 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23275 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
23276 new_target
= gen_reg_rtx (mode
);
23278 new_target
= target
;
23279 var
= force_reg (GET_MODE_INNER (mode
), var
);
23280 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
23281 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
23282 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
23285 /* We need to shuffle the value to the correct position, so
23286 create a new pseudo to store the intermediate result. */
23288 /* With SSE2, we can use the integer shuffle insns. */
23289 if (mode
!= V4SFmode
&& TARGET_SSE2
)
23291 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
23293 GEN_INT (one_var
== 1 ? 0 : 1),
23294 GEN_INT (one_var
== 2 ? 0 : 1),
23295 GEN_INT (one_var
== 3 ? 0 : 1)));
23296 if (target
!= new_target
)
23297 emit_move_insn (target
, new_target
);
23301 /* Otherwise convert the intermediate result to V4SFmode and
23302 use the SSE1 shuffle instructions. */
23303 if (mode
!= V4SFmode
)
23305 tmp
= gen_reg_rtx (V4SFmode
);
23306 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
23311 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
23313 GEN_INT (one_var
== 1 ? 0 : 1),
23314 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
23315 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
23317 if (mode
!= V4SFmode
)
23318 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
23319 else if (tmp
!= target
)
23320 emit_move_insn (target
, tmp
);
23322 else if (target
!= new_target
)
23323 emit_move_insn (target
, new_target
);
23328 vsimode
= V4SImode
;
23334 vsimode
= V2SImode
;
23340 /* Zero extend the variable element to SImode and recurse. */
23341 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
23343 x
= gen_reg_rtx (vsimode
);
23344 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
23346 gcc_unreachable ();
23348 emit_move_insn (target
, gen_lowpart (mode
, x
));
23356 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23357 consisting of the values in VALS. It is known that all elements
23358 except ONE_VAR are constants. Return true if successful. */
23361 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
23362 rtx target
, rtx vals
, int one_var
)
23364 rtx var
= XVECEXP (vals
, 0, one_var
);
23365 enum machine_mode wmode
;
23368 const_vec
= copy_rtx (vals
);
23369 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
23370 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
23378 /* For the two element vectors, it's just as easy to use
23379 the general case. */
23395 /* There's no way to set one QImode entry easily. Combine
23396 the variable value with its adjacent constant value, and
23397 promote to an HImode set. */
23398 x
= XVECEXP (vals
, 0, one_var
^ 1);
23401 var
= convert_modes (HImode
, QImode
, var
, true);
23402 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
23403 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23404 x
= GEN_INT (INTVAL (x
) & 0xff);
23408 var
= convert_modes (HImode
, QImode
, var
, true);
23409 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
23411 if (x
!= const0_rtx
)
23412 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
23413 1, OPTAB_LIB_WIDEN
);
23415 x
= gen_reg_rtx (wmode
);
23416 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
23417 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
23419 emit_move_insn (target
, gen_lowpart (mode
, x
));
23426 emit_move_insn (target
, const_vec
);
23427 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
23431 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23432 all values variable, and none identical. */
23435 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
23436 rtx target
, rtx vals
)
23438 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
23439 rtx op0
= NULL
, op1
= NULL
;
23440 bool use_vec_concat
= false;
23446 if (!mmx_ok
&& !TARGET_SSE
)
23452 /* For the two element vectors, we always implement VEC_CONCAT. */
23453 op0
= XVECEXP (vals
, 0, 0);
23454 op1
= XVECEXP (vals
, 0, 1);
23455 use_vec_concat
= true;
23459 half_mode
= V2SFmode
;
23462 half_mode
= V2SImode
;
23468 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23469 Recurse to load the two halves. */
23471 op0
= gen_reg_rtx (half_mode
);
23472 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
23473 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
23475 op1
= gen_reg_rtx (half_mode
);
23476 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
23477 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
23479 use_vec_concat
= true;
23490 gcc_unreachable ();
23493 if (use_vec_concat
)
23495 if (!register_operand (op0
, half_mode
))
23496 op0
= force_reg (half_mode
, op0
);
23497 if (!register_operand (op1
, half_mode
))
23498 op1
= force_reg (half_mode
, op1
);
23500 emit_insn (gen_rtx_SET (VOIDmode
, target
,
23501 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
23505 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
23506 enum machine_mode inner_mode
;
23507 rtx words
[4], shift
;
23509 inner_mode
= GET_MODE_INNER (mode
);
23510 n_elts
= GET_MODE_NUNITS (mode
);
23511 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
23512 n_elt_per_word
= n_elts
/ n_words
;
23513 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
23515 for (i
= 0; i
< n_words
; ++i
)
23517 rtx word
= NULL_RTX
;
23519 for (j
= 0; j
< n_elt_per_word
; ++j
)
23521 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
23522 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
23528 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
23529 word
, 1, OPTAB_LIB_WIDEN
);
23530 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
23531 word
, 1, OPTAB_LIB_WIDEN
);
23539 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
23540 else if (n_words
== 2)
23542 rtx tmp
= gen_reg_rtx (mode
);
23543 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
23544 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
23545 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
23546 emit_move_insn (target
, tmp
);
23548 else if (n_words
== 4)
23550 rtx tmp
= gen_reg_rtx (V4SImode
);
23551 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
23552 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
23553 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
23556 gcc_unreachable ();
23560 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23561 instructions unless MMX_OK is true. */
23564 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
23566 enum machine_mode mode
= GET_MODE (target
);
23567 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23568 int n_elts
= GET_MODE_NUNITS (mode
);
23569 int n_var
= 0, one_var
= -1;
23570 bool all_same
= true, all_const_zero
= true;
23574 for (i
= 0; i
< n_elts
; ++i
)
23576 x
= XVECEXP (vals
, 0, i
);
23577 if (!CONSTANT_P (x
))
23578 n_var
++, one_var
= i
;
23579 else if (x
!= CONST0_RTX (inner_mode
))
23580 all_const_zero
= false;
23581 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
23585 /* Constants are best loaded from the constant pool. */
23588 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
23592 /* If all values are identical, broadcast the value. */
23594 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
23595 XVECEXP (vals
, 0, 0)))
23598 /* Values where only one field is non-constant are best loaded from
23599 the pool and overwritten via move later. */
23603 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
23604 XVECEXP (vals
, 0, one_var
),
23608 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
23612 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
23616 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
23618 enum machine_mode mode
= GET_MODE (target
);
23619 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23620 bool use_vec_merge
= false;
23629 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
23630 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
23632 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
23634 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
23635 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23641 use_vec_merge
= TARGET_SSE4_1
;
23649 /* For the two element vectors, we implement a VEC_CONCAT with
23650 the extraction of the other element. */
23652 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
23653 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
23656 op0
= val
, op1
= tmp
;
23658 op0
= tmp
, op1
= val
;
23660 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
23661 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23666 use_vec_merge
= TARGET_SSE4_1
;
23673 use_vec_merge
= true;
23677 /* tmp = target = A B C D */
23678 tmp
= copy_to_reg (target
);
23679 /* target = A A B B */
23680 emit_insn (gen_sse_unpcklps (target
, target
, target
));
23681 /* target = X A B B */
23682 ix86_expand_vector_set (false, target
, val
, 0);
23683 /* target = A X C D */
23684 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23685 GEN_INT (1), GEN_INT (0),
23686 GEN_INT (2+4), GEN_INT (3+4)));
23690 /* tmp = target = A B C D */
23691 tmp
= copy_to_reg (target
);
23692 /* tmp = X B C D */
23693 ix86_expand_vector_set (false, tmp
, val
, 0);
23694 /* target = A B X D */
23695 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23696 GEN_INT (0), GEN_INT (1),
23697 GEN_INT (0+4), GEN_INT (3+4)));
23701 /* tmp = target = A B C D */
23702 tmp
= copy_to_reg (target
);
23703 /* tmp = X B C D */
23704 ix86_expand_vector_set (false, tmp
, val
, 0);
23705 /* target = A B X D */
23706 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23707 GEN_INT (0), GEN_INT (1),
23708 GEN_INT (2+4), GEN_INT (0+4)));
23712 gcc_unreachable ();
23717 use_vec_merge
= TARGET_SSE4_1
;
23721 /* Element 0 handled by vec_merge below. */
23724 use_vec_merge
= true;
23730 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23731 store into element 0, then shuffle them back. */
23735 order
[0] = GEN_INT (elt
);
23736 order
[1] = const1_rtx
;
23737 order
[2] = const2_rtx
;
23738 order
[3] = GEN_INT (3);
23739 order
[elt
] = const0_rtx
;
23741 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
23742 order
[1], order
[2], order
[3]));
23744 ix86_expand_vector_set (false, target
, val
, 0);
23746 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
23747 order
[1], order
[2], order
[3]));
23751 /* For SSE1, we have to reuse the V4SF code. */
23752 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
23753 gen_lowpart (SFmode
, val
), elt
);
23758 use_vec_merge
= TARGET_SSE2
;
23761 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
23765 use_vec_merge
= TARGET_SSE4_1
;
23775 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
23776 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
23777 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23781 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
23783 emit_move_insn (mem
, target
);
23785 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
23786 emit_move_insn (tmp
, val
);
23788 emit_move_insn (target
, mem
);
23793 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
23795 enum machine_mode mode
= GET_MODE (vec
);
23796 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23797 bool use_vec_extr
= false;
23810 use_vec_extr
= true;
23814 use_vec_extr
= TARGET_SSE4_1
;
23826 tmp
= gen_reg_rtx (mode
);
23827 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
23828 GEN_INT (elt
), GEN_INT (elt
),
23829 GEN_INT (elt
+4), GEN_INT (elt
+4)));
23833 tmp
= gen_reg_rtx (mode
);
23834 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
23838 gcc_unreachable ();
23841 use_vec_extr
= true;
23846 use_vec_extr
= TARGET_SSE4_1
;
23860 tmp
= gen_reg_rtx (mode
);
23861 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
23862 GEN_INT (elt
), GEN_INT (elt
),
23863 GEN_INT (elt
), GEN_INT (elt
)));
23867 tmp
= gen_reg_rtx (mode
);
23868 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
23872 gcc_unreachable ();
23875 use_vec_extr
= true;
23880 /* For SSE1, we have to reuse the V4SF code. */
23881 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
23882 gen_lowpart (V4SFmode
, vec
), elt
);
23888 use_vec_extr
= TARGET_SSE2
;
23891 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
23895 use_vec_extr
= TARGET_SSE4_1
;
23899 /* ??? Could extract the appropriate HImode element and shift. */
23906 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
23907 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
23909 /* Let the rtl optimizers know about the zero extension performed. */
23910 if (inner_mode
== QImode
|| inner_mode
== HImode
)
23912 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
23913 target
= gen_lowpart (SImode
, target
);
23916 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23920 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
23922 emit_move_insn (mem
, vec
);
23924 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
23925 emit_move_insn (target
, tmp
);
23929 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23930 pattern to reduce; DEST is the destination; IN is the input vector. */
23933 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
23935 rtx tmp1
, tmp2
, tmp3
;
23937 tmp1
= gen_reg_rtx (V4SFmode
);
23938 tmp2
= gen_reg_rtx (V4SFmode
);
23939 tmp3
= gen_reg_rtx (V4SFmode
);
23941 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
23942 emit_insn (fn (tmp2
, tmp1
, in
));
23944 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
23945 GEN_INT (1), GEN_INT (1),
23946 GEN_INT (1+4), GEN_INT (1+4)));
23947 emit_insn (fn (dest
, tmp2
, tmp3
));
23950 /* Target hook for scalar_mode_supported_p. */
23952 ix86_scalar_mode_supported_p (enum machine_mode mode
)
23954 if (DECIMAL_FLOAT_MODE_P (mode
))
23956 else if (mode
== TFmode
)
23957 return TARGET_64BIT
;
23959 return default_scalar_mode_supported_p (mode
);
23962 /* Implements target hook vector_mode_supported_p. */
23964 ix86_vector_mode_supported_p (enum machine_mode mode
)
23966 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
23968 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
23970 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
23972 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
23977 /* Target hook for c_mode_for_suffix. */
23978 static enum machine_mode
23979 ix86_c_mode_for_suffix (char suffix
)
23981 if (TARGET_64BIT
&& suffix
== 'q')
23983 if (TARGET_MMX
&& suffix
== 'w')
23989 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23991 We do this in the new i386 backend to maintain source compatibility
23992 with the old cc0-based compiler. */
23995 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
23996 tree inputs ATTRIBUTE_UNUSED
,
23999 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
24001 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
24006 /* Implements target vector targetm.asm.encode_section_info. This
24007 is not used by netware. */
24009 static void ATTRIBUTE_UNUSED
24010 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
24012 default_encode_section_info (decl
, rtl
, first
);
24014 if (TREE_CODE (decl
) == VAR_DECL
24015 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
24016 && ix86_in_large_data_p (decl
))
24017 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
24020 /* Worker function for REVERSE_CONDITION. */
24023 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
24025 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
24026 ? reverse_condition (code
)
24027 : reverse_condition_maybe_unordered (code
));
24030 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24034 output_387_reg_move (rtx insn
, rtx
*operands
)
24036 if (REG_P (operands
[0]))
24038 if (REG_P (operands
[1])
24039 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
24041 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
24042 return output_387_ffreep (operands
, 0);
24043 return "fstp\t%y0";
24045 if (STACK_TOP_P (operands
[0]))
24046 return "fld%z1\t%y1";
24049 else if (MEM_P (operands
[0]))
24051 gcc_assert (REG_P (operands
[1]));
24052 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
24053 return "fstp%z0\t%y0";
24056 /* There is no non-popping store to memory for XFmode.
24057 So if we need one, follow the store with a load. */
24058 if (GET_MODE (operands
[0]) == XFmode
)
24059 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24061 return "fst%z0\t%y0";
24068 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24069 FP status register is set. */
24072 ix86_emit_fp_unordered_jump (rtx label
)
24074 rtx reg
= gen_reg_rtx (HImode
);
24077 emit_insn (gen_x86_fnstsw_1 (reg
));
24079 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
24081 emit_insn (gen_x86_sahf_1 (reg
));
24083 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24084 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
24088 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
24090 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24091 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
24094 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
24095 gen_rtx_LABEL_REF (VOIDmode
, label
),
24097 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
24099 emit_jump_insn (temp
);
24100 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
24103 /* Output code to perform a log1p XFmode calculation. */
24105 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
24107 rtx label1
= gen_label_rtx ();
24108 rtx label2
= gen_label_rtx ();
24110 rtx tmp
= gen_reg_rtx (XFmode
);
24111 rtx tmp2
= gen_reg_rtx (XFmode
);
24113 emit_insn (gen_absxf2 (tmp
, op1
));
24114 emit_insn (gen_cmpxf (tmp
,
24115 CONST_DOUBLE_FROM_REAL_VALUE (
24116 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
24118 emit_jump_insn (gen_bge (label1
));
24120 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
24121 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
24122 emit_jump (label2
);
24124 emit_label (label1
);
24125 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
24126 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
24127 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
24128 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
24130 emit_label (label2
);
24133 /* Output code to perform a Newton-Rhapson approximation of a single precision
24134 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24136 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
24138 rtx x0
, x1
, e0
, e1
, two
;
24140 x0
= gen_reg_rtx (mode
);
24141 e0
= gen_reg_rtx (mode
);
24142 e1
= gen_reg_rtx (mode
);
24143 x1
= gen_reg_rtx (mode
);
24145 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
24147 if (VECTOR_MODE_P (mode
))
24148 two
= ix86_build_const_vector (SFmode
, true, two
);
24150 two
= force_reg (mode
, two
);
24152 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24154 /* x0 = 1./b estimate */
24155 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24156 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
24159 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
24160 gen_rtx_MULT (mode
, x0
, b
)));
24162 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
24163 gen_rtx_MINUS (mode
, two
, e0
)));
24165 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
24166 gen_rtx_MULT (mode
, x0
, e1
)));
24168 emit_insn (gen_rtx_SET (VOIDmode
, res
,
24169 gen_rtx_MULT (mode
, a
, x1
)));
24172 /* Output code to perform a Newton-Rhapson approximation of a
24173 single precision floating point [reciprocal] square root. */
24175 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
24178 rtx x0
, e0
, e1
, e2
, e3
, three
, half
, zero
, mask
;
24180 x0
= gen_reg_rtx (mode
);
24181 e0
= gen_reg_rtx (mode
);
24182 e1
= gen_reg_rtx (mode
);
24183 e2
= gen_reg_rtx (mode
);
24184 e3
= gen_reg_rtx (mode
);
24186 three
= CONST_DOUBLE_FROM_REAL_VALUE (dconst3
, SFmode
);
24187 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, SFmode
);
24189 mask
= gen_reg_rtx (mode
);
24191 if (VECTOR_MODE_P (mode
))
24193 three
= ix86_build_const_vector (SFmode
, true, three
);
24194 half
= ix86_build_const_vector (SFmode
, true, half
);
24197 three
= force_reg (mode
, three
);
24198 half
= force_reg (mode
, half
);
24200 zero
= force_reg (mode
, CONST0_RTX(mode
));
24202 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24203 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24205 /* Compare a to zero. */
24206 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
24207 gen_rtx_NE (mode
, a
, zero
)));
24209 /* x0 = 1./sqrt(a) estimate */
24210 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24211 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
24213 /* Filter out infinity. */
24214 if (VECTOR_MODE_P (mode
))
24215 emit_insn (gen_rtx_SET (VOIDmode
, gen_lowpart (V4SFmode
, x0
),
24217 gen_lowpart (V4SFmode
, x0
),
24218 gen_lowpart (V4SFmode
, mask
))));
24220 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24221 gen_rtx_AND (mode
, x0
, mask
)));
24224 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
24225 gen_rtx_MULT (mode
, x0
, a
)));
24227 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
24228 gen_rtx_MULT (mode
, e0
, x0
)));
24230 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
24231 gen_rtx_MINUS (mode
, three
, e1
)));
24234 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
24235 gen_rtx_MULT (mode
, half
, x0
)));
24238 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
24239 gen_rtx_MULT (mode
, half
, e0
)));
24240 /* ret = e2 * e3 */
24241 emit_insn (gen_rtx_SET (VOIDmode
, res
,
24242 gen_rtx_MULT (mode
, e2
, e3
)));
24245 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24247 static void ATTRIBUTE_UNUSED
24248 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
24251 /* With Binutils 2.15, the "@unwind" marker must be specified on
24252 every occurrence of the ".eh_frame" section, not just the first
24255 && strcmp (name
, ".eh_frame") == 0)
24257 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
24258 flags
& SECTION_WRITE
? "aw" : "a");
24261 default_elf_asm_named_section (name
, flags
, decl
);
24264 /* Return the mangling of TYPE if it is an extended fundamental type. */
24266 static const char *
24267 ix86_mangle_type (const_tree type
)
24269 type
= TYPE_MAIN_VARIANT (type
);
24271 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
24272 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
24275 switch (TYPE_MODE (type
))
24278 /* __float128 is "g". */
24281 /* "long double" or __float80 is "e". */
24288 /* For 32-bit code we can save PIC register setup by using
24289 __stack_chk_fail_local hidden function instead of calling
24290 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24291 register, so it is better to call __stack_chk_fail directly. */
24294 ix86_stack_protect_fail (void)
24296 return TARGET_64BIT
24297 ? default_external_stack_protect_fail ()
24298 : default_hidden_stack_protect_fail ();
24301 /* Select a format to encode pointers in exception handling data. CODE
24302 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24303 true if the symbol may be affected by dynamic relocations.
24305 ??? All x86 object file formats are capable of representing this.
24306 After all, the relocation needed is the same as for the call insn.
24307 Whether or not a particular assembler allows us to enter such, I
24308 guess we'll have to see. */
24310 asm_preferred_eh_data_format (int code
, int global
)
24314 int type
= DW_EH_PE_sdata8
;
24316 || ix86_cmodel
== CM_SMALL_PIC
24317 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
24318 type
= DW_EH_PE_sdata4
;
24319 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
24321 if (ix86_cmodel
== CM_SMALL
24322 || (ix86_cmodel
== CM_MEDIUM
&& code
))
24323 return DW_EH_PE_udata4
;
24324 return DW_EH_PE_absptr
;
24327 /* Expand copysign from SIGN to the positive value ABS_VALUE
24328 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24331 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
24333 enum machine_mode mode
= GET_MODE (sign
);
24334 rtx sgn
= gen_reg_rtx (mode
);
24335 if (mask
== NULL_RTX
)
24337 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
24338 if (!VECTOR_MODE_P (mode
))
24340 /* We need to generate a scalar mode mask in this case. */
24341 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
24342 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
24343 mask
= gen_reg_rtx (mode
);
24344 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
24348 mask
= gen_rtx_NOT (mode
, mask
);
24349 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
24350 gen_rtx_AND (mode
, mask
, sign
)));
24351 emit_insn (gen_rtx_SET (VOIDmode
, result
,
24352 gen_rtx_IOR (mode
, abs_value
, sgn
)));
24355 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24356 mask for masking out the sign-bit is stored in *SMASK, if that is
24359 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
24361 enum machine_mode mode
= GET_MODE (op0
);
24364 xa
= gen_reg_rtx (mode
);
24365 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
24366 if (!VECTOR_MODE_P (mode
))
24368 /* We need to generate a scalar mode mask in this case. */
24369 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
24370 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
24371 mask
= gen_reg_rtx (mode
);
24372 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
24374 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
24375 gen_rtx_AND (mode
, op0
, mask
)));
24383 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24384 swapping the operands if SWAP_OPERANDS is true. The expanded
24385 code is a forward jump to a newly created label in case the
24386 comparison is true. The generated label rtx is returned. */
24388 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
24389 bool swap_operands
)
24400 label
= gen_label_rtx ();
24401 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
24402 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24403 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
24404 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
24405 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24406 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
24407 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24408 JUMP_LABEL (tmp
) = label
;
24413 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24414 using comparison code CODE. Operands are swapped for the comparison if
24415 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24417 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
24418 bool swap_operands
)
24420 enum machine_mode mode
= GET_MODE (op0
);
24421 rtx mask
= gen_reg_rtx (mode
);
24430 if (mode
== DFmode
)
24431 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
24432 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
24434 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
24435 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
24440 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24441 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24443 ix86_gen_TWO52 (enum machine_mode mode
)
24445 REAL_VALUE_TYPE TWO52r
;
24448 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
24449 TWO52
= const_double_from_real_value (TWO52r
, mode
);
24450 TWO52
= force_reg (mode
, TWO52
);
24455 /* Expand SSE sequence for computing lround from OP1 storing
24458 ix86_expand_lround (rtx op0
, rtx op1
)
24460 /* C code for the stuff we're doing below:
24461 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24464 enum machine_mode mode
= GET_MODE (op1
);
24465 const struct real_format
*fmt
;
24466 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
24469 /* load nextafter (0.5, 0.0) */
24470 fmt
= REAL_MODE_FORMAT (mode
);
24471 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
24472 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
24474 /* adj = copysign (0.5, op1) */
24475 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
24476 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
24478 /* adj = op1 + adj */
24479 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
24481 /* op0 = (imode)adj */
24482 expand_fix (op0
, adj
, 0);
24485 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24488 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
24490 /* C code for the stuff we're doing below (for do_floor):
24492 xi -= (double)xi > op1 ? 1 : 0;
24495 enum machine_mode fmode
= GET_MODE (op1
);
24496 enum machine_mode imode
= GET_MODE (op0
);
24497 rtx ireg
, freg
, label
, tmp
;
24499 /* reg = (long)op1 */
24500 ireg
= gen_reg_rtx (imode
);
24501 expand_fix (ireg
, op1
, 0);
24503 /* freg = (double)reg */
24504 freg
= gen_reg_rtx (fmode
);
24505 expand_float (freg
, ireg
, 0);
24507 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24508 label
= ix86_expand_sse_compare_and_jump (UNLE
,
24509 freg
, op1
, !do_floor
);
24510 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
24511 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
24512 emit_move_insn (ireg
, tmp
);
24514 emit_label (label
);
24515 LABEL_NUSES (label
) = 1;
24517 emit_move_insn (op0
, ireg
);
24520 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24521 result in OPERAND0. */
24523 ix86_expand_rint (rtx operand0
, rtx operand1
)
24525 /* C code for the stuff we're doing below:
24526 xa = fabs (operand1);
24527 if (!isless (xa, 2**52))
24529 xa = xa + 2**52 - 2**52;
24530 return copysign (xa, operand1);
24532 enum machine_mode mode
= GET_MODE (operand0
);
24533 rtx res
, xa
, label
, TWO52
, mask
;
24535 res
= gen_reg_rtx (mode
);
24536 emit_move_insn (res
, operand1
);
24538 /* xa = abs (operand1) */
24539 xa
= ix86_expand_sse_fabs (res
, &mask
);
24541 /* if (!isless (xa, TWO52)) goto label; */
24542 TWO52
= ix86_gen_TWO52 (mode
);
24543 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24545 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24546 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
24548 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
24550 emit_label (label
);
24551 LABEL_NUSES (label
) = 1;
24553 emit_move_insn (operand0
, res
);
24556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24559 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
24561 /* C code for the stuff we expand below.
24562 double xa = fabs (x), x2;
24563 if (!isless (xa, TWO52))
24565 xa = xa + TWO52 - TWO52;
24566 x2 = copysign (xa, x);
24575 enum machine_mode mode
= GET_MODE (operand0
);
24576 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
24578 TWO52
= ix86_gen_TWO52 (mode
);
24580 /* Temporary for holding the result, initialized to the input
24581 operand to ease control flow. */
24582 res
= gen_reg_rtx (mode
);
24583 emit_move_insn (res
, operand1
);
24585 /* xa = abs (operand1) */
24586 xa
= ix86_expand_sse_fabs (res
, &mask
);
24588 /* if (!isless (xa, TWO52)) goto label; */
24589 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24591 /* xa = xa + TWO52 - TWO52; */
24592 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24593 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
24595 /* xa = copysign (xa, operand1) */
24596 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
24598 /* generate 1.0 or -1.0 */
24599 one
= force_reg (mode
,
24600 const_double_from_real_value (do_floor
24601 ? dconst1
: dconstm1
, mode
));
24603 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24604 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
24605 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24606 gen_rtx_AND (mode
, one
, tmp
)));
24607 /* We always need to subtract here to preserve signed zero. */
24608 tmp
= expand_simple_binop (mode
, MINUS
,
24609 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24610 emit_move_insn (res
, tmp
);
24612 emit_label (label
);
24613 LABEL_NUSES (label
) = 1;
24615 emit_move_insn (operand0
, res
);
24618 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24621 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
24623 /* C code for the stuff we expand below.
24624 double xa = fabs (x), x2;
24625 if (!isless (xa, TWO52))
24627 x2 = (double)(long)x;
24634 if (HONOR_SIGNED_ZEROS (mode))
24635 return copysign (x2, x);
24638 enum machine_mode mode
= GET_MODE (operand0
);
24639 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
24641 TWO52
= ix86_gen_TWO52 (mode
);
24643 /* Temporary for holding the result, initialized to the input
24644 operand to ease control flow. */
24645 res
= gen_reg_rtx (mode
);
24646 emit_move_insn (res
, operand1
);
24648 /* xa = abs (operand1) */
24649 xa
= ix86_expand_sse_fabs (res
, &mask
);
24651 /* if (!isless (xa, TWO52)) goto label; */
24652 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24654 /* xa = (double)(long)x */
24655 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24656 expand_fix (xi
, res
, 0);
24657 expand_float (xa
, xi
, 0);
24660 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
24662 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24663 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
24664 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24665 gen_rtx_AND (mode
, one
, tmp
)));
24666 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
24667 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24668 emit_move_insn (res
, tmp
);
24670 if (HONOR_SIGNED_ZEROS (mode
))
24671 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
24673 emit_label (label
);
24674 LABEL_NUSES (label
) = 1;
24676 emit_move_insn (operand0
, res
);
24679 /* Expand SSE sequence for computing round from OPERAND1 storing
24680 into OPERAND0. Sequence that works without relying on DImode truncation
24681 via cvttsd2siq that is only available on 64bit targets. */
24683 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
24685 /* C code for the stuff we expand below.
24686 double xa = fabs (x), xa2, x2;
24687 if (!isless (xa, TWO52))
24689 Using the absolute value and copying back sign makes
24690 -0.0 -> -0.0 correct.
24691 xa2 = xa + TWO52 - TWO52;
24696 else if (dxa > 0.5)
24698 x2 = copysign (xa2, x);
24701 enum machine_mode mode
= GET_MODE (operand0
);
24702 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
24704 TWO52
= ix86_gen_TWO52 (mode
);
24706 /* Temporary for holding the result, initialized to the input
24707 operand to ease control flow. */
24708 res
= gen_reg_rtx (mode
);
24709 emit_move_insn (res
, operand1
);
24711 /* xa = abs (operand1) */
24712 xa
= ix86_expand_sse_fabs (res
, &mask
);
24714 /* if (!isless (xa, TWO52)) goto label; */
24715 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24717 /* xa2 = xa + TWO52 - TWO52; */
24718 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24719 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
24721 /* dxa = xa2 - xa; */
24722 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
24724 /* generate 0.5, 1.0 and -0.5 */
24725 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
24726 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
24727 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
24731 tmp
= gen_reg_rtx (mode
);
24732 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24733 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
24734 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24735 gen_rtx_AND (mode
, one
, tmp
)));
24736 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24737 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24738 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
24739 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24740 gen_rtx_AND (mode
, one
, tmp
)));
24741 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24743 /* res = copysign (xa2, operand1) */
24744 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
24746 emit_label (label
);
24747 LABEL_NUSES (label
) = 1;
24749 emit_move_insn (operand0
, res
);
24752 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24755 ix86_expand_trunc (rtx operand0
, rtx operand1
)
24757 /* C code for SSE variant we expand below.
24758 double xa = fabs (x), x2;
24759 if (!isless (xa, TWO52))
24761 x2 = (double)(long)x;
24762 if (HONOR_SIGNED_ZEROS (mode))
24763 return copysign (x2, x);
24766 enum machine_mode mode
= GET_MODE (operand0
);
24767 rtx xa
, xi
, TWO52
, label
, res
, mask
;
24769 TWO52
= ix86_gen_TWO52 (mode
);
24771 /* Temporary for holding the result, initialized to the input
24772 operand to ease control flow. */
24773 res
= gen_reg_rtx (mode
);
24774 emit_move_insn (res
, operand1
);
24776 /* xa = abs (operand1) */
24777 xa
= ix86_expand_sse_fabs (res
, &mask
);
24779 /* if (!isless (xa, TWO52)) goto label; */
24780 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24782 /* x = (double)(long)x */
24783 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24784 expand_fix (xi
, res
, 0);
24785 expand_float (res
, xi
, 0);
24787 if (HONOR_SIGNED_ZEROS (mode
))
24788 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
24790 emit_label (label
);
24791 LABEL_NUSES (label
) = 1;
24793 emit_move_insn (operand0
, res
);
24796 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24799 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
24801 enum machine_mode mode
= GET_MODE (operand0
);
24802 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
24804 /* C code for SSE variant we expand below.
24805 double xa = fabs (x), x2;
24806 if (!isless (xa, TWO52))
24808 xa2 = xa + TWO52 - TWO52;
24812 x2 = copysign (xa2, x);
24816 TWO52
= ix86_gen_TWO52 (mode
);
24818 /* Temporary for holding the result, initialized to the input
24819 operand to ease control flow. */
24820 res
= gen_reg_rtx (mode
);
24821 emit_move_insn (res
, operand1
);
24823 /* xa = abs (operand1) */
24824 xa
= ix86_expand_sse_fabs (res
, &smask
);
24826 /* if (!isless (xa, TWO52)) goto label; */
24827 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24829 /* res = xa + TWO52 - TWO52; */
24830 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24831 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
24832 emit_move_insn (res
, tmp
);
24835 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
24837 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24838 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
24839 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
24840 gen_rtx_AND (mode
, mask
, one
)));
24841 tmp
= expand_simple_binop (mode
, MINUS
,
24842 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
24843 emit_move_insn (res
, tmp
);
24845 /* res = copysign (res, operand1) */
24846 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
24848 emit_label (label
);
24849 LABEL_NUSES (label
) = 1;
24851 emit_move_insn (operand0
, res
);
24854 /* Expand SSE sequence for computing round from OPERAND1 storing
24857 ix86_expand_round (rtx operand0
, rtx operand1
)
24859 /* C code for the stuff we're doing below:
24860 double xa = fabs (x);
24861 if (!isless (xa, TWO52))
24863 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24864 return copysign (xa, x);
24866 enum machine_mode mode
= GET_MODE (operand0
);
24867 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
24868 const struct real_format
*fmt
;
24869 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
24871 /* Temporary for holding the result, initialized to the input
24872 operand to ease control flow. */
24873 res
= gen_reg_rtx (mode
);
24874 emit_move_insn (res
, operand1
);
24876 TWO52
= ix86_gen_TWO52 (mode
);
24877 xa
= ix86_expand_sse_fabs (res
, &mask
);
24878 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24880 /* load nextafter (0.5, 0.0) */
24881 fmt
= REAL_MODE_FORMAT (mode
);
24882 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
24883 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
24885 /* xa = xa + 0.5 */
24886 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
24887 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
24889 /* xa = (double)(int64_t)xa */
24890 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24891 expand_fix (xi
, xa
, 0);
24892 expand_float (xa
, xi
, 0);
24894 /* res = copysign (xa, operand1) */
24895 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
24897 emit_label (label
);
24898 LABEL_NUSES (label
) = 1;
24900 emit_move_insn (operand0
, res
);
24904 /* Validate whether a SSE5 instruction is valid or not.
24905 OPERANDS is the array of operands.
24906 NUM is the number of operands.
24907 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24908 NUM_MEMORY is the maximum number of memory operands to accept. */
24910 ix86_sse5_valid_op_p (rtx operands
[], rtx insn
, int num
, bool uses_oc0
, int num_memory
)
24916 /* Count the number of memory arguments */
24919 for (i
= 0; i
< num
; i
++)
24921 enum machine_mode mode
= GET_MODE (operands
[i
]);
24922 if (register_operand (operands
[i
], mode
))
24925 else if (memory_operand (operands
[i
], mode
))
24927 mem_mask
|= (1 << i
);
24933 rtx pattern
= PATTERN (insn
);
24935 /* allow 0 for pcmov */
24936 if (GET_CODE (pattern
) != SET
24937 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
24939 || operands
[i
] != CONST0_RTX (mode
))
24944 /* If there were no memory operations, allow the insn */
24948 /* Do not allow the destination register to be a memory operand. */
24949 else if (mem_mask
& (1 << 0))
24952 /* If there are too many memory operations, disallow the instruction. While
24953 the hardware only allows 1 memory reference, before register allocation
24954 for some insns, we allow two memory operations sometimes in order to allow
24955 code like the following to be optimized:
24957 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24959 or similar cases that are vectorized into using the fmaddss
24961 else if (mem_count
> num_memory
)
24964 /* Don't allow more than one memory operation if not optimizing. */
24965 else if (mem_count
> 1 && !optimize
)
24968 else if (num
== 4 && mem_count
== 1)
24970 /* formats (destination is the first argument), example fmaddss:
24971 xmm1, xmm1, xmm2, xmm3/mem
24972 xmm1, xmm1, xmm2/mem, xmm3
24973 xmm1, xmm2, xmm3/mem, xmm1
24974 xmm1, xmm2/mem, xmm3, xmm1 */
24976 return ((mem_mask
== (1 << 1))
24977 || (mem_mask
== (1 << 2))
24978 || (mem_mask
== (1 << 3)));
24980 /* format, example pmacsdd:
24981 xmm1, xmm2, xmm3/mem, xmm1 */
24983 return (mem_mask
== (1 << 2));
24986 else if (num
== 4 && num_memory
== 2)
24988 /* If there are two memory operations, we can load one of the memory ops
24989 into the destination register. This is for optimizing the
24990 multiply/add ops, which the combiner has optimized both the multiply
24991 and the add insns to have a memory operation. We have to be careful
24992 that the destination doesn't overlap with the inputs. */
24993 rtx op0
= operands
[0];
24995 if (reg_mentioned_p (op0
, operands
[1])
24996 || reg_mentioned_p (op0
, operands
[2])
24997 || reg_mentioned_p (op0
, operands
[3]))
25000 /* formats (destination is the first argument), example fmaddss:
25001 xmm1, xmm1, xmm2, xmm3/mem
25002 xmm1, xmm1, xmm2/mem, xmm3
25003 xmm1, xmm2, xmm3/mem, xmm1
25004 xmm1, xmm2/mem, xmm3, xmm1
25006 For the oc0 case, we will load either operands[1] or operands[3] into
25007 operands[0], so any combination of 2 memory operands is ok. */
25011 /* format, example pmacsdd:
25012 xmm1, xmm2, xmm3/mem, xmm1
25014 For the integer multiply/add instructions be more restrictive and
25015 require operands[2] and operands[3] to be the memory operands. */
25017 return (mem_mask
== ((1 << 2) | (1 << 3)));
25020 else if (num
== 3 && num_memory
== 1)
25022 /* formats, example protb:
25023 xmm1, xmm2, xmm3/mem
25024 xmm1, xmm2/mem, xmm3 */
25026 return ((mem_mask
== (1 << 1)) || (mem_mask
== (1 << 2)));
25028 /* format, example comeq:
25029 xmm1, xmm2, xmm3/mem */
25031 return (mem_mask
== (1 << 2));
25035 gcc_unreachable ();
25041 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25042 hardware will allow by using the destination register to load one of the
25043 memory operations. Presently this is used by the multiply/add routines to
25044 allow 2 memory references. */
25047 ix86_expand_sse5_multiple_memory (rtx operands
[],
25049 enum machine_mode mode
)
25051 rtx op0
= operands
[0];
25053 || memory_operand (op0
, mode
)
25054 || reg_mentioned_p (op0
, operands
[1])
25055 || reg_mentioned_p (op0
, operands
[2])
25056 || reg_mentioned_p (op0
, operands
[3]))
25057 gcc_unreachable ();
25059 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25060 the destination register. */
25061 if (memory_operand (operands
[1], mode
))
25063 emit_move_insn (op0
, operands
[1]);
25066 else if (memory_operand (operands
[3], mode
))
25068 emit_move_insn (op0
, operands
[3]);
25072 gcc_unreachable ();
25078 /* Table of valid machine attributes. */
25079 static const struct attribute_spec ix86_attribute_table
[] =
25081 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25082 /* Stdcall attribute says callee is responsible for popping arguments
25083 if they are not variable. */
25084 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25085 /* Fastcall attribute says callee is responsible for popping arguments
25086 if they are not variable. */
25087 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25088 /* Cdecl attribute says the callee is a normal C declaration */
25089 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25090 /* Regparm attribute specifies how many integer arguments are to be
25091 passed in registers. */
25092 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
25093 /* Sseregparm attribute says we are using x86_64 calling conventions
25094 for FP arguments. */
25095 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25096 /* force_align_arg_pointer says this function realigns the stack at entry. */
25097 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
25098 false, true, true, ix86_handle_cconv_attribute
},
25099 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25100 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
25101 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
25102 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
25104 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
25105 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
25106 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25107 SUBTARGET_ATTRIBUTE_TABLE
,
25109 { NULL
, 0, 0, false, false, false, NULL
}
25112 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25114 x86_builtin_vectorization_cost (bool runtime_test
)
25116 /* If the branch of the runtime test is taken - i.e. - the vectorized
25117 version is skipped - this incurs a misprediction cost (because the
25118 vectorized version is expected to be the fall-through). So we subtract
25119 the latency of a mispredicted branch from the costs that are incured
25120 when the vectorized version is executed.
25122 TODO: The values in individual target tables have to be tuned or new
25123 fields may be needed. For eg. on K8, the default branch path is the
25124 not-taken path. If the taken path is predicted correctly, the minimum
25125 penalty of going down the taken-path is 1 cycle. If the taken-path is
25126 not predicted correctly, then the minimum penalty is 10 cycles. */
25130 return (-(ix86_cost
->cond_taken_branch_cost
));
25136 /* Initialize the GCC target structure. */
25137 #undef TARGET_ATTRIBUTE_TABLE
25138 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25139 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25140 # undef TARGET_MERGE_DECL_ATTRIBUTES
25141 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25144 #undef TARGET_COMP_TYPE_ATTRIBUTES
25145 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25147 #undef TARGET_INIT_BUILTINS
25148 #define TARGET_INIT_BUILTINS ix86_init_builtins
25149 #undef TARGET_EXPAND_BUILTIN
25150 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25152 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25153 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25154 ix86_builtin_vectorized_function
25156 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25157 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25159 #undef TARGET_BUILTIN_RECIPROCAL
25160 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25162 #undef TARGET_ASM_FUNCTION_EPILOGUE
25163 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25165 #undef TARGET_ENCODE_SECTION_INFO
25166 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25167 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25169 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25172 #undef TARGET_ASM_OPEN_PAREN
25173 #define TARGET_ASM_OPEN_PAREN ""
25174 #undef TARGET_ASM_CLOSE_PAREN
25175 #define TARGET_ASM_CLOSE_PAREN ""
25177 #undef TARGET_ASM_ALIGNED_HI_OP
25178 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25179 #undef TARGET_ASM_ALIGNED_SI_OP
25180 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25182 #undef TARGET_ASM_ALIGNED_DI_OP
25183 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25186 #undef TARGET_ASM_UNALIGNED_HI_OP
25187 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25188 #undef TARGET_ASM_UNALIGNED_SI_OP
25189 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25190 #undef TARGET_ASM_UNALIGNED_DI_OP
25191 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25193 #undef TARGET_SCHED_ADJUST_COST
25194 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25195 #undef TARGET_SCHED_ISSUE_RATE
25196 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25197 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25198 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25199 ia32_multipass_dfa_lookahead
25201 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25202 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25205 #undef TARGET_HAVE_TLS
25206 #define TARGET_HAVE_TLS true
25208 #undef TARGET_CANNOT_FORCE_CONST_MEM
25209 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25210 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25211 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25213 #undef TARGET_DELEGITIMIZE_ADDRESS
25214 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25216 #undef TARGET_MS_BITFIELD_LAYOUT_P
25217 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25220 #undef TARGET_BINDS_LOCAL_P
25221 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25223 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25224 #undef TARGET_BINDS_LOCAL_P
25225 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25228 #undef TARGET_ASM_OUTPUT_MI_THUNK
25229 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25230 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25231 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25233 #undef TARGET_ASM_FILE_START
25234 #define TARGET_ASM_FILE_START x86_file_start
25236 #undef TARGET_DEFAULT_TARGET_FLAGS
25237 #define TARGET_DEFAULT_TARGET_FLAGS \
25239 | TARGET_SUBTARGET_DEFAULT \
25240 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25242 #undef TARGET_HANDLE_OPTION
25243 #define TARGET_HANDLE_OPTION ix86_handle_option
25245 #undef TARGET_RTX_COSTS
25246 #define TARGET_RTX_COSTS ix86_rtx_costs
25247 #undef TARGET_ADDRESS_COST
25248 #define TARGET_ADDRESS_COST ix86_address_cost
25250 #undef TARGET_FIXED_CONDITION_CODE_REGS
25251 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25252 #undef TARGET_CC_MODES_COMPATIBLE
25253 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25255 #undef TARGET_MACHINE_DEPENDENT_REORG
25256 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25258 #undef TARGET_BUILD_BUILTIN_VA_LIST
25259 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25261 #undef TARGET_EXPAND_BUILTIN_VA_START
25262 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25264 #undef TARGET_MD_ASM_CLOBBERS
25265 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25267 #undef TARGET_PROMOTE_PROTOTYPES
25268 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25269 #undef TARGET_STRUCT_VALUE_RTX
25270 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25271 #undef TARGET_SETUP_INCOMING_VARARGS
25272 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25273 #undef TARGET_MUST_PASS_IN_STACK
25274 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25275 #undef TARGET_PASS_BY_REFERENCE
25276 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25277 #undef TARGET_INTERNAL_ARG_POINTER
25278 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25279 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25280 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25281 #undef TARGET_STRICT_ARGUMENT_NAMING
25282 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25284 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25285 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25287 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25288 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25290 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25291 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25293 #undef TARGET_C_MODE_FOR_SUFFIX
25294 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25297 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25298 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25301 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25302 #undef TARGET_INSERT_ATTRIBUTES
25303 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25306 #undef TARGET_MANGLE_TYPE
25307 #define TARGET_MANGLE_TYPE ix86_mangle_type
25309 #undef TARGET_STACK_PROTECT_FAIL
25310 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25312 #undef TARGET_FUNCTION_VALUE
25313 #define TARGET_FUNCTION_VALUE ix86_function_value
25315 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25316 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25318 struct gcc_target targetm
= TARGET_INITIALIZER
;
25320 #include "gt-i386.h"