1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 static int x86_builtin_vectorization_cost (bool);
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
76 struct processor_costs size_cost
= { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
129 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
130 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
131 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
145 /* Processor costs (relative to an add) */
147 struct processor_costs i386_cost
= { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
200 DUMMY_STRINGOP_ALGS
},
201 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
202 DUMMY_STRINGOP_ALGS
},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
217 struct processor_costs i486_cost
= { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
272 DUMMY_STRINGOP_ALGS
},
273 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
274 DUMMY_STRINGOP_ALGS
},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
289 struct processor_costs pentium_cost
= {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
342 DUMMY_STRINGOP_ALGS
},
343 {{libcall
, {{-1, rep_prefix_4_byte
}}},
344 DUMMY_STRINGOP_ALGS
},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
359 struct processor_costs pentiumpro_cost
= {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
416 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
417 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
418 DUMMY_STRINGOP_ALGS
},
419 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
420 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
421 DUMMY_STRINGOP_ALGS
},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
436 struct processor_costs geode_cost
= {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
490 DUMMY_STRINGOP_ALGS
},
491 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
492 DUMMY_STRINGOP_ALGS
},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
507 struct processor_costs k6_cost
= {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
563 DUMMY_STRINGOP_ALGS
},
564 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
565 DUMMY_STRINGOP_ALGS
},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs athlon_cost
= {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs k8_cost
= {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
702 100, /* number of parallel prefetches */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
714 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
715 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
716 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
717 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
731 struct processor_costs amdfam10_cost
= {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
776 MOVD reg64, xmmreg Double FADD 3
778 MOVD reg32, xmmreg Double FADD 3
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
788 100, /* number of parallel prefetches */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
801 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
802 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
803 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
804 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
819 struct processor_costs pentium4_cost
= {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
872 DUMMY_STRINGOP_ALGS
},
873 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
875 DUMMY_STRINGOP_ALGS
},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
890 struct processor_costs nocona_cost
= {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
943 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
944 {100000, unrolled_loop
}, {-1, libcall
}}}},
945 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
947 {libcall
, {{24, loop
}, {64, unrolled_loop
},
948 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
963 struct processor_costs core2_cost
= {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1015 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1016 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1017 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1018 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1019 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1020 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1036 struct processor_costs generic64_cost
= {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS
,
1095 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1096 {DUMMY_STRINGOP_ALGS
,
1097 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1113 struct processor_costs generic32_cost
= {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1166 DUMMY_STRINGOP_ALGS
},
1167 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1168 DUMMY_STRINGOP_ALGS
},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1182 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC64
,
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
1219 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1224 /* X86_TUNE_USE_BIT_TEST */
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486
| m_PENT
| m_PPRO
| m_AMD_MULTIPLE
| m_K6
| m_CORE2
| m_GENERIC
,
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
| m_GENERIC
,
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1244 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE
| m_PPRO
| m_PENT4
| m_NOCONA
1249 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2
| m_GENERIC
,
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386
| m_486
| m_K6_GEODE
,
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO
| m_AMD_MULTIPLE
| m_PENT
| m_CORE2
| m_GENERIC
),
1270 /* X86_TUNE_USE_MOV0 */
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1285 /* X86_TUNE_READ_MODIFY */
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_AMD_MULTIPLE
| m_CORE2
1290 | m_GENERIC
/* | m_PENT4 ? */,
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT
| m_486
| m_386
),
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386
| m_PENT4
| m_NOCONA
,
1298 /* X86_TUNE_QIMODE_MATH */
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE
| m_PPRO
| m_K6_GEODE
| m_386
1318 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE
| m_PPRO
| m_386
| m_486
1325 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1330 | m_GENERIC
| m_GEODE
),
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO
| m_PENT4
| m_NOCONA
,
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1370 /* X86_TUNE_SHIFT1 */
1373 /* X86_TUNE_USE_FFREEP */
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO
| m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1389 /* X86_TUNE_USE_BT */
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE
| m_CORE2
| m_GENERIC
,
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1417 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1433 from integer to FP. */
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386
| m_486
| m_PENT
| m_K6
),
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
1460 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1462 static enum stringop_alg stringop_alg
= no_stringop
;
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1471 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1472 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1477 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1479 /* ax, dx, cx, bx */
1480 AREG
, DREG
, CREG
, BREG
,
1481 /* si, di, bp, sp */
1482 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1484 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1485 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1491 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1494 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1497 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1498 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1499 /* SSE REX registers */
1500 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1504 /* The "default" register map used in 32bit mode. */
1506 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1517 static int const x86_64_int_parameter_registers
[6] =
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1523 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1529 static int const x86_64_int_return_registers
[4] =
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1600 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1614 rtx ix86_compare_op0
= NULL_RTX
;
1615 rtx ix86_compare_op1
= NULL_RTX
;
1616 rtx ix86_compare_emitted
= NULL_RTX
;
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621 /* Define the structure for the machine field in struct function. */
1623 struct stack_local_entry
GTY(())
1625 unsigned short mode
;
1628 struct stack_local_entry
*next
;
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1655 HOST_WIDE_INT frame
;
1657 int outgoing_arguments_size
;
1660 HOST_WIDE_INT to_allocate
;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset
;
1663 HOST_WIDE_INT hard_frame_pointer_offset
;
1664 HOST_WIDE_INT stack_pointer_offset
;
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov
;
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel
;
1674 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1676 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath
;
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune
;
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch
;
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse
;
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm
;
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer
;
1695 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary
;
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost
;
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1706 int ix86_section_threshold
= 65536;
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix
[16];
1710 int internal_label_prefix_len
;
1712 /* Fence to use after loop using movnt. */
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1725 X86_64_INTEGER_CLASS
,
1726 X86_64_INTEGERSI_CLASS
,
1733 X86_64_COMPLEX_X87_CLASS
,
1736 static const char * const x86_64_reg_class_name
[] =
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1742 #define MAX_CLASSES 4
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1746 static bool ext_80387_constants_init
= 0;
1749 static struct machine_function
* ix86_init_machine_status (void);
1750 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1751 static int ix86_function_regparm (const_tree
, const_tree
);
1752 static void ix86_compute_frame_layout (struct ix86_frame
*);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit
;
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
1802 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1812 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX
;
1815 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
1816 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
1821 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW
;
1824 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
1825 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
1833 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE
;
1836 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
1837 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
1842 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2
;
1845 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
1846 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
1851 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3
;
1854 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
1855 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
1860 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3
;
1863 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
1864 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
1869 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1
;
1872 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
1873 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
1878 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2
;
1881 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
1882 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
1887 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4
;
1888 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4
;
1892 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
1893 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
1897 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A
;
1900 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
1901 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
1906 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5
;
1909 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE5_UNSET
;
1910 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE5_UNSET
;
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1929 override_options (void)
1932 int ix86_tune_defaulted
= 0;
1933 int ix86_arch_specified
= 0;
1934 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1941 const struct processor_costs
*cost
; /* Processor costs */
1942 const int align_loop
; /* Default alignments. */
1943 const int align_loop_max_skip
;
1944 const int align_jump
;
1945 const int align_jump_max_skip
;
1946 const int align_func
;
1948 const processor_target_table
[PROCESSOR_max
] =
1950 {&i386_cost
, 4, 3, 4, 3, 4},
1951 {&i486_cost
, 16, 15, 16, 15, 16},
1952 {&pentium_cost
, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
1954 {&geode_cost
, 0, 0, 0, 0, 0},
1955 {&k6_cost
, 32, 7, 32, 7, 32},
1956 {&athlon_cost
, 16, 7, 16, 7, 16},
1957 {&pentium4_cost
, 0, 0, 0, 0, 0},
1958 {&k8_cost
, 16, 7, 16, 7, 16},
1959 {&nocona_cost
, 0, 0, 0, 0, 0},
1960 {&core2_cost
, 16, 10, 16, 10, 16},
1961 {&generic32_cost
, 16, 7, 16, 7, 16},
1962 {&generic64_cost
, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost
, 32, 24, 32, 7, 32}
1966 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
1997 PTA_PREFETCH_SSE
= 1 << 4,
1999 PTA_3DNOW_A
= 1 << 6,
2003 PTA_POPCNT
= 1 << 10,
2005 PTA_SSE4A
= 1 << 12,
2006 PTA_NO_SAHF
= 1 << 13,
2007 PTA_SSE4_1
= 1 << 14,
2008 PTA_SSE4_2
= 1 << 15,
2014 const char *const name
; /* processor name or nickname. */
2015 const enum processor_type processor
;
2016 const unsigned /*enum pta_flags*/ flags
;
2018 const processor_alias_table
[] =
2020 {"i386", PROCESSOR_I386
, 0},
2021 {"i486", PROCESSOR_I486
, 0},
2022 {"i586", PROCESSOR_PENTIUM
, 0},
2023 {"pentium", PROCESSOR_PENTIUM
, 0},
2024 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
2025 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
2026 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
2027 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
2028 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2029 {"i686", PROCESSOR_PENTIUMPRO
, 0},
2030 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
2031 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
2032 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2033 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2034 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2035 {"pentium4", PROCESSOR_PENTIUM4
, PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2036 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2037 {"prescott", PROCESSOR_NOCONA
, PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2038 {"nocona", PROCESSOR_NOCONA
, (PTA_64BIT
2039 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2040 | PTA_CX16
| PTA_NO_SAHF
)},
2041 {"core2", PROCESSOR_CORE2
, (PTA_64BIT
2042 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2045 {"geode", PROCESSOR_GEODE
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2046 |PTA_PREFETCH_SSE
)},
2047 {"k6", PROCESSOR_K6
, PTA_MMX
},
2048 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2049 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2050 {"athlon", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2051 | PTA_PREFETCH_SSE
)},
2052 {"athlon-tbird", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2053 | PTA_PREFETCH_SSE
)},
2054 {"athlon-4", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2056 {"athlon-xp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2058 {"athlon-mp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2060 {"x86-64", PROCESSOR_K8
, (PTA_64BIT
2061 | PTA_MMX
| PTA_SSE
| PTA_SSE2
2063 {"k8", PROCESSOR_K8
, (PTA_64BIT
2064 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2065 | PTA_SSE
| PTA_SSE2
2067 {"k8-sse3", PROCESSOR_K8
, (PTA_64BIT
2068 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2069 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2071 {"opteron", PROCESSOR_K8
, (PTA_64BIT
2072 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2073 | PTA_SSE
| PTA_SSE2
2075 {"opteron-sse3", PROCESSOR_K8
, (PTA_64BIT
2076 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2077 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2079 {"athlon64", PROCESSOR_K8
, (PTA_64BIT
2080 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2081 | PTA_SSE
| PTA_SSE2
2083 {"athlon64-sse3", PROCESSOR_K8
, (PTA_64BIT
2084 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2085 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2087 {"athlon-fx", PROCESSOR_K8
, (PTA_64BIT
2088 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2089 | PTA_SSE
| PTA_SSE2
2091 {"amdfam10", PROCESSOR_AMDFAM10
, (PTA_64BIT
2092 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2093 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2095 | PTA_CX16
| PTA_ABM
)},
2096 {"barcelona", PROCESSOR_AMDFAM10
, (PTA_64BIT
2097 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2098 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2100 | PTA_CX16
| PTA_ABM
)},
2101 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
2102 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
2105 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
2107 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2108 SUBTARGET_OVERRIDE_OPTIONS
;
2111 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2112 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2115 /* -fPIC is the default for x86_64. */
2116 if (TARGET_MACHO
&& TARGET_64BIT
)
2119 /* Set the default values for switches whose default depends on TARGET_64BIT
2120 in case they weren't overwritten by command line options. */
2123 /* Mach-O doesn't support omitting the frame pointer for now. */
2124 if (flag_omit_frame_pointer
== 2)
2125 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2126 if (flag_asynchronous_unwind_tables
== 2)
2127 flag_asynchronous_unwind_tables
= 1;
2128 if (flag_pcc_struct_return
== 2)
2129 flag_pcc_struct_return
= 0;
2133 if (flag_omit_frame_pointer
== 2)
2134 flag_omit_frame_pointer
= 0;
2135 if (flag_asynchronous_unwind_tables
== 2)
2136 flag_asynchronous_unwind_tables
= 0;
2137 if (flag_pcc_struct_return
== 2)
2138 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2141 /* Need to check -mtune=generic first. */
2142 if (ix86_tune_string
)
2144 if (!strcmp (ix86_tune_string
, "generic")
2145 || !strcmp (ix86_tune_string
, "i686")
2146 /* As special support for cross compilers we read -mtune=native
2147 as -mtune=generic. With native compilers we won't see the
2148 -mtune=native, as it was changed by the driver. */
2149 || !strcmp (ix86_tune_string
, "native"))
2152 ix86_tune_string
= "generic64";
2154 ix86_tune_string
= "generic32";
2156 else if (!strncmp (ix86_tune_string
, "generic", 7))
2157 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2161 if (ix86_arch_string
)
2162 ix86_tune_string
= ix86_arch_string
;
2163 if (!ix86_tune_string
)
2165 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2166 ix86_tune_defaulted
= 1;
2169 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2170 need to use a sensible tune option. */
2171 if (!strcmp (ix86_tune_string
, "generic")
2172 || !strcmp (ix86_tune_string
, "x86-64")
2173 || !strcmp (ix86_tune_string
, "i686"))
2176 ix86_tune_string
= "generic64";
2178 ix86_tune_string
= "generic32";
2181 if (ix86_stringop_string
)
2183 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2184 stringop_alg
= rep_prefix_1_byte
;
2185 else if (!strcmp (ix86_stringop_string
, "libcall"))
2186 stringop_alg
= libcall
;
2187 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2188 stringop_alg
= rep_prefix_4_byte
;
2189 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2190 stringop_alg
= rep_prefix_8_byte
;
2191 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2192 stringop_alg
= loop_1_byte
;
2193 else if (!strcmp (ix86_stringop_string
, "loop"))
2194 stringop_alg
= loop
;
2195 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2196 stringop_alg
= unrolled_loop
;
2198 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2200 if (!strcmp (ix86_tune_string
, "x86-64"))
2201 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2202 "-mtune=generic instead as appropriate.");
2204 if (!ix86_arch_string
)
2205 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2207 ix86_arch_specified
= 1;
2209 if (!strcmp (ix86_arch_string
, "generic"))
2210 error ("generic CPU can be used only for -mtune= switch");
2211 if (!strncmp (ix86_arch_string
, "generic", 7))
2212 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2214 if (ix86_cmodel_string
!= 0)
2216 if (!strcmp (ix86_cmodel_string
, "small"))
2217 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2218 else if (!strcmp (ix86_cmodel_string
, "medium"))
2219 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2220 else if (!strcmp (ix86_cmodel_string
, "large"))
2221 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2223 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2224 else if (!strcmp (ix86_cmodel_string
, "32"))
2225 ix86_cmodel
= CM_32
;
2226 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2227 ix86_cmodel
= CM_KERNEL
;
2229 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2233 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2234 use of rip-relative addressing. This eliminates fixups that
2235 would otherwise be needed if this object is to be placed in a
2236 DLL, and is essentially just as efficient as direct addressing. */
2237 if (TARGET_64BIT_MS_ABI
)
2238 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2239 else if (TARGET_64BIT
)
2240 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2242 ix86_cmodel
= CM_32
;
2244 if (ix86_asm_string
!= 0)
2247 && !strcmp (ix86_asm_string
, "intel"))
2248 ix86_asm_dialect
= ASM_INTEL
;
2249 else if (!strcmp (ix86_asm_string
, "att"))
2250 ix86_asm_dialect
= ASM_ATT
;
2252 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2254 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2255 error ("code model %qs not supported in the %s bit mode",
2256 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2257 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2258 sorry ("%i-bit mode not compiled in",
2259 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2261 for (i
= 0; i
< pta_size
; i
++)
2262 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2264 ix86_arch
= processor_alias_table
[i
].processor
;
2265 /* Default cpu tuning to the architecture. */
2266 ix86_tune
= ix86_arch
;
2268 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2269 error ("CPU you selected does not support x86-64 "
2272 if (processor_alias_table
[i
].flags
& PTA_MMX
2273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2274 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2275 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2277 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2278 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2280 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2281 if (processor_alias_table
[i
].flags
& PTA_SSE
2282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2284 if (processor_alias_table
[i
].flags
& PTA_SSE2
2285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2286 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2287 if (processor_alias_table
[i
].flags
& PTA_SSE3
2288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2289 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2290 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2292 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2293 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2295 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2296 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2298 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2299 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2301 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2302 if (processor_alias_table
[i
].flags
& PTA_SSE5
2303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE5
))
2304 ix86_isa_flags
|= OPTION_MASK_ISA_SSE5
;
2306 if (processor_alias_table
[i
].flags
& PTA_ABM
)
2308 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2309 x86_cmpxchg16b
= true;
2310 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
))
2312 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2313 x86_prefetch_sse
= true;
2314 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2321 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2323 ix86_arch_mask
= 1u << ix86_arch
;
2324 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2325 ix86_arch_features
[i
] &= ix86_arch_mask
;
2327 for (i
= 0; i
< pta_size
; i
++)
2328 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2330 ix86_tune
= processor_alias_table
[i
].processor
;
2331 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2333 if (ix86_tune_defaulted
)
2335 ix86_tune_string
= "x86-64";
2336 for (i
= 0; i
< pta_size
; i
++)
2337 if (! strcmp (ix86_tune_string
,
2338 processor_alias_table
[i
].name
))
2340 ix86_tune
= processor_alias_table
[i
].processor
;
2343 error ("CPU you selected does not support x86-64 "
2346 /* Intel CPUs have always interpreted SSE prefetch instructions as
2347 NOPs; so, we can enable SSE prefetch instructions even when
2348 -mtune (rather than -march) points us to a processor that has them.
2349 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2350 higher processors. */
2352 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
2353 x86_prefetch_sse
= true;
2357 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2359 ix86_tune_mask
= 1u << ix86_tune
;
2360 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2361 ix86_tune_features
[i
] &= ix86_tune_mask
;
2364 ix86_cost
= &size_cost
;
2366 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2368 /* Arrange to set up i386_stack_locals for all functions. */
2369 init_machine_status
= ix86_init_machine_status
;
2371 /* Validate -mregparm= value. */
2372 if (ix86_regparm_string
)
2375 warning (0, "-mregparm is ignored in 64-bit mode");
2376 i
= atoi (ix86_regparm_string
);
2377 if (i
< 0 || i
> REGPARM_MAX
)
2378 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2383 ix86_regparm
= REGPARM_MAX
;
2385 /* If the user has provided any of the -malign-* options,
2386 warn and use that value only if -falign-* is not set.
2387 Remove this code in GCC 3.2 or later. */
2388 if (ix86_align_loops_string
)
2390 warning (0, "-malign-loops is obsolete, use -falign-loops");
2391 if (align_loops
== 0)
2393 i
= atoi (ix86_align_loops_string
);
2394 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2395 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2397 align_loops
= 1 << i
;
2401 if (ix86_align_jumps_string
)
2403 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2404 if (align_jumps
== 0)
2406 i
= atoi (ix86_align_jumps_string
);
2407 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2408 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2410 align_jumps
= 1 << i
;
2414 if (ix86_align_funcs_string
)
2416 warning (0, "-malign-functions is obsolete, use -falign-functions");
2417 if (align_functions
== 0)
2419 i
= atoi (ix86_align_funcs_string
);
2420 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2421 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2423 align_functions
= 1 << i
;
2427 /* Default align_* from the processor table. */
2428 if (align_loops
== 0)
2430 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2431 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2433 if (align_jumps
== 0)
2435 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2436 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2438 if (align_functions
== 0)
2440 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2443 /* Validate -mbranch-cost= value, or provide default. */
2444 ix86_branch_cost
= ix86_cost
->branch_cost
;
2445 if (ix86_branch_cost_string
)
2447 i
= atoi (ix86_branch_cost_string
);
2449 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2451 ix86_branch_cost
= i
;
2453 if (ix86_section_threshold_string
)
2455 i
= atoi (ix86_section_threshold_string
);
2457 error ("-mlarge-data-threshold=%d is negative", i
);
2459 ix86_section_threshold
= i
;
2462 if (ix86_tls_dialect_string
)
2464 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2465 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2466 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2467 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2468 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2469 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2471 error ("bad value (%s) for -mtls-dialect= switch",
2472 ix86_tls_dialect_string
);
2475 if (ix87_precision_string
)
2477 i
= atoi (ix87_precision_string
);
2478 if (i
!= 32 && i
!= 64 && i
!= 80)
2479 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2484 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
2486 /* Enable by default the SSE and MMX builtins. Do allow the user to
2487 explicitly disable any of these. In particular, disabling SSE and
2488 MMX for kernel code is extremely useful. */
2489 if (!ix86_arch_specified
)
2491 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
2492 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
2495 warning (0, "-mrtd is ignored in 64bit mode");
2499 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
2501 if (!ix86_arch_specified
)
2503 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
2505 /* i386 ABI does not specify red zone. It still makes sense to use it
2506 when programmer takes care to stack from being destroyed. */
2507 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2508 target_flags
|= MASK_NO_RED_ZONE
;
2511 /* Keep nonleaf frame pointers. */
2512 if (flag_omit_frame_pointer
)
2513 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2514 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2515 flag_omit_frame_pointer
= 1;
2517 /* If we're doing fast math, we don't care about comparison order
2518 wrt NaNs. This lets us use a shorter comparison sequence. */
2519 if (flag_finite_math_only
)
2520 target_flags
&= ~MASK_IEEE_FP
;
2522 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2523 since the insns won't need emulation. */
2524 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2525 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2527 /* Likewise, if the target doesn't have a 387, or we've specified
2528 software floating point, don't use 387 inline intrinsics. */
2530 target_flags
|= MASK_NO_FANCY_MATH_387
;
2532 /* Turn on SSE4A bultins for -msse5. */
2534 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2536 /* Turn on SSE4.1 builtins for -msse4.2. */
2538 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2540 /* Turn on SSSE3 builtins for -msse4.1. */
2542 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2544 /* Turn on SSE3 builtins for -mssse3. */
2546 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2548 /* Turn on SSE3 builtins for -msse4a. */
2550 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2552 /* Turn on SSE2 builtins for -msse3. */
2554 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2556 /* Turn on SSE builtins for -msse2. */
2558 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2560 /* Turn on MMX builtins for -msse. */
2563 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
2564 x86_prefetch_sse
= true;
2567 /* Turn on MMX builtins for 3Dnow. */
2569 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2572 if (TARGET_SSE4_2
|| TARGET_ABM
)
2575 /* Validate -mpreferred-stack-boundary= value, or provide default.
2576 The default of 128 bits is for Pentium III's SSE __m128. We can't
2577 change it because of optimize_size. Otherwise, we can't mix object
2578 files compiled with -Os and -On. */
2579 ix86_preferred_stack_boundary
= 128;
2580 if (ix86_preferred_stack_boundary_string
)
2582 i
= atoi (ix86_preferred_stack_boundary_string
);
2583 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2584 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2585 TARGET_64BIT
? 4 : 2);
2587 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2590 /* Accept -msseregparm only if at least SSE support is enabled. */
2591 if (TARGET_SSEREGPARM
2593 error ("-msseregparm used without SSE enabled");
2595 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2596 if (ix86_fpmath_string
!= 0)
2598 if (! strcmp (ix86_fpmath_string
, "387"))
2599 ix86_fpmath
= FPMATH_387
;
2600 else if (! strcmp (ix86_fpmath_string
, "sse"))
2604 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2605 ix86_fpmath
= FPMATH_387
;
2608 ix86_fpmath
= FPMATH_SSE
;
2610 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2611 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2615 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2616 ix86_fpmath
= FPMATH_387
;
2618 else if (!TARGET_80387
)
2620 warning (0, "387 instruction set disabled, using SSE arithmetics");
2621 ix86_fpmath
= FPMATH_SSE
;
2624 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
2627 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2630 /* If the i387 is disabled, then do not return values in it. */
2632 target_flags
&= ~MASK_FLOAT_RETURNS
;
2634 /* Use external vectorized library in vectorizing intrinsics. */
2635 if (ix86_veclibabi_string
)
2637 if (strcmp (ix86_veclibabi_string
, "acml") == 0)
2638 ix86_veclib_handler
= ix86_veclibabi_acml
;
2640 error ("unknown vectorization library ABI type (%s) for "
2641 "-mveclibabi= switch", ix86_veclibabi_string
);
2644 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2645 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2647 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2649 /* ??? Unwind info is not correct around the CFG unless either a frame
2650 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2651 unwind info generation to be aware of the CFG and propagating states
2653 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2654 || flag_exceptions
|| flag_non_call_exceptions
)
2655 && flag_omit_frame_pointer
2656 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2658 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2659 warning (0, "unwind tables currently require either a frame pointer "
2660 "or -maccumulate-outgoing-args for correctness");
2661 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2664 /* For sane SSE instruction set generation we need fcomi instruction.
2665 It is safe to enable all CMOVE instructions. */
2669 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2672 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2673 p
= strchr (internal_label_prefix
, 'X');
2674 internal_label_prefix_len
= p
- internal_label_prefix
;
2678 /* When scheduling description is not available, disable scheduler pass
2679 so it won't slow down the compilation and make x87 code slower. */
2680 if (!TARGET_SCHEDULE
)
2681 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2683 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2684 set_param_value ("simultaneous-prefetches",
2685 ix86_cost
->simultaneous_prefetches
);
2686 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2687 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2688 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
2689 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
2690 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
2691 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
2693 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2694 can be optimized to ap = __builtin_next_arg (0). */
2695 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
2696 targetm
.expand_builtin_va_start
= NULL
;
2699 /* Return true if this goes in large data/bss. */
2702 ix86_in_large_data_p (tree exp
)
2704 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2707 /* Functions are never large data. */
2708 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2711 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2713 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2714 if (strcmp (section
, ".ldata") == 0
2715 || strcmp (section
, ".lbss") == 0)
2721 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2723 /* If this is an incomplete type with size 0, then we can't put it
2724 in data because it might be too big when completed. */
2725 if (!size
|| size
> ix86_section_threshold
)
2732 /* Switch to the appropriate section for output of DECL.
2733 DECL is either a `VAR_DECL' node or a constant of some sort.
2734 RELOC indicates whether forming the initial value of DECL requires
2735 link-time relocations. */
2737 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2741 x86_64_elf_select_section (tree decl
, int reloc
,
2742 unsigned HOST_WIDE_INT align
)
2744 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2745 && ix86_in_large_data_p (decl
))
2747 const char *sname
= NULL
;
2748 unsigned int flags
= SECTION_WRITE
;
2749 switch (categorize_decl_for_section (decl
, reloc
))
2754 case SECCAT_DATA_REL
:
2755 sname
= ".ldata.rel";
2757 case SECCAT_DATA_REL_LOCAL
:
2758 sname
= ".ldata.rel.local";
2760 case SECCAT_DATA_REL_RO
:
2761 sname
= ".ldata.rel.ro";
2763 case SECCAT_DATA_REL_RO_LOCAL
:
2764 sname
= ".ldata.rel.ro.local";
2768 flags
|= SECTION_BSS
;
2771 case SECCAT_RODATA_MERGE_STR
:
2772 case SECCAT_RODATA_MERGE_STR_INIT
:
2773 case SECCAT_RODATA_MERGE_CONST
:
2777 case SECCAT_SRODATA
:
2784 /* We don't split these for medium model. Place them into
2785 default sections and hope for best. */
2790 /* We might get called with string constants, but get_named_section
2791 doesn't like them as they are not DECLs. Also, we need to set
2792 flags in that case. */
2794 return get_section (sname
, flags
, NULL
);
2795 return get_named_section (decl
, sname
, reloc
);
2798 return default_elf_select_section (decl
, reloc
, align
);
2801 /* Build up a unique section name, expressed as a
2802 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2803 RELOC indicates whether the initial value of EXP requires
2804 link-time relocations. */
2806 static void ATTRIBUTE_UNUSED
2807 x86_64_elf_unique_section (tree decl
, int reloc
)
2809 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2810 && ix86_in_large_data_p (decl
))
2812 const char *prefix
= NULL
;
2813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2814 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2816 switch (categorize_decl_for_section (decl
, reloc
))
2819 case SECCAT_DATA_REL
:
2820 case SECCAT_DATA_REL_LOCAL
:
2821 case SECCAT_DATA_REL_RO
:
2822 case SECCAT_DATA_REL_RO_LOCAL
:
2823 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2826 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2829 case SECCAT_RODATA_MERGE_STR
:
2830 case SECCAT_RODATA_MERGE_STR_INIT
:
2831 case SECCAT_RODATA_MERGE_CONST
:
2832 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2834 case SECCAT_SRODATA
:
2841 /* We don't split these for medium model. Place them into
2842 default sections and hope for best. */
2850 plen
= strlen (prefix
);
2852 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2853 name
= targetm
.strip_name_encoding (name
);
2854 nlen
= strlen (name
);
2856 string
= (char *) alloca (nlen
+ plen
+ 1);
2857 memcpy (string
, prefix
, plen
);
2858 memcpy (string
+ plen
, name
, nlen
+ 1);
2860 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2864 default_unique_section (decl
, reloc
);
2867 #ifdef COMMON_ASM_OP
2868 /* This says how to output assembler code to declare an
2869 uninitialized external linkage data object.
2871 For medium model x86-64 we need to use .largecomm opcode for
2874 x86_elf_aligned_common (FILE *file
,
2875 const char *name
, unsigned HOST_WIDE_INT size
,
2878 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2879 && size
> (unsigned int)ix86_section_threshold
)
2880 fprintf (file
, ".largecomm\t");
2882 fprintf (file
, "%s", COMMON_ASM_OP
);
2883 assemble_name (file
, name
);
2884 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2885 size
, align
/ BITS_PER_UNIT
);
2889 /* Utility function for targets to use in implementing
2890 ASM_OUTPUT_ALIGNED_BSS. */
2893 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2894 const char *name
, unsigned HOST_WIDE_INT size
,
2897 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2898 && size
> (unsigned int)ix86_section_threshold
)
2899 switch_to_section (get_named_section (decl
, ".lbss", 0));
2901 switch_to_section (bss_section
);
2902 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2903 #ifdef ASM_DECLARE_OBJECT_NAME
2904 last_assemble_variable_decl
= decl
;
2905 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2907 /* Standard thing is just output label for the object. */
2908 ASM_OUTPUT_LABEL (file
, name
);
2909 #endif /* ASM_DECLARE_OBJECT_NAME */
2910 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2914 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2916 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2917 make the problem with not enough registers even worse. */
2918 #ifdef INSN_SCHEDULING
2920 flag_schedule_insns
= 0;
2924 /* The Darwin libraries never set errno, so we might as well
2925 avoid calling them when that's the only reason we would. */
2926 flag_errno_math
= 0;
2928 /* The default values of these switches depend on the TARGET_64BIT
2929 that is not known at this moment. Mark these values with 2 and
2930 let user the to override these. In case there is no command line option
2931 specifying them, we will set the defaults in override_options. */
2933 flag_omit_frame_pointer
= 2;
2934 flag_pcc_struct_return
= 2;
2935 flag_asynchronous_unwind_tables
= 2;
2936 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2937 SUBTARGET_OPTIMIZATION_OPTIONS
;
2941 /* Decide whether we can make a sibling call to a function. DECL is the
2942 declaration of the function being targeted by the call and EXP is the
2943 CALL_EXPR representing the call. */
2946 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2951 /* If we are generating position-independent code, we cannot sibcall
2952 optimize any indirect call, or a direct call to a global function,
2953 as the PLT requires %ebx be live. */
2954 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2961 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2962 if (POINTER_TYPE_P (func
))
2963 func
= TREE_TYPE (func
);
2966 /* Check that the return value locations are the same. Like
2967 if we are returning floats on the 80387 register stack, we cannot
2968 make a sibcall from a function that doesn't return a float to a
2969 function that does or, conversely, from a function that does return
2970 a float to a function that doesn't; the necessary stack adjustment
2971 would not be executed. This is also the place we notice
2972 differences in the return value ABI. Note that it is ok for one
2973 of the functions to have void return type as long as the return
2974 value of the other is passed in a register. */
2975 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2976 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2978 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2980 if (!rtx_equal_p (a
, b
))
2983 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2985 else if (!rtx_equal_p (a
, b
))
2988 /* If this call is indirect, we'll need to be able to use a call-clobbered
2989 register for the address of the target function. Make sure that all
2990 such registers are not used for passing parameters. */
2991 if (!decl
&& !TARGET_64BIT
)
2995 /* We're looking at the CALL_EXPR, we need the type of the function. */
2996 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2997 type
= TREE_TYPE (type
); /* pointer type */
2998 type
= TREE_TYPE (type
); /* function type */
3000 if (ix86_function_regparm (type
, NULL
) >= 3)
3002 /* ??? Need to count the actual number of registers to be used,
3003 not the possible number of registers. Fix later. */
3008 /* Dllimport'd functions are also called indirectly. */
3009 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3010 && decl
&& DECL_DLLIMPORT_P (decl
)
3011 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
3014 /* If we forced aligned the stack, then sibcalling would unalign the
3015 stack, which may break the called function. */
3016 if (cfun
->machine
->force_align_arg_pointer
)
3019 /* Otherwise okay. That also includes certain types of indirect calls. */
3023 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3024 calling convention attributes;
3025 arguments as in struct attribute_spec.handler. */
3028 ix86_handle_cconv_attribute (tree
*node
, tree name
,
3030 int flags ATTRIBUTE_UNUSED
,
3033 if (TREE_CODE (*node
) != FUNCTION_TYPE
3034 && TREE_CODE (*node
) != METHOD_TYPE
3035 && TREE_CODE (*node
) != FIELD_DECL
3036 && TREE_CODE (*node
) != TYPE_DECL
)
3038 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
3039 IDENTIFIER_POINTER (name
));
3040 *no_add_attrs
= true;
3044 /* Can combine regparm with all attributes but fastcall. */
3045 if (is_attribute_p ("regparm", name
))
3049 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3051 error ("fastcall and regparm attributes are not compatible");
3054 cst
= TREE_VALUE (args
);
3055 if (TREE_CODE (cst
) != INTEGER_CST
)
3057 warning (OPT_Wattributes
,
3058 "%qs attribute requires an integer constant argument",
3059 IDENTIFIER_POINTER (name
));
3060 *no_add_attrs
= true;
3062 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
3064 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
3065 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
3066 *no_add_attrs
= true;
3070 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3071 TYPE_ATTRIBUTES (*node
))
3072 && compare_tree_int (cst
, REGPARM_MAX
-1))
3074 error ("%s functions limited to %d register parameters",
3075 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
3083 /* Do not warn when emulating the MS ABI. */
3084 if (!TARGET_64BIT_MS_ABI
)
3085 warning (OPT_Wattributes
, "%qs attribute ignored",
3086 IDENTIFIER_POINTER (name
));
3087 *no_add_attrs
= true;
3091 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3092 if (is_attribute_p ("fastcall", name
))
3094 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3096 error ("fastcall and cdecl attributes are not compatible");
3098 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3100 error ("fastcall and stdcall attributes are not compatible");
3102 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
3104 error ("fastcall and regparm attributes are not compatible");
3108 /* Can combine stdcall with fastcall (redundant), regparm and
3110 else if (is_attribute_p ("stdcall", name
))
3112 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3114 error ("stdcall and cdecl attributes are not compatible");
3116 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3118 error ("stdcall and fastcall attributes are not compatible");
3122 /* Can combine cdecl with regparm and sseregparm. */
3123 else if (is_attribute_p ("cdecl", name
))
3125 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3127 error ("stdcall and cdecl attributes are not compatible");
3129 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3131 error ("fastcall and cdecl attributes are not compatible");
3135 /* Can combine sseregparm with all attributes. */
3140 /* Return 0 if the attributes for two types are incompatible, 1 if they
3141 are compatible, and 2 if they are nearly compatible (which causes a
3142 warning to be generated). */
3145 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
3147 /* Check for mismatch of non-default calling convention. */
3148 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
3150 if (TREE_CODE (type1
) != FUNCTION_TYPE
3151 && TREE_CODE (type1
) != METHOD_TYPE
)
3154 /* Check for mismatched fastcall/regparm types. */
3155 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
3156 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
3157 || (ix86_function_regparm (type1
, NULL
)
3158 != ix86_function_regparm (type2
, NULL
)))
3161 /* Check for mismatched sseregparm types. */
3162 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
3163 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
3166 /* Check for mismatched return types (cdecl vs stdcall). */
3167 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
3168 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
3174 /* Return the regparm value for a function with the indicated TYPE and DECL.
3175 DECL may be NULL when calling function indirectly
3176 or considering a libcall. */
3179 ix86_function_regparm (const_tree type
, const_tree decl
)
3182 int regparm
= ix86_regparm
;
3187 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3189 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3191 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3194 /* Use register calling convention for local functions when possible. */
3195 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
3196 && flag_unit_at_a_time
&& !profile_flag
)
3198 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3199 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3202 int local_regparm
, globals
= 0, regno
;
3205 /* Make sure no regparm register is taken by a
3206 fixed register variable. */
3207 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
3208 if (fixed_regs
[local_regparm
])
3211 /* We can't use regparm(3) for nested functions as these use
3212 static chain pointer in third argument. */
3213 if (local_regparm
== 3
3214 && (decl_function_context (decl
)
3215 || ix86_force_align_arg_pointer
)
3216 && !DECL_NO_STATIC_CHAIN (decl
))
3219 /* If the function realigns its stackpointer, the prologue will
3220 clobber %ecx. If we've already generated code for the callee,
3221 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3222 scanning the attributes for the self-realigning property. */
3223 f
= DECL_STRUCT_FUNCTION (decl
);
3224 if (local_regparm
== 3
3225 && (f
? !!f
->machine
->force_align_arg_pointer
3226 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3227 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3230 /* Each fixed register usage increases register pressure,
3231 so less registers should be used for argument passing.
3232 This functionality can be overriden by an explicit
3234 for (regno
= 0; regno
<= DI_REG
; regno
++)
3235 if (fixed_regs
[regno
])
3239 = globals
< local_regparm
? local_regparm
- globals
: 0;
3241 if (local_regparm
> regparm
)
3242 regparm
= local_regparm
;
3249 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3250 DFmode (2) arguments in SSE registers for a function with the
3251 indicated TYPE and DECL. DECL may be NULL when calling function
3252 indirectly or considering a libcall. Otherwise return 0. */
3255 ix86_function_sseregparm (const_tree type
, const_tree decl
)
3257 gcc_assert (!TARGET_64BIT
);
3259 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3260 by the sseregparm attribute. */
3261 if (TARGET_SSEREGPARM
3262 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3267 error ("Calling %qD with attribute sseregparm without "
3268 "SSE/SSE2 enabled", decl
);
3270 error ("Calling %qT with attribute sseregparm without "
3271 "SSE/SSE2 enabled", type
);
3278 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3279 (and DFmode for SSE2) arguments in SSE registers. */
3280 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3282 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3283 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3285 return TARGET_SSE2
? 2 : 1;
3291 /* Return true if EAX is live at the start of the function. Used by
3292 ix86_expand_prologue to determine if we need special help before
3293 calling allocate_stack_worker. */
3296 ix86_eax_live_at_start_p (void)
3298 /* Cheat. Don't bother working forward from ix86_function_regparm
3299 to the function type to whether an actual argument is located in
3300 eax. Instead just look at cfg info, which is still close enough
3301 to correct at this point. This gives false positives for broken
3302 functions that might use uninitialized data that happens to be
3303 allocated in eax, but who cares? */
3304 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
3307 /* Value is the number of bytes of arguments automatically
3308 popped when returning from a subroutine call.
3309 FUNDECL is the declaration node of the function (as a tree),
3310 FUNTYPE is the data type of the function (as a tree),
3311 or for a library call it is an identifier node for the subroutine name.
3312 SIZE is the number of bytes of arguments passed on the stack.
3314 On the 80386, the RTD insn may be used to pop them if the number
3315 of args is fixed, but if the number is variable then the caller
3316 must pop them all. RTD can't be used for library calls now
3317 because the library is compiled with the Unix compiler.
3318 Use of RTD is a selectable option, since it is incompatible with
3319 standard Unix calling sequences. If the option is not selected,
3320 the caller must always pop the args.
3322 The attribute stdcall is equivalent to RTD on a per module basis. */
3325 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3329 /* None of the 64-bit ABIs pop arguments. */
3333 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3335 /* Cdecl functions override -mrtd, and never pop the stack. */
3336 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3338 /* Stdcall and fastcall functions will pop the stack if not
3340 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3341 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3344 if (rtd
&& ! stdarg_p (funtype
))
3348 /* Lose any fake structure return argument if it is passed on the stack. */
3349 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3350 && !KEEP_AGGREGATE_RETURN_POINTER
)
3352 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3354 return GET_MODE_SIZE (Pmode
);
3360 /* Argument support functions. */
3362 /* Return true when register may be used to pass function parameters. */
3364 ix86_function_arg_regno_p (int regno
)
3367 const int *parm_regs
;
3372 return (regno
< REGPARM_MAX
3373 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3375 return (regno
< REGPARM_MAX
3376 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3377 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3378 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3379 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3384 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3389 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3390 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3394 /* RAX is used as hidden argument to va_arg functions. */
3395 if (!TARGET_64BIT_MS_ABI
&& regno
== AX_REG
)
3398 if (TARGET_64BIT_MS_ABI
)
3399 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3401 parm_regs
= x86_64_int_parameter_registers
;
3402 for (i
= 0; i
< REGPARM_MAX
; i
++)
3403 if (regno
== parm_regs
[i
])
3408 /* Return if we do not know how to pass TYPE solely in registers. */
3411 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
3413 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3416 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3417 The layout_type routine is crafty and tries to trick us into passing
3418 currently unsupported vector types on the stack by using TImode. */
3419 return (!TARGET_64BIT
&& mode
== TImode
3420 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3423 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3424 for a call to a function whose data type is FNTYPE.
3425 For a library call, FNTYPE is 0. */
3428 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3429 tree fntype
, /* tree ptr for function decl */
3430 rtx libname
, /* SYMBOL_REF of library name or 0 */
3433 memset (cum
, 0, sizeof (*cum
));
3435 /* Set up the number of registers to use for passing arguments. */
3436 cum
->nregs
= ix86_regparm
;
3438 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3440 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3441 cum
->warn_sse
= true;
3442 cum
->warn_mmx
= true;
3443 cum
->maybe_vaarg
= (fntype
3444 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
3449 /* If there are variable arguments, then we won't pass anything
3450 in registers in 32-bit mode. */
3451 if (cum
->maybe_vaarg
)
3461 /* Use ecx and edx registers if function has fastcall attribute,
3462 else look for regparm information. */
3465 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3471 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3474 /* Set up the number of SSE registers used for passing SFmode
3475 and DFmode arguments. Warn for mismatching ABI. */
3476 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3480 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3481 But in the case of vector types, it is some vector mode.
3483 When we have only some of our vector isa extensions enabled, then there
3484 are some modes for which vector_mode_supported_p is false. For these
3485 modes, the generic vector support in gcc will choose some non-vector mode
3486 in order to implement the type. By computing the natural mode, we'll
3487 select the proper ABI location for the operand and not depend on whatever
3488 the middle-end decides to do with these vector types. */
3490 static enum machine_mode
3491 type_natural_mode (const_tree type
)
3493 enum machine_mode mode
= TYPE_MODE (type
);
3495 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3497 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3498 if ((size
== 8 || size
== 16)
3499 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3500 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3502 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3504 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3505 mode
= MIN_MODE_VECTOR_FLOAT
;
3507 mode
= MIN_MODE_VECTOR_INT
;
3509 /* Get the mode which has this inner mode and number of units. */
3510 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3511 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3512 && GET_MODE_INNER (mode
) == innermode
)
3522 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3523 this may not agree with the mode that the type system has chosen for the
3524 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3525 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3528 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3533 if (orig_mode
!= BLKmode
)
3534 tmp
= gen_rtx_REG (orig_mode
, regno
);
3537 tmp
= gen_rtx_REG (mode
, regno
);
3538 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3539 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3545 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3546 of this code is to classify each 8bytes of incoming argument by the register
3547 class and assign registers accordingly. */
3549 /* Return the union class of CLASS1 and CLASS2.
3550 See the x86-64 PS ABI for details. */
3552 static enum x86_64_reg_class
3553 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3555 /* Rule #1: If both classes are equal, this is the resulting class. */
3556 if (class1
== class2
)
3559 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3561 if (class1
== X86_64_NO_CLASS
)
3563 if (class2
== X86_64_NO_CLASS
)
3566 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3567 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3568 return X86_64_MEMORY_CLASS
;
3570 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3571 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3572 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3573 return X86_64_INTEGERSI_CLASS
;
3574 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3575 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3576 return X86_64_INTEGER_CLASS
;
3578 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3580 if (class1
== X86_64_X87_CLASS
3581 || class1
== X86_64_X87UP_CLASS
3582 || class1
== X86_64_COMPLEX_X87_CLASS
3583 || class2
== X86_64_X87_CLASS
3584 || class2
== X86_64_X87UP_CLASS
3585 || class2
== X86_64_COMPLEX_X87_CLASS
)
3586 return X86_64_MEMORY_CLASS
;
3588 /* Rule #6: Otherwise class SSE is used. */
3589 return X86_64_SSE_CLASS
;
3592 /* Classify the argument of type TYPE and mode MODE.
3593 CLASSES will be filled by the register class used to pass each word
3594 of the operand. The number of words is returned. In case the parameter
3595 should be passed in memory, 0 is returned. As a special case for zero
3596 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3598 BIT_OFFSET is used internally for handling records and specifies offset
3599 of the offset in bits modulo 256 to avoid overflow cases.
3601 See the x86-64 PS ABI for details.
3605 classify_argument (enum machine_mode mode
, const_tree type
,
3606 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3608 HOST_WIDE_INT bytes
=
3609 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3610 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3612 /* Variable sized entities are always passed/returned in memory. */
3616 if (mode
!= VOIDmode
3617 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3620 if (type
&& AGGREGATE_TYPE_P (type
))
3624 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3626 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3630 for (i
= 0; i
< words
; i
++)
3631 classes
[i
] = X86_64_NO_CLASS
;
3633 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3634 signalize memory class, so handle it as special case. */
3637 classes
[0] = X86_64_NO_CLASS
;
3641 /* Classify each field of record and merge classes. */
3642 switch (TREE_CODE (type
))
3645 /* And now merge the fields of structure. */
3646 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3648 if (TREE_CODE (field
) == FIELD_DECL
)
3652 if (TREE_TYPE (field
) == error_mark_node
)
3655 /* Bitfields are always classified as integer. Handle them
3656 early, since later code would consider them to be
3657 misaligned integers. */
3658 if (DECL_BIT_FIELD (field
))
3660 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3661 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3662 + tree_low_cst (DECL_SIZE (field
), 0)
3665 merge_classes (X86_64_INTEGER_CLASS
,
3670 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3671 TREE_TYPE (field
), subclasses
,
3672 (int_bit_position (field
)
3673 + bit_offset
) % 256);
3676 for (i
= 0; i
< num
; i
++)
3679 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3681 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3689 /* Arrays are handled as small records. */
3692 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3693 TREE_TYPE (type
), subclasses
, bit_offset
);
3697 /* The partial classes are now full classes. */
3698 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3699 subclasses
[0] = X86_64_SSE_CLASS
;
3700 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3701 subclasses
[0] = X86_64_INTEGER_CLASS
;
3703 for (i
= 0; i
< words
; i
++)
3704 classes
[i
] = subclasses
[i
% num
];
3709 case QUAL_UNION_TYPE
:
3710 /* Unions are similar to RECORD_TYPE but offset is always 0.
3712 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3714 if (TREE_CODE (field
) == FIELD_DECL
)
3718 if (TREE_TYPE (field
) == error_mark_node
)
3721 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3722 TREE_TYPE (field
), subclasses
,
3726 for (i
= 0; i
< num
; i
++)
3727 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3736 /* Final merger cleanup. */
3737 for (i
= 0; i
< words
; i
++)
3739 /* If one class is MEMORY, everything should be passed in
3741 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3744 /* The X86_64_SSEUP_CLASS should be always preceded by
3745 X86_64_SSE_CLASS. */
3746 if (classes
[i
] == X86_64_SSEUP_CLASS
3747 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3748 classes
[i
] = X86_64_SSE_CLASS
;
3750 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3751 if (classes
[i
] == X86_64_X87UP_CLASS
3752 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3753 classes
[i
] = X86_64_SSE_CLASS
;
3758 /* Compute alignment needed. We align all types to natural boundaries with
3759 exception of XFmode that is aligned to 64bits. */
3760 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3762 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3765 mode_alignment
= 128;
3766 else if (mode
== XCmode
)
3767 mode_alignment
= 256;
3768 if (COMPLEX_MODE_P (mode
))
3769 mode_alignment
/= 2;
3770 /* Misaligned fields are always returned in memory. */
3771 if (bit_offset
% mode_alignment
)
3775 /* for V1xx modes, just use the base mode */
3776 if (VECTOR_MODE_P (mode
)
3777 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3778 mode
= GET_MODE_INNER (mode
);
3780 /* Classification of atomic types. */
3785 classes
[0] = X86_64_SSE_CLASS
;
3788 classes
[0] = X86_64_SSE_CLASS
;
3789 classes
[1] = X86_64_SSEUP_CLASS
;
3798 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3799 classes
[0] = X86_64_INTEGERSI_CLASS
;
3801 classes
[0] = X86_64_INTEGER_CLASS
;
3805 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3810 if (!(bit_offset
% 64))
3811 classes
[0] = X86_64_SSESF_CLASS
;
3813 classes
[0] = X86_64_SSE_CLASS
;
3816 classes
[0] = X86_64_SSEDF_CLASS
;
3819 classes
[0] = X86_64_X87_CLASS
;
3820 classes
[1] = X86_64_X87UP_CLASS
;
3823 classes
[0] = X86_64_SSE_CLASS
;
3824 classes
[1] = X86_64_SSEUP_CLASS
;
3827 classes
[0] = X86_64_SSE_CLASS
;
3830 classes
[0] = X86_64_SSEDF_CLASS
;
3831 classes
[1] = X86_64_SSEDF_CLASS
;
3834 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3837 /* This modes is larger than 16 bytes. */
3845 classes
[0] = X86_64_SSE_CLASS
;
3846 classes
[1] = X86_64_SSEUP_CLASS
;
3852 classes
[0] = X86_64_SSE_CLASS
;
3858 gcc_assert (VECTOR_MODE_P (mode
));
3863 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3865 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3866 classes
[0] = X86_64_INTEGERSI_CLASS
;
3868 classes
[0] = X86_64_INTEGER_CLASS
;
3869 classes
[1] = X86_64_INTEGER_CLASS
;
3870 return 1 + (bytes
> 8);
3874 /* Examine the argument and return set number of register required in each
3875 class. Return 0 iff parameter should be passed in memory. */
3877 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
3878 int *int_nregs
, int *sse_nregs
)
3880 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3881 int n
= classify_argument (mode
, type
, regclass
, 0);
3887 for (n
--; n
>= 0; n
--)
3888 switch (regclass
[n
])
3890 case X86_64_INTEGER_CLASS
:
3891 case X86_64_INTEGERSI_CLASS
:
3894 case X86_64_SSE_CLASS
:
3895 case X86_64_SSESF_CLASS
:
3896 case X86_64_SSEDF_CLASS
:
3899 case X86_64_NO_CLASS
:
3900 case X86_64_SSEUP_CLASS
:
3902 case X86_64_X87_CLASS
:
3903 case X86_64_X87UP_CLASS
:
3907 case X86_64_COMPLEX_X87_CLASS
:
3908 return in_return
? 2 : 0;
3909 case X86_64_MEMORY_CLASS
:
3915 /* Construct container for the argument used by GCC interface. See
3916 FUNCTION_ARG for the detailed description. */
3919 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3920 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
3921 const int *intreg
, int sse_regno
)
3923 /* The following variables hold the static issued_error state. */
3924 static bool issued_sse_arg_error
;
3925 static bool issued_sse_ret_error
;
3926 static bool issued_x87_ret_error
;
3928 enum machine_mode tmpmode
;
3930 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3931 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3935 int needed_sseregs
, needed_intregs
;
3936 rtx exp
[MAX_CLASSES
];
3939 n
= classify_argument (mode
, type
, regclass
, 0);
3942 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3945 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3948 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3949 some less clueful developer tries to use floating-point anyway. */
3950 if (needed_sseregs
&& !TARGET_SSE
)
3954 if (!issued_sse_ret_error
)
3956 error ("SSE register return with SSE disabled");
3957 issued_sse_ret_error
= true;
3960 else if (!issued_sse_arg_error
)
3962 error ("SSE register argument with SSE disabled");
3963 issued_sse_arg_error
= true;
3968 /* Likewise, error if the ABI requires us to return values in the
3969 x87 registers and the user specified -mno-80387. */
3970 if (!TARGET_80387
&& in_return
)
3971 for (i
= 0; i
< n
; i
++)
3972 if (regclass
[i
] == X86_64_X87_CLASS
3973 || regclass
[i
] == X86_64_X87UP_CLASS
3974 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
3976 if (!issued_x87_ret_error
)
3978 error ("x87 register return with x87 disabled");
3979 issued_x87_ret_error
= true;
3984 /* First construct simple cases. Avoid SCmode, since we want to use
3985 single register to pass this type. */
3986 if (n
== 1 && mode
!= SCmode
)
3987 switch (regclass
[0])
3989 case X86_64_INTEGER_CLASS
:
3990 case X86_64_INTEGERSI_CLASS
:
3991 return gen_rtx_REG (mode
, intreg
[0]);
3992 case X86_64_SSE_CLASS
:
3993 case X86_64_SSESF_CLASS
:
3994 case X86_64_SSEDF_CLASS
:
3995 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3996 case X86_64_X87_CLASS
:
3997 case X86_64_COMPLEX_X87_CLASS
:
3998 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3999 case X86_64_NO_CLASS
:
4000 /* Zero sized array, struct or class. */
4005 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
4006 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
4007 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
4010 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
4011 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
4012 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
4013 && regclass
[1] == X86_64_INTEGER_CLASS
4014 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
4015 && intreg
[0] + 1 == intreg
[1])
4016 return gen_rtx_REG (mode
, intreg
[0]);
4018 /* Otherwise figure out the entries of the PARALLEL. */
4019 for (i
= 0; i
< n
; i
++)
4021 switch (regclass
[i
])
4023 case X86_64_NO_CLASS
:
4025 case X86_64_INTEGER_CLASS
:
4026 case X86_64_INTEGERSI_CLASS
:
4027 /* Merge TImodes on aligned occasions here too. */
4028 if (i
* 8 + 8 > bytes
)
4029 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
4030 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
4034 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4035 if (tmpmode
== BLKmode
)
4037 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4038 gen_rtx_REG (tmpmode
, *intreg
),
4042 case X86_64_SSESF_CLASS
:
4043 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4044 gen_rtx_REG (SFmode
,
4045 SSE_REGNO (sse_regno
)),
4049 case X86_64_SSEDF_CLASS
:
4050 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4051 gen_rtx_REG (DFmode
,
4052 SSE_REGNO (sse_regno
)),
4056 case X86_64_SSE_CLASS
:
4057 if (i
< n
- 1 && regclass
[i
+ 1] == X86_64_SSEUP_CLASS
)
4061 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4062 gen_rtx_REG (tmpmode
,
4063 SSE_REGNO (sse_regno
)),
4065 if (tmpmode
== TImode
)
4074 /* Empty aligned struct, union or class. */
4078 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
4079 for (i
= 0; i
< nexps
; i
++)
4080 XVECEXP (ret
, 0, i
) = exp
[i
];
4084 /* Update the data in CUM to advance over an argument of mode MODE
4085 and data type TYPE. (TYPE is null for libcalls where that information
4086 may not be available.) */
4089 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4090 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4106 cum
->words
+= words
;
4107 cum
->nregs
-= words
;
4108 cum
->regno
+= words
;
4110 if (cum
->nregs
<= 0)
4118 if (cum
->float_in_sse
< 2)
4121 if (cum
->float_in_sse
< 1)
4132 if (!type
|| !AGGREGATE_TYPE_P (type
))
4134 cum
->sse_words
+= words
;
4135 cum
->sse_nregs
-= 1;
4136 cum
->sse_regno
+= 1;
4137 if (cum
->sse_nregs
<= 0)
4149 if (!type
|| !AGGREGATE_TYPE_P (type
))
4151 cum
->mmx_words
+= words
;
4152 cum
->mmx_nregs
-= 1;
4153 cum
->mmx_regno
+= 1;
4154 if (cum
->mmx_nregs
<= 0)
4165 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4166 tree type
, HOST_WIDE_INT words
)
4168 int int_nregs
, sse_nregs
;
4170 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
4171 cum
->words
+= words
;
4172 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
4174 cum
->nregs
-= int_nregs
;
4175 cum
->sse_nregs
-= sse_nregs
;
4176 cum
->regno
+= int_nregs
;
4177 cum
->sse_regno
+= sse_nregs
;
4180 cum
->words
+= words
;
4184 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
4185 HOST_WIDE_INT words
)
4187 /* Otherwise, this should be passed indirect. */
4188 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
4190 cum
->words
+= words
;
4199 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4200 tree type
, int named ATTRIBUTE_UNUSED
)
4202 HOST_WIDE_INT bytes
, words
;
4204 if (mode
== BLKmode
)
4205 bytes
= int_size_in_bytes (type
);
4207 bytes
= GET_MODE_SIZE (mode
);
4208 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4211 mode
= type_natural_mode (type
);
4213 if (TARGET_64BIT_MS_ABI
)
4214 function_arg_advance_ms_64 (cum
, bytes
, words
);
4215 else if (TARGET_64BIT
)
4216 function_arg_advance_64 (cum
, mode
, type
, words
);
4218 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4221 /* Define where to put the arguments to a function.
4222 Value is zero to push the argument on the stack,
4223 or a hard register in which to store the argument.
4225 MODE is the argument's machine mode.
4226 TYPE is the data type of the argument (as a tree).
4227 This is null for libcalls where that information may
4229 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4230 the preceding args and about the function being called.
4231 NAMED is nonzero if this argument is a named parameter
4232 (otherwise it is an extra parameter matching an ellipsis). */
4235 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4236 enum machine_mode orig_mode
, tree type
,
4237 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4239 static bool warnedsse
, warnedmmx
;
4241 /* Avoid the AL settings for the Unix64 ABI. */
4242 if (mode
== VOIDmode
)
4258 if (words
<= cum
->nregs
)
4260 int regno
= cum
->regno
;
4262 /* Fastcall allocates the first two DWORD (SImode) or
4263 smaller arguments to ECX and EDX if it isn't an
4269 || (type
&& AGGREGATE_TYPE_P (type
)))
4272 /* ECX not EAX is the first allocated register. */
4273 if (regno
== AX_REG
)
4276 return gen_rtx_REG (mode
, regno
);
4281 if (cum
->float_in_sse
< 2)
4284 if (cum
->float_in_sse
< 1)
4294 if (!type
|| !AGGREGATE_TYPE_P (type
))
4296 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4299 warning (0, "SSE vector argument without SSE enabled "
4303 return gen_reg_or_parallel (mode
, orig_mode
,
4304 cum
->sse_regno
+ FIRST_SSE_REG
);
4312 if (!type
|| !AGGREGATE_TYPE_P (type
))
4314 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4317 warning (0, "MMX vector argument without MMX enabled "
4321 return gen_reg_or_parallel (mode
, orig_mode
,
4322 cum
->mmx_regno
+ FIRST_MMX_REG
);
4331 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4332 enum machine_mode orig_mode
, tree type
)
4334 /* Handle a hidden AL argument containing number of registers
4335 for varargs x86-64 functions. */
4336 if (mode
== VOIDmode
)
4337 return GEN_INT (cum
->maybe_vaarg
4338 ? (cum
->sse_nregs
< 0
4343 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4345 &x86_64_int_parameter_registers
[cum
->regno
],
4350 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4351 enum machine_mode orig_mode
, int named
)
4355 /* Avoid the AL settings for the Unix64 ABI. */
4356 if (mode
== VOIDmode
)
4359 /* If we've run out of registers, it goes on the stack. */
4360 if (cum
->nregs
== 0)
4363 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
4365 /* Only floating point modes are passed in anything but integer regs. */
4366 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4369 regno
= cum
->regno
+ FIRST_SSE_REG
;
4374 /* Unnamed floating parameters are passed in both the
4375 SSE and integer registers. */
4376 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4377 t2
= gen_rtx_REG (mode
, regno
);
4378 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4379 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4380 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4384 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4388 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4389 tree type
, int named
)
4391 enum machine_mode mode
= omode
;
4392 HOST_WIDE_INT bytes
, words
;
4394 if (mode
== BLKmode
)
4395 bytes
= int_size_in_bytes (type
);
4397 bytes
= GET_MODE_SIZE (mode
);
4398 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4400 /* To simplify the code below, represent vector types with a vector mode
4401 even if MMX/SSE are not active. */
4402 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4403 mode
= type_natural_mode (type
);
4405 if (TARGET_64BIT_MS_ABI
)
4406 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4407 else if (TARGET_64BIT
)
4408 return function_arg_64 (cum
, mode
, omode
, type
);
4410 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4413 /* A C expression that indicates when an argument must be passed by
4414 reference. If nonzero for an argument, a copy of that argument is
4415 made in memory and a pointer to the argument is passed instead of
4416 the argument itself. The pointer is passed in whatever way is
4417 appropriate for passing a pointer to that type. */
4420 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4421 enum machine_mode mode ATTRIBUTE_UNUSED
,
4422 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4424 if (TARGET_64BIT_MS_ABI
)
4428 /* Arrays are passed by reference. */
4429 if (TREE_CODE (type
) == ARRAY_TYPE
)
4432 if (AGGREGATE_TYPE_P (type
))
4434 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4435 are passed by reference. */
4436 int el2
= exact_log2 (int_size_in_bytes (type
));
4437 return !(el2
>= 0 && el2
<= 3);
4441 /* __m128 is passed by reference. */
4442 /* ??? How to handle complex? For now treat them as structs,
4443 and pass them by reference if they're too large. */
4444 if (GET_MODE_SIZE (mode
) > 8)
4447 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4453 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4454 ABI. Only called if TARGET_SSE. */
4456 contains_128bit_aligned_vector_p (tree type
)
4458 enum machine_mode mode
= TYPE_MODE (type
);
4459 if (SSE_REG_MODE_P (mode
)
4460 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4462 if (TYPE_ALIGN (type
) < 128)
4465 if (AGGREGATE_TYPE_P (type
))
4467 /* Walk the aggregates recursively. */
4468 switch (TREE_CODE (type
))
4472 case QUAL_UNION_TYPE
:
4476 /* Walk all the structure fields. */
4477 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4479 if (TREE_CODE (field
) == FIELD_DECL
4480 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4487 /* Just for use if some languages passes arrays by value. */
4488 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4499 /* Gives the alignment boundary, in bits, of an argument with the
4500 specified mode and type. */
4503 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4507 align
= TYPE_ALIGN (type
);
4509 align
= GET_MODE_ALIGNMENT (mode
);
4510 if (align
< PARM_BOUNDARY
)
4511 align
= PARM_BOUNDARY
;
4514 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4515 make an exception for SSE modes since these require 128bit
4518 The handling here differs from field_alignment. ICC aligns MMX
4519 arguments to 4 byte boundaries, while structure fields are aligned
4520 to 8 byte boundaries. */
4522 align
= PARM_BOUNDARY
;
4525 if (!SSE_REG_MODE_P (mode
))
4526 align
= PARM_BOUNDARY
;
4530 if (!contains_128bit_aligned_vector_p (type
))
4531 align
= PARM_BOUNDARY
;
4539 /* Return true if N is a possible register number of function value. */
4542 ix86_function_value_regno_p (int regno
)
4549 case FIRST_FLOAT_REG
:
4550 if (TARGET_64BIT_MS_ABI
)
4552 return TARGET_FLOAT_RETURNS_IN_80387
;
4558 if (TARGET_MACHO
|| TARGET_64BIT
)
4566 /* Define how to find the value returned by a function.
4567 VALTYPE is the data type of the value (as a tree).
4568 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4569 otherwise, FUNC is 0. */
4572 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4573 const_tree fntype
, const_tree fn
)
4577 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4578 we normally prevent this case when mmx is not available. However
4579 some ABIs may require the result to be returned like DImode. */
4580 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4581 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4583 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4584 we prevent this case when sse is not available. However some ABIs
4585 may require the result to be returned like integer TImode. */
4586 else if (mode
== TImode
4587 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4588 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4590 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4591 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4592 regno
= FIRST_FLOAT_REG
;
4594 /* Most things go in %eax. */
4597 /* Override FP return register with %xmm0 for local functions when
4598 SSE math is enabled or for functions with sseregparm attribute. */
4599 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4601 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4602 if ((sse_level
>= 1 && mode
== SFmode
)
4603 || (sse_level
== 2 && mode
== DFmode
))
4604 regno
= FIRST_SSE_REG
;
4607 return gen_rtx_REG (orig_mode
, regno
);
4611 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4616 /* Handle libcalls, which don't provide a type node. */
4617 if (valtype
== NULL
)
4629 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4632 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4636 return gen_rtx_REG (mode
, AX_REG
);
4640 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4641 REGPARM_MAX
, SSE_REGPARM_MAX
,
4642 x86_64_int_return_registers
, 0);
4644 /* For zero sized structures, construct_container returns NULL, but we
4645 need to keep rest of compiler happy by returning meaningful value. */
4647 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
4653 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4655 unsigned int regno
= AX_REG
;
4659 if (mode
== SFmode
|| mode
== DFmode
)
4660 regno
= FIRST_SSE_REG
;
4661 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4662 regno
= FIRST_SSE_REG
;
4665 return gen_rtx_REG (orig_mode
, regno
);
4669 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4670 enum machine_mode orig_mode
, enum machine_mode mode
)
4672 const_tree fn
, fntype
;
4675 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4676 fn
= fntype_or_decl
;
4677 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4679 if (TARGET_64BIT_MS_ABI
)
4680 return function_value_ms_64 (orig_mode
, mode
);
4681 else if (TARGET_64BIT
)
4682 return function_value_64 (orig_mode
, mode
, valtype
);
4684 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4688 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
4689 bool outgoing ATTRIBUTE_UNUSED
)
4691 enum machine_mode mode
, orig_mode
;
4693 orig_mode
= TYPE_MODE (valtype
);
4694 mode
= type_natural_mode (valtype
);
4695 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4699 ix86_libcall_value (enum machine_mode mode
)
4701 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4704 /* Return true iff type is returned in memory. */
4707 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
4711 if (mode
== BLKmode
)
4714 size
= int_size_in_bytes (type
);
4716 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4719 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4721 /* User-created vectors small enough to fit in EAX. */
4725 /* MMX/3dNow values are returned in MM0,
4726 except when it doesn't exits. */
4728 return (TARGET_MMX
? 0 : 1);
4730 /* SSE values are returned in XMM0, except when it doesn't exist. */
4732 return (TARGET_SSE
? 0 : 1);
4747 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
4749 int needed_intregs
, needed_sseregs
;
4750 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4754 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
4756 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4758 /* __m128 and friends are returned in xmm0. */
4759 if (!COMPLEX_MODE_P (mode
) && size
== 16 && VECTOR_MODE_P (mode
))
4762 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4763 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8)
4764 || COMPLEX_MODE_P (mode
);
4768 ix86_return_in_memory (const_tree type
)
4770 const enum machine_mode mode
= type_natural_mode (type
);
4772 if (TARGET_64BIT_MS_ABI
)
4773 return return_in_memory_ms_64 (type
, mode
);
4774 else if (TARGET_64BIT
)
4775 return return_in_memory_64 (type
, mode
);
4777 return return_in_memory_32 (type
, mode
);
4780 /* Return false iff TYPE is returned in memory. This version is used
4781 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4782 but differs notably in that when MMX is available, 8-byte vectors
4783 are returned in memory, rather than in MMX registers. */
4786 ix86_sol10_return_in_memory (const_tree type
)
4789 enum machine_mode mode
= type_natural_mode (type
);
4792 return return_in_memory_64 (type
, mode
);
4794 if (mode
== BLKmode
)
4797 size
= int_size_in_bytes (type
);
4799 if (VECTOR_MODE_P (mode
))
4801 /* Return in memory only if MMX registers *are* available. This
4802 seems backwards, but it is consistent with the existing
4809 else if (mode
== TImode
)
4811 else if (mode
== XFmode
)
4817 /* When returning SSE vector types, we have a choice of either
4818 (1) being abi incompatible with a -march switch, or
4819 (2) generating an error.
4820 Given no good solution, I think the safest thing is one warning.
4821 The user won't be able to use -Werror, but....
4823 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4824 called in response to actually generating a caller or callee that
4825 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4826 via aggregate_value_p for general type probing from tree-ssa. */
4829 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4831 static bool warnedsse
, warnedmmx
;
4833 if (!TARGET_64BIT
&& type
)
4835 /* Look at the return type of the function, not the function type. */
4836 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4838 if (!TARGET_SSE
&& !warnedsse
)
4841 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4844 warning (0, "SSE vector return without SSE enabled "
4849 if (!TARGET_MMX
&& !warnedmmx
)
4851 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4854 warning (0, "MMX vector return without MMX enabled "
4864 /* Create the va_list data type. */
4867 ix86_build_builtin_va_list (void)
4869 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4871 /* For i386 we use plain pointer to argument area. */
4872 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4873 return build_pointer_type (char_type_node
);
4875 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4876 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4878 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4879 unsigned_type_node
);
4880 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4881 unsigned_type_node
);
4882 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4884 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4887 va_list_gpr_counter_field
= f_gpr
;
4888 va_list_fpr_counter_field
= f_fpr
;
4890 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4891 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4892 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4893 DECL_FIELD_CONTEXT (f_sav
) = record
;
4895 TREE_CHAIN (record
) = type_decl
;
4896 TYPE_NAME (record
) = type_decl
;
4897 TYPE_FIELDS (record
) = f_gpr
;
4898 TREE_CHAIN (f_gpr
) = f_fpr
;
4899 TREE_CHAIN (f_fpr
) = f_ovf
;
4900 TREE_CHAIN (f_ovf
) = f_sav
;
4902 layout_type (record
);
4904 /* The correct type is an array type of one element. */
4905 return build_array_type (record
, build_index_type (size_zero_node
));
4908 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4911 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4921 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4924 /* Indicate to allocate space on the stack for varargs save area. */
4925 ix86_save_varrargs_registers
= 1;
4926 /* We need 16-byte stack alignment to save SSE registers. If user
4927 asked for lower preferred_stack_boundary, lets just hope that he knows
4928 what he is doing and won't varargs SSE values.
4930 We also may end up assuming that only 64bit values are stored in SSE
4931 register let some floating point program work. */
4932 if (ix86_preferred_stack_boundary
>= 128)
4933 cfun
->stack_alignment_needed
= 128;
4935 save_area
= frame_pointer_rtx
;
4936 set
= get_varargs_alias_set ();
4938 for (i
= cum
->regno
;
4940 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4943 mem
= gen_rtx_MEM (Pmode
,
4944 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4945 MEM_NOTRAP_P (mem
) = 1;
4946 set_mem_alias_set (mem
, set
);
4947 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4948 x86_64_int_parameter_registers
[i
]));
4951 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4953 /* Now emit code to save SSE registers. The AX parameter contains number
4954 of SSE parameter registers used to call this function. We use
4955 sse_prologue_save insn template that produces computed jump across
4956 SSE saves. We need some preparation work to get this working. */
4958 label
= gen_label_rtx ();
4959 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4961 /* Compute address to jump to :
4962 label - 5*eax + nnamed_sse_arguments*5 */
4963 tmp_reg
= gen_reg_rtx (Pmode
);
4964 nsse_reg
= gen_reg_rtx (Pmode
);
4965 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, AX_REG
)));
4966 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4967 gen_rtx_MULT (Pmode
, nsse_reg
,
4972 gen_rtx_CONST (DImode
,
4973 gen_rtx_PLUS (DImode
,
4975 GEN_INT (cum
->sse_regno
* 4))));
4977 emit_move_insn (nsse_reg
, label_ref
);
4978 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4980 /* Compute address of memory block we save into. We always use pointer
4981 pointing 127 bytes after first byte to store - this is needed to keep
4982 instruction size limited by 4 bytes. */
4983 tmp_reg
= gen_reg_rtx (Pmode
);
4984 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4985 plus_constant (save_area
,
4986 8 * REGPARM_MAX
+ 127)));
4987 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4988 MEM_NOTRAP_P (mem
) = 1;
4989 set_mem_alias_set (mem
, set
);
4990 set_mem_align (mem
, BITS_PER_WORD
);
4992 /* And finally do the dirty job! */
4993 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4994 GEN_INT (cum
->sse_regno
), label
));
4999 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
5001 alias_set_type set
= get_varargs_alias_set ();
5004 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
5008 mem
= gen_rtx_MEM (Pmode
,
5009 plus_constant (virtual_incoming_args_rtx
,
5010 i
* UNITS_PER_WORD
));
5011 MEM_NOTRAP_P (mem
) = 1;
5012 set_mem_alias_set (mem
, set
);
5014 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
5015 emit_move_insn (mem
, reg
);
5020 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
5021 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5024 CUMULATIVE_ARGS next_cum
;
5027 /* This argument doesn't appear to be used anymore. Which is good,
5028 because the old code here didn't suppress rtl generation. */
5029 gcc_assert (!no_rtl
);
5034 fntype
= TREE_TYPE (current_function_decl
);
5036 /* For varargs, we do not want to skip the dummy va_dcl argument.
5037 For stdargs, we do want to skip the last named argument. */
5039 if (stdarg_p (fntype
))
5040 function_arg_advance (&next_cum
, mode
, type
, 1);
5042 if (TARGET_64BIT_MS_ABI
)
5043 setup_incoming_varargs_ms_64 (&next_cum
);
5045 setup_incoming_varargs_64 (&next_cum
);
5048 /* Implement va_start. */
5051 ix86_va_start (tree valist
, rtx nextarg
)
5053 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
5054 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5055 tree gpr
, fpr
, ovf
, sav
, t
;
5058 /* Only 64bit target needs something special. */
5059 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5061 std_expand_builtin_va_start (valist
, nextarg
);
5065 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5066 f_fpr
= TREE_CHAIN (f_gpr
);
5067 f_ovf
= TREE_CHAIN (f_fpr
);
5068 f_sav
= TREE_CHAIN (f_ovf
);
5070 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
5071 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5072 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5073 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5074 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5076 /* Count number of gp and fp argument registers used. */
5077 words
= current_function_args_info
.words
;
5078 n_gpr
= current_function_args_info
.regno
;
5079 n_fpr
= current_function_args_info
.sse_regno
;
5081 if (cfun
->va_list_gpr_size
)
5083 type
= TREE_TYPE (gpr
);
5084 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
5085 build_int_cst (type
, n_gpr
* 8));
5086 TREE_SIDE_EFFECTS (t
) = 1;
5087 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5090 if (cfun
->va_list_fpr_size
)
5092 type
= TREE_TYPE (fpr
);
5093 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
5094 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
5095 TREE_SIDE_EFFECTS (t
) = 1;
5096 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5099 /* Find the overflow area. */
5100 type
= TREE_TYPE (ovf
);
5101 t
= make_tree (type
, virtual_incoming_args_rtx
);
5103 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
5104 size_int (words
* UNITS_PER_WORD
));
5105 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
5106 TREE_SIDE_EFFECTS (t
) = 1;
5107 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5109 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
5111 /* Find the register save area.
5112 Prologue of the function save it right above stack frame. */
5113 type
= TREE_TYPE (sav
);
5114 t
= make_tree (type
, frame_pointer_rtx
);
5115 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
5116 TREE_SIDE_EFFECTS (t
) = 1;
5117 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5121 /* Implement va_arg. */
5124 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
5126 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
5127 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5128 tree gpr
, fpr
, ovf
, sav
, t
;
5130 tree lab_false
, lab_over
= NULL_TREE
;
5135 enum machine_mode nat_mode
;
5137 /* Only 64bit target needs something special. */
5138 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5139 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5141 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5142 f_fpr
= TREE_CHAIN (f_gpr
);
5143 f_ovf
= TREE_CHAIN (f_fpr
);
5144 f_sav
= TREE_CHAIN (f_ovf
);
5146 valist
= build_va_arg_indirect_ref (valist
);
5147 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5148 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5149 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5150 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5152 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5154 type
= build_pointer_type (type
);
5155 size
= int_size_in_bytes (type
);
5156 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5158 nat_mode
= type_natural_mode (type
);
5159 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
5160 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
5162 /* Pull the value out of the saved registers. */
5164 addr
= create_tmp_var (ptr_type_node
, "addr");
5165 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
5169 int needed_intregs
, needed_sseregs
;
5171 tree int_addr
, sse_addr
;
5173 lab_false
= create_artificial_label ();
5174 lab_over
= create_artificial_label ();
5176 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
5178 need_temp
= (!REG_P (container
)
5179 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
5180 || TYPE_ALIGN (type
) > 128));
5182 /* In case we are passing structure, verify that it is consecutive block
5183 on the register save area. If not we need to do moves. */
5184 if (!need_temp
&& !REG_P (container
))
5186 /* Verify that all registers are strictly consecutive */
5187 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
5191 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5193 rtx slot
= XVECEXP (container
, 0, i
);
5194 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
5195 || INTVAL (XEXP (slot
, 1)) != i
* 16)
5203 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5205 rtx slot
= XVECEXP (container
, 0, i
);
5206 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
5207 || INTVAL (XEXP (slot
, 1)) != i
* 8)
5219 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
5220 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
5221 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
5222 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
5225 /* First ensure that we fit completely in registers. */
5228 t
= build_int_cst (TREE_TYPE (gpr
),
5229 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
5230 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
5231 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5232 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5233 gimplify_and_add (t
, pre_p
);
5237 t
= build_int_cst (TREE_TYPE (fpr
),
5238 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
5240 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
5241 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5242 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5243 gimplify_and_add (t
, pre_p
);
5246 /* Compute index to start of area used for integer regs. */
5249 /* int_addr = gpr + sav; */
5250 t
= fold_convert (sizetype
, gpr
);
5251 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5252 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
5253 gimplify_and_add (t
, pre_p
);
5257 /* sse_addr = fpr + sav; */
5258 t
= fold_convert (sizetype
, fpr
);
5259 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5260 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
5261 gimplify_and_add (t
, pre_p
);
5266 tree temp
= create_tmp_var (type
, "va_arg_tmp");
5269 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
5270 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5271 gimplify_and_add (t
, pre_p
);
5273 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
5275 rtx slot
= XVECEXP (container
, 0, i
);
5276 rtx reg
= XEXP (slot
, 0);
5277 enum machine_mode mode
= GET_MODE (reg
);
5278 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5279 tree addr_type
= build_pointer_type (piece_type
);
5282 tree dest_addr
, dest
;
5284 if (SSE_REGNO_P (REGNO (reg
)))
5286 src_addr
= sse_addr
;
5287 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5291 src_addr
= int_addr
;
5292 src_offset
= REGNO (reg
) * 8;
5294 src_addr
= fold_convert (addr_type
, src_addr
);
5295 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
5296 size_int (src_offset
));
5297 src
= build_va_arg_indirect_ref (src_addr
);
5299 dest_addr
= fold_convert (addr_type
, addr
);
5300 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, dest_addr
,
5301 size_int (INTVAL (XEXP (slot
, 1))));
5302 dest
= build_va_arg_indirect_ref (dest_addr
);
5304 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
5305 gimplify_and_add (t
, pre_p
);
5311 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5312 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5313 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
5314 gimplify_and_add (t
, pre_p
);
5318 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5319 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5320 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
5321 gimplify_and_add (t
, pre_p
);
5324 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
5325 gimplify_and_add (t
, pre_p
);
5327 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
5328 append_to_statement_list (t
, pre_p
);
5331 /* ... otherwise out of the overflow area. */
5333 /* Care for on-stack alignment if needed. */
5334 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
5335 || integer_zerop (TYPE_SIZE (type
)))
5339 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
5340 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
5341 size_int (align
- 1));
5342 t
= fold_convert (sizetype
, t
);
5343 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5345 t
= fold_convert (TREE_TYPE (ovf
), t
);
5347 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5349 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5350 gimplify_and_add (t2
, pre_p
);
5352 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
5353 size_int (rsize
* UNITS_PER_WORD
));
5354 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
5355 gimplify_and_add (t
, pre_p
);
5359 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
5360 append_to_statement_list (t
, pre_p
);
5363 ptrtype
= build_pointer_type (type
);
5364 addr
= fold_convert (ptrtype
, addr
);
5367 addr
= build_va_arg_indirect_ref (addr
);
5368 return build_va_arg_indirect_ref (addr
);
5371 /* Return nonzero if OPNUM's MEM should be matched
5372 in movabs* patterns. */
5375 ix86_check_movabs (rtx insn
, int opnum
)
5379 set
= PATTERN (insn
);
5380 if (GET_CODE (set
) == PARALLEL
)
5381 set
= XVECEXP (set
, 0, 0);
5382 gcc_assert (GET_CODE (set
) == SET
);
5383 mem
= XEXP (set
, opnum
);
5384 while (GET_CODE (mem
) == SUBREG
)
5385 mem
= SUBREG_REG (mem
);
5386 gcc_assert (MEM_P (mem
));
5387 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5390 /* Initialize the table of extra 80387 mathematical constants. */
5393 init_ext_80387_constants (void)
5395 static const char * cst
[5] =
5397 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5398 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5399 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5400 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5401 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5405 for (i
= 0; i
< 5; i
++)
5407 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5408 /* Ensure each constant is rounded to XFmode precision. */
5409 real_convert (&ext_80387_constants_table
[i
],
5410 XFmode
, &ext_80387_constants_table
[i
]);
5413 ext_80387_constants_init
= 1;
5416 /* Return true if the constant is something that can be loaded with
5417 a special instruction. */
5420 standard_80387_constant_p (rtx x
)
5422 enum machine_mode mode
= GET_MODE (x
);
5426 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5429 if (x
== CONST0_RTX (mode
))
5431 if (x
== CONST1_RTX (mode
))
5434 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5436 /* For XFmode constants, try to find a special 80387 instruction when
5437 optimizing for size or on those CPUs that benefit from them. */
5439 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5443 if (! ext_80387_constants_init
)
5444 init_ext_80387_constants ();
5446 for (i
= 0; i
< 5; i
++)
5447 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5451 /* Load of the constant -0.0 or -1.0 will be split as
5452 fldz;fchs or fld1;fchs sequence. */
5453 if (real_isnegzero (&r
))
5455 if (real_identical (&r
, &dconstm1
))
5461 /* Return the opcode of the special instruction to be used to load
5465 standard_80387_constant_opcode (rtx x
)
5467 switch (standard_80387_constant_p (x
))
5491 /* Return the CONST_DOUBLE representing the 80387 constant that is
5492 loaded by the specified special instruction. The argument IDX
5493 matches the return value from standard_80387_constant_p. */
5496 standard_80387_constant_rtx (int idx
)
5500 if (! ext_80387_constants_init
)
5501 init_ext_80387_constants ();
5517 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5521 /* Return 1 if mode is a valid mode for sse. */
5523 standard_sse_mode_p (enum machine_mode mode
)
5540 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5543 standard_sse_constant_p (rtx x
)
5545 enum machine_mode mode
= GET_MODE (x
);
5547 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5549 if (vector_all_ones_operand (x
, mode
)
5550 && standard_sse_mode_p (mode
))
5551 return TARGET_SSE2
? 2 : -1;
5556 /* Return the opcode of the special instruction to be used to load
5560 standard_sse_constant_opcode (rtx insn
, rtx x
)
5562 switch (standard_sse_constant_p (x
))
5565 if (get_attr_mode (insn
) == MODE_V4SF
)
5566 return "xorps\t%0, %0";
5567 else if (get_attr_mode (insn
) == MODE_V2DF
)
5568 return "xorpd\t%0, %0";
5570 return "pxor\t%0, %0";
5572 return "pcmpeqd\t%0, %0";
5577 /* Returns 1 if OP contains a symbol reference */
5580 symbolic_reference_mentioned_p (rtx op
)
5585 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5588 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5589 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5595 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5596 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5600 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5607 /* Return 1 if it is appropriate to emit `ret' instructions in the
5608 body of a function. Do this only if the epilogue is simple, needing a
5609 couple of insns. Prior to reloading, we can't tell how many registers
5610 must be saved, so return 0 then. Return 0 if there is no frame
5611 marker to de-allocate. */
5614 ix86_can_use_return_insn_p (void)
5616 struct ix86_frame frame
;
5618 if (! reload_completed
|| frame_pointer_needed
)
5621 /* Don't allow more than 32 pop, since that's all we can do
5622 with one instruction. */
5623 if (current_function_pops_args
5624 && current_function_args_size
>= 32768)
5627 ix86_compute_frame_layout (&frame
);
5628 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5631 /* Value should be nonzero if functions must have frame pointers.
5632 Zero means the frame pointer need not be set up (and parms may
5633 be accessed via the stack pointer) in functions that seem suitable. */
5636 ix86_frame_pointer_required (void)
5638 /* If we accessed previous frames, then the generated code expects
5639 to be able to access the saved ebp value in our frame. */
5640 if (cfun
->machine
->accesses_prev_frame
)
5643 /* Several x86 os'es need a frame pointer for other reasons,
5644 usually pertaining to setjmp. */
5645 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5648 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5649 the frame pointer by default. Turn it back on now if we've not
5650 got a leaf function. */
5651 if (TARGET_OMIT_LEAF_FRAME_POINTER
5652 && (!current_function_is_leaf
5653 || ix86_current_function_calls_tls_descriptor
))
5656 if (current_function_profile
)
5662 /* Record that the current function accesses previous call frames. */
5665 ix86_setup_frame_addresses (void)
5667 cfun
->machine
->accesses_prev_frame
= 1;
5670 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5671 # define USE_HIDDEN_LINKONCE 1
5673 # define USE_HIDDEN_LINKONCE 0
5676 static int pic_labels_used
;
5678 /* Fills in the label name that should be used for a pc thunk for
5679 the given register. */
5682 get_pc_thunk_name (char name
[32], unsigned int regno
)
5684 gcc_assert (!TARGET_64BIT
);
5686 if (USE_HIDDEN_LINKONCE
)
5687 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5689 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5693 /* This function generates code for -fpic that loads %ebx with
5694 the return address of the caller and then returns. */
5697 ix86_file_end (void)
5702 for (regno
= 0; regno
< 8; ++regno
)
5706 if (! ((pic_labels_used
>> regno
) & 1))
5709 get_pc_thunk_name (name
, regno
);
5714 switch_to_section (darwin_sections
[text_coal_section
]);
5715 fputs ("\t.weak_definition\t", asm_out_file
);
5716 assemble_name (asm_out_file
, name
);
5717 fputs ("\n\t.private_extern\t", asm_out_file
);
5718 assemble_name (asm_out_file
, name
);
5719 fputs ("\n", asm_out_file
);
5720 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5724 if (USE_HIDDEN_LINKONCE
)
5728 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5730 TREE_PUBLIC (decl
) = 1;
5731 TREE_STATIC (decl
) = 1;
5732 DECL_ONE_ONLY (decl
) = 1;
5734 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5735 switch_to_section (get_named_section (decl
, NULL
, 0));
5737 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5738 fputs ("\t.hidden\t", asm_out_file
);
5739 assemble_name (asm_out_file
, name
);
5740 fputc ('\n', asm_out_file
);
5741 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5745 switch_to_section (text_section
);
5746 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5749 xops
[0] = gen_rtx_REG (SImode
, regno
);
5750 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5751 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5752 output_asm_insn ("ret", xops
);
5755 if (NEED_INDICATE_EXEC_STACK
)
5756 file_end_indicate_exec_stack ();
5759 /* Emit code for the SET_GOT patterns. */
5762 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5768 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5770 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5771 xops
[2] = gen_rtx_MEM (Pmode
,
5772 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5773 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5775 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5776 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5777 an unadorned address. */
5778 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5779 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5780 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5784 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5786 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5788 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5791 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5793 output_asm_insn ("call\t%a2", xops
);
5796 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5797 is what will be referenced by the Mach-O PIC subsystem. */
5799 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5802 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5803 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5806 output_asm_insn ("pop{l}\t%0", xops
);
5811 get_pc_thunk_name (name
, REGNO (dest
));
5812 pic_labels_used
|= 1 << REGNO (dest
);
5814 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5815 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5816 output_asm_insn ("call\t%X2", xops
);
5817 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5818 is what will be referenced by the Mach-O PIC subsystem. */
5821 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5823 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5824 CODE_LABEL_NUMBER (label
));
5831 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5832 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5834 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5839 /* Generate an "push" pattern for input ARG. */
5844 return gen_rtx_SET (VOIDmode
,
5846 gen_rtx_PRE_DEC (Pmode
,
5847 stack_pointer_rtx
)),
5851 /* Return >= 0 if there is an unused call-clobbered register available
5852 for the entire function. */
5855 ix86_select_alt_pic_regnum (void)
5857 if (current_function_is_leaf
&& !current_function_profile
5858 && !ix86_current_function_calls_tls_descriptor
)
5861 for (i
= 2; i
>= 0; --i
)
5862 if (!df_regs_ever_live_p (i
))
5866 return INVALID_REGNUM
;
5869 /* Return 1 if we need to save REGNO. */
5871 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5873 if (pic_offset_table_rtx
5874 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5875 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5876 || current_function_profile
5877 || current_function_calls_eh_return
5878 || current_function_uses_const_pool
))
5880 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5885 if (current_function_calls_eh_return
&& maybe_eh_return
)
5890 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5891 if (test
== INVALID_REGNUM
)
5898 if (cfun
->machine
->force_align_arg_pointer
5899 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5902 return (df_regs_ever_live_p (regno
)
5903 && !call_used_regs
[regno
]
5904 && !fixed_regs
[regno
]
5905 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5908 /* Return number of registers to be saved on the stack. */
5911 ix86_nsaved_regs (void)
5916 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5917 if (ix86_save_reg (regno
, true))
5922 /* Return the offset between two registers, one to be eliminated, and the other
5923 its replacement, at the start of a routine. */
5926 ix86_initial_elimination_offset (int from
, int to
)
5928 struct ix86_frame frame
;
5929 ix86_compute_frame_layout (&frame
);
5931 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5932 return frame
.hard_frame_pointer_offset
;
5933 else if (from
== FRAME_POINTER_REGNUM
5934 && to
== HARD_FRAME_POINTER_REGNUM
)
5935 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5938 gcc_assert (to
== STACK_POINTER_REGNUM
);
5940 if (from
== ARG_POINTER_REGNUM
)
5941 return frame
.stack_pointer_offset
;
5943 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5944 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5948 /* Fill structure ix86_frame about frame of currently computed function. */
5951 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5953 HOST_WIDE_INT total_size
;
5954 unsigned int stack_alignment_needed
;
5955 HOST_WIDE_INT offset
;
5956 unsigned int preferred_alignment
;
5957 HOST_WIDE_INT size
= get_frame_size ();
5959 frame
->nregs
= ix86_nsaved_regs ();
5962 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5963 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5965 /* During reload iteration the amount of registers saved can change.
5966 Recompute the value as needed. Do not recompute when amount of registers
5967 didn't change as reload does multiple calls to the function and does not
5968 expect the decision to change within single iteration. */
5970 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5972 int count
= frame
->nregs
;
5974 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5975 /* The fast prologue uses move instead of push to save registers. This
5976 is significantly longer, but also executes faster as modern hardware
5977 can execute the moves in parallel, but can't do that for push/pop.
5979 Be careful about choosing what prologue to emit: When function takes
5980 many instructions to execute we may use slow version as well as in
5981 case function is known to be outside hot spot (this is known with
5982 feedback only). Weight the size of function by number of registers
5983 to save as it is cheap to use one or two push instructions but very
5984 slow to use many of them. */
5986 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5987 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5988 || (flag_branch_probabilities
5989 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5990 cfun
->machine
->use_fast_prologue_epilogue
= false;
5992 cfun
->machine
->use_fast_prologue_epilogue
5993 = !expensive_function_p (count
);
5995 if (TARGET_PROLOGUE_USING_MOVE
5996 && cfun
->machine
->use_fast_prologue_epilogue
)
5997 frame
->save_regs_using_mov
= true;
5999 frame
->save_regs_using_mov
= false;
6002 /* Skip return address and saved base pointer. */
6003 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
6005 frame
->hard_frame_pointer_offset
= offset
;
6007 /* Do some sanity checking of stack_alignment_needed and
6008 preferred_alignment, since i386 port is the only using those features
6009 that may break easily. */
6011 gcc_assert (!size
|| stack_alignment_needed
);
6012 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
6013 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
6014 gcc_assert (stack_alignment_needed
6015 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
6017 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
6018 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
6020 /* Register save area */
6021 offset
+= frame
->nregs
* UNITS_PER_WORD
;
6024 if (ix86_save_varrargs_registers
)
6026 offset
+= X86_64_VARARGS_SIZE
;
6027 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
6030 frame
->va_arg_size
= 0;
6032 /* Align start of frame for local function. */
6033 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
6034 & -stack_alignment_needed
) - offset
;
6036 offset
+= frame
->padding1
;
6038 /* Frame pointer points here. */
6039 frame
->frame_pointer_offset
= offset
;
6043 /* Add outgoing arguments area. Can be skipped if we eliminated
6044 all the function calls as dead code.
6045 Skipping is however impossible when function calls alloca. Alloca
6046 expander assumes that last current_function_outgoing_args_size
6047 of stack frame are unused. */
6048 if (ACCUMULATE_OUTGOING_ARGS
6049 && (!current_function_is_leaf
|| current_function_calls_alloca
6050 || ix86_current_function_calls_tls_descriptor
))
6052 offset
+= current_function_outgoing_args_size
;
6053 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
6056 frame
->outgoing_arguments_size
= 0;
6058 /* Align stack boundary. Only needed if we're calling another function
6060 if (!current_function_is_leaf
|| current_function_calls_alloca
6061 || ix86_current_function_calls_tls_descriptor
)
6062 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
6063 & -preferred_alignment
) - offset
;
6065 frame
->padding2
= 0;
6067 offset
+= frame
->padding2
;
6069 /* We've reached end of stack frame. */
6070 frame
->stack_pointer_offset
= offset
;
6072 /* Size prologue needs to allocate. */
6073 frame
->to_allocate
=
6074 (size
+ frame
->padding1
+ frame
->padding2
6075 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
6077 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
6078 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
6079 frame
->save_regs_using_mov
= false;
6081 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
6082 && current_function_is_leaf
6083 && !ix86_current_function_calls_tls_descriptor
)
6085 frame
->red_zone_size
= frame
->to_allocate
;
6086 if (frame
->save_regs_using_mov
)
6087 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6088 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6089 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6092 frame
->red_zone_size
= 0;
6093 frame
->to_allocate
-= frame
->red_zone_size
;
6094 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6096 fprintf (stderr
, "\n");
6097 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
6098 fprintf (stderr
, "size: %ld\n", (long)size
);
6099 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
6100 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
6101 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
6102 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
6103 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
6104 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
6105 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
6106 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
6107 (long)frame
->hard_frame_pointer_offset
);
6108 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
6109 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
6110 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
6111 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
6115 /* Emit code to save registers in the prologue. */
6118 ix86_emit_save_regs (void)
6123 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
6124 if (ix86_save_reg (regno
, true))
6126 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
6127 RTX_FRAME_RELATED_P (insn
) = 1;
6131 /* Emit code to save registers using MOV insns. First register
6132 is restored from POINTER + OFFSET. */
6134 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
6139 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6140 if (ix86_save_reg (regno
, true))
6142 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
6144 gen_rtx_REG (Pmode
, regno
));
6145 RTX_FRAME_RELATED_P (insn
) = 1;
6146 offset
+= UNITS_PER_WORD
;
6150 /* Expand prologue or epilogue stack adjustment.
6151 The pattern exist to put a dependency on all ebp-based memory accesses.
6152 STYLE should be negative if instructions should be marked as frame related,
6153 zero if %r11 register is live and cannot be freely used and positive
6157 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
6162 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
6163 else if (x86_64_immediate_operand (offset
, DImode
))
6164 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
6168 /* r11 is used by indirect sibcall return as well, set before the
6169 epilogue and used after the epilogue. ATM indirect sibcall
6170 shouldn't be used together with huge frame sizes in one
6171 function because of the frame_size check in sibcall.c. */
6173 r11
= gen_rtx_REG (DImode
, R11_REG
);
6174 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
6176 RTX_FRAME_RELATED_P (insn
) = 1;
6177 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
6181 RTX_FRAME_RELATED_P (insn
) = 1;
6184 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6187 ix86_internal_arg_pointer (void)
6189 bool has_force_align_arg_pointer
=
6190 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
6191 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
6192 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6193 && DECL_NAME (current_function_decl
)
6194 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
6195 && DECL_FILE_SCOPE_P (current_function_decl
))
6196 || ix86_force_align_arg_pointer
6197 || has_force_align_arg_pointer
)
6199 /* Nested functions can't realign the stack due to a register
6201 if (DECL_CONTEXT (current_function_decl
)
6202 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
6204 if (ix86_force_align_arg_pointer
)
6205 warning (0, "-mstackrealign ignored for nested functions");
6206 if (has_force_align_arg_pointer
)
6207 error ("%s not supported for nested functions",
6208 ix86_force_align_arg_pointer_string
);
6209 return virtual_incoming_args_rtx
;
6211 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, CX_REG
);
6212 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
6215 return virtual_incoming_args_rtx
;
6218 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6219 This is called from dwarf2out.c to emit call frame instructions
6220 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6222 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
6224 rtx unspec
= SET_SRC (pattern
);
6225 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
6229 case UNSPEC_REG_SAVE
:
6230 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
6231 SET_DEST (pattern
));
6233 case UNSPEC_DEF_CFA
:
6234 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
6235 INTVAL (XVECEXP (unspec
, 0, 0)));
6242 /* Expand the prologue into a bunch of separate insns. */
6245 ix86_expand_prologue (void)
6249 struct ix86_frame frame
;
6250 HOST_WIDE_INT allocate
;
6252 ix86_compute_frame_layout (&frame
);
6254 if (cfun
->machine
->force_align_arg_pointer
)
6258 /* Grab the argument pointer. */
6259 x
= plus_constant (stack_pointer_rtx
, 4);
6260 y
= cfun
->machine
->force_align_arg_pointer
;
6261 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
6262 RTX_FRAME_RELATED_P (insn
) = 1;
6264 /* The unwind info consists of two parts: install the fafp as the cfa,
6265 and record the fafp as the "save register" of the stack pointer.
6266 The later is there in order that the unwinder can see where it
6267 should restore the stack pointer across the and insn. */
6268 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
6269 x
= gen_rtx_SET (VOIDmode
, y
, x
);
6270 RTX_FRAME_RELATED_P (x
) = 1;
6271 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
6273 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
6274 RTX_FRAME_RELATED_P (y
) = 1;
6275 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
6276 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6277 REG_NOTES (insn
) = x
;
6279 /* Align the stack. */
6280 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6283 /* And here we cheat like madmen with the unwind info. We force the
6284 cfa register back to sp+4, which is exactly what it was at the
6285 start of the function. Re-pushing the return address results in
6286 the return at the same spot relative to the cfa, and thus is
6287 correct wrt the unwind info. */
6288 x
= cfun
->machine
->force_align_arg_pointer
;
6289 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
6290 insn
= emit_insn (gen_push (x
));
6291 RTX_FRAME_RELATED_P (insn
) = 1;
6294 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
6295 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
6296 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6297 REG_NOTES (insn
) = x
;
6300 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6301 slower on all targets. Also sdb doesn't like it. */
6303 if (frame_pointer_needed
)
6305 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
6306 RTX_FRAME_RELATED_P (insn
) = 1;
6308 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
6309 RTX_FRAME_RELATED_P (insn
) = 1;
6312 allocate
= frame
.to_allocate
;
6314 if (!frame
.save_regs_using_mov
)
6315 ix86_emit_save_regs ();
6317 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
6319 /* When using red zone we may start register saving before allocating
6320 the stack frame saving one cycle of the prologue. */
6321 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
6322 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
6323 : stack_pointer_rtx
,
6324 -frame
.nregs
* UNITS_PER_WORD
);
6328 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
6329 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6330 GEN_INT (-allocate
), -1);
6333 /* Only valid for Win32. */
6334 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
6338 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
6340 if (TARGET_64BIT_MS_ABI
)
6343 eax_live
= ix86_eax_live_at_start_p ();
6347 emit_insn (gen_push (eax
));
6348 allocate
-= UNITS_PER_WORD
;
6351 emit_move_insn (eax
, GEN_INT (allocate
));
6354 insn
= gen_allocate_stack_worker_64 (eax
);
6356 insn
= gen_allocate_stack_worker_32 (eax
);
6357 insn
= emit_insn (insn
);
6358 RTX_FRAME_RELATED_P (insn
) = 1;
6359 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
6360 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
6361 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
6362 t
, REG_NOTES (insn
));
6366 if (frame_pointer_needed
)
6367 t
= plus_constant (hard_frame_pointer_rtx
,
6370 - frame
.nregs
* UNITS_PER_WORD
);
6372 t
= plus_constant (stack_pointer_rtx
, allocate
);
6373 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6377 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6379 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6380 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6382 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6383 -frame
.nregs
* UNITS_PER_WORD
);
6386 pic_reg_used
= false;
6387 if (pic_offset_table_rtx
6388 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6389 || current_function_profile
))
6391 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6393 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6394 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
6396 pic_reg_used
= true;
6403 if (ix86_cmodel
== CM_LARGE_PIC
)
6405 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
6406 rtx label
= gen_label_rtx ();
6408 LABEL_PRESERVE_P (label
) = 1;
6409 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6410 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6411 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6412 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6413 pic_offset_table_rtx
, tmp_reg
));
6416 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6419 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6422 /* Prevent function calls from being scheduled before the call to mcount.
6423 In the pic_reg_used case, make sure that the got load isn't deleted. */
6424 if (current_function_profile
)
6427 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
6428 emit_insn (gen_blockage ());
6432 /* Emit code to restore saved registers using MOV insns. First register
6433 is restored from POINTER + OFFSET. */
6435 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6436 int maybe_eh_return
)
6439 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6441 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6442 if (ix86_save_reg (regno
, maybe_eh_return
))
6444 /* Ensure that adjust_address won't be forced to produce pointer
6445 out of range allowed by x86-64 instruction set. */
6446 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6450 r11
= gen_rtx_REG (DImode
, R11_REG
);
6451 emit_move_insn (r11
, GEN_INT (offset
));
6452 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6453 base_address
= gen_rtx_MEM (Pmode
, r11
);
6456 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6457 adjust_address (base_address
, Pmode
, offset
));
6458 offset
+= UNITS_PER_WORD
;
6462 /* Restore function stack, frame, and registers. */
6465 ix86_expand_epilogue (int style
)
6468 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6469 struct ix86_frame frame
;
6470 HOST_WIDE_INT offset
;
6472 ix86_compute_frame_layout (&frame
);
6474 /* Calculate start of saved registers relative to ebp. Special care
6475 must be taken for the normal return case of a function using
6476 eh_return: the eax and edx registers are marked as saved, but not
6477 restored along this path. */
6478 offset
= frame
.nregs
;
6479 if (current_function_calls_eh_return
&& style
!= 2)
6481 offset
*= -UNITS_PER_WORD
;
6483 /* If we're only restoring one register and sp is not valid then
6484 using a move instruction to restore the register since it's
6485 less work than reloading sp and popping the register.
6487 The default code result in stack adjustment using add/lea instruction,
6488 while this code results in LEAVE instruction (or discrete equivalent),
6489 so it is profitable in some other cases as well. Especially when there
6490 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6491 and there is exactly one register to pop. This heuristic may need some
6492 tuning in future. */
6493 if ((!sp_valid
&& frame
.nregs
<= 1)
6494 || (TARGET_EPILOGUE_USING_MOVE
6495 && cfun
->machine
->use_fast_prologue_epilogue
6496 && (frame
.nregs
> 1 || frame
.to_allocate
))
6497 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6498 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6499 && cfun
->machine
->use_fast_prologue_epilogue
6500 && frame
.nregs
== 1)
6501 || current_function_calls_eh_return
)
6503 /* Restore registers. We can use ebp or esp to address the memory
6504 locations. If both are available, default to ebp, since offsets
6505 are known to be small. Only exception is esp pointing directly to the
6506 end of block of saved registers, where we may simplify addressing
6509 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6510 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6511 frame
.to_allocate
, style
== 2);
6513 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6514 offset
, style
== 2);
6516 /* eh_return epilogues need %ecx added to the stack pointer. */
6519 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6521 if (frame_pointer_needed
)
6523 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6524 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6525 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6527 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6528 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6530 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6535 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6536 tmp
= plus_constant (tmp
, (frame
.to_allocate
6537 + frame
.nregs
* UNITS_PER_WORD
));
6538 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6541 else if (!frame_pointer_needed
)
6542 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6543 GEN_INT (frame
.to_allocate
6544 + frame
.nregs
* UNITS_PER_WORD
),
6546 /* If not an i386, mov & pop is faster than "leave". */
6547 else if (TARGET_USE_LEAVE
|| optimize_size
6548 || !cfun
->machine
->use_fast_prologue_epilogue
)
6549 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6552 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6553 hard_frame_pointer_rtx
,
6556 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6558 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6563 /* First step is to deallocate the stack frame so that we can
6564 pop the registers. */
6567 gcc_assert (frame_pointer_needed
);
6568 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6569 hard_frame_pointer_rtx
,
6570 GEN_INT (offset
), style
);
6572 else if (frame
.to_allocate
)
6573 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6574 GEN_INT (frame
.to_allocate
), style
);
6576 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6577 if (ix86_save_reg (regno
, false))
6580 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6582 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6584 if (frame_pointer_needed
)
6586 /* Leave results in shorter dependency chains on CPUs that are
6587 able to grok it fast. */
6588 if (TARGET_USE_LEAVE
)
6589 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6590 else if (TARGET_64BIT
)
6591 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6593 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6597 if (cfun
->machine
->force_align_arg_pointer
)
6599 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6600 cfun
->machine
->force_align_arg_pointer
,
6604 /* Sibcall epilogues don't want a return instruction. */
6608 if (current_function_pops_args
&& current_function_args_size
)
6610 rtx popc
= GEN_INT (current_function_pops_args
);
6612 /* i386 can only pop 64K bytes. If asked to pop more, pop
6613 return address, do explicit add, and jump indirectly to the
6616 if (current_function_pops_args
>= 65536)
6618 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
6620 /* There is no "pascal" calling convention in any 64bit ABI. */
6621 gcc_assert (!TARGET_64BIT
);
6623 emit_insn (gen_popsi1 (ecx
));
6624 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6625 emit_jump_insn (gen_return_indirect_internal (ecx
));
6628 emit_jump_insn (gen_return_pop_internal (popc
));
6631 emit_jump_insn (gen_return_internal ());
6634 /* Reset from the function's potential modifications. */
6637 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6638 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6640 if (pic_offset_table_rtx
)
6641 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
6643 /* Mach-O doesn't support labels at the end of objects, so if
6644 it looks like we might want one, insert a NOP. */
6646 rtx insn
= get_last_insn ();
6649 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6650 insn
= PREV_INSN (insn
);
6654 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6655 fputs ("\tnop\n", file
);
6661 /* Extract the parts of an RTL expression that is a valid memory address
6662 for an instruction. Return 0 if the structure of the address is
6663 grossly off. Return -1 if the address contains ASHIFT, so it is not
6664 strictly valid, but still used for computing length of lea instruction. */
6667 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6669 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6670 rtx base_reg
, index_reg
;
6671 HOST_WIDE_INT scale
= 1;
6672 rtx scale_rtx
= NULL_RTX
;
6674 enum ix86_address_seg seg
= SEG_DEFAULT
;
6676 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6678 else if (GET_CODE (addr
) == PLUS
)
6688 addends
[n
++] = XEXP (op
, 1);
6691 while (GET_CODE (op
) == PLUS
);
6696 for (i
= n
; i
>= 0; --i
)
6699 switch (GET_CODE (op
))
6704 index
= XEXP (op
, 0);
6705 scale_rtx
= XEXP (op
, 1);
6709 if (XINT (op
, 1) == UNSPEC_TP
6710 && TARGET_TLS_DIRECT_SEG_REFS
6711 && seg
== SEG_DEFAULT
)
6712 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6741 else if (GET_CODE (addr
) == MULT
)
6743 index
= XEXP (addr
, 0); /* index*scale */
6744 scale_rtx
= XEXP (addr
, 1);
6746 else if (GET_CODE (addr
) == ASHIFT
)
6750 /* We're called for lea too, which implements ashift on occasion. */
6751 index
= XEXP (addr
, 0);
6752 tmp
= XEXP (addr
, 1);
6753 if (!CONST_INT_P (tmp
))
6755 scale
= INTVAL (tmp
);
6756 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6762 disp
= addr
; /* displacement */
6764 /* Extract the integral value of scale. */
6767 if (!CONST_INT_P (scale_rtx
))
6769 scale
= INTVAL (scale_rtx
);
6772 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6773 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6775 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6776 if (base_reg
&& index_reg
&& scale
== 1
6777 && (index_reg
== arg_pointer_rtx
6778 || index_reg
== frame_pointer_rtx
6779 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6782 tmp
= base
, base
= index
, index
= tmp
;
6783 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6786 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6787 if ((base_reg
== hard_frame_pointer_rtx
6788 || base_reg
== frame_pointer_rtx
6789 || base_reg
== arg_pointer_rtx
) && !disp
)
6792 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6793 Avoid this by transforming to [%esi+0]. */
6794 if (TARGET_K6
&& !optimize_size
6795 && base_reg
&& !index_reg
&& !disp
6797 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6800 /* Special case: encode reg+reg instead of reg*2. */
6801 if (!base
&& index
&& scale
&& scale
== 2)
6802 base
= index
, base_reg
= index_reg
, scale
= 1;
6804 /* Special case: scaling cannot be encoded without base or displacement. */
6805 if (!base
&& !disp
&& index
&& scale
!= 1)
6817 /* Return cost of the memory address x.
6818 For i386, it is better to use a complex address than let gcc copy
6819 the address into a reg and make a new pseudo. But not if the address
6820 requires to two regs - that would mean more pseudos with longer
6823 ix86_address_cost (rtx x
)
6825 struct ix86_address parts
;
6827 int ok
= ix86_decompose_address (x
, &parts
);
6831 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6832 parts
.base
= SUBREG_REG (parts
.base
);
6833 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6834 parts
.index
= SUBREG_REG (parts
.index
);
6836 /* Attempt to minimize number of registers in the address. */
6838 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6840 && (!REG_P (parts
.index
)
6841 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6845 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6847 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6848 && parts
.base
!= parts
.index
)
6851 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6852 since it's predecode logic can't detect the length of instructions
6853 and it degenerates to vector decoded. Increase cost of such
6854 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6855 to split such addresses or even refuse such addresses at all.
6857 Following addressing modes are affected:
6862 The first and last case may be avoidable by explicitly coding the zero in
6863 memory address, but I don't have AMD-K6 machine handy to check this
6867 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6868 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6869 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6875 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6876 this is used for to form addresses to local data when -fPIC is in
6880 darwin_local_data_pic (rtx disp
)
6882 if (GET_CODE (disp
) == MINUS
)
6884 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6885 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6886 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6888 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6889 if (! strcmp (sym_name
, "<pic base>"))
6897 /* Determine if a given RTX is a valid constant. We already know this
6898 satisfies CONSTANT_P. */
6901 legitimate_constant_p (rtx x
)
6903 switch (GET_CODE (x
))
6908 if (GET_CODE (x
) == PLUS
)
6910 if (!CONST_INT_P (XEXP (x
, 1)))
6915 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6918 /* Only some unspecs are valid as "constants". */
6919 if (GET_CODE (x
) == UNSPEC
)
6920 switch (XINT (x
, 1))
6925 return TARGET_64BIT
;
6928 x
= XVECEXP (x
, 0, 0);
6929 return (GET_CODE (x
) == SYMBOL_REF
6930 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6932 x
= XVECEXP (x
, 0, 0);
6933 return (GET_CODE (x
) == SYMBOL_REF
6934 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6939 /* We must have drilled down to a symbol. */
6940 if (GET_CODE (x
) == LABEL_REF
)
6942 if (GET_CODE (x
) != SYMBOL_REF
)
6947 /* TLS symbols are never valid. */
6948 if (SYMBOL_REF_TLS_MODEL (x
))
6951 /* DLLIMPORT symbols are never valid. */
6952 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6953 && SYMBOL_REF_DLLIMPORT_P (x
))
6958 if (GET_MODE (x
) == TImode
6959 && x
!= CONST0_RTX (TImode
)
6965 if (x
== CONST0_RTX (GET_MODE (x
)))
6973 /* Otherwise we handle everything else in the move patterns. */
6977 /* Determine if it's legal to put X into the constant pool. This
6978 is not possible for the address of thread-local symbols, which
6979 is checked above. */
6982 ix86_cannot_force_const_mem (rtx x
)
6984 /* We can always put integral constants and vectors in memory. */
6985 switch (GET_CODE (x
))
6995 return !legitimate_constant_p (x
);
6998 /* Determine if a given RTX is a valid constant address. */
7001 constant_address_p (rtx x
)
7003 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
7006 /* Nonzero if the constant value X is a legitimate general operand
7007 when generating PIC code. It is given that flag_pic is on and
7008 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7011 legitimate_pic_operand_p (rtx x
)
7015 switch (GET_CODE (x
))
7018 inner
= XEXP (x
, 0);
7019 if (GET_CODE (inner
) == PLUS
7020 && CONST_INT_P (XEXP (inner
, 1)))
7021 inner
= XEXP (inner
, 0);
7023 /* Only some unspecs are valid as "constants". */
7024 if (GET_CODE (inner
) == UNSPEC
)
7025 switch (XINT (inner
, 1))
7030 return TARGET_64BIT
;
7032 x
= XVECEXP (inner
, 0, 0);
7033 return (GET_CODE (x
) == SYMBOL_REF
7034 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
7042 return legitimate_pic_address_disp_p (x
);
7049 /* Determine if a given CONST RTX is a valid memory displacement
7053 legitimate_pic_address_disp_p (rtx disp
)
7057 /* In 64bit mode we can allow direct addresses of symbols and labels
7058 when they are not dynamic symbols. */
7061 rtx op0
= disp
, op1
;
7063 switch (GET_CODE (disp
))
7069 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
7071 op0
= XEXP (XEXP (disp
, 0), 0);
7072 op1
= XEXP (XEXP (disp
, 0), 1);
7073 if (!CONST_INT_P (op1
)
7074 || INTVAL (op1
) >= 16*1024*1024
7075 || INTVAL (op1
) < -16*1024*1024)
7077 if (GET_CODE (op0
) == LABEL_REF
)
7079 if (GET_CODE (op0
) != SYMBOL_REF
)
7084 /* TLS references should always be enclosed in UNSPEC. */
7085 if (SYMBOL_REF_TLS_MODEL (op0
))
7087 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
7088 && ix86_cmodel
!= CM_LARGE_PIC
)
7096 if (GET_CODE (disp
) != CONST
)
7098 disp
= XEXP (disp
, 0);
7102 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7103 of GOT tables. We should not need these anyway. */
7104 if (GET_CODE (disp
) != UNSPEC
7105 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
7106 && XINT (disp
, 1) != UNSPEC_GOTOFF
7107 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
7110 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
7111 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
7117 if (GET_CODE (disp
) == PLUS
)
7119 if (!CONST_INT_P (XEXP (disp
, 1)))
7121 disp
= XEXP (disp
, 0);
7125 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
7128 if (GET_CODE (disp
) != UNSPEC
)
7131 switch (XINT (disp
, 1))
7136 /* We need to check for both symbols and labels because VxWorks loads
7137 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7139 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7140 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
7142 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7143 While ABI specify also 32bit relocation but we don't produce it in
7144 small PIC model at all. */
7145 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7146 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
7148 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
7150 case UNSPEC_GOTTPOFF
:
7151 case UNSPEC_GOTNTPOFF
:
7152 case UNSPEC_INDNTPOFF
:
7155 disp
= XVECEXP (disp
, 0, 0);
7156 return (GET_CODE (disp
) == SYMBOL_REF
7157 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
7159 disp
= XVECEXP (disp
, 0, 0);
7160 return (GET_CODE (disp
) == SYMBOL_REF
7161 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
7163 disp
= XVECEXP (disp
, 0, 0);
7164 return (GET_CODE (disp
) == SYMBOL_REF
7165 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
7171 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7172 memory address for an instruction. The MODE argument is the machine mode
7173 for the MEM expression that wants to use this address.
7175 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7176 convert common non-canonical forms to canonical form so that they will
7180 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
7181 rtx addr
, int strict
)
7183 struct ix86_address parts
;
7184 rtx base
, index
, disp
;
7185 HOST_WIDE_INT scale
;
7186 const char *reason
= NULL
;
7187 rtx reason_rtx
= NULL_RTX
;
7189 if (ix86_decompose_address (addr
, &parts
) <= 0)
7191 reason
= "decomposition failed";
7196 index
= parts
.index
;
7198 scale
= parts
.scale
;
7200 /* Validate base register.
7202 Don't allow SUBREG's that span more than a word here. It can lead to spill
7203 failures when the base is one word out of a two word structure, which is
7204 represented internally as a DImode int. */
7213 else if (GET_CODE (base
) == SUBREG
7214 && REG_P (SUBREG_REG (base
))
7215 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
7217 reg
= SUBREG_REG (base
);
7220 reason
= "base is not a register";
7224 if (GET_MODE (base
) != Pmode
)
7226 reason
= "base is not in Pmode";
7230 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
7231 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
7233 reason
= "base is not valid";
7238 /* Validate index register.
7240 Don't allow SUBREG's that span more than a word here -- same as above. */
7249 else if (GET_CODE (index
) == SUBREG
7250 && REG_P (SUBREG_REG (index
))
7251 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
7253 reg
= SUBREG_REG (index
);
7256 reason
= "index is not a register";
7260 if (GET_MODE (index
) != Pmode
)
7262 reason
= "index is not in Pmode";
7266 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
7267 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
7269 reason
= "index is not valid";
7274 /* Validate scale factor. */
7277 reason_rtx
= GEN_INT (scale
);
7280 reason
= "scale without index";
7284 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
7286 reason
= "scale is not a valid multiplier";
7291 /* Validate displacement. */
7296 if (GET_CODE (disp
) == CONST
7297 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
7298 switch (XINT (XEXP (disp
, 0), 1))
7300 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7301 used. While ABI specify also 32bit relocations, we don't produce
7302 them at all and use IP relative instead. */
7305 gcc_assert (flag_pic
);
7307 goto is_legitimate_pic
;
7308 reason
= "64bit address unspec";
7311 case UNSPEC_GOTPCREL
:
7312 gcc_assert (flag_pic
);
7313 goto is_legitimate_pic
;
7315 case UNSPEC_GOTTPOFF
:
7316 case UNSPEC_GOTNTPOFF
:
7317 case UNSPEC_INDNTPOFF
:
7323 reason
= "invalid address unspec";
7327 else if (SYMBOLIC_CONST (disp
)
7331 && MACHOPIC_INDIRECT
7332 && !machopic_operand_p (disp
)
7338 if (TARGET_64BIT
&& (index
|| base
))
7340 /* foo@dtpoff(%rX) is ok. */
7341 if (GET_CODE (disp
) != CONST
7342 || GET_CODE (XEXP (disp
, 0)) != PLUS
7343 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7344 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7345 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7346 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7348 reason
= "non-constant pic memory reference";
7352 else if (! legitimate_pic_address_disp_p (disp
))
7354 reason
= "displacement is an invalid pic construct";
7358 /* This code used to verify that a symbolic pic displacement
7359 includes the pic_offset_table_rtx register.
7361 While this is good idea, unfortunately these constructs may
7362 be created by "adds using lea" optimization for incorrect
7371 This code is nonsensical, but results in addressing
7372 GOT table with pic_offset_table_rtx base. We can't
7373 just refuse it easily, since it gets matched by
7374 "addsi3" pattern, that later gets split to lea in the
7375 case output register differs from input. While this
7376 can be handled by separate addsi pattern for this case
7377 that never results in lea, this seems to be easier and
7378 correct fix for crash to disable this test. */
7380 else if (GET_CODE (disp
) != LABEL_REF
7381 && !CONST_INT_P (disp
)
7382 && (GET_CODE (disp
) != CONST
7383 || !legitimate_constant_p (disp
))
7384 && (GET_CODE (disp
) != SYMBOL_REF
7385 || !legitimate_constant_p (disp
)))
7387 reason
= "displacement is not constant";
7390 else if (TARGET_64BIT
7391 && !x86_64_immediate_operand (disp
, VOIDmode
))
7393 reason
= "displacement is out of range";
7398 /* Everything looks valid. */
7405 /* Return a unique alias set for the GOT. */
7407 static alias_set_type
7408 ix86_GOT_alias_set (void)
7410 static alias_set_type set
= -1;
7412 set
= new_alias_set ();
7416 /* Return a legitimate reference for ORIG (an address) using the
7417 register REG. If REG is 0, a new pseudo is generated.
7419 There are two types of references that must be handled:
7421 1. Global data references must load the address from the GOT, via
7422 the PIC reg. An insn is emitted to do this load, and the reg is
7425 2. Static data references, constant pool addresses, and code labels
7426 compute the address as an offset from the GOT, whose base is in
7427 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7428 differentiate them from global data objects. The returned
7429 address is the PIC reg + an unspec constant.
7431 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7432 reg also appears in the address. */
7435 legitimize_pic_address (rtx orig
, rtx reg
)
7442 if (TARGET_MACHO
&& !TARGET_64BIT
)
7445 reg
= gen_reg_rtx (Pmode
);
7446 /* Use the generic Mach-O PIC machinery. */
7447 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7451 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7453 else if (TARGET_64BIT
7454 && ix86_cmodel
!= CM_SMALL_PIC
7455 && gotoff_operand (addr
, Pmode
))
7458 /* This symbol may be referenced via a displacement from the PIC
7459 base address (@GOTOFF). */
7461 if (reload_in_progress
)
7462 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7463 if (GET_CODE (addr
) == CONST
)
7464 addr
= XEXP (addr
, 0);
7465 if (GET_CODE (addr
) == PLUS
)
7467 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7469 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7472 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7473 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7475 tmpreg
= gen_reg_rtx (Pmode
);
7478 emit_move_insn (tmpreg
, new_rtx
);
7482 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7483 tmpreg
, 1, OPTAB_DIRECT
);
7486 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7488 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7490 /* This symbol may be referenced via a displacement from the PIC
7491 base address (@GOTOFF). */
7493 if (reload_in_progress
)
7494 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7495 if (GET_CODE (addr
) == CONST
)
7496 addr
= XEXP (addr
, 0);
7497 if (GET_CODE (addr
) == PLUS
)
7499 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7501 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7504 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7505 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7506 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7510 emit_move_insn (reg
, new_rtx
);
7514 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7515 /* We can't use @GOTOFF for text labels on VxWorks;
7516 see gotoff_operand. */
7517 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7519 /* Given that we've already handled dllimport variables separately
7520 in legitimize_address, and all other variables should satisfy
7521 legitimate_pic_address_disp_p, we should never arrive here. */
7522 gcc_assert (!TARGET_64BIT_MS_ABI
);
7524 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7526 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7527 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7528 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7529 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7532 reg
= gen_reg_rtx (Pmode
);
7533 /* Use directly gen_movsi, otherwise the address is loaded
7534 into register for CSE. We don't want to CSE this addresses,
7535 instead we CSE addresses from the GOT table, so skip this. */
7536 emit_insn (gen_movsi (reg
, new_rtx
));
7541 /* This symbol must be referenced via a load from the
7542 Global Offset Table (@GOT). */
7544 if (reload_in_progress
)
7545 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7546 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7547 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7549 new_rtx
= force_reg (Pmode
, new_rtx
);
7550 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7551 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7552 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7555 reg
= gen_reg_rtx (Pmode
);
7556 emit_move_insn (reg
, new_rtx
);
7562 if (CONST_INT_P (addr
)
7563 && !x86_64_immediate_operand (addr
, VOIDmode
))
7567 emit_move_insn (reg
, addr
);
7571 new_rtx
= force_reg (Pmode
, addr
);
7573 else if (GET_CODE (addr
) == CONST
)
7575 addr
= XEXP (addr
, 0);
7577 /* We must match stuff we generate before. Assume the only
7578 unspecs that can get here are ours. Not that we could do
7579 anything with them anyway.... */
7580 if (GET_CODE (addr
) == UNSPEC
7581 || (GET_CODE (addr
) == PLUS
7582 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7584 gcc_assert (GET_CODE (addr
) == PLUS
);
7586 if (GET_CODE (addr
) == PLUS
)
7588 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7590 /* Check first to see if this is a constant offset from a @GOTOFF
7591 symbol reference. */
7592 if (gotoff_operand (op0
, Pmode
)
7593 && CONST_INT_P (op1
))
7597 if (reload_in_progress
)
7598 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7599 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7601 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
7602 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7603 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7607 emit_move_insn (reg
, new_rtx
);
7613 if (INTVAL (op1
) < -16*1024*1024
7614 || INTVAL (op1
) >= 16*1024*1024)
7616 if (!x86_64_immediate_operand (op1
, Pmode
))
7617 op1
= force_reg (Pmode
, op1
);
7618 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7624 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7625 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
7626 base
== reg
? NULL_RTX
: reg
);
7628 if (CONST_INT_P (new_rtx
))
7629 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
7632 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
7634 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
7635 new_rtx
= XEXP (new_rtx
, 1);
7637 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
7645 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7648 get_thread_pointer (int to_reg
)
7652 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7656 reg
= gen_reg_rtx (Pmode
);
7657 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7658 insn
= emit_insn (insn
);
7663 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7664 false if we expect this to be used for a memory address and true if
7665 we expect to load the address into a register. */
7668 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7670 rtx dest
, base
, off
, pic
, tp
;
7675 case TLS_MODEL_GLOBAL_DYNAMIC
:
7676 dest
= gen_reg_rtx (Pmode
);
7677 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7679 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7681 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
7684 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7685 insns
= get_insns ();
7688 CONST_OR_PURE_CALL_P (insns
) = 1;
7689 emit_libcall_block (insns
, dest
, rax
, x
);
7691 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7692 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7694 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7696 if (TARGET_GNU2_TLS
)
7698 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7700 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7704 case TLS_MODEL_LOCAL_DYNAMIC
:
7705 base
= gen_reg_rtx (Pmode
);
7706 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7708 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7710 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, note
;
7713 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7714 insns
= get_insns ();
7717 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7718 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7719 CONST_OR_PURE_CALL_P (insns
) = 1;
7720 emit_libcall_block (insns
, base
, rax
, note
);
7722 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7723 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7725 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7727 if (TARGET_GNU2_TLS
)
7729 rtx x
= ix86_tls_module_base ();
7731 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7732 gen_rtx_MINUS (Pmode
, x
, tp
));
7735 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7736 off
= gen_rtx_CONST (Pmode
, off
);
7738 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7740 if (TARGET_GNU2_TLS
)
7742 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7744 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7749 case TLS_MODEL_INITIAL_EXEC
:
7753 type
= UNSPEC_GOTNTPOFF
;
7757 if (reload_in_progress
)
7758 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7759 pic
= pic_offset_table_rtx
;
7760 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7762 else if (!TARGET_ANY_GNU_TLS
)
7764 pic
= gen_reg_rtx (Pmode
);
7765 emit_insn (gen_set_got (pic
));
7766 type
= UNSPEC_GOTTPOFF
;
7771 type
= UNSPEC_INDNTPOFF
;
7774 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7775 off
= gen_rtx_CONST (Pmode
, off
);
7777 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7778 off
= gen_const_mem (Pmode
, off
);
7779 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7781 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7783 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7784 off
= force_reg (Pmode
, off
);
7785 return gen_rtx_PLUS (Pmode
, base
, off
);
7789 base
= get_thread_pointer (true);
7790 dest
= gen_reg_rtx (Pmode
);
7791 emit_insn (gen_subsi3 (dest
, base
, off
));
7795 case TLS_MODEL_LOCAL_EXEC
:
7796 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7797 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7798 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7799 off
= gen_rtx_CONST (Pmode
, off
);
7801 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7803 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7804 return gen_rtx_PLUS (Pmode
, base
, off
);
7808 base
= get_thread_pointer (true);
7809 dest
= gen_reg_rtx (Pmode
);
7810 emit_insn (gen_subsi3 (dest
, base
, off
));
7821 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7824 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7825 htab_t dllimport_map
;
7828 get_dllimport_decl (tree decl
)
7830 struct tree_map
*h
, in
;
7834 size_t namelen
, prefixlen
;
7840 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7842 in
.hash
= htab_hash_pointer (decl
);
7843 in
.base
.from
= decl
;
7844 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7845 h
= (struct tree_map
*) *loc
;
7849 *loc
= h
= GGC_NEW (struct tree_map
);
7851 h
->base
.from
= decl
;
7852 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7853 DECL_ARTIFICIAL (to
) = 1;
7854 DECL_IGNORED_P (to
) = 1;
7855 DECL_EXTERNAL (to
) = 1;
7856 TREE_READONLY (to
) = 1;
7858 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7859 name
= targetm
.strip_name_encoding (name
);
7860 prefix
= name
[0] == FASTCALL_PREFIX
? "*__imp_": "*__imp__";
7861 namelen
= strlen (name
);
7862 prefixlen
= strlen (prefix
);
7863 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
7864 memcpy (imp_name
, prefix
, prefixlen
);
7865 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7867 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7868 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7869 SET_SYMBOL_REF_DECL (rtl
, to
);
7870 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7872 rtl
= gen_const_mem (Pmode
, rtl
);
7873 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7875 SET_DECL_RTL (to
, rtl
);
7876 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
7881 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7882 true if we require the result be a register. */
7885 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7890 gcc_assert (SYMBOL_REF_DECL (symbol
));
7891 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7893 x
= DECL_RTL (imp_decl
);
7895 x
= force_reg (Pmode
, x
);
7899 /* Try machine-dependent ways of modifying an illegitimate address
7900 to be legitimate. If we find one, return the new, valid address.
7901 This macro is used in only one place: `memory_address' in explow.c.
7903 OLDX is the address as it was before break_out_memory_refs was called.
7904 In some cases it is useful to look at this to decide what needs to be done.
7906 MODE and WIN are passed so that this macro can use
7907 GO_IF_LEGITIMATE_ADDRESS.
7909 It is always safe for this macro to do nothing. It exists to recognize
7910 opportunities to optimize the output.
7912 For the 80386, we handle X+REG by loading X into a register R and
7913 using R+REG. R will go in a general reg and indexing will be used.
7914 However, if REG is a broken-out memory address or multiplication,
7915 nothing needs to be done because REG can certainly go in a general reg.
7917 When -fpic is used, special handling is needed for symbolic references.
7918 See comments by legitimize_pic_address in i386.c for details. */
7921 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7926 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7928 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
7929 if (GET_CODE (x
) == CONST
7930 && GET_CODE (XEXP (x
, 0)) == PLUS
7931 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7932 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7934 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
7935 (enum tls_model
) log
, false);
7936 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7939 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7941 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7942 return legitimize_dllimport_symbol (x
, true);
7943 if (GET_CODE (x
) == CONST
7944 && GET_CODE (XEXP (x
, 0)) == PLUS
7945 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7946 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7948 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7949 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7953 if (flag_pic
&& SYMBOLIC_CONST (x
))
7954 return legitimize_pic_address (x
, 0);
7956 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7957 if (GET_CODE (x
) == ASHIFT
7958 && CONST_INT_P (XEXP (x
, 1))
7959 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7962 log
= INTVAL (XEXP (x
, 1));
7963 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7964 GEN_INT (1 << log
));
7967 if (GET_CODE (x
) == PLUS
)
7969 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7971 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7972 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7973 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7976 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7977 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7978 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7979 GEN_INT (1 << log
));
7982 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7983 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7984 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7987 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7988 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7989 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7990 GEN_INT (1 << log
));
7993 /* Put multiply first if it isn't already. */
7994 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7996 rtx tmp
= XEXP (x
, 0);
7997 XEXP (x
, 0) = XEXP (x
, 1);
8002 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8003 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8004 created by virtual register instantiation, register elimination, and
8005 similar optimizations. */
8006 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
8009 x
= gen_rtx_PLUS (Pmode
,
8010 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8011 XEXP (XEXP (x
, 1), 0)),
8012 XEXP (XEXP (x
, 1), 1));
8016 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8017 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8018 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
8019 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8020 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
8021 && CONSTANT_P (XEXP (x
, 1)))
8024 rtx other
= NULL_RTX
;
8026 if (CONST_INT_P (XEXP (x
, 1)))
8028 constant
= XEXP (x
, 1);
8029 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
8031 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
8033 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
8034 other
= XEXP (x
, 1);
8042 x
= gen_rtx_PLUS (Pmode
,
8043 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
8044 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
8045 plus_constant (other
, INTVAL (constant
)));
8049 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8052 if (GET_CODE (XEXP (x
, 0)) == MULT
)
8055 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
8058 if (GET_CODE (XEXP (x
, 1)) == MULT
)
8061 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
8065 && REG_P (XEXP (x
, 1))
8066 && REG_P (XEXP (x
, 0)))
8069 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
8072 x
= legitimize_pic_address (x
, 0);
8075 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8078 if (REG_P (XEXP (x
, 0)))
8080 rtx temp
= gen_reg_rtx (Pmode
);
8081 rtx val
= force_operand (XEXP (x
, 1), temp
);
8083 emit_move_insn (temp
, val
);
8089 else if (REG_P (XEXP (x
, 1)))
8091 rtx temp
= gen_reg_rtx (Pmode
);
8092 rtx val
= force_operand (XEXP (x
, 0), temp
);
8094 emit_move_insn (temp
, val
);
8104 /* Print an integer constant expression in assembler syntax. Addition
8105 and subtraction are the only arithmetic that may appear in these
8106 expressions. FILE is the stdio stream to write to, X is the rtx, and
8107 CODE is the operand print code from the output string. */
8110 output_pic_addr_const (FILE *file
, rtx x
, int code
)
8114 switch (GET_CODE (x
))
8117 gcc_assert (flag_pic
);
8122 if (! TARGET_MACHO
|| TARGET_64BIT
)
8123 output_addr_const (file
, x
);
8126 const char *name
= XSTR (x
, 0);
8128 /* Mark the decl as referenced so that cgraph will
8129 output the function. */
8130 if (SYMBOL_REF_DECL (x
))
8131 mark_decl_referenced (SYMBOL_REF_DECL (x
));
8134 if (MACHOPIC_INDIRECT
8135 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
8136 name
= machopic_indirection_name (x
, /*stub_p=*/true);
8138 assemble_name (file
, name
);
8140 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
8141 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
8142 fputs ("@PLT", file
);
8149 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
8150 assemble_name (asm_out_file
, buf
);
8154 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8158 /* This used to output parentheses around the expression,
8159 but that does not work on the 386 (either ATT or BSD assembler). */
8160 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8164 if (GET_MODE (x
) == VOIDmode
)
8166 /* We can use %d if the number is <32 bits and positive. */
8167 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
8168 fprintf (file
, "0x%lx%08lx",
8169 (unsigned long) CONST_DOUBLE_HIGH (x
),
8170 (unsigned long) CONST_DOUBLE_LOW (x
));
8172 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
8175 /* We can't handle floating point constants;
8176 PRINT_OPERAND must handle them. */
8177 output_operand_lossage ("floating constant misused");
8181 /* Some assemblers need integer constants to appear first. */
8182 if (CONST_INT_P (XEXP (x
, 0)))
8184 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8186 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8190 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
8191 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8193 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8199 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
8200 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8202 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8204 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
8208 gcc_assert (XVECLEN (x
, 0) == 1);
8209 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
8210 switch (XINT (x
, 1))
8213 fputs ("@GOT", file
);
8216 fputs ("@GOTOFF", file
);
8219 fputs ("@PLTOFF", file
);
8221 case UNSPEC_GOTPCREL
:
8222 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
8223 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
8225 case UNSPEC_GOTTPOFF
:
8226 /* FIXME: This might be @TPOFF in Sun ld too. */
8227 fputs ("@GOTTPOFF", file
);
8230 fputs ("@TPOFF", file
);
8234 fputs ("@TPOFF", file
);
8236 fputs ("@NTPOFF", file
);
8239 fputs ("@DTPOFF", file
);
8241 case UNSPEC_GOTNTPOFF
:
8243 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
8244 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file
);
8246 fputs ("@GOTNTPOFF", file
);
8248 case UNSPEC_INDNTPOFF
:
8249 fputs ("@INDNTPOFF", file
);
8252 output_operand_lossage ("invalid UNSPEC as operand");
8258 output_operand_lossage ("invalid expression as operand");
8262 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8263 We need to emit DTP-relative relocations. */
8265 static void ATTRIBUTE_UNUSED
8266 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8268 fputs (ASM_LONG
, file
);
8269 output_addr_const (file
, x
);
8270 fputs ("@DTPOFF", file
);
8276 fputs (", 0", file
);
8283 /* In the name of slightly smaller debug output, and to cater to
8284 general assembler lossage, recognize PIC+GOTOFF and turn it back
8285 into a direct symbol reference.
8287 On Darwin, this is necessary to avoid a crash, because Darwin
8288 has a different PIC label for each routine but the DWARF debugging
8289 information is not associated with any particular routine, so it's
8290 necessary to remove references to the PIC label from RTL stored by
8291 the DWARF output code. */
8294 ix86_delegitimize_address (rtx orig_x
)
8297 /* reg_addend is NULL or a multiple of some register. */
8298 rtx reg_addend
= NULL_RTX
;
8299 /* const_addend is NULL or a const_int. */
8300 rtx const_addend
= NULL_RTX
;
8301 /* This is the result, or NULL. */
8302 rtx result
= NULL_RTX
;
8309 if (GET_CODE (x
) != CONST
8310 || GET_CODE (XEXP (x
, 0)) != UNSPEC
8311 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
8314 return XVECEXP (XEXP (x
, 0), 0, 0);
8317 if (GET_CODE (x
) != PLUS
8318 || GET_CODE (XEXP (x
, 1)) != CONST
)
8321 if (REG_P (XEXP (x
, 0))
8322 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8323 /* %ebx + GOT/GOTOFF */
8325 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
8327 /* %ebx + %reg * scale + GOT/GOTOFF */
8328 reg_addend
= XEXP (x
, 0);
8329 if (REG_P (XEXP (reg_addend
, 0))
8330 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8331 reg_addend
= XEXP (reg_addend
, 1);
8332 else if (REG_P (XEXP (reg_addend
, 1))
8333 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
8334 reg_addend
= XEXP (reg_addend
, 0);
8337 if (!REG_P (reg_addend
)
8338 && GET_CODE (reg_addend
) != MULT
8339 && GET_CODE (reg_addend
) != ASHIFT
)
8345 x
= XEXP (XEXP (x
, 1), 0);
8346 if (GET_CODE (x
) == PLUS
8347 && CONST_INT_P (XEXP (x
, 1)))
8349 const_addend
= XEXP (x
, 1);
8353 if (GET_CODE (x
) == UNSPEC
8354 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
8355 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
8356 result
= XVECEXP (x
, 0, 0);
8358 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8360 result
= XEXP (x
, 0);
8366 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8368 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8372 /* If X is a machine specific address (i.e. a symbol or label being
8373 referenced as a displacement from the GOT implemented using an
8374 UNSPEC), then return the base term. Otherwise return X. */
8377 ix86_find_base_term (rtx x
)
8383 if (GET_CODE (x
) != CONST
)
8386 if (GET_CODE (term
) == PLUS
8387 && (CONST_INT_P (XEXP (term
, 1))
8388 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8389 term
= XEXP (term
, 0);
8390 if (GET_CODE (term
) != UNSPEC
8391 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8394 term
= XVECEXP (term
, 0, 0);
8396 if (GET_CODE (term
) != SYMBOL_REF
8397 && GET_CODE (term
) != LABEL_REF
)
8403 term
= ix86_delegitimize_address (x
);
8405 if (GET_CODE (term
) != SYMBOL_REF
8406 && GET_CODE (term
) != LABEL_REF
)
8413 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8418 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8420 enum rtx_code second_code
, bypass_code
;
8421 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8422 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8423 code
= ix86_fp_compare_code_to_integer (code
);
8427 code
= reverse_condition (code
);
8478 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8482 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8483 Those same assemblers have the same but opposite lossage on cmov. */
8485 suffix
= fp
? "nbe" : "a";
8486 else if (mode
== CCCmode
)
8509 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8531 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8532 suffix
= fp
? "nb" : "ae";
8535 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8542 else if (mode
== CCCmode
)
8543 suffix
= fp
? "nb" : "ae";
8548 suffix
= fp
? "u" : "p";
8551 suffix
= fp
? "nu" : "np";
8556 fputs (suffix
, file
);
8559 /* Print the name of register X to FILE based on its machine mode and number.
8560 If CODE is 'w', pretend the mode is HImode.
8561 If CODE is 'b', pretend the mode is QImode.
8562 If CODE is 'k', pretend the mode is SImode.
8563 If CODE is 'q', pretend the mode is DImode.
8564 If CODE is 'h', pretend the reg is the 'high' byte register.
8565 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8568 print_reg (rtx x
, int code
, FILE *file
)
8570 gcc_assert (x
== pc_rtx
8571 || (REGNO (x
) != ARG_POINTER_REGNUM
8572 && REGNO (x
) != FRAME_POINTER_REGNUM
8573 && REGNO (x
) != FLAGS_REG
8574 && REGNO (x
) != FPSR_REG
8575 && REGNO (x
) != FPCR_REG
));
8577 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8582 gcc_assert (TARGET_64BIT
);
8583 fputs ("rip", file
);
8587 if (code
== 'w' || MMX_REG_P (x
))
8589 else if (code
== 'b')
8591 else if (code
== 'k')
8593 else if (code
== 'q')
8595 else if (code
== 'y')
8597 else if (code
== 'h')
8600 code
= GET_MODE_SIZE (GET_MODE (x
));
8602 /* Irritatingly, AMD extended registers use different naming convention
8603 from the normal registers. */
8604 if (REX_INT_REG_P (x
))
8606 gcc_assert (TARGET_64BIT
);
8610 error ("extended registers have no high halves");
8613 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8616 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8619 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8622 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8625 error ("unsupported operand size for extended register");
8633 if (STACK_TOP_P (x
))
8635 fputs ("st(0)", file
);
8642 if (! ANY_FP_REG_P (x
))
8643 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8648 fputs (hi_reg_name
[REGNO (x
)], file
);
8651 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8653 fputs (qi_reg_name
[REGNO (x
)], file
);
8656 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8658 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8665 /* Locate some local-dynamic symbol still in use by this function
8666 so that we can print its name in some tls_local_dynamic_base
8670 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8674 if (GET_CODE (x
) == SYMBOL_REF
8675 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8677 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8685 get_some_local_dynamic_name (void)
8689 if (cfun
->machine
->some_ld_name
)
8690 return cfun
->machine
->some_ld_name
;
8692 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8694 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8695 return cfun
->machine
->some_ld_name
;
8701 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8702 C -- print opcode suffix for set/cmov insn.
8703 c -- like C, but print reversed condition
8704 F,f -- likewise, but for floating-point.
8705 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8707 R -- print the prefix for register names.
8708 z -- print the opcode suffix for the size of the current operand.
8709 * -- print a star (in certain assembler syntax)
8710 A -- print an absolute memory reference.
8711 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8712 s -- print a shift double count, followed by the assemblers argument
8714 b -- print the QImode name of the register for the indicated operand.
8715 %b0 would print %al if operands[0] is reg 0.
8716 w -- likewise, print the HImode name of the register.
8717 k -- likewise, print the SImode name of the register.
8718 q -- likewise, print the DImode name of the register.
8719 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8720 y -- print "st(0)" instead of "st" as a register.
8721 D -- print condition for SSE cmp instruction.
8722 P -- if PIC, print an @PLT suffix.
8723 X -- don't print any sort of PIC '@' suffix for a symbol.
8724 & -- print some in-use local-dynamic symbol name.
8725 H -- print a memory address offset by 8; used for sse high-parts
8726 Y -- print condition for SSE5 com* instruction.
8727 + -- print a branch hint as 'cs' or 'ds' prefix
8728 ; -- print a semicolon (after prefixes due to bug in older gas).
8732 print_operand (FILE *file
, rtx x
, int code
)
8739 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8744 assemble_name (file
, get_some_local_dynamic_name ());
8748 switch (ASSEMBLER_DIALECT
)
8755 /* Intel syntax. For absolute addresses, registers should not
8756 be surrounded by braces. */
8760 PRINT_OPERAND (file
, x
, 0);
8770 PRINT_OPERAND (file
, x
, 0);
8775 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8780 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8785 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8790 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8795 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8800 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8805 /* 387 opcodes don't get size suffixes if the operands are
8807 if (STACK_REG_P (x
))
8810 /* Likewise if using Intel opcodes. */
8811 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8814 /* This is the size of op from size of operand. */
8815 switch (GET_MODE_SIZE (GET_MODE (x
)))
8824 #ifdef HAVE_GAS_FILDS_FISTS
8834 if (GET_MODE (x
) == SFmode
)
8849 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8851 #ifdef GAS_MNEMONICS
8877 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8879 PRINT_OPERAND (file
, x
, 0);
8885 /* Little bit of braindamage here. The SSE compare instructions
8886 does use completely different names for the comparisons that the
8887 fp conditional moves. */
8888 switch (GET_CODE (x
))
8903 fputs ("unord", file
);
8907 fputs ("neq", file
);
8911 fputs ("nlt", file
);
8915 fputs ("nle", file
);
8918 fputs ("ord", file
);
8925 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8926 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8928 switch (GET_MODE (x
))
8930 case HImode
: putc ('w', file
); break;
8932 case SFmode
: putc ('l', file
); break;
8934 case DFmode
: putc ('q', file
); break;
8935 default: gcc_unreachable ();
8942 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8945 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8946 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8949 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8952 /* Like above, but reverse condition */
8954 /* Check to see if argument to %c is really a constant
8955 and not a condition code which needs to be reversed. */
8956 if (!COMPARISON_P (x
))
8958 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8961 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8964 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8965 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8968 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8972 /* It doesn't actually matter what mode we use here, as we're
8973 only going to use this for printing. */
8974 x
= adjust_address_nv (x
, DImode
, 8);
8981 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8984 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8987 int pred_val
= INTVAL (XEXP (x
, 0));
8989 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8990 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8992 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8993 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8995 /* Emit hints only in the case default branch prediction
8996 heuristics would fail. */
8997 if (taken
!= cputaken
)
8999 /* We use 3e (DS) prefix for taken branches and
9000 2e (CS) prefix for not taken branches. */
9002 fputs ("ds ; ", file
);
9004 fputs ("cs ; ", file
);
9012 switch (GET_CODE (x
))
9015 fputs ("neq", file
);
9022 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
9026 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
9037 fputs ("unord", file
);
9040 fputs ("ord", file
);
9043 fputs ("ueq", file
);
9046 fputs ("nlt", file
);
9049 fputs ("nle", file
);
9052 fputs ("ule", file
);
9055 fputs ("ult", file
);
9058 fputs ("une", file
);
9067 fputs (" ; ", file
);
9074 output_operand_lossage ("invalid operand code '%c'", code
);
9079 print_reg (x
, code
, file
);
9083 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9084 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
9085 && GET_MODE (x
) != BLKmode
)
9088 switch (GET_MODE_SIZE (GET_MODE (x
)))
9090 case 1: size
= "BYTE"; break;
9091 case 2: size
= "WORD"; break;
9092 case 4: size
= "DWORD"; break;
9093 case 8: size
= "QWORD"; break;
9094 case 12: size
= "XWORD"; break;
9096 if (GET_MODE (x
) == XFmode
)
9105 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9108 else if (code
== 'w')
9110 else if (code
== 'k')
9114 fputs (" PTR ", file
);
9118 /* Avoid (%rip) for call operands. */
9119 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
9120 && !CONST_INT_P (x
))
9121 output_addr_const (file
, x
);
9122 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
9123 output_operand_lossage ("invalid constraints for operand");
9128 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
9133 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9134 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
9136 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9138 fprintf (file
, "0x%08lx", l
);
9141 /* These float cases don't actually occur as immediate operands. */
9142 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
9146 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9147 fprintf (file
, "%s", dstr
);
9150 else if (GET_CODE (x
) == CONST_DOUBLE
9151 && GET_MODE (x
) == XFmode
)
9155 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9156 fprintf (file
, "%s", dstr
);
9161 /* We have patterns that allow zero sets of memory, for instance.
9162 In 64-bit mode, we should probably support all 8-byte vectors,
9163 since we can in fact encode that into an immediate. */
9164 if (GET_CODE (x
) == CONST_VECTOR
)
9166 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
9172 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
9174 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9177 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
9178 || GET_CODE (x
) == LABEL_REF
)
9180 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9183 fputs ("OFFSET FLAT:", file
);
9186 if (CONST_INT_P (x
))
9187 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
9189 output_pic_addr_const (file
, x
, code
);
9191 output_addr_const (file
, x
);
9195 /* Print a memory operand whose address is ADDR. */
9198 print_operand_address (FILE *file
, rtx addr
)
9200 struct ix86_address parts
;
9201 rtx base
, index
, disp
;
9203 int ok
= ix86_decompose_address (addr
, &parts
);
9208 index
= parts
.index
;
9210 scale
= parts
.scale
;
9218 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9220 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
9226 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9227 if (TARGET_64BIT
&& !base
&& !index
)
9231 if (GET_CODE (disp
) == CONST
9232 && GET_CODE (XEXP (disp
, 0)) == PLUS
9233 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9234 symbol
= XEXP (XEXP (disp
, 0), 0);
9236 if (GET_CODE (symbol
) == LABEL_REF
9237 || (GET_CODE (symbol
) == SYMBOL_REF
9238 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
9241 if (!base
&& !index
)
9243 /* Displacement only requires special attention. */
9245 if (CONST_INT_P (disp
))
9247 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
9248 fputs ("ds:", file
);
9249 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
9252 output_pic_addr_const (file
, disp
, 0);
9254 output_addr_const (file
, disp
);
9258 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9263 output_pic_addr_const (file
, disp
, 0);
9264 else if (GET_CODE (disp
) == LABEL_REF
)
9265 output_asm_label (disp
);
9267 output_addr_const (file
, disp
);
9272 print_reg (base
, 0, file
);
9276 print_reg (index
, 0, file
);
9278 fprintf (file
, ",%d", scale
);
9284 rtx offset
= NULL_RTX
;
9288 /* Pull out the offset of a symbol; print any symbol itself. */
9289 if (GET_CODE (disp
) == CONST
9290 && GET_CODE (XEXP (disp
, 0)) == PLUS
9291 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9293 offset
= XEXP (XEXP (disp
, 0), 1);
9294 disp
= gen_rtx_CONST (VOIDmode
,
9295 XEXP (XEXP (disp
, 0), 0));
9299 output_pic_addr_const (file
, disp
, 0);
9300 else if (GET_CODE (disp
) == LABEL_REF
)
9301 output_asm_label (disp
);
9302 else if (CONST_INT_P (disp
))
9305 output_addr_const (file
, disp
);
9311 print_reg (base
, 0, file
);
9314 if (INTVAL (offset
) >= 0)
9316 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9320 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9327 print_reg (index
, 0, file
);
9329 fprintf (file
, "*%d", scale
);
9337 output_addr_const_extra (FILE *file
, rtx x
)
9341 if (GET_CODE (x
) != UNSPEC
)
9344 op
= XVECEXP (x
, 0, 0);
9345 switch (XINT (x
, 1))
9347 case UNSPEC_GOTTPOFF
:
9348 output_addr_const (file
, op
);
9349 /* FIXME: This might be @TPOFF in Sun ld. */
9350 fputs ("@GOTTPOFF", file
);
9353 output_addr_const (file
, op
);
9354 fputs ("@TPOFF", file
);
9357 output_addr_const (file
, op
);
9359 fputs ("@TPOFF", file
);
9361 fputs ("@NTPOFF", file
);
9364 output_addr_const (file
, op
);
9365 fputs ("@DTPOFF", file
);
9367 case UNSPEC_GOTNTPOFF
:
9368 output_addr_const (file
, op
);
9370 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
9371 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file
);
9373 fputs ("@GOTNTPOFF", file
);
9375 case UNSPEC_INDNTPOFF
:
9376 output_addr_const (file
, op
);
9377 fputs ("@INDNTPOFF", file
);
9387 /* Split one or more DImode RTL references into pairs of SImode
9388 references. The RTL can be REG, offsettable MEM, integer constant, or
9389 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9390 split and "num" is its length. lo_half and hi_half are output arrays
9391 that parallel "operands". */
9394 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9398 rtx op
= operands
[num
];
9400 /* simplify_subreg refuse to split volatile memory addresses,
9401 but we still have to handle it. */
9404 lo_half
[num
] = adjust_address (op
, SImode
, 0);
9405 hi_half
[num
] = adjust_address (op
, SImode
, 4);
9409 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
9410 GET_MODE (op
) == VOIDmode
9411 ? DImode
: GET_MODE (op
), 0);
9412 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
9413 GET_MODE (op
) == VOIDmode
9414 ? DImode
: GET_MODE (op
), 4);
9418 /* Split one or more TImode RTL references into pairs of DImode
9419 references. The RTL can be REG, offsettable MEM, integer constant, or
9420 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9421 split and "num" is its length. lo_half and hi_half are output arrays
9422 that parallel "operands". */
9425 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9429 rtx op
= operands
[num
];
9431 /* simplify_subreg refuse to split volatile memory addresses, but we
9432 still have to handle it. */
9435 lo_half
[num
] = adjust_address (op
, DImode
, 0);
9436 hi_half
[num
] = adjust_address (op
, DImode
, 8);
9440 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
9441 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
9446 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9447 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9448 is the expression of the binary operation. The output may either be
9449 emitted here, or returned to the caller, like all output_* functions.
9451 There is no guarantee that the operands are the same mode, as they
9452 might be within FLOAT or FLOAT_EXTEND expressions. */
9454 #ifndef SYSV386_COMPAT
9455 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9456 wants to fix the assemblers because that causes incompatibility
9457 with gcc. No-one wants to fix gcc because that causes
9458 incompatibility with assemblers... You can use the option of
9459 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9460 #define SYSV386_COMPAT 1
9464 output_387_binary_op (rtx insn
, rtx
*operands
)
9466 static char buf
[30];
9469 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
9471 #ifdef ENABLE_CHECKING
9472 /* Even if we do not want to check the inputs, this documents input
9473 constraints. Which helps in understanding the following code. */
9474 if (STACK_REG_P (operands
[0])
9475 && ((REG_P (operands
[1])
9476 && REGNO (operands
[0]) == REGNO (operands
[1])
9477 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
9478 || (REG_P (operands
[2])
9479 && REGNO (operands
[0]) == REGNO (operands
[2])
9480 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
9481 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
9484 gcc_assert (is_sse
);
9487 switch (GET_CODE (operands
[3]))
9490 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9491 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9499 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9500 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9508 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9509 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9517 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9518 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9532 if (GET_MODE (operands
[0]) == SFmode
)
9533 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9535 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9540 switch (GET_CODE (operands
[3]))
9544 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9546 rtx temp
= operands
[2];
9547 operands
[2] = operands
[1];
9551 /* know operands[0] == operands[1]. */
9553 if (MEM_P (operands
[2]))
9559 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9561 if (STACK_TOP_P (operands
[0]))
9562 /* How is it that we are storing to a dead operand[2]?
9563 Well, presumably operands[1] is dead too. We can't
9564 store the result to st(0) as st(0) gets popped on this
9565 instruction. Instead store to operands[2] (which I
9566 think has to be st(1)). st(1) will be popped later.
9567 gcc <= 2.8.1 didn't have this check and generated
9568 assembly code that the Unixware assembler rejected. */
9569 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9571 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9575 if (STACK_TOP_P (operands
[0]))
9576 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9578 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9583 if (MEM_P (operands
[1]))
9589 if (MEM_P (operands
[2]))
9595 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9598 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9599 derived assemblers, confusingly reverse the direction of
9600 the operation for fsub{r} and fdiv{r} when the
9601 destination register is not st(0). The Intel assembler
9602 doesn't have this brain damage. Read !SYSV386_COMPAT to
9603 figure out what the hardware really does. */
9604 if (STACK_TOP_P (operands
[0]))
9605 p
= "{p\t%0, %2|rp\t%2, %0}";
9607 p
= "{rp\t%2, %0|p\t%0, %2}";
9609 if (STACK_TOP_P (operands
[0]))
9610 /* As above for fmul/fadd, we can't store to st(0). */
9611 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9613 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9618 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9621 if (STACK_TOP_P (operands
[0]))
9622 p
= "{rp\t%0, %1|p\t%1, %0}";
9624 p
= "{p\t%1, %0|rp\t%0, %1}";
9626 if (STACK_TOP_P (operands
[0]))
9627 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9629 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9634 if (STACK_TOP_P (operands
[0]))
9636 if (STACK_TOP_P (operands
[1]))
9637 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9639 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9642 else if (STACK_TOP_P (operands
[1]))
9645 p
= "{\t%1, %0|r\t%0, %1}";
9647 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9653 p
= "{r\t%2, %0|\t%0, %2}";
9655 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9668 /* Return needed mode for entity in optimize_mode_switching pass. */
9671 ix86_mode_needed (int entity
, rtx insn
)
9673 enum attr_i387_cw mode
;
9675 /* The mode UNINITIALIZED is used to store control word after a
9676 function call or ASM pattern. The mode ANY specify that function
9677 has no requirements on the control word and make no changes in the
9678 bits we are interested in. */
9681 || (NONJUMP_INSN_P (insn
)
9682 && (asm_noperands (PATTERN (insn
)) >= 0
9683 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9684 return I387_CW_UNINITIALIZED
;
9686 if (recog_memoized (insn
) < 0)
9689 mode
= get_attr_i387_cw (insn
);
9694 if (mode
== I387_CW_TRUNC
)
9699 if (mode
== I387_CW_FLOOR
)
9704 if (mode
== I387_CW_CEIL
)
9709 if (mode
== I387_CW_MASK_PM
)
9720 /* Output code to initialize control word copies used by trunc?f?i and
9721 rounding patterns. CURRENT_MODE is set to current control word,
9722 while NEW_MODE is set to new control word. */
9725 emit_i387_cw_initialization (int mode
)
9727 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9730 enum ix86_stack_slot slot
;
9732 rtx reg
= gen_reg_rtx (HImode
);
9734 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9735 emit_move_insn (reg
, copy_rtx (stored_mode
));
9737 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9742 /* round toward zero (truncate) */
9743 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9744 slot
= SLOT_CW_TRUNC
;
9748 /* round down toward -oo */
9749 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9750 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9751 slot
= SLOT_CW_FLOOR
;
9755 /* round up toward +oo */
9756 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9757 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9758 slot
= SLOT_CW_CEIL
;
9761 case I387_CW_MASK_PM
:
9762 /* mask precision exception for nearbyint() */
9763 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9764 slot
= SLOT_CW_MASK_PM
;
9776 /* round toward zero (truncate) */
9777 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9778 slot
= SLOT_CW_TRUNC
;
9782 /* round down toward -oo */
9783 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9784 slot
= SLOT_CW_FLOOR
;
9788 /* round up toward +oo */
9789 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9790 slot
= SLOT_CW_CEIL
;
9793 case I387_CW_MASK_PM
:
9794 /* mask precision exception for nearbyint() */
9795 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9796 slot
= SLOT_CW_MASK_PM
;
9804 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9806 new_mode
= assign_386_stack_local (HImode
, slot
);
9807 emit_move_insn (new_mode
, reg
);
9810 /* Output code for INSN to convert a float to a signed int. OPERANDS
9811 are the insn operands. The output may be [HSD]Imode and the input
9812 operand may be [SDX]Fmode. */
9815 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9817 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9818 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9819 int round_mode
= get_attr_i387_cw (insn
);
9821 /* Jump through a hoop or two for DImode, since the hardware has no
9822 non-popping instruction. We used to do this a different way, but
9823 that was somewhat fragile and broke with post-reload splitters. */
9824 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9825 output_asm_insn ("fld\t%y1", operands
);
9827 gcc_assert (STACK_TOP_P (operands
[1]));
9828 gcc_assert (MEM_P (operands
[0]));
9829 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9832 output_asm_insn ("fisttp%z0\t%0", operands
);
9835 if (round_mode
!= I387_CW_ANY
)
9836 output_asm_insn ("fldcw\t%3", operands
);
9837 if (stack_top_dies
|| dimode_p
)
9838 output_asm_insn ("fistp%z0\t%0", operands
);
9840 output_asm_insn ("fist%z0\t%0", operands
);
9841 if (round_mode
!= I387_CW_ANY
)
9842 output_asm_insn ("fldcw\t%2", operands
);
9848 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9849 have the values zero or one, indicates the ffreep insn's operand
9850 from the OPERANDS array. */
9853 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9855 if (TARGET_USE_FFREEP
)
9856 #if HAVE_AS_IX86_FFREEP
9857 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9860 static char retval
[] = ".word\t0xc_df";
9861 int regno
= REGNO (operands
[opno
]);
9863 gcc_assert (FP_REGNO_P (regno
));
9865 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9870 return opno
? "fstp\t%y1" : "fstp\t%y0";
9874 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9875 should be used. UNORDERED_P is true when fucom should be used. */
9878 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9881 rtx cmp_op0
, cmp_op1
;
9882 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9886 cmp_op0
= operands
[0];
9887 cmp_op1
= operands
[1];
9891 cmp_op0
= operands
[1];
9892 cmp_op1
= operands
[2];
9897 if (GET_MODE (operands
[0]) == SFmode
)
9899 return "ucomiss\t{%1, %0|%0, %1}";
9901 return "comiss\t{%1, %0|%0, %1}";
9904 return "ucomisd\t{%1, %0|%0, %1}";
9906 return "comisd\t{%1, %0|%0, %1}";
9909 gcc_assert (STACK_TOP_P (cmp_op0
));
9911 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9913 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9917 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9918 return output_387_ffreep (operands
, 1);
9921 return "ftst\n\tfnstsw\t%0";
9924 if (STACK_REG_P (cmp_op1
)
9926 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9927 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9929 /* If both the top of the 387 stack dies, and the other operand
9930 is also a stack register that dies, then this must be a
9931 `fcompp' float compare */
9935 /* There is no double popping fcomi variant. Fortunately,
9936 eflags is immune from the fstp's cc clobbering. */
9938 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9940 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9941 return output_387_ffreep (operands
, 0);
9946 return "fucompp\n\tfnstsw\t%0";
9948 return "fcompp\n\tfnstsw\t%0";
9953 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9955 static const char * const alt
[16] =
9957 "fcom%z2\t%y2\n\tfnstsw\t%0",
9958 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9959 "fucom%z2\t%y2\n\tfnstsw\t%0",
9960 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9962 "ficom%z2\t%y2\n\tfnstsw\t%0",
9963 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9967 "fcomi\t{%y1, %0|%0, %y1}",
9968 "fcomip\t{%y1, %0|%0, %y1}",
9969 "fucomi\t{%y1, %0|%0, %y1}",
9970 "fucomip\t{%y1, %0|%0, %y1}",
9981 mask
= eflags_p
<< 3;
9982 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9983 mask
|= unordered_p
<< 1;
9984 mask
|= stack_top_dies
;
9986 gcc_assert (mask
< 16);
9995 ix86_output_addr_vec_elt (FILE *file
, int value
)
9997 const char *directive
= ASM_LONG
;
10001 directive
= ASM_QUAD
;
10003 gcc_assert (!TARGET_64BIT
);
10006 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
10010 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
10012 const char *directive
= ASM_LONG
;
10015 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
10016 directive
= ASM_QUAD
;
10018 gcc_assert (!TARGET_64BIT
);
10020 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10021 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
10022 fprintf (file
, "%s%s%d-%s%d\n",
10023 directive
, LPREFIX
, value
, LPREFIX
, rel
);
10024 else if (HAVE_AS_GOTOFF_IN_DATA
)
10025 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
10027 else if (TARGET_MACHO
)
10029 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
10030 machopic_output_function_base_name (file
);
10031 fprintf(file
, "\n");
10035 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
10036 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
10039 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10043 ix86_expand_clear (rtx dest
)
10047 /* We play register width games, which are only valid after reload. */
10048 gcc_assert (reload_completed
);
10050 /* Avoid HImode and its attendant prefix byte. */
10051 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
10052 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
10053 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
10055 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10056 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
10058 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10059 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
10065 /* X is an unchanging MEM. If it is a constant pool reference, return
10066 the constant pool rtx, else NULL. */
10069 maybe_get_pool_constant (rtx x
)
10071 x
= ix86_delegitimize_address (XEXP (x
, 0));
10073 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
10074 return get_pool_constant (x
);
10080 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
10083 enum tls_model model
;
10088 if (GET_CODE (op1
) == SYMBOL_REF
)
10090 model
= SYMBOL_REF_TLS_MODEL (op1
);
10093 op1
= legitimize_tls_address (op1
, model
, true);
10094 op1
= force_operand (op1
, op0
);
10098 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10099 && SYMBOL_REF_DLLIMPORT_P (op1
))
10100 op1
= legitimize_dllimport_symbol (op1
, false);
10102 else if (GET_CODE (op1
) == CONST
10103 && GET_CODE (XEXP (op1
, 0)) == PLUS
10104 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
10106 rtx addend
= XEXP (XEXP (op1
, 0), 1);
10107 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
10110 model
= SYMBOL_REF_TLS_MODEL (symbol
);
10112 tmp
= legitimize_tls_address (symbol
, model
, true);
10113 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10114 && SYMBOL_REF_DLLIMPORT_P (symbol
))
10115 tmp
= legitimize_dllimport_symbol (symbol
, true);
10119 tmp
= force_operand (tmp
, NULL
);
10120 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
10121 op0
, 1, OPTAB_DIRECT
);
10127 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
10129 if (TARGET_MACHO
&& !TARGET_64BIT
)
10134 rtx temp
= ((reload_in_progress
10135 || ((op0
&& REG_P (op0
))
10137 ? op0
: gen_reg_rtx (Pmode
));
10138 op1
= machopic_indirect_data_reference (op1
, temp
);
10139 op1
= machopic_legitimize_pic_address (op1
, mode
,
10140 temp
== op1
? 0 : temp
);
10142 else if (MACHOPIC_INDIRECT
)
10143 op1
= machopic_indirect_data_reference (op1
, 0);
10151 op1
= force_reg (Pmode
, op1
);
10152 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
10154 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
10155 op1
= legitimize_pic_address (op1
, reg
);
10164 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
10165 || !push_operand (op0
, mode
))
10167 op1
= force_reg (mode
, op1
);
10169 if (push_operand (op0
, mode
)
10170 && ! general_no_elim_operand (op1
, mode
))
10171 op1
= copy_to_mode_reg (mode
, op1
);
10173 /* Force large constants in 64bit compilation into register
10174 to get them CSEed. */
10175 if (can_create_pseudo_p ()
10176 && (mode
== DImode
) && TARGET_64BIT
10177 && immediate_operand (op1
, mode
)
10178 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
10179 && !register_operand (op0
, mode
)
10181 op1
= copy_to_mode_reg (mode
, op1
);
10183 if (can_create_pseudo_p ()
10184 && FLOAT_MODE_P (mode
)
10185 && GET_CODE (op1
) == CONST_DOUBLE
)
10187 /* If we are loading a floating point constant to a register,
10188 force the value to memory now, since we'll get better code
10189 out the back end. */
10191 op1
= validize_mem (force_const_mem (mode
, op1
));
10192 if (!register_operand (op0
, mode
))
10194 rtx temp
= gen_reg_rtx (mode
);
10195 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
10196 emit_move_insn (op0
, temp
);
10202 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10206 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
10208 rtx op0
= operands
[0], op1
= operands
[1];
10209 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
10211 /* Force constants other than zero into memory. We do not know how
10212 the instructions used to build constants modify the upper 64 bits
10213 of the register, once we have that information we may be able
10214 to handle some of them more efficiently. */
10215 if (can_create_pseudo_p ()
10216 && register_operand (op0
, mode
)
10217 && (CONSTANT_P (op1
)
10218 || (GET_CODE (op1
) == SUBREG
10219 && CONSTANT_P (SUBREG_REG (op1
))))
10220 && standard_sse_constant_p (op1
) <= 0)
10221 op1
= validize_mem (force_const_mem (mode
, op1
));
10223 /* TDmode values are passed as TImode on the stack. TImode values
10224 are moved via xmm registers, and moving them to stack can result in
10225 unaligned memory access. Use ix86_expand_vector_move_misalign()
10226 if memory operand is not aligned correctly. */
10227 if (can_create_pseudo_p ()
10228 && (mode
== TImode
) && !TARGET_64BIT
10229 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
10230 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
10234 /* ix86_expand_vector_move_misalign() does not like constants ... */
10235 if (CONSTANT_P (op1
)
10236 || (GET_CODE (op1
) == SUBREG
10237 && CONSTANT_P (SUBREG_REG (op1
))))
10238 op1
= validize_mem (force_const_mem (mode
, op1
));
10240 /* ... nor both arguments in memory. */
10241 if (!register_operand (op0
, mode
)
10242 && !register_operand (op1
, mode
))
10243 op1
= force_reg (mode
, op1
);
10245 tmp
[0] = op0
; tmp
[1] = op1
;
10246 ix86_expand_vector_move_misalign (mode
, tmp
);
10250 /* Make operand1 a register if it isn't already. */
10251 if (can_create_pseudo_p ()
10252 && !register_operand (op0
, mode
)
10253 && !register_operand (op1
, mode
))
10255 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
10259 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10262 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10263 straight to ix86_expand_vector_move. */
10264 /* Code generation for scalar reg-reg moves of single and double precision data:
10265 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10269 if (x86_sse_partial_reg_dependency == true)
10274 Code generation for scalar loads of double precision data:
10275 if (x86_sse_split_regs == true)
10276 movlpd mem, reg (gas syntax)
10280 Code generation for unaligned packed loads of single precision data
10281 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10282 if (x86_sse_unaligned_move_optimal)
10285 if (x86_sse_partial_reg_dependency == true)
10297 Code generation for unaligned packed loads of double precision data
10298 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10299 if (x86_sse_unaligned_move_optimal)
10302 if (x86_sse_split_regs == true)
10315 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
10324 /* If we're optimizing for size, movups is the smallest. */
10327 op0
= gen_lowpart (V4SFmode
, op0
);
10328 op1
= gen_lowpart (V4SFmode
, op1
);
10329 emit_insn (gen_sse_movups (op0
, op1
));
10333 /* ??? If we have typed data, then it would appear that using
10334 movdqu is the only way to get unaligned data loaded with
10336 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10338 op0
= gen_lowpart (V16QImode
, op0
);
10339 op1
= gen_lowpart (V16QImode
, op1
);
10340 emit_insn (gen_sse2_movdqu (op0
, op1
));
10344 if (TARGET_SSE2
&& mode
== V2DFmode
)
10348 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10350 op0
= gen_lowpart (V2DFmode
, op0
);
10351 op1
= gen_lowpart (V2DFmode
, op1
);
10352 emit_insn (gen_sse2_movupd (op0
, op1
));
10356 /* When SSE registers are split into halves, we can avoid
10357 writing to the top half twice. */
10358 if (TARGET_SSE_SPLIT_REGS
)
10360 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10365 /* ??? Not sure about the best option for the Intel chips.
10366 The following would seem to satisfy; the register is
10367 entirely cleared, breaking the dependency chain. We
10368 then store to the upper half, with a dependency depth
10369 of one. A rumor has it that Intel recommends two movsd
10370 followed by an unpacklpd, but this is unconfirmed. And
10371 given that the dependency depth of the unpacklpd would
10372 still be one, I'm not sure why this would be better. */
10373 zero
= CONST0_RTX (V2DFmode
);
10376 m
= adjust_address (op1
, DFmode
, 0);
10377 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
10378 m
= adjust_address (op1
, DFmode
, 8);
10379 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
10383 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10385 op0
= gen_lowpart (V4SFmode
, op0
);
10386 op1
= gen_lowpart (V4SFmode
, op1
);
10387 emit_insn (gen_sse_movups (op0
, op1
));
10391 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
10392 emit_move_insn (op0
, CONST0_RTX (mode
));
10394 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10396 if (mode
!= V4SFmode
)
10397 op0
= gen_lowpart (V4SFmode
, op0
);
10398 m
= adjust_address (op1
, V2SFmode
, 0);
10399 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
10400 m
= adjust_address (op1
, V2SFmode
, 8);
10401 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
10404 else if (MEM_P (op0
))
10406 /* If we're optimizing for size, movups is the smallest. */
10409 op0
= gen_lowpart (V4SFmode
, op0
);
10410 op1
= gen_lowpart (V4SFmode
, op1
);
10411 emit_insn (gen_sse_movups (op0
, op1
));
10415 /* ??? Similar to above, only less clear because of quote
10416 typeless stores unquote. */
10417 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
10418 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10420 op0
= gen_lowpart (V16QImode
, op0
);
10421 op1
= gen_lowpart (V16QImode
, op1
);
10422 emit_insn (gen_sse2_movdqu (op0
, op1
));
10426 if (TARGET_SSE2
&& mode
== V2DFmode
)
10428 m
= adjust_address (op0
, DFmode
, 0);
10429 emit_insn (gen_sse2_storelpd (m
, op1
));
10430 m
= adjust_address (op0
, DFmode
, 8);
10431 emit_insn (gen_sse2_storehpd (m
, op1
));
10435 if (mode
!= V4SFmode
)
10436 op1
= gen_lowpart (V4SFmode
, op1
);
10437 m
= adjust_address (op0
, V2SFmode
, 0);
10438 emit_insn (gen_sse_storelps (m
, op1
));
10439 m
= adjust_address (op0
, V2SFmode
, 8);
10440 emit_insn (gen_sse_storehps (m
, op1
));
10444 gcc_unreachable ();
10447 /* Expand a push in MODE. This is some mode for which we do not support
10448 proper push instructions, at least from the registers that we expect
10449 the value to live in. */
10452 ix86_expand_push (enum machine_mode mode
, rtx x
)
10456 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
10457 GEN_INT (-GET_MODE_SIZE (mode
)),
10458 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
10459 if (tmp
!= stack_pointer_rtx
)
10460 emit_move_insn (stack_pointer_rtx
, tmp
);
10462 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
10463 emit_move_insn (tmp
, x
);
10466 /* Helper function of ix86_fixup_binary_operands to canonicalize
10467 operand order. Returns true if the operands should be swapped. */
10470 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
10473 rtx dst
= operands
[0];
10474 rtx src1
= operands
[1];
10475 rtx src2
= operands
[2];
10477 /* If the operation is not commutative, we can't do anything. */
10478 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
10481 /* Highest priority is that src1 should match dst. */
10482 if (rtx_equal_p (dst
, src1
))
10484 if (rtx_equal_p (dst
, src2
))
10487 /* Next highest priority is that immediate constants come second. */
10488 if (immediate_operand (src2
, mode
))
10490 if (immediate_operand (src1
, mode
))
10493 /* Lowest priority is that memory references should come second. */
10503 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10504 destination to use for the operation. If different from the true
10505 destination in operands[0], a copy operation will be required. */
10508 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
10511 rtx dst
= operands
[0];
10512 rtx src1
= operands
[1];
10513 rtx src2
= operands
[2];
10515 /* Canonicalize operand order. */
10516 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10523 /* Both source operands cannot be in memory. */
10524 if (MEM_P (src1
) && MEM_P (src2
))
10526 /* Optimization: Only read from memory once. */
10527 if (rtx_equal_p (src1
, src2
))
10529 src2
= force_reg (mode
, src2
);
10533 src2
= force_reg (mode
, src2
);
10536 /* If the destination is memory, and we do not have matching source
10537 operands, do things in registers. */
10538 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10539 dst
= gen_reg_rtx (mode
);
10541 /* Source 1 cannot be a constant. */
10542 if (CONSTANT_P (src1
))
10543 src1
= force_reg (mode
, src1
);
10545 /* Source 1 cannot be a non-matching memory. */
10546 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10547 src1
= force_reg (mode
, src1
);
10549 operands
[1] = src1
;
10550 operands
[2] = src2
;
10554 /* Similarly, but assume that the destination has already been
10555 set up properly. */
10558 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10559 enum machine_mode mode
, rtx operands
[])
10561 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10562 gcc_assert (dst
== operands
[0]);
10565 /* Attempt to expand a binary operator. Make the expansion closer to the
10566 actual machine, then just general_operand, which will allow 3 separate
10567 memory references (one output, two input) in a single insn. */
10570 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10573 rtx src1
, src2
, dst
, op
, clob
;
10575 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10576 src1
= operands
[1];
10577 src2
= operands
[2];
10579 /* Emit the instruction. */
10581 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10582 if (reload_in_progress
)
10584 /* Reload doesn't know about the flags register, and doesn't know that
10585 it doesn't want to clobber it. We can only do this with PLUS. */
10586 gcc_assert (code
== PLUS
);
10591 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10592 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10595 /* Fix up the destination if needed. */
10596 if (dst
!= operands
[0])
10597 emit_move_insn (operands
[0], dst
);
10600 /* Return TRUE or FALSE depending on whether the binary operator meets the
10601 appropriate constraints. */
10604 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10607 rtx dst
= operands
[0];
10608 rtx src1
= operands
[1];
10609 rtx src2
= operands
[2];
10611 /* Both source operands cannot be in memory. */
10612 if (MEM_P (src1
) && MEM_P (src2
))
10615 /* Canonicalize operand order for commutative operators. */
10616 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10623 /* If the destination is memory, we must have a matching source operand. */
10624 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10627 /* Source 1 cannot be a constant. */
10628 if (CONSTANT_P (src1
))
10631 /* Source 1 cannot be a non-matching memory. */
10632 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10638 /* Attempt to expand a unary operator. Make the expansion closer to the
10639 actual machine, then just general_operand, which will allow 2 separate
10640 memory references (one output, one input) in a single insn. */
10643 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10646 int matching_memory
;
10647 rtx src
, dst
, op
, clob
;
10652 /* If the destination is memory, and we do not have matching source
10653 operands, do things in registers. */
10654 matching_memory
= 0;
10657 if (rtx_equal_p (dst
, src
))
10658 matching_memory
= 1;
10660 dst
= gen_reg_rtx (mode
);
10663 /* When source operand is memory, destination must match. */
10664 if (MEM_P (src
) && !matching_memory
)
10665 src
= force_reg (mode
, src
);
10667 /* Emit the instruction. */
10669 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10670 if (reload_in_progress
|| code
== NOT
)
10672 /* Reload doesn't know about the flags register, and doesn't know that
10673 it doesn't want to clobber it. */
10674 gcc_assert (code
== NOT
);
10679 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10680 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10683 /* Fix up the destination if needed. */
10684 if (dst
!= operands
[0])
10685 emit_move_insn (operands
[0], dst
);
10688 /* Return TRUE or FALSE depending on whether the unary operator meets the
10689 appropriate constraints. */
10692 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10693 enum machine_mode mode ATTRIBUTE_UNUSED
,
10694 rtx operands
[2] ATTRIBUTE_UNUSED
)
10696 /* If one of operands is memory, source and destination must match. */
10697 if ((MEM_P (operands
[0])
10698 || MEM_P (operands
[1]))
10699 && ! rtx_equal_p (operands
[0], operands
[1]))
10704 /* Post-reload splitter for converting an SF or DFmode value in an
10705 SSE register into an unsigned SImode. */
10708 ix86_split_convert_uns_si_sse (rtx operands
[])
10710 enum machine_mode vecmode
;
10711 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10713 large
= operands
[1];
10714 zero_or_two31
= operands
[2];
10715 input
= operands
[3];
10716 two31
= operands
[4];
10717 vecmode
= GET_MODE (large
);
10718 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10720 /* Load up the value into the low element. We must ensure that the other
10721 elements are valid floats -- zero is the easiest such value. */
10724 if (vecmode
== V4SFmode
)
10725 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10727 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10731 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10732 emit_move_insn (value
, CONST0_RTX (vecmode
));
10733 if (vecmode
== V4SFmode
)
10734 emit_insn (gen_sse_movss (value
, value
, input
));
10736 emit_insn (gen_sse2_movsd (value
, value
, input
));
10739 emit_move_insn (large
, two31
);
10740 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10742 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10743 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10745 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10746 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10748 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10749 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10751 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10752 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10754 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10755 if (vecmode
== V4SFmode
)
10756 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10758 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10761 emit_insn (gen_xorv4si3 (value
, value
, large
));
10764 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10765 Expects the 64-bit DImode to be supplied in a pair of integral
10766 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10767 -mfpmath=sse, !optimize_size only. */
10770 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10772 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10773 rtx int_xmm
, fp_xmm
;
10774 rtx biases
, exponents
;
10777 int_xmm
= gen_reg_rtx (V4SImode
);
10778 if (TARGET_INTER_UNIT_MOVES
)
10779 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10780 else if (TARGET_SSE_SPLIT_REGS
)
10782 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10783 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10787 x
= gen_reg_rtx (V2DImode
);
10788 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10789 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10792 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10793 gen_rtvec (4, GEN_INT (0x43300000UL
),
10794 GEN_INT (0x45300000UL
),
10795 const0_rtx
, const0_rtx
));
10796 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10798 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10799 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10801 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10802 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10803 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10804 (0x1.0p84 + double(fp_value_hi_xmm)).
10805 Note these exponents differ by 32. */
10807 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10809 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10810 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10811 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10812 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10813 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10814 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10815 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10816 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10817 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10819 /* Add the upper and lower DFmode values together. */
10821 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10824 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10825 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10826 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10829 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10832 /* Convert an unsigned SImode value into a DFmode. Only currently used
10833 for SSE, but applicable anywhere. */
10836 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10838 REAL_VALUE_TYPE TWO31r
;
10841 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10842 NULL
, 1, OPTAB_DIRECT
);
10844 fp
= gen_reg_rtx (DFmode
);
10845 emit_insn (gen_floatsidf2 (fp
, x
));
10847 real_ldexp (&TWO31r
, &dconst1
, 31);
10848 x
= const_double_from_real_value (TWO31r
, DFmode
);
10850 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10852 emit_move_insn (target
, x
);
10855 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10856 32-bit mode; otherwise we have a direct convert instruction. */
10859 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10861 REAL_VALUE_TYPE TWO32r
;
10862 rtx fp_lo
, fp_hi
, x
;
10864 fp_lo
= gen_reg_rtx (DFmode
);
10865 fp_hi
= gen_reg_rtx (DFmode
);
10867 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10869 real_ldexp (&TWO32r
, &dconst1
, 32);
10870 x
= const_double_from_real_value (TWO32r
, DFmode
);
10871 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10873 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10875 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10878 emit_move_insn (target
, x
);
10881 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10882 For x86_32, -mfpmath=sse, !optimize_size only. */
10884 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10886 REAL_VALUE_TYPE ONE16r
;
10887 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10889 real_ldexp (&ONE16r
, &dconst1
, 16);
10890 x
= const_double_from_real_value (ONE16r
, SFmode
);
10891 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10892 NULL
, 0, OPTAB_DIRECT
);
10893 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10894 NULL
, 0, OPTAB_DIRECT
);
10895 fp_hi
= gen_reg_rtx (SFmode
);
10896 fp_lo
= gen_reg_rtx (SFmode
);
10897 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10898 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10899 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10901 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10903 if (!rtx_equal_p (target
, fp_hi
))
10904 emit_move_insn (target
, fp_hi
);
10907 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10908 then replicate the value for all elements of the vector
10912 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10919 v
= gen_rtvec (4, value
, value
, value
, value
);
10920 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
10924 v
= gen_rtvec (2, value
, value
);
10925 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
10929 v
= gen_rtvec (4, value
, value
, value
, value
);
10931 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10932 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10933 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10937 v
= gen_rtvec (2, value
, value
);
10939 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10940 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10943 gcc_unreachable ();
10947 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10948 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10949 for an SSE register. If VECT is true, then replicate the mask for
10950 all elements of the vector register. If INVERT is true, then create
10951 a mask excluding the sign bit. */
10954 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10956 enum machine_mode vec_mode
, imode
;
10957 HOST_WIDE_INT hi
, lo
;
10962 /* Find the sign bit, sign extended to 2*HWI. */
10968 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
10969 lo
= 0x80000000, hi
= lo
< 0;
10975 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
10976 if (HOST_BITS_PER_WIDE_INT
>= 64)
10977 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10979 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10985 vec_mode
= VOIDmode
;
10986 gcc_assert (HOST_BITS_PER_WIDE_INT
>= 64);
10987 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
10991 gcc_unreachable ();
10995 lo
= ~lo
, hi
= ~hi
;
10997 /* Force this value into the low part of a fp vector constant. */
10998 mask
= immed_double_const (lo
, hi
, imode
);
10999 mask
= gen_lowpart (mode
, mask
);
11001 if (vec_mode
== VOIDmode
)
11002 return force_reg (mode
, mask
);
11004 v
= ix86_build_const_vector (mode
, vect
, mask
);
11005 return force_reg (vec_mode
, v
);
11008 /* Generate code for floating point ABS or NEG. */
11011 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
11014 rtx mask
, set
, use
, clob
, dst
, src
;
11015 bool matching_memory
;
11016 bool use_sse
= false;
11017 bool vector_mode
= VECTOR_MODE_P (mode
);
11018 enum machine_mode elt_mode
= mode
;
11022 elt_mode
= GET_MODE_INNER (mode
);
11025 else if (mode
== TFmode
)
11027 else if (TARGET_SSE_MATH
)
11028 use_sse
= SSE_FLOAT_MODE_P (mode
);
11030 /* NEG and ABS performed with SSE use bitwise mask operations.
11031 Create the appropriate mask now. */
11033 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
11040 /* If the destination is memory, and we don't have matching source
11041 operands or we're using the x87, do things in registers. */
11042 matching_memory
= false;
11045 if (use_sse
&& rtx_equal_p (dst
, src
))
11046 matching_memory
= true;
11048 dst
= gen_reg_rtx (mode
);
11050 if (MEM_P (src
) && !matching_memory
)
11051 src
= force_reg (mode
, src
);
11055 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
11056 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
11061 set
= gen_rtx_fmt_e (code
, mode
, src
);
11062 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
11065 use
= gen_rtx_USE (VOIDmode
, mask
);
11066 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
11067 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
11068 gen_rtvec (3, set
, use
, clob
)));
11074 if (dst
!= operands
[0])
11075 emit_move_insn (operands
[0], dst
);
11078 /* Expand a copysign operation. Special case operand 0 being a constant. */
11081 ix86_expand_copysign (rtx operands
[])
11083 enum machine_mode mode
, vmode
;
11084 rtx dest
, op0
, op1
, mask
, nmask
;
11086 dest
= operands
[0];
11090 mode
= GET_MODE (dest
);
11091 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
11093 if (GET_CODE (op0
) == CONST_DOUBLE
)
11095 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
11097 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
11098 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
11100 if (mode
== SFmode
|| mode
== DFmode
)
11102 if (op0
== CONST0_RTX (mode
))
11103 op0
= CONST0_RTX (vmode
);
11108 if (mode
== SFmode
)
11109 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
11110 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
11112 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
11113 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
11117 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11119 if (mode
== SFmode
)
11120 copysign_insn
= gen_copysignsf3_const
;
11121 else if (mode
== DFmode
)
11122 copysign_insn
= gen_copysigndf3_const
;
11124 copysign_insn
= gen_copysigntf3_const
;
11126 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
11130 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
11132 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
11133 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11135 if (mode
== SFmode
)
11136 copysign_insn
= gen_copysignsf3_var
;
11137 else if (mode
== DFmode
)
11138 copysign_insn
= gen_copysigndf3_var
;
11140 copysign_insn
= gen_copysigntf3_var
;
11142 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
11146 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11147 be a constant, and so has already been expanded into a vector constant. */
11150 ix86_split_copysign_const (rtx operands
[])
11152 enum machine_mode mode
, vmode
;
11153 rtx dest
, op0
, op1
, mask
, x
;
11155 dest
= operands
[0];
11158 mask
= operands
[3];
11160 mode
= GET_MODE (dest
);
11161 vmode
= GET_MODE (mask
);
11163 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
11164 x
= gen_rtx_AND (vmode
, dest
, mask
);
11165 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11167 if (op0
!= CONST0_RTX (vmode
))
11169 x
= gen_rtx_IOR (vmode
, dest
, op0
);
11170 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11174 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11175 so we have to do two masks. */
11178 ix86_split_copysign_var (rtx operands
[])
11180 enum machine_mode mode
, vmode
;
11181 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
11183 dest
= operands
[0];
11184 scratch
= operands
[1];
11187 nmask
= operands
[4];
11188 mask
= operands
[5];
11190 mode
= GET_MODE (dest
);
11191 vmode
= GET_MODE (mask
);
11193 if (rtx_equal_p (op0
, op1
))
11195 /* Shouldn't happen often (it's useless, obviously), but when it does
11196 we'd generate incorrect code if we continue below. */
11197 emit_move_insn (dest
, op0
);
11201 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
11203 gcc_assert (REGNO (op1
) == REGNO (scratch
));
11205 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11206 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11209 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11210 x
= gen_rtx_NOT (vmode
, dest
);
11211 x
= gen_rtx_AND (vmode
, x
, op0
);
11212 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11216 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
11218 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11220 else /* alternative 2,4 */
11222 gcc_assert (REGNO (mask
) == REGNO (scratch
));
11223 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
11224 x
= gen_rtx_AND (vmode
, scratch
, op1
);
11226 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11228 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
11230 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11231 x
= gen_rtx_AND (vmode
, dest
, nmask
);
11233 else /* alternative 3,4 */
11235 gcc_assert (REGNO (nmask
) == REGNO (dest
));
11237 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11238 x
= gen_rtx_AND (vmode
, dest
, op0
);
11240 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11243 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
11244 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11247 /* Return TRUE or FALSE depending on whether the first SET in INSN
11248 has source and destination with matching CC modes, and that the
11249 CC mode is at least as constrained as REQ_MODE. */
11252 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
11255 enum machine_mode set_mode
;
11257 set
= PATTERN (insn
);
11258 if (GET_CODE (set
) == PARALLEL
)
11259 set
= XVECEXP (set
, 0, 0);
11260 gcc_assert (GET_CODE (set
) == SET
);
11261 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
11263 set_mode
= GET_MODE (SET_DEST (set
));
11267 if (req_mode
!= CCNOmode
11268 && (req_mode
!= CCmode
11269 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
11273 if (req_mode
== CCGCmode
)
11277 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
11281 if (req_mode
== CCZmode
)
11288 gcc_unreachable ();
11291 return (GET_MODE (SET_SRC (set
)) == set_mode
);
11294 /* Generate insn patterns to do an integer compare of OPERANDS. */
11297 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
11299 enum machine_mode cmpmode
;
11302 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
11303 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
11305 /* This is very simple, but making the interface the same as in the
11306 FP case makes the rest of the code easier. */
11307 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
11308 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
11310 /* Return the test that should be put into the flags user, i.e.
11311 the bcc, scc, or cmov instruction. */
11312 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
11315 /* Figure out whether to use ordered or unordered fp comparisons.
11316 Return the appropriate mode to use. */
11319 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
11321 /* ??? In order to make all comparisons reversible, we do all comparisons
11322 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11323 all forms trapping and nontrapping comparisons, we can make inequality
11324 comparisons trapping again, since it results in better code when using
11325 FCOM based compares. */
11326 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
11330 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
11332 enum machine_mode mode
= GET_MODE (op0
);
11334 if (SCALAR_FLOAT_MODE_P (mode
))
11336 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11337 return ix86_fp_compare_mode (code
);
11342 /* Only zero flag is needed. */
11343 case EQ
: /* ZF=0 */
11344 case NE
: /* ZF!=0 */
11346 /* Codes needing carry flag. */
11347 case GEU
: /* CF=0 */
11348 case LTU
: /* CF=1 */
11349 /* Detect overflow checks. They need just the carry flag. */
11350 if (GET_CODE (op0
) == PLUS
11351 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11355 case GTU
: /* CF=0 & ZF=0 */
11356 case LEU
: /* CF=1 | ZF=1 */
11357 /* Detect overflow checks. They need just the carry flag. */
11358 if (GET_CODE (op0
) == MINUS
11359 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11363 /* Codes possibly doable only with sign flag when
11364 comparing against zero. */
11365 case GE
: /* SF=OF or SF=0 */
11366 case LT
: /* SF<>OF or SF=1 */
11367 if (op1
== const0_rtx
)
11370 /* For other cases Carry flag is not required. */
11372 /* Codes doable only with sign flag when comparing
11373 against zero, but we miss jump instruction for it
11374 so we need to use relational tests against overflow
11375 that thus needs to be zero. */
11376 case GT
: /* ZF=0 & SF=OF */
11377 case LE
: /* ZF=1 | SF<>OF */
11378 if (op1
== const0_rtx
)
11382 /* strcmp pattern do (use flags) and combine may ask us for proper
11387 gcc_unreachable ();
11391 /* Return the fixed registers used for condition codes. */
11394 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
11401 /* If two condition code modes are compatible, return a condition code
11402 mode which is compatible with both. Otherwise, return
11405 static enum machine_mode
11406 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
11411 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
11414 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
11415 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
11421 gcc_unreachable ();
11451 /* These are only compatible with themselves, which we already
11457 /* Split comparison code CODE into comparisons we can do using branch
11458 instructions. BYPASS_CODE is comparison code for branch that will
11459 branch around FIRST_CODE and SECOND_CODE. If some of branches
11460 is not required, set value to UNKNOWN.
11461 We never require more than two branches. */
11464 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
11465 enum rtx_code
*first_code
,
11466 enum rtx_code
*second_code
)
11468 *first_code
= code
;
11469 *bypass_code
= UNKNOWN
;
11470 *second_code
= UNKNOWN
;
11472 /* The fcomi comparison sets flags as follows:
11482 case GT
: /* GTU - CF=0 & ZF=0 */
11483 case GE
: /* GEU - CF=0 */
11484 case ORDERED
: /* PF=0 */
11485 case UNORDERED
: /* PF=1 */
11486 case UNEQ
: /* EQ - ZF=1 */
11487 case UNLT
: /* LTU - CF=1 */
11488 case UNLE
: /* LEU - CF=1 | ZF=1 */
11489 case LTGT
: /* EQ - ZF=0 */
11491 case LT
: /* LTU - CF=1 - fails on unordered */
11492 *first_code
= UNLT
;
11493 *bypass_code
= UNORDERED
;
11495 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
11496 *first_code
= UNLE
;
11497 *bypass_code
= UNORDERED
;
11499 case EQ
: /* EQ - ZF=1 - fails on unordered */
11500 *first_code
= UNEQ
;
11501 *bypass_code
= UNORDERED
;
11503 case NE
: /* NE - ZF=0 - fails on unordered */
11504 *first_code
= LTGT
;
11505 *second_code
= UNORDERED
;
11507 case UNGE
: /* GEU - CF=0 - fails on unordered */
11509 *second_code
= UNORDERED
;
11511 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
11513 *second_code
= UNORDERED
;
11516 gcc_unreachable ();
11518 if (!TARGET_IEEE_FP
)
11520 *second_code
= UNKNOWN
;
11521 *bypass_code
= UNKNOWN
;
11525 /* Return cost of comparison done fcom + arithmetics operations on AX.
11526 All following functions do use number of instructions as a cost metrics.
11527 In future this should be tweaked to compute bytes for optimize_size and
11528 take into account performance of various instructions on various CPUs. */
11530 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
11532 if (!TARGET_IEEE_FP
)
11534 /* The cost of code output by ix86_expand_fp_compare. */
11558 gcc_unreachable ();
11562 /* Return cost of comparison done using fcomi operation.
11563 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11565 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
11567 enum rtx_code bypass_code
, first_code
, second_code
;
11568 /* Return arbitrarily high cost when instruction is not supported - this
11569 prevents gcc from using it. */
11572 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11573 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
11576 /* Return cost of comparison done using sahf operation.
11577 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11579 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11581 enum rtx_code bypass_code
, first_code
, second_code
;
11582 /* Return arbitrarily high cost when instruction is not preferred - this
11583 avoids gcc from using it. */
11584 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11586 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11587 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11590 /* Compute cost of the comparison done using any method.
11591 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11593 ix86_fp_comparison_cost (enum rtx_code code
)
11595 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11598 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11599 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11601 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11602 if (min
> sahf_cost
)
11604 if (min
> fcomi_cost
)
11609 /* Return true if we should use an FCOMI instruction for this
11613 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11615 enum rtx_code swapped_code
= swap_condition (code
);
11617 return ((ix86_fp_comparison_cost (code
)
11618 == ix86_fp_comparison_fcomi_cost (code
))
11619 || (ix86_fp_comparison_cost (swapped_code
)
11620 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11623 /* Swap, force into registers, or otherwise massage the two operands
11624 to a fp comparison. The operands are updated in place; the new
11625 comparison code is returned. */
11627 static enum rtx_code
11628 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11630 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11631 rtx op0
= *pop0
, op1
= *pop1
;
11632 enum machine_mode op_mode
= GET_MODE (op0
);
11633 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11635 /* All of the unordered compare instructions only work on registers.
11636 The same is true of the fcomi compare instructions. The XFmode
11637 compare instructions require registers except when comparing
11638 against zero or when converting operand 1 from fixed point to
11642 && (fpcmp_mode
== CCFPUmode
11643 || (op_mode
== XFmode
11644 && ! (standard_80387_constant_p (op0
) == 1
11645 || standard_80387_constant_p (op1
) == 1)
11646 && GET_CODE (op1
) != FLOAT
)
11647 || ix86_use_fcomi_compare (code
)))
11649 op0
= force_reg (op_mode
, op0
);
11650 op1
= force_reg (op_mode
, op1
);
11654 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11655 things around if they appear profitable, otherwise force op0
11656 into a register. */
11658 if (standard_80387_constant_p (op0
) == 0
11660 && ! (standard_80387_constant_p (op1
) == 0
11664 tmp
= op0
, op0
= op1
, op1
= tmp
;
11665 code
= swap_condition (code
);
11669 op0
= force_reg (op_mode
, op0
);
11671 if (CONSTANT_P (op1
))
11673 int tmp
= standard_80387_constant_p (op1
);
11675 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11679 op1
= force_reg (op_mode
, op1
);
11682 op1
= force_reg (op_mode
, op1
);
11686 /* Try to rearrange the comparison to make it cheaper. */
11687 if (ix86_fp_comparison_cost (code
)
11688 > ix86_fp_comparison_cost (swap_condition (code
))
11689 && (REG_P (op1
) || can_create_pseudo_p ()))
11692 tmp
= op0
, op0
= op1
, op1
= tmp
;
11693 code
= swap_condition (code
);
11695 op0
= force_reg (op_mode
, op0
);
11703 /* Convert comparison codes we use to represent FP comparison to integer
11704 code that will result in proper branch. Return UNKNOWN if no such code
11708 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11737 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11740 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11741 rtx
*second_test
, rtx
*bypass_test
)
11743 enum machine_mode fpcmp_mode
, intcmp_mode
;
11745 int cost
= ix86_fp_comparison_cost (code
);
11746 enum rtx_code bypass_code
, first_code
, second_code
;
11748 fpcmp_mode
= ix86_fp_compare_mode (code
);
11749 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11752 *second_test
= NULL_RTX
;
11754 *bypass_test
= NULL_RTX
;
11756 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11758 /* Do fcomi/sahf based test when profitable. */
11759 if (ix86_fp_comparison_arithmetics_cost (code
) > cost
11760 && (bypass_code
== UNKNOWN
|| bypass_test
)
11761 && (second_code
== UNKNOWN
|| second_test
))
11763 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11764 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11770 gcc_assert (TARGET_SAHF
);
11773 scratch
= gen_reg_rtx (HImode
);
11774 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11776 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
11779 /* The FP codes work out to act like unsigned. */
11780 intcmp_mode
= fpcmp_mode
;
11782 if (bypass_code
!= UNKNOWN
)
11783 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11784 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11786 if (second_code
!= UNKNOWN
)
11787 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11788 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11793 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11794 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11795 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11797 scratch
= gen_reg_rtx (HImode
);
11798 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11800 /* In the unordered case, we have to check C2 for NaN's, which
11801 doesn't happen to work out to anything nice combination-wise.
11802 So do some bit twiddling on the value we've got in AH to come
11803 up with an appropriate set of condition codes. */
11805 intcmp_mode
= CCNOmode
;
11810 if (code
== GT
|| !TARGET_IEEE_FP
)
11812 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11817 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11818 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11819 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11820 intcmp_mode
= CCmode
;
11826 if (code
== LT
&& TARGET_IEEE_FP
)
11828 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11829 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11830 intcmp_mode
= CCmode
;
11835 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11841 if (code
== GE
|| !TARGET_IEEE_FP
)
11843 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11848 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11849 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11856 if (code
== LE
&& TARGET_IEEE_FP
)
11858 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11859 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11860 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11861 intcmp_mode
= CCmode
;
11866 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11872 if (code
== EQ
&& TARGET_IEEE_FP
)
11874 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11875 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11876 intcmp_mode
= CCmode
;
11881 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11888 if (code
== NE
&& TARGET_IEEE_FP
)
11890 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11891 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11897 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11903 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11907 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11912 gcc_unreachable ();
11916 /* Return the test that should be put into the flags user, i.e.
11917 the bcc, scc, or cmov instruction. */
11918 return gen_rtx_fmt_ee (code
, VOIDmode
,
11919 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11924 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11927 op0
= ix86_compare_op0
;
11928 op1
= ix86_compare_op1
;
11931 *second_test
= NULL_RTX
;
11933 *bypass_test
= NULL_RTX
;
11935 if (ix86_compare_emitted
)
11937 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11938 ix86_compare_emitted
= NULL_RTX
;
11940 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11942 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11943 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11944 second_test
, bypass_test
);
11947 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11952 /* Return true if the CODE will result in nontrivial jump sequence. */
11954 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11956 enum rtx_code bypass_code
, first_code
, second_code
;
11959 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11960 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11964 ix86_expand_branch (enum rtx_code code
, rtx label
)
11968 /* If we have emitted a compare insn, go straight to simple.
11969 ix86_expand_compare won't emit anything if ix86_compare_emitted
11971 if (ix86_compare_emitted
)
11974 switch (GET_MODE (ix86_compare_op0
))
11980 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11981 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11982 gen_rtx_LABEL_REF (VOIDmode
, label
),
11984 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11993 enum rtx_code bypass_code
, first_code
, second_code
;
11995 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11996 &ix86_compare_op1
);
11998 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
12000 /* Check whether we will use the natural sequence with one jump. If
12001 so, we can expand jump early. Otherwise delay expansion by
12002 creating compound insn to not confuse optimizers. */
12003 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
)
12005 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
12006 gen_rtx_LABEL_REF (VOIDmode
, label
),
12007 pc_rtx
, NULL_RTX
, NULL_RTX
);
12011 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
12012 ix86_compare_op0
, ix86_compare_op1
);
12013 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
12014 gen_rtx_LABEL_REF (VOIDmode
, label
),
12016 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
12018 use_fcomi
= ix86_use_fcomi_compare (code
);
12019 vec
= rtvec_alloc (3 + !use_fcomi
);
12020 RTVEC_ELT (vec
, 0) = tmp
;
12022 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FPSR_REG
));
12024 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FLAGS_REG
));
12027 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
12029 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
12038 /* Expand DImode branch into multiple compare+branch. */
12040 rtx lo
[2], hi
[2], label2
;
12041 enum rtx_code code1
, code2
, code3
;
12042 enum machine_mode submode
;
12044 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
12046 tmp
= ix86_compare_op0
;
12047 ix86_compare_op0
= ix86_compare_op1
;
12048 ix86_compare_op1
= tmp
;
12049 code
= swap_condition (code
);
12051 if (GET_MODE (ix86_compare_op0
) == DImode
)
12053 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
12054 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
12059 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
12060 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
12064 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12065 avoid two branches. This costs one extra insn, so disable when
12066 optimizing for size. */
12068 if ((code
== EQ
|| code
== NE
)
12070 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
12075 if (hi
[1] != const0_rtx
)
12076 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
12077 NULL_RTX
, 0, OPTAB_WIDEN
);
12080 if (lo
[1] != const0_rtx
)
12081 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
12082 NULL_RTX
, 0, OPTAB_WIDEN
);
12084 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
12085 NULL_RTX
, 0, OPTAB_WIDEN
);
12087 ix86_compare_op0
= tmp
;
12088 ix86_compare_op1
= const0_rtx
;
12089 ix86_expand_branch (code
, label
);
12093 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12094 op1 is a constant and the low word is zero, then we can just
12095 examine the high word. */
12097 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
12100 case LT
: case LTU
: case GE
: case GEU
:
12101 ix86_compare_op0
= hi
[0];
12102 ix86_compare_op1
= hi
[1];
12103 ix86_expand_branch (code
, label
);
12109 /* Otherwise, we need two or three jumps. */
12111 label2
= gen_label_rtx ();
12114 code2
= swap_condition (code
);
12115 code3
= unsigned_condition (code
);
12119 case LT
: case GT
: case LTU
: case GTU
:
12122 case LE
: code1
= LT
; code2
= GT
; break;
12123 case GE
: code1
= GT
; code2
= LT
; break;
12124 case LEU
: code1
= LTU
; code2
= GTU
; break;
12125 case GEU
: code1
= GTU
; code2
= LTU
; break;
12127 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
12128 case NE
: code2
= UNKNOWN
; break;
12131 gcc_unreachable ();
12136 * if (hi(a) < hi(b)) goto true;
12137 * if (hi(a) > hi(b)) goto false;
12138 * if (lo(a) < lo(b)) goto true;
12142 ix86_compare_op0
= hi
[0];
12143 ix86_compare_op1
= hi
[1];
12145 if (code1
!= UNKNOWN
)
12146 ix86_expand_branch (code1
, label
);
12147 if (code2
!= UNKNOWN
)
12148 ix86_expand_branch (code2
, label2
);
12150 ix86_compare_op0
= lo
[0];
12151 ix86_compare_op1
= lo
[1];
12152 ix86_expand_branch (code3
, label
);
12154 if (code2
!= UNKNOWN
)
12155 emit_label (label2
);
12160 gcc_unreachable ();
12164 /* Split branch based on floating point condition. */
12166 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
12167 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
12169 rtx second
, bypass
;
12170 rtx label
= NULL_RTX
;
12172 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
12175 if (target2
!= pc_rtx
)
12178 code
= reverse_condition_maybe_unordered (code
);
12183 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
12184 tmp
, &second
, &bypass
);
12186 /* Remove pushed operand from stack. */
12188 ix86_free_from_memory (GET_MODE (pushed
));
12190 if (split_branch_probability
>= 0)
12192 /* Distribute the probabilities across the jumps.
12193 Assume the BYPASS and SECOND to be always test
12195 probability
= split_branch_probability
;
12197 /* Value of 1 is low enough to make no need for probability
12198 to be updated. Later we may run some experiments and see
12199 if unordered values are more frequent in practice. */
12201 bypass_probability
= 1;
12203 second_probability
= 1;
12205 if (bypass
!= NULL_RTX
)
12207 label
= gen_label_rtx ();
12208 i
= emit_jump_insn (gen_rtx_SET
12210 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12212 gen_rtx_LABEL_REF (VOIDmode
,
12215 if (bypass_probability
>= 0)
12217 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12218 GEN_INT (bypass_probability
),
12221 i
= emit_jump_insn (gen_rtx_SET
12223 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12224 condition
, target1
, target2
)));
12225 if (probability
>= 0)
12227 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12228 GEN_INT (probability
),
12230 if (second
!= NULL_RTX
)
12232 i
= emit_jump_insn (gen_rtx_SET
12234 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
12236 if (second_probability
>= 0)
12238 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12239 GEN_INT (second_probability
),
12242 if (label
!= NULL_RTX
)
12243 emit_label (label
);
12247 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
12249 rtx ret
, tmp
, tmpreg
, equiv
;
12250 rtx second_test
, bypass_test
;
12252 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
12253 return 0; /* FAIL */
12255 gcc_assert (GET_MODE (dest
) == QImode
);
12257 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12258 PUT_MODE (ret
, QImode
);
12263 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
12264 if (bypass_test
|| second_test
)
12266 rtx test
= second_test
;
12268 rtx tmp2
= gen_reg_rtx (QImode
);
12271 gcc_assert (!second_test
);
12272 test
= bypass_test
;
12274 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
12276 PUT_MODE (test
, QImode
);
12277 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
12280 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
12282 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
12285 /* Attach a REG_EQUAL note describing the comparison result. */
12286 if (ix86_compare_op0
&& ix86_compare_op1
)
12288 equiv
= simplify_gen_relational (code
, QImode
,
12289 GET_MODE (ix86_compare_op0
),
12290 ix86_compare_op0
, ix86_compare_op1
);
12291 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
12294 return 1; /* DONE */
12297 /* Expand comparison setting or clearing carry flag. Return true when
12298 successful and set pop for the operation. */
12300 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
12302 enum machine_mode mode
=
12303 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
12305 /* Do not handle DImode compares that go through special path. */
12306 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
12309 if (SCALAR_FLOAT_MODE_P (mode
))
12311 rtx second_test
= NULL
, bypass_test
= NULL
;
12312 rtx compare_op
, compare_seq
;
12314 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
12316 /* Shortcut: following common codes never translate
12317 into carry flag compares. */
12318 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
12319 || code
== ORDERED
|| code
== UNORDERED
)
12322 /* These comparisons require zero flag; swap operands so they won't. */
12323 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
12324 && !TARGET_IEEE_FP
)
12329 code
= swap_condition (code
);
12332 /* Try to expand the comparison and verify that we end up with
12333 carry flag based comparison. This fails to be true only when
12334 we decide to expand comparison using arithmetic that is not
12335 too common scenario. */
12337 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
12338 &second_test
, &bypass_test
);
12339 compare_seq
= get_insns ();
12342 if (second_test
|| bypass_test
)
12345 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12346 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12347 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
12349 code
= GET_CODE (compare_op
);
12351 if (code
!= LTU
&& code
!= GEU
)
12354 emit_insn (compare_seq
);
12359 if (!INTEGRAL_MODE_P (mode
))
12368 /* Convert a==0 into (unsigned)a<1. */
12371 if (op1
!= const0_rtx
)
12374 code
= (code
== EQ
? LTU
: GEU
);
12377 /* Convert a>b into b<a or a>=b-1. */
12380 if (CONST_INT_P (op1
))
12382 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
12383 /* Bail out on overflow. We still can swap operands but that
12384 would force loading of the constant into register. */
12385 if (op1
== const0_rtx
12386 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
12388 code
= (code
== GTU
? GEU
: LTU
);
12395 code
= (code
== GTU
? LTU
: GEU
);
12399 /* Convert a>=0 into (unsigned)a<0x80000000. */
12402 if (mode
== DImode
|| op1
!= const0_rtx
)
12404 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12405 code
= (code
== LT
? GEU
: LTU
);
12409 if (mode
== DImode
|| op1
!= constm1_rtx
)
12411 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12412 code
= (code
== LE
? GEU
: LTU
);
12418 /* Swapping operands may cause constant to appear as first operand. */
12419 if (!nonimmediate_operand (op0
, VOIDmode
))
12421 if (!can_create_pseudo_p ())
12423 op0
= force_reg (mode
, op0
);
12425 ix86_compare_op0
= op0
;
12426 ix86_compare_op1
= op1
;
12427 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
12428 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
12433 ix86_expand_int_movcc (rtx operands
[])
12435 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
12436 rtx compare_seq
, compare_op
;
12437 rtx second_test
, bypass_test
;
12438 enum machine_mode mode
= GET_MODE (operands
[0]);
12439 bool sign_bit_compare_p
= false;;
12442 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12443 compare_seq
= get_insns ();
12446 compare_code
= GET_CODE (compare_op
);
12448 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
12449 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
12450 sign_bit_compare_p
= true;
12452 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12453 HImode insns, we'd be swallowed in word prefix ops. */
12455 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
12456 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
12457 && CONST_INT_P (operands
[2])
12458 && CONST_INT_P (operands
[3]))
12460 rtx out
= operands
[0];
12461 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
12462 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
12463 HOST_WIDE_INT diff
;
12466 /* Sign bit compares are better done using shifts than we do by using
12468 if (sign_bit_compare_p
12469 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12470 ix86_compare_op1
, &compare_op
))
12472 /* Detect overlap between destination and compare sources. */
12475 if (!sign_bit_compare_p
)
12477 bool fpcmp
= false;
12479 compare_code
= GET_CODE (compare_op
);
12481 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12482 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12485 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
12488 /* To simplify rest of code, restrict to the GEU case. */
12489 if (compare_code
== LTU
)
12491 HOST_WIDE_INT tmp
= ct
;
12494 compare_code
= reverse_condition (compare_code
);
12495 code
= reverse_condition (code
);
12500 PUT_CODE (compare_op
,
12501 reverse_condition_maybe_unordered
12502 (GET_CODE (compare_op
)));
12504 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12508 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
12509 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
12510 tmp
= gen_reg_rtx (mode
);
12512 if (mode
== DImode
)
12513 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
12515 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
12519 if (code
== GT
|| code
== GE
)
12520 code
= reverse_condition (code
);
12523 HOST_WIDE_INT tmp
= ct
;
12528 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
12529 ix86_compare_op1
, VOIDmode
, 0, -1);
12542 tmp
= expand_simple_binop (mode
, PLUS
,
12544 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12555 tmp
= expand_simple_binop (mode
, IOR
,
12557 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12559 else if (diff
== -1 && ct
)
12569 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12571 tmp
= expand_simple_binop (mode
, PLUS
,
12572 copy_rtx (tmp
), GEN_INT (cf
),
12573 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12581 * andl cf - ct, dest
12591 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12594 tmp
= expand_simple_binop (mode
, AND
,
12596 gen_int_mode (cf
- ct
, mode
),
12597 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12599 tmp
= expand_simple_binop (mode
, PLUS
,
12600 copy_rtx (tmp
), GEN_INT (ct
),
12601 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12604 if (!rtx_equal_p (tmp
, out
))
12605 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12607 return 1; /* DONE */
12612 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12615 tmp
= ct
, ct
= cf
, cf
= tmp
;
12618 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12620 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12622 /* We may be reversing unordered compare to normal compare, that
12623 is not valid in general (we may convert non-trapping condition
12624 to trapping one), however on i386 we currently emit all
12625 comparisons unordered. */
12626 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12627 code
= reverse_condition_maybe_unordered (code
);
12631 compare_code
= reverse_condition (compare_code
);
12632 code
= reverse_condition (code
);
12636 compare_code
= UNKNOWN
;
12637 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12638 && CONST_INT_P (ix86_compare_op1
))
12640 if (ix86_compare_op1
== const0_rtx
12641 && (code
== LT
|| code
== GE
))
12642 compare_code
= code
;
12643 else if (ix86_compare_op1
== constm1_rtx
)
12647 else if (code
== GT
)
12652 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12653 if (compare_code
!= UNKNOWN
12654 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12655 && (cf
== -1 || ct
== -1))
12657 /* If lea code below could be used, only optimize
12658 if it results in a 2 insn sequence. */
12660 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12661 || diff
== 3 || diff
== 5 || diff
== 9)
12662 || (compare_code
== LT
&& ct
== -1)
12663 || (compare_code
== GE
&& cf
== -1))
12666 * notl op1 (if necessary)
12674 code
= reverse_condition (code
);
12677 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12678 ix86_compare_op1
, VOIDmode
, 0, -1);
12680 out
= expand_simple_binop (mode
, IOR
,
12682 out
, 1, OPTAB_DIRECT
);
12683 if (out
!= operands
[0])
12684 emit_move_insn (operands
[0], out
);
12686 return 1; /* DONE */
12691 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12692 || diff
== 3 || diff
== 5 || diff
== 9)
12693 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12695 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12701 * lea cf(dest*(ct-cf)),dest
12705 * This also catches the degenerate setcc-only case.
12711 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12712 ix86_compare_op1
, VOIDmode
, 0, 1);
12715 /* On x86_64 the lea instruction operates on Pmode, so we need
12716 to get arithmetics done in proper mode to match. */
12718 tmp
= copy_rtx (out
);
12722 out1
= copy_rtx (out
);
12723 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12727 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12733 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12736 if (!rtx_equal_p (tmp
, out
))
12739 out
= force_operand (tmp
, copy_rtx (out
));
12741 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12743 if (!rtx_equal_p (out
, operands
[0]))
12744 emit_move_insn (operands
[0], copy_rtx (out
));
12746 return 1; /* DONE */
12750 * General case: Jumpful:
12751 * xorl dest,dest cmpl op1, op2
12752 * cmpl op1, op2 movl ct, dest
12753 * setcc dest jcc 1f
12754 * decl dest movl cf, dest
12755 * andl (cf-ct),dest 1:
12758 * Size 20. Size 14.
12760 * This is reasonably steep, but branch mispredict costs are
12761 * high on modern cpus, so consider failing only if optimizing
12765 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12766 && BRANCH_COST
>= 2)
12770 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12775 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12777 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12779 /* We may be reversing unordered compare to normal compare,
12780 that is not valid in general (we may convert non-trapping
12781 condition to trapping one), however on i386 we currently
12782 emit all comparisons unordered. */
12783 code
= reverse_condition_maybe_unordered (code
);
12787 code
= reverse_condition (code
);
12788 if (compare_code
!= UNKNOWN
)
12789 compare_code
= reverse_condition (compare_code
);
12793 if (compare_code
!= UNKNOWN
)
12795 /* notl op1 (if needed)
12800 For x < 0 (resp. x <= -1) there will be no notl,
12801 so if possible swap the constants to get rid of the
12803 True/false will be -1/0 while code below (store flag
12804 followed by decrement) is 0/-1, so the constants need
12805 to be exchanged once more. */
12807 if (compare_code
== GE
|| !cf
)
12809 code
= reverse_condition (code
);
12814 HOST_WIDE_INT tmp
= cf
;
12819 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12820 ix86_compare_op1
, VOIDmode
, 0, -1);
12824 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12825 ix86_compare_op1
, VOIDmode
, 0, 1);
12827 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12828 copy_rtx (out
), 1, OPTAB_DIRECT
);
12831 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12832 gen_int_mode (cf
- ct
, mode
),
12833 copy_rtx (out
), 1, OPTAB_DIRECT
);
12835 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12836 copy_rtx (out
), 1, OPTAB_DIRECT
);
12837 if (!rtx_equal_p (out
, operands
[0]))
12838 emit_move_insn (operands
[0], copy_rtx (out
));
12840 return 1; /* DONE */
12844 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12846 /* Try a few things more with specific constants and a variable. */
12849 rtx var
, orig_out
, out
, tmp
;
12851 if (BRANCH_COST
<= 2)
12852 return 0; /* FAIL */
12854 /* If one of the two operands is an interesting constant, load a
12855 constant with the above and mask it in with a logical operation. */
12857 if (CONST_INT_P (operands
[2]))
12860 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12861 operands
[3] = constm1_rtx
, op
= and_optab
;
12862 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12863 operands
[3] = const0_rtx
, op
= ior_optab
;
12865 return 0; /* FAIL */
12867 else if (CONST_INT_P (operands
[3]))
12870 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12871 operands
[2] = constm1_rtx
, op
= and_optab
;
12872 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12873 operands
[2] = const0_rtx
, op
= ior_optab
;
12875 return 0; /* FAIL */
12878 return 0; /* FAIL */
12880 orig_out
= operands
[0];
12881 tmp
= gen_reg_rtx (mode
);
12884 /* Recurse to get the constant loaded. */
12885 if (ix86_expand_int_movcc (operands
) == 0)
12886 return 0; /* FAIL */
12888 /* Mask in the interesting variable. */
12889 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12891 if (!rtx_equal_p (out
, orig_out
))
12892 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12894 return 1; /* DONE */
12898 * For comparison with above,
12908 if (! nonimmediate_operand (operands
[2], mode
))
12909 operands
[2] = force_reg (mode
, operands
[2]);
12910 if (! nonimmediate_operand (operands
[3], mode
))
12911 operands
[3] = force_reg (mode
, operands
[3]);
12913 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12915 rtx tmp
= gen_reg_rtx (mode
);
12916 emit_move_insn (tmp
, operands
[3]);
12919 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12921 rtx tmp
= gen_reg_rtx (mode
);
12922 emit_move_insn (tmp
, operands
[2]);
12926 if (! register_operand (operands
[2], VOIDmode
)
12928 || ! register_operand (operands
[3], VOIDmode
)))
12929 operands
[2] = force_reg (mode
, operands
[2]);
12932 && ! register_operand (operands
[3], VOIDmode
))
12933 operands
[3] = force_reg (mode
, operands
[3]);
12935 emit_insn (compare_seq
);
12936 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12937 gen_rtx_IF_THEN_ELSE (mode
,
12938 compare_op
, operands
[2],
12941 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12942 gen_rtx_IF_THEN_ELSE (mode
,
12944 copy_rtx (operands
[3]),
12945 copy_rtx (operands
[0]))));
12947 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12948 gen_rtx_IF_THEN_ELSE (mode
,
12950 copy_rtx (operands
[2]),
12951 copy_rtx (operands
[0]))));
12953 return 1; /* DONE */
12956 /* Swap, force into registers, or otherwise massage the two operands
12957 to an sse comparison with a mask result. Thus we differ a bit from
12958 ix86_prepare_fp_compare_args which expects to produce a flags result.
12960 The DEST operand exists to help determine whether to commute commutative
12961 operators. The POP0/POP1 operands are updated in place. The new
12962 comparison code is returned, or UNKNOWN if not implementable. */
12964 static enum rtx_code
12965 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12966 rtx
*pop0
, rtx
*pop1
)
12974 /* We have no LTGT as an operator. We could implement it with
12975 NE & ORDERED, but this requires an extra temporary. It's
12976 not clear that it's worth it. */
12983 /* These are supported directly. */
12990 /* For commutative operators, try to canonicalize the destination
12991 operand to be first in the comparison - this helps reload to
12992 avoid extra moves. */
12993 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
13001 /* These are not supported directly. Swap the comparison operands
13002 to transform into something that is supported. */
13006 code
= swap_condition (code
);
13010 gcc_unreachable ();
13016 /* Detect conditional moves that exactly match min/max operational
13017 semantics. Note that this is IEEE safe, as long as we don't
13018 interchange the operands.
13020 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13021 and TRUE if the operation is successful and instructions are emitted. */
13024 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
13025 rtx cmp_op1
, rtx if_true
, rtx if_false
)
13027 enum machine_mode mode
;
13033 else if (code
== UNGE
)
13036 if_true
= if_false
;
13042 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
13044 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
13049 mode
= GET_MODE (dest
);
13051 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13052 but MODE may be a vector mode and thus not appropriate. */
13053 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
13055 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
13058 if_true
= force_reg (mode
, if_true
);
13059 v
= gen_rtvec (2, if_true
, if_false
);
13060 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
13064 code
= is_min
? SMIN
: SMAX
;
13065 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
13068 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
13072 /* Expand an sse vector comparison. Return the register with the result. */
13075 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
13076 rtx op_true
, rtx op_false
)
13078 enum machine_mode mode
= GET_MODE (dest
);
13081 cmp_op0
= force_reg (mode
, cmp_op0
);
13082 if (!nonimmediate_operand (cmp_op1
, mode
))
13083 cmp_op1
= force_reg (mode
, cmp_op1
);
13086 || reg_overlap_mentioned_p (dest
, op_true
)
13087 || reg_overlap_mentioned_p (dest
, op_false
))
13088 dest
= gen_reg_rtx (mode
);
13090 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
13091 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13096 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13097 operations. This is used for both scalar and vector conditional moves. */
13100 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
13102 enum machine_mode mode
= GET_MODE (dest
);
13107 rtx pcmov
= gen_rtx_SET (mode
, dest
,
13108 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
13113 else if (op_false
== CONST0_RTX (mode
))
13115 op_true
= force_reg (mode
, op_true
);
13116 x
= gen_rtx_AND (mode
, cmp
, op_true
);
13117 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13119 else if (op_true
== CONST0_RTX (mode
))
13121 op_false
= force_reg (mode
, op_false
);
13122 x
= gen_rtx_NOT (mode
, cmp
);
13123 x
= gen_rtx_AND (mode
, x
, op_false
);
13124 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13128 op_true
= force_reg (mode
, op_true
);
13129 op_false
= force_reg (mode
, op_false
);
13131 t2
= gen_reg_rtx (mode
);
13133 t3
= gen_reg_rtx (mode
);
13137 x
= gen_rtx_AND (mode
, op_true
, cmp
);
13138 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
13140 x
= gen_rtx_NOT (mode
, cmp
);
13141 x
= gen_rtx_AND (mode
, x
, op_false
);
13142 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
13144 x
= gen_rtx_IOR (mode
, t3
, t2
);
13145 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13149 /* Expand a floating-point conditional move. Return true if successful. */
13152 ix86_expand_fp_movcc (rtx operands
[])
13154 enum machine_mode mode
= GET_MODE (operands
[0]);
13155 enum rtx_code code
= GET_CODE (operands
[1]);
13156 rtx tmp
, compare_op
, second_test
, bypass_test
;
13158 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
13160 enum machine_mode cmode
;
13162 /* Since we've no cmove for sse registers, don't force bad register
13163 allocation just to gain access to it. Deny movcc when the
13164 comparison mode doesn't match the move mode. */
13165 cmode
= GET_MODE (ix86_compare_op0
);
13166 if (cmode
== VOIDmode
)
13167 cmode
= GET_MODE (ix86_compare_op1
);
13171 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13173 &ix86_compare_op1
);
13174 if (code
== UNKNOWN
)
13177 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
13178 ix86_compare_op1
, operands
[2],
13182 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
13183 ix86_compare_op1
, operands
[2], operands
[3]);
13184 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
13188 /* The floating point conditional move instructions don't directly
13189 support conditions resulting from a signed integer comparison. */
13191 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13193 /* The floating point conditional move instructions don't directly
13194 support signed integer comparisons. */
13196 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
13198 gcc_assert (!second_test
&& !bypass_test
);
13199 tmp
= gen_reg_rtx (QImode
);
13200 ix86_expand_setcc (code
, tmp
);
13202 ix86_compare_op0
= tmp
;
13203 ix86_compare_op1
= const0_rtx
;
13204 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13206 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
13208 tmp
= gen_reg_rtx (mode
);
13209 emit_move_insn (tmp
, operands
[3]);
13212 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
13214 tmp
= gen_reg_rtx (mode
);
13215 emit_move_insn (tmp
, operands
[2]);
13219 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13220 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
13221 operands
[2], operands
[3])));
13223 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13224 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
13225 operands
[3], operands
[0])));
13227 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13228 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
13229 operands
[2], operands
[0])));
13234 /* Expand a floating-point vector conditional move; a vcond operation
13235 rather than a movcc operation. */
13238 ix86_expand_fp_vcond (rtx operands
[])
13240 enum rtx_code code
= GET_CODE (operands
[3]);
13243 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13244 &operands
[4], &operands
[5]);
13245 if (code
== UNKNOWN
)
13248 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
13249 operands
[5], operands
[1], operands
[2]))
13252 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
13253 operands
[1], operands
[2]);
13254 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
13258 /* Expand a signed/unsigned integral vector conditional move. */
13261 ix86_expand_int_vcond (rtx operands
[])
13263 enum machine_mode mode
= GET_MODE (operands
[0]);
13264 enum rtx_code code
= GET_CODE (operands
[3]);
13265 bool negate
= false;
13268 cop0
= operands
[4];
13269 cop1
= operands
[5];
13271 /* Canonicalize the comparison to EQ, GT, GTU. */
13282 code
= reverse_condition (code
);
13288 code
= reverse_condition (code
);
13294 code
= swap_condition (code
);
13295 x
= cop0
, cop0
= cop1
, cop1
= x
;
13299 gcc_unreachable ();
13302 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13303 if (mode
== V2DImode
)
13308 /* SSE4.1 supports EQ. */
13309 if (!TARGET_SSE4_1
)
13315 /* SSE4.2 supports GT/GTU. */
13316 if (!TARGET_SSE4_2
)
13321 gcc_unreachable ();
13325 /* Unsigned parallel compare is not supported by the hardware. Play some
13326 tricks to turn this into a signed comparison against 0. */
13329 cop0
= force_reg (mode
, cop0
);
13338 /* Perform a parallel modulo subtraction. */
13339 t1
= gen_reg_rtx (mode
);
13340 emit_insn ((mode
== V4SImode
13342 : gen_subv2di3
) (t1
, cop0
, cop1
));
13344 /* Extract the original sign bit of op0. */
13345 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
13347 t2
= gen_reg_rtx (mode
);
13348 emit_insn ((mode
== V4SImode
13350 : gen_andv2di3
) (t2
, cop0
, mask
));
13352 /* XOR it back into the result of the subtraction. This results
13353 in the sign bit set iff we saw unsigned underflow. */
13354 x
= gen_reg_rtx (mode
);
13355 emit_insn ((mode
== V4SImode
13357 : gen_xorv2di3
) (x
, t1
, t2
));
13365 /* Perform a parallel unsigned saturating subtraction. */
13366 x
= gen_reg_rtx (mode
);
13367 emit_insn (gen_rtx_SET (VOIDmode
, x
,
13368 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
13375 gcc_unreachable ();
13379 cop1
= CONST0_RTX (mode
);
13382 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
13383 operands
[1+negate
], operands
[2-negate
]);
13385 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
13386 operands
[2-negate
]);
13390 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13391 true if we should do zero extension, else sign extension. HIGH_P is
13392 true if we want the N/2 high elements, else the low elements. */
13395 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13397 enum machine_mode imode
= GET_MODE (operands
[1]);
13398 rtx (*unpack
)(rtx
, rtx
, rtx
);
13405 unpack
= gen_vec_interleave_highv16qi
;
13407 unpack
= gen_vec_interleave_lowv16qi
;
13411 unpack
= gen_vec_interleave_highv8hi
;
13413 unpack
= gen_vec_interleave_lowv8hi
;
13417 unpack
= gen_vec_interleave_highv4si
;
13419 unpack
= gen_vec_interleave_lowv4si
;
13422 gcc_unreachable ();
13425 dest
= gen_lowpart (imode
, operands
[0]);
13428 se
= force_reg (imode
, CONST0_RTX (imode
));
13430 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
13431 operands
[1], pc_rtx
, pc_rtx
);
13433 emit_insn (unpack (dest
, operands
[1], se
));
13436 /* This function performs the same task as ix86_expand_sse_unpack,
13437 but with SSE4.1 instructions. */
13440 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13442 enum machine_mode imode
= GET_MODE (operands
[1]);
13443 rtx (*unpack
)(rtx
, rtx
);
13450 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
13452 unpack
= gen_sse4_1_extendv8qiv8hi2
;
13456 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
13458 unpack
= gen_sse4_1_extendv4hiv4si2
;
13462 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
13464 unpack
= gen_sse4_1_extendv2siv2di2
;
13467 gcc_unreachable ();
13470 dest
= operands
[0];
13473 /* Shift higher 8 bytes to lower 8 bytes. */
13474 src
= gen_reg_rtx (imode
);
13475 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
13476 gen_lowpart (TImode
, operands
[1]),
13482 emit_insn (unpack (dest
, src
));
13485 /* This function performs the same task as ix86_expand_sse_unpack,
13486 but with amdfam15 instructions. */
13488 #define PPERM_SRC 0x00 /* copy source */
13489 #define PPERM_INVERT 0x20 /* invert source */
13490 #define PPERM_REVERSE 0x40 /* bit reverse source */
13491 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13492 #define PPERM_ZERO 0x80 /* all 0's */
13493 #define PPERM_ONES 0xa0 /* all 1's */
13494 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13495 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13497 #define PPERM_SRC1 0x00 /* use first source byte */
13498 #define PPERM_SRC2 0x10 /* use second source byte */
13501 ix86_expand_sse5_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13503 enum machine_mode imode
= GET_MODE (operands
[1]);
13504 int pperm_bytes
[16];
13506 int h
= (high_p
) ? 8 : 0;
13509 rtvec v
= rtvec_alloc (16);
13512 rtx op0
= operands
[0], op1
= operands
[1];
13517 vs
= rtvec_alloc (8);
13518 h2
= (high_p
) ? 8 : 0;
13519 for (i
= 0; i
< 8; i
++)
13521 pperm_bytes
[2*i
+0] = PPERM_SRC
| PPERM_SRC2
| i
| h
;
13522 pperm_bytes
[2*i
+1] = ((unsigned_p
)
13524 : PPERM_SIGN
| PPERM_SRC2
| i
| h
);
13527 for (i
= 0; i
< 16; i
++)
13528 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13530 for (i
= 0; i
< 8; i
++)
13531 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13533 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13534 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13536 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0
, op1
, p
, x
));
13538 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0
, op1
, p
, x
));
13542 vs
= rtvec_alloc (4);
13543 h2
= (high_p
) ? 4 : 0;
13544 for (i
= 0; i
< 4; i
++)
13546 sign_extend
= ((unsigned_p
)
13548 : PPERM_SIGN
| PPERM_SRC2
| ((2*i
) + 1 + h
));
13549 pperm_bytes
[4*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 0 + h
);
13550 pperm_bytes
[4*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((2*i
) + 1 + h
);
13551 pperm_bytes
[4*i
+2] = sign_extend
;
13552 pperm_bytes
[4*i
+3] = sign_extend
;
13555 for (i
= 0; i
< 16; i
++)
13556 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13558 for (i
= 0; i
< 4; i
++)
13559 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13561 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13562 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13564 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0
, op1
, p
, x
));
13566 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0
, op1
, p
, x
));
13570 vs
= rtvec_alloc (2);
13571 h2
= (high_p
) ? 2 : 0;
13572 for (i
= 0; i
< 2; i
++)
13574 sign_extend
= ((unsigned_p
)
13576 : PPERM_SIGN
| PPERM_SRC2
| ((4*i
) + 3 + h
));
13577 pperm_bytes
[8*i
+0] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 0 + h
);
13578 pperm_bytes
[8*i
+1] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 1 + h
);
13579 pperm_bytes
[8*i
+2] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 2 + h
);
13580 pperm_bytes
[8*i
+3] = PPERM_SRC
| PPERM_SRC2
| ((4*i
) + 3 + h
);
13581 pperm_bytes
[8*i
+4] = sign_extend
;
13582 pperm_bytes
[8*i
+5] = sign_extend
;
13583 pperm_bytes
[8*i
+6] = sign_extend
;
13584 pperm_bytes
[8*i
+7] = sign_extend
;
13587 for (i
= 0; i
< 16; i
++)
13588 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13590 for (i
= 0; i
< 2; i
++)
13591 RTVEC_ELT (vs
, i
) = GEN_INT (i
+ h2
);
13593 p
= gen_rtx_PARALLEL (VOIDmode
, vs
);
13594 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13596 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0
, op1
, p
, x
));
13598 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0
, op1
, p
, x
));
13602 gcc_unreachable ();
13608 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13609 next narrower integer vector type */
13611 ix86_expand_sse5_pack (rtx operands
[3])
13613 enum machine_mode imode
= GET_MODE (operands
[0]);
13614 int pperm_bytes
[16];
13616 rtvec v
= rtvec_alloc (16);
13618 rtx op0
= operands
[0];
13619 rtx op1
= operands
[1];
13620 rtx op2
= operands
[2];
13625 for (i
= 0; i
< 8; i
++)
13627 pperm_bytes
[i
+0] = PPERM_SRC
| PPERM_SRC1
| (i
*2);
13628 pperm_bytes
[i
+8] = PPERM_SRC
| PPERM_SRC2
| (i
*2);
13631 for (i
= 0; i
< 16; i
++)
13632 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13634 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13635 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0
, op1
, op2
, x
));
13639 for (i
= 0; i
< 4; i
++)
13641 pperm_bytes
[(2*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 0);
13642 pperm_bytes
[(2*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*4) + 1);
13643 pperm_bytes
[(2*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 0);
13644 pperm_bytes
[(2*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*4) + 1);
13647 for (i
= 0; i
< 16; i
++)
13648 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13650 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13651 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0
, op1
, op2
, x
));
13655 for (i
= 0; i
< 2; i
++)
13657 pperm_bytes
[(4*i
)+0] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 0);
13658 pperm_bytes
[(4*i
)+1] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 1);
13659 pperm_bytes
[(4*i
)+2] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 2);
13660 pperm_bytes
[(4*i
)+3] = PPERM_SRC
| PPERM_SRC1
| ((i
*8) + 3);
13661 pperm_bytes
[(4*i
)+8] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 0);
13662 pperm_bytes
[(4*i
)+9] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 1);
13663 pperm_bytes
[(4*i
)+10] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 2);
13664 pperm_bytes
[(4*i
)+11] = PPERM_SRC
| PPERM_SRC2
| ((i
*8) + 3);
13667 for (i
= 0; i
< 16; i
++)
13668 RTVEC_ELT (v
, i
) = GEN_INT (pperm_bytes
[i
]);
13670 x
= force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, v
));
13671 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0
, op1
, op2
, x
));
13675 gcc_unreachable ();
13681 /* Expand conditional increment or decrement using adb/sbb instructions.
13682 The default case using setcc followed by the conditional move can be
13683 done by generic code. */
13685 ix86_expand_int_addcc (rtx operands
[])
13687 enum rtx_code code
= GET_CODE (operands
[1]);
13689 rtx val
= const0_rtx
;
13690 bool fpcmp
= false;
13691 enum machine_mode mode
= GET_MODE (operands
[0]);
13693 if (operands
[3] != const1_rtx
13694 && operands
[3] != constm1_rtx
)
13696 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
13697 ix86_compare_op1
, &compare_op
))
13699 code
= GET_CODE (compare_op
);
13701 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
13702 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
13705 code
= ix86_fp_compare_code_to_integer (code
);
13712 PUT_CODE (compare_op
,
13713 reverse_condition_maybe_unordered
13714 (GET_CODE (compare_op
)));
13716 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
13718 PUT_MODE (compare_op
, mode
);
13720 /* Construct either adc or sbb insn. */
13721 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
13723 switch (GET_MODE (operands
[0]))
13726 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13729 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13732 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13735 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13738 gcc_unreachable ();
13743 switch (GET_MODE (operands
[0]))
13746 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13749 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13752 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13755 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13758 gcc_unreachable ();
13761 return 1; /* DONE */
13765 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13766 works for floating pointer parameters and nonoffsetable memories.
13767 For pushes, it returns just stack offsets; the values will be saved
13768 in the right order. Maximally three parts are generated. */
13771 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
13776 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
13778 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
13780 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
13781 gcc_assert (size
>= 2 && size
<= 3);
13783 /* Optimize constant pool reference to immediates. This is used by fp
13784 moves, that force all constants to memory to allow combining. */
13785 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
13787 rtx tmp
= maybe_get_pool_constant (operand
);
13792 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
13794 /* The only non-offsetable memories we handle are pushes. */
13795 int ok
= push_operand (operand
, VOIDmode
);
13799 operand
= copy_rtx (operand
);
13800 PUT_MODE (operand
, Pmode
);
13801 parts
[0] = parts
[1] = parts
[2] = operand
;
13805 if (GET_CODE (operand
) == CONST_VECTOR
)
13807 enum machine_mode imode
= int_mode_for_mode (mode
);
13808 /* Caution: if we looked through a constant pool memory above,
13809 the operand may actually have a different mode now. That's
13810 ok, since we want to pun this all the way back to an integer. */
13811 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
13812 gcc_assert (operand
!= NULL
);
13818 if (mode
== DImode
)
13819 split_di (&operand
, 1, &parts
[0], &parts
[1]);
13822 if (REG_P (operand
))
13824 gcc_assert (reload_completed
);
13825 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
13826 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
13828 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
13830 else if (offsettable_memref_p (operand
))
13832 operand
= adjust_address (operand
, SImode
, 0);
13833 parts
[0] = operand
;
13834 parts
[1] = adjust_address (operand
, SImode
, 4);
13836 parts
[2] = adjust_address (operand
, SImode
, 8);
13838 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13843 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13847 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
13848 parts
[2] = gen_int_mode (l
[2], SImode
);
13851 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
13854 gcc_unreachable ();
13856 parts
[1] = gen_int_mode (l
[1], SImode
);
13857 parts
[0] = gen_int_mode (l
[0], SImode
);
13860 gcc_unreachable ();
13865 if (mode
== TImode
)
13866 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
13867 if (mode
== XFmode
|| mode
== TFmode
)
13869 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13870 if (REG_P (operand
))
13872 gcc_assert (reload_completed
);
13873 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13874 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13876 else if (offsettable_memref_p (operand
))
13878 operand
= adjust_address (operand
, DImode
, 0);
13879 parts
[0] = operand
;
13880 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13882 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13887 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13888 real_to_target (l
, &r
, mode
);
13890 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13891 if (HOST_BITS_PER_WIDE_INT
>= 64)
13894 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13895 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13898 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13900 if (upper_mode
== SImode
)
13901 parts
[1] = gen_int_mode (l
[2], SImode
);
13902 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13905 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13906 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13909 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13912 gcc_unreachable ();
13919 /* Emit insns to perform a move or push of DI, DF, and XF values.
13920 Return false when normal moves are needed; true when all required
13921 insns have been emitted. Operands 2-4 contain the input values
13922 int the correct order; operands 5-7 contain the output values. */
13925 ix86_split_long_move (rtx operands
[])
13930 int collisions
= 0;
13931 enum machine_mode mode
= GET_MODE (operands
[0]);
13933 /* The DFmode expanders may ask us to move double.
13934 For 64bit target this is single move. By hiding the fact
13935 here we simplify i386.md splitters. */
13936 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13938 /* Optimize constant pool reference to immediates. This is used by
13939 fp moves, that force all constants to memory to allow combining. */
13941 if (MEM_P (operands
[1])
13942 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13943 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13944 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13945 if (push_operand (operands
[0], VOIDmode
))
13947 operands
[0] = copy_rtx (operands
[0]);
13948 PUT_MODE (operands
[0], Pmode
);
13951 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13952 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13953 emit_move_insn (operands
[0], operands
[1]);
13957 /* The only non-offsettable memory we handle is push. */
13958 if (push_operand (operands
[0], VOIDmode
))
13961 gcc_assert (!MEM_P (operands
[0])
13962 || offsettable_memref_p (operands
[0]));
13964 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13965 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13967 /* When emitting push, take care for source operands on the stack. */
13968 if (push
&& MEM_P (operands
[1])
13969 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13972 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13973 XEXP (part
[1][2], 0));
13974 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13975 XEXP (part
[1][1], 0));
13978 /* We need to do copy in the right order in case an address register
13979 of the source overlaps the destination. */
13980 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13982 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13984 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13987 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13990 /* Collision in the middle part can be handled by reordering. */
13991 if (collisions
== 1 && nparts
== 3
13992 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13995 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13996 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13999 /* If there are more collisions, we can't handle it by reordering.
14000 Do an lea to the last part and use only one colliding move. */
14001 else if (collisions
> 1)
14007 base
= part
[0][nparts
- 1];
14009 /* Handle the case when the last part isn't valid for lea.
14010 Happens in 64-bit mode storing the 12-byte XFmode. */
14011 if (GET_MODE (base
) != Pmode
)
14012 base
= gen_rtx_REG (Pmode
, REGNO (base
));
14014 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
14015 part
[1][0] = replace_equiv_address (part
[1][0], base
);
14016 part
[1][1] = replace_equiv_address (part
[1][1],
14017 plus_constant (base
, UNITS_PER_WORD
));
14019 part
[1][2] = replace_equiv_address (part
[1][2],
14020 plus_constant (base
, 8));
14030 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
14031 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
14032 emit_move_insn (part
[0][2], part
[1][2]);
14037 /* In 64bit mode we don't have 32bit push available. In case this is
14038 register, it is OK - we will just use larger counterpart. We also
14039 retype memory - these comes from attempt to avoid REX prefix on
14040 moving of second half of TFmode value. */
14041 if (GET_MODE (part
[1][1]) == SImode
)
14043 switch (GET_CODE (part
[1][1]))
14046 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
14050 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
14054 gcc_unreachable ();
14057 if (GET_MODE (part
[1][0]) == SImode
)
14058 part
[1][0] = part
[1][1];
14061 emit_move_insn (part
[0][1], part
[1][1]);
14062 emit_move_insn (part
[0][0], part
[1][0]);
14066 /* Choose correct order to not overwrite the source before it is copied. */
14067 if ((REG_P (part
[0][0])
14068 && REG_P (part
[1][1])
14069 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
14071 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
14073 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
14077 operands
[2] = part
[0][2];
14078 operands
[3] = part
[0][1];
14079 operands
[4] = part
[0][0];
14080 operands
[5] = part
[1][2];
14081 operands
[6] = part
[1][1];
14082 operands
[7] = part
[1][0];
14086 operands
[2] = part
[0][1];
14087 operands
[3] = part
[0][0];
14088 operands
[5] = part
[1][1];
14089 operands
[6] = part
[1][0];
14096 operands
[2] = part
[0][0];
14097 operands
[3] = part
[0][1];
14098 operands
[4] = part
[0][2];
14099 operands
[5] = part
[1][0];
14100 operands
[6] = part
[1][1];
14101 operands
[7] = part
[1][2];
14105 operands
[2] = part
[0][0];
14106 operands
[3] = part
[0][1];
14107 operands
[5] = part
[1][0];
14108 operands
[6] = part
[1][1];
14112 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14115 if (CONST_INT_P (operands
[5])
14116 && operands
[5] != const0_rtx
14117 && REG_P (operands
[2]))
14119 if (CONST_INT_P (operands
[6])
14120 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
14121 operands
[6] = operands
[2];
14124 && CONST_INT_P (operands
[7])
14125 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
14126 operands
[7] = operands
[2];
14130 && CONST_INT_P (operands
[6])
14131 && operands
[6] != const0_rtx
14132 && REG_P (operands
[3])
14133 && CONST_INT_P (operands
[7])
14134 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
14135 operands
[7] = operands
[3];
14138 emit_move_insn (operands
[2], operands
[5]);
14139 emit_move_insn (operands
[3], operands
[6]);
14141 emit_move_insn (operands
[4], operands
[7]);
14146 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14147 left shift by a constant, either using a single shift or
14148 a sequence of add instructions. */
14151 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
14155 emit_insn ((mode
== DImode
14157 : gen_adddi3
) (operand
, operand
, operand
));
14159 else if (!optimize_size
14160 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
14163 for (i
=0; i
<count
; i
++)
14165 emit_insn ((mode
== DImode
14167 : gen_adddi3
) (operand
, operand
, operand
));
14171 emit_insn ((mode
== DImode
14173 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
14177 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14179 rtx low
[2], high
[2];
14181 const int single_width
= mode
== DImode
? 32 : 64;
14183 if (CONST_INT_P (operands
[2]))
14185 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14186 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14188 if (count
>= single_width
)
14190 emit_move_insn (high
[0], low
[1]);
14191 emit_move_insn (low
[0], const0_rtx
);
14193 if (count
> single_width
)
14194 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
14198 if (!rtx_equal_p (operands
[0], operands
[1]))
14199 emit_move_insn (operands
[0], operands
[1]);
14200 emit_insn ((mode
== DImode
14202 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
14203 ix86_expand_ashl_const (low
[0], count
, mode
);
14208 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14210 if (operands
[1] == const1_rtx
)
14212 /* Assuming we've chosen a QImode capable registers, then 1 << N
14213 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14214 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
14216 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
14218 ix86_expand_clear (low
[0]);
14219 ix86_expand_clear (high
[0]);
14220 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
14222 d
= gen_lowpart (QImode
, low
[0]);
14223 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
14224 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
14225 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
14227 d
= gen_lowpart (QImode
, high
[0]);
14228 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
14229 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
14230 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
14233 /* Otherwise, we can get the same results by manually performing
14234 a bit extract operation on bit 5/6, and then performing the two
14235 shifts. The two methods of getting 0/1 into low/high are exactly
14236 the same size. Avoiding the shift in the bit extract case helps
14237 pentium4 a bit; no one else seems to care much either way. */
14242 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
14243 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
14245 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
14246 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
14248 emit_insn ((mode
== DImode
14250 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
14251 emit_insn ((mode
== DImode
14253 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
14254 emit_move_insn (low
[0], high
[0]);
14255 emit_insn ((mode
== DImode
14257 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
14260 emit_insn ((mode
== DImode
14262 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
14263 emit_insn ((mode
== DImode
14265 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
14269 if (operands
[1] == constm1_rtx
)
14271 /* For -1 << N, we can avoid the shld instruction, because we
14272 know that we're shifting 0...31/63 ones into a -1. */
14273 emit_move_insn (low
[0], constm1_rtx
);
14275 emit_move_insn (high
[0], low
[0]);
14277 emit_move_insn (high
[0], constm1_rtx
);
14281 if (!rtx_equal_p (operands
[0], operands
[1]))
14282 emit_move_insn (operands
[0], operands
[1]);
14284 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14285 emit_insn ((mode
== DImode
14287 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
14290 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
14292 if (TARGET_CMOVE
&& scratch
)
14294 ix86_expand_clear (scratch
);
14295 emit_insn ((mode
== DImode
14296 ? gen_x86_shift_adj_1
14297 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
14300 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
14304 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14306 rtx low
[2], high
[2];
14308 const int single_width
= mode
== DImode
? 32 : 64;
14310 if (CONST_INT_P (operands
[2]))
14312 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14313 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14315 if (count
== single_width
* 2 - 1)
14317 emit_move_insn (high
[0], high
[1]);
14318 emit_insn ((mode
== DImode
14320 : gen_ashrdi3
) (high
[0], high
[0],
14321 GEN_INT (single_width
- 1)));
14322 emit_move_insn (low
[0], high
[0]);
14325 else if (count
>= single_width
)
14327 emit_move_insn (low
[0], high
[1]);
14328 emit_move_insn (high
[0], low
[0]);
14329 emit_insn ((mode
== DImode
14331 : gen_ashrdi3
) (high
[0], high
[0],
14332 GEN_INT (single_width
- 1)));
14333 if (count
> single_width
)
14334 emit_insn ((mode
== DImode
14336 : gen_ashrdi3
) (low
[0], low
[0],
14337 GEN_INT (count
- single_width
)));
14341 if (!rtx_equal_p (operands
[0], operands
[1]))
14342 emit_move_insn (operands
[0], operands
[1]);
14343 emit_insn ((mode
== DImode
14345 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14346 emit_insn ((mode
== DImode
14348 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14353 if (!rtx_equal_p (operands
[0], operands
[1]))
14354 emit_move_insn (operands
[0], operands
[1]);
14356 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14358 emit_insn ((mode
== DImode
14360 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14361 emit_insn ((mode
== DImode
14363 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
14365 if (TARGET_CMOVE
&& scratch
)
14367 emit_move_insn (scratch
, high
[0]);
14368 emit_insn ((mode
== DImode
14370 : gen_ashrdi3
) (scratch
, scratch
,
14371 GEN_INT (single_width
- 1)));
14372 emit_insn ((mode
== DImode
14373 ? gen_x86_shift_adj_1
14374 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14378 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
14383 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14385 rtx low
[2], high
[2];
14387 const int single_width
= mode
== DImode
? 32 : 64;
14389 if (CONST_INT_P (operands
[2]))
14391 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14392 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14394 if (count
>= single_width
)
14396 emit_move_insn (low
[0], high
[1]);
14397 ix86_expand_clear (high
[0]);
14399 if (count
> single_width
)
14400 emit_insn ((mode
== DImode
14402 : gen_lshrdi3
) (low
[0], low
[0],
14403 GEN_INT (count
- single_width
)));
14407 if (!rtx_equal_p (operands
[0], operands
[1]))
14408 emit_move_insn (operands
[0], operands
[1]);
14409 emit_insn ((mode
== DImode
14411 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14412 emit_insn ((mode
== DImode
14414 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14419 if (!rtx_equal_p (operands
[0], operands
[1]))
14420 emit_move_insn (operands
[0], operands
[1]);
14422 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14424 emit_insn ((mode
== DImode
14426 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14427 emit_insn ((mode
== DImode
14429 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
14431 /* Heh. By reversing the arguments, we can reuse this pattern. */
14432 if (TARGET_CMOVE
&& scratch
)
14434 ix86_expand_clear (scratch
);
14435 emit_insn ((mode
== DImode
14436 ? gen_x86_shift_adj_1
14437 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14441 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
14445 /* Predict just emitted jump instruction to be taken with probability PROB. */
14447 predict_jump (int prob
)
14449 rtx insn
= get_last_insn ();
14450 gcc_assert (JUMP_P (insn
));
14452 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
14457 /* Helper function for the string operations below. Dest VARIABLE whether
14458 it is aligned to VALUE bytes. If true, jump to the label. */
14460 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
14462 rtx label
= gen_label_rtx ();
14463 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
14464 if (GET_MODE (variable
) == DImode
)
14465 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
14467 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
14468 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
14471 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
14473 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14477 /* Adjust COUNTER by the VALUE. */
14479 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
14481 if (GET_MODE (countreg
) == DImode
)
14482 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
14484 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
14487 /* Zero extend possibly SImode EXP to Pmode register. */
14489 ix86_zero_extend_to_Pmode (rtx exp
)
14492 if (GET_MODE (exp
) == VOIDmode
)
14493 return force_reg (Pmode
, exp
);
14494 if (GET_MODE (exp
) == Pmode
)
14495 return copy_to_mode_reg (Pmode
, exp
);
14496 r
= gen_reg_rtx (Pmode
);
14497 emit_insn (gen_zero_extendsidi2 (r
, exp
));
14501 /* Divide COUNTREG by SCALE. */
14503 scale_counter (rtx countreg
, int scale
)
14506 rtx piece_size_mask
;
14510 if (CONST_INT_P (countreg
))
14511 return GEN_INT (INTVAL (countreg
) / scale
);
14512 gcc_assert (REG_P (countreg
));
14514 piece_size_mask
= GEN_INT (scale
- 1);
14515 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
14516 GEN_INT (exact_log2 (scale
)),
14517 NULL
, 1, OPTAB_DIRECT
);
14521 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14522 DImode for constant loop counts. */
14524 static enum machine_mode
14525 counter_mode (rtx count_exp
)
14527 if (GET_MODE (count_exp
) != VOIDmode
)
14528 return GET_MODE (count_exp
);
14529 if (GET_CODE (count_exp
) != CONST_INT
)
14531 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
14536 /* When SRCPTR is non-NULL, output simple loop to move memory
14537 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14538 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14539 equivalent loop to set memory by VALUE (supposed to be in MODE).
14541 The size is rounded down to whole number of chunk size moved at once.
14542 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14546 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
14547 rtx destptr
, rtx srcptr
, rtx value
,
14548 rtx count
, enum machine_mode mode
, int unroll
,
14551 rtx out_label
, top_label
, iter
, tmp
;
14552 enum machine_mode iter_mode
= counter_mode (count
);
14553 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
14554 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
14560 top_label
= gen_label_rtx ();
14561 out_label
= gen_label_rtx ();
14562 iter
= gen_reg_rtx (iter_mode
);
14564 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
14565 NULL
, 1, OPTAB_DIRECT
);
14566 /* Those two should combine. */
14567 if (piece_size
== const1_rtx
)
14569 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
14571 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
14573 emit_move_insn (iter
, const0_rtx
);
14575 emit_label (top_label
);
14577 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
14578 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
14579 destmem
= change_address (destmem
, mode
, x_addr
);
14583 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
14584 srcmem
= change_address (srcmem
, mode
, y_addr
);
14586 /* When unrolling for chips that reorder memory reads and writes,
14587 we can save registers by using single temporary.
14588 Also using 4 temporaries is overkill in 32bit mode. */
14589 if (!TARGET_64BIT
&& 0)
14591 for (i
= 0; i
< unroll
; i
++)
14596 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14598 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14600 emit_move_insn (destmem
, srcmem
);
14606 gcc_assert (unroll
<= 4);
14607 for (i
= 0; i
< unroll
; i
++)
14609 tmpreg
[i
] = gen_reg_rtx (mode
);
14613 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14615 emit_move_insn (tmpreg
[i
], srcmem
);
14617 for (i
= 0; i
< unroll
; i
++)
14622 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14624 emit_move_insn (destmem
, tmpreg
[i
]);
14629 for (i
= 0; i
< unroll
; i
++)
14633 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14634 emit_move_insn (destmem
, value
);
14637 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
14638 true, OPTAB_LIB_WIDEN
);
14640 emit_move_insn (iter
, tmp
);
14642 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
14644 if (expected_size
!= -1)
14646 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
14647 if (expected_size
== 0)
14649 else if (expected_size
> REG_BR_PROB_BASE
)
14650 predict_jump (REG_BR_PROB_BASE
- 1);
14652 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
14655 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
14656 iter
= ix86_zero_extend_to_Pmode (iter
);
14657 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
14658 true, OPTAB_LIB_WIDEN
);
14659 if (tmp
!= destptr
)
14660 emit_move_insn (destptr
, tmp
);
14663 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
14664 true, OPTAB_LIB_WIDEN
);
14666 emit_move_insn (srcptr
, tmp
);
14668 emit_label (out_label
);
14671 /* Output "rep; mov" instruction.
14672 Arguments have same meaning as for previous function */
14674 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
14675 rtx destptr
, rtx srcptr
,
14677 enum machine_mode mode
)
14683 /* If the size is known, it is shorter to use rep movs. */
14684 if (mode
== QImode
&& CONST_INT_P (count
)
14685 && !(INTVAL (count
) & 3))
14688 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14689 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14690 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
14691 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
14692 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14693 if (mode
!= QImode
)
14695 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14696 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14697 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14698 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14699 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14700 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
14704 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14705 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
14707 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
14711 /* Output "rep; stos" instruction.
14712 Arguments have same meaning as for previous function */
14714 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
14716 enum machine_mode mode
)
14721 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14722 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14723 value
= force_reg (mode
, gen_lowpart (mode
, value
));
14724 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14725 if (mode
!= QImode
)
14727 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14728 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14729 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14732 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14733 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
14737 emit_strmov (rtx destmem
, rtx srcmem
,
14738 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
14740 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
14741 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
14742 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14745 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14747 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
14748 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
14751 if (CONST_INT_P (count
))
14753 HOST_WIDE_INT countval
= INTVAL (count
);
14756 if ((countval
& 0x10) && max_size
> 16)
14760 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14761 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
14764 gcc_unreachable ();
14767 if ((countval
& 0x08) && max_size
> 8)
14770 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14773 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14774 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
14778 if ((countval
& 0x04) && max_size
> 4)
14780 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14783 if ((countval
& 0x02) && max_size
> 2)
14785 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
14788 if ((countval
& 0x01) && max_size
> 1)
14790 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
14797 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
14798 count
, 1, OPTAB_DIRECT
);
14799 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
14800 count
, QImode
, 1, 4);
14804 /* When there are stringops, we can cheaply increase dest and src pointers.
14805 Otherwise we save code size by maintaining offset (zero is readily
14806 available from preceding rep operation) and using x86 addressing modes.
14808 if (TARGET_SINGLE_STRINGOP
)
14812 rtx label
= ix86_expand_aligntest (count
, 4, true);
14813 src
= change_address (srcmem
, SImode
, srcptr
);
14814 dest
= change_address (destmem
, SImode
, destptr
);
14815 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14816 emit_label (label
);
14817 LABEL_NUSES (label
) = 1;
14821 rtx label
= ix86_expand_aligntest (count
, 2, true);
14822 src
= change_address (srcmem
, HImode
, srcptr
);
14823 dest
= change_address (destmem
, HImode
, destptr
);
14824 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14825 emit_label (label
);
14826 LABEL_NUSES (label
) = 1;
14830 rtx label
= ix86_expand_aligntest (count
, 1, true);
14831 src
= change_address (srcmem
, QImode
, srcptr
);
14832 dest
= change_address (destmem
, QImode
, destptr
);
14833 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14834 emit_label (label
);
14835 LABEL_NUSES (label
) = 1;
14840 rtx offset
= force_reg (Pmode
, const0_rtx
);
14845 rtx label
= ix86_expand_aligntest (count
, 4, true);
14846 src
= change_address (srcmem
, SImode
, srcptr
);
14847 dest
= change_address (destmem
, SImode
, destptr
);
14848 emit_move_insn (dest
, src
);
14849 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
14850 true, OPTAB_LIB_WIDEN
);
14852 emit_move_insn (offset
, tmp
);
14853 emit_label (label
);
14854 LABEL_NUSES (label
) = 1;
14858 rtx label
= ix86_expand_aligntest (count
, 2, true);
14859 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14860 src
= change_address (srcmem
, HImode
, tmp
);
14861 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14862 dest
= change_address (destmem
, HImode
, tmp
);
14863 emit_move_insn (dest
, src
);
14864 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
14865 true, OPTAB_LIB_WIDEN
);
14867 emit_move_insn (offset
, tmp
);
14868 emit_label (label
);
14869 LABEL_NUSES (label
) = 1;
14873 rtx label
= ix86_expand_aligntest (count
, 1, true);
14874 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14875 src
= change_address (srcmem
, QImode
, tmp
);
14876 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14877 dest
= change_address (destmem
, QImode
, tmp
);
14878 emit_move_insn (dest
, src
);
14879 emit_label (label
);
14880 LABEL_NUSES (label
) = 1;
14885 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14887 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14888 rtx count
, int max_size
)
14891 expand_simple_binop (counter_mode (count
), AND
, count
,
14892 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14893 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14894 gen_lowpart (QImode
, value
), count
, QImode
,
14898 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14900 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14904 if (CONST_INT_P (count
))
14906 HOST_WIDE_INT countval
= INTVAL (count
);
14909 if ((countval
& 0x10) && max_size
> 16)
14913 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14914 emit_insn (gen_strset (destptr
, dest
, value
));
14915 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14916 emit_insn (gen_strset (destptr
, dest
, value
));
14919 gcc_unreachable ();
14922 if ((countval
& 0x08) && max_size
> 8)
14926 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14927 emit_insn (gen_strset (destptr
, dest
, value
));
14931 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14932 emit_insn (gen_strset (destptr
, dest
, value
));
14933 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14934 emit_insn (gen_strset (destptr
, dest
, value
));
14938 if ((countval
& 0x04) && max_size
> 4)
14940 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14941 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14944 if ((countval
& 0x02) && max_size
> 2)
14946 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14947 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14950 if ((countval
& 0x01) && max_size
> 1)
14952 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14953 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14960 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14965 rtx label
= ix86_expand_aligntest (count
, 16, true);
14968 dest
= change_address (destmem
, DImode
, destptr
);
14969 emit_insn (gen_strset (destptr
, dest
, value
));
14970 emit_insn (gen_strset (destptr
, dest
, value
));
14974 dest
= change_address (destmem
, SImode
, destptr
);
14975 emit_insn (gen_strset (destptr
, dest
, value
));
14976 emit_insn (gen_strset (destptr
, dest
, value
));
14977 emit_insn (gen_strset (destptr
, dest
, value
));
14978 emit_insn (gen_strset (destptr
, dest
, value
));
14980 emit_label (label
);
14981 LABEL_NUSES (label
) = 1;
14985 rtx label
= ix86_expand_aligntest (count
, 8, true);
14988 dest
= change_address (destmem
, DImode
, destptr
);
14989 emit_insn (gen_strset (destptr
, dest
, value
));
14993 dest
= change_address (destmem
, SImode
, destptr
);
14994 emit_insn (gen_strset (destptr
, dest
, value
));
14995 emit_insn (gen_strset (destptr
, dest
, value
));
14997 emit_label (label
);
14998 LABEL_NUSES (label
) = 1;
15002 rtx label
= ix86_expand_aligntest (count
, 4, true);
15003 dest
= change_address (destmem
, SImode
, destptr
);
15004 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
15005 emit_label (label
);
15006 LABEL_NUSES (label
) = 1;
15010 rtx label
= ix86_expand_aligntest (count
, 2, true);
15011 dest
= change_address (destmem
, HImode
, destptr
);
15012 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
15013 emit_label (label
);
15014 LABEL_NUSES (label
) = 1;
15018 rtx label
= ix86_expand_aligntest (count
, 1, true);
15019 dest
= change_address (destmem
, QImode
, destptr
);
15020 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
15021 emit_label (label
);
15022 LABEL_NUSES (label
) = 1;
15026 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15027 DESIRED_ALIGNMENT. */
15029 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
15030 rtx destptr
, rtx srcptr
, rtx count
,
15031 int align
, int desired_alignment
)
15033 if (align
<= 1 && desired_alignment
> 1)
15035 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
15036 srcmem
= change_address (srcmem
, QImode
, srcptr
);
15037 destmem
= change_address (destmem
, QImode
, destptr
);
15038 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15039 ix86_adjust_counter (count
, 1);
15040 emit_label (label
);
15041 LABEL_NUSES (label
) = 1;
15043 if (align
<= 2 && desired_alignment
> 2)
15045 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
15046 srcmem
= change_address (srcmem
, HImode
, srcptr
);
15047 destmem
= change_address (destmem
, HImode
, destptr
);
15048 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15049 ix86_adjust_counter (count
, 2);
15050 emit_label (label
);
15051 LABEL_NUSES (label
) = 1;
15053 if (align
<= 4 && desired_alignment
> 4)
15055 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
15056 srcmem
= change_address (srcmem
, SImode
, srcptr
);
15057 destmem
= change_address (destmem
, SImode
, destptr
);
15058 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
15059 ix86_adjust_counter (count
, 4);
15060 emit_label (label
);
15061 LABEL_NUSES (label
) = 1;
15063 gcc_assert (desired_alignment
<= 8);
15066 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15067 DESIRED_ALIGNMENT. */
15069 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
15070 int align
, int desired_alignment
)
15072 if (align
<= 1 && desired_alignment
> 1)
15074 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
15075 destmem
= change_address (destmem
, QImode
, destptr
);
15076 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
15077 ix86_adjust_counter (count
, 1);
15078 emit_label (label
);
15079 LABEL_NUSES (label
) = 1;
15081 if (align
<= 2 && desired_alignment
> 2)
15083 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
15084 destmem
= change_address (destmem
, HImode
, destptr
);
15085 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
15086 ix86_adjust_counter (count
, 2);
15087 emit_label (label
);
15088 LABEL_NUSES (label
) = 1;
15090 if (align
<= 4 && desired_alignment
> 4)
15092 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
15093 destmem
= change_address (destmem
, SImode
, destptr
);
15094 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
15095 ix86_adjust_counter (count
, 4);
15096 emit_label (label
);
15097 LABEL_NUSES (label
) = 1;
15099 gcc_assert (desired_alignment
<= 8);
15102 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15103 static enum stringop_alg
15104 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
15105 int *dynamic_check
)
15107 const struct stringop_algs
* algs
;
15108 /* Algorithms using the rep prefix want at least edi and ecx;
15109 additionally, memset wants eax and memcpy wants esi. Don't
15110 consider such algorithms if the user has appropriated those
15111 registers for their own purposes. */
15112 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
15114 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
15116 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15117 || (alg != rep_prefix_1_byte \
15118 && alg != rep_prefix_4_byte \
15119 && alg != rep_prefix_8_byte))
15121 *dynamic_check
= -1;
15123 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
15125 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
15126 if (stringop_alg
!= no_stringop
&& ALG_USABLE_P (stringop_alg
))
15127 return stringop_alg
;
15128 /* rep; movq or rep; movl is the smallest variant. */
15129 else if (optimize_size
)
15131 if (!count
|| (count
& 3))
15132 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
15134 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
15136 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15138 else if (expected_size
!= -1 && expected_size
< 4)
15139 return loop_1_byte
;
15140 else if (expected_size
!= -1)
15143 enum stringop_alg alg
= libcall
;
15144 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
15146 /* We get here if the algorithms that were not libcall-based
15147 were rep-prefix based and we are unable to use rep prefixes
15148 based on global register usage. Break out of the loop and
15149 use the heuristic below. */
15150 if (algs
->size
[i
].max
== 0)
15152 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
15154 enum stringop_alg candidate
= algs
->size
[i
].alg
;
15156 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
15158 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15159 last non-libcall inline algorithm. */
15160 if (TARGET_INLINE_ALL_STRINGOPS
)
15162 /* When the current size is best to be copied by a libcall,
15163 but we are still forced to inline, run the heuristic below
15164 that will pick code for medium sized blocks. */
15165 if (alg
!= libcall
)
15169 else if (ALG_USABLE_P (candidate
))
15173 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
15175 /* When asked to inline the call anyway, try to pick meaningful choice.
15176 We look for maximal size of block that is faster to copy by hand and
15177 take blocks of at most of that size guessing that average size will
15178 be roughly half of the block.
15180 If this turns out to be bad, we might simply specify the preferred
15181 choice in ix86_costs. */
15182 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15183 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
15186 enum stringop_alg alg
;
15188 bool any_alg_usable_p
= true;
15190 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
15192 enum stringop_alg candidate
= algs
->size
[i
].alg
;
15193 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
15195 if (candidate
!= libcall
&& candidate
15196 && ALG_USABLE_P (candidate
))
15197 max
= algs
->size
[i
].max
;
15199 /* If there aren't any usable algorithms, then recursing on
15200 smaller sizes isn't going to find anything. Just return the
15201 simple byte-at-a-time copy loop. */
15202 if (!any_alg_usable_p
)
15204 /* Pick something reasonable. */
15205 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15206 *dynamic_check
= 128;
15207 return loop_1_byte
;
15211 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
15212 gcc_assert (*dynamic_check
== -1);
15213 gcc_assert (alg
!= libcall
);
15214 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
15215 *dynamic_check
= max
;
15218 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
15219 #undef ALG_USABLE_P
15222 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15223 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15225 decide_alignment (int align
,
15226 enum stringop_alg alg
,
15229 int desired_align
= 0;
15233 gcc_unreachable ();
15235 case unrolled_loop
:
15236 desired_align
= GET_MODE_SIZE (Pmode
);
15238 case rep_prefix_8_byte
:
15241 case rep_prefix_4_byte
:
15242 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15243 copying whole cacheline at once. */
15244 if (TARGET_PENTIUMPRO
)
15249 case rep_prefix_1_byte
:
15250 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15251 copying whole cacheline at once. */
15252 if (TARGET_PENTIUMPRO
)
15266 if (desired_align
< align
)
15267 desired_align
= align
;
15268 if (expected_size
!= -1 && expected_size
< 4)
15269 desired_align
= align
;
15270 return desired_align
;
15273 /* Return the smallest power of 2 greater than VAL. */
15275 smallest_pow2_greater_than (int val
)
15283 /* Expand string move (memcpy) operation. Use i386 string operations when
15284 profitable. expand_clrmem contains similar code. The code depends upon
15285 architecture, block size and alignment, but always has the same
15288 1) Prologue guard: Conditional that jumps up to epilogues for small
15289 blocks that can be handled by epilogue alone. This is faster but
15290 also needed for correctness, since prologue assume the block is larger
15291 than the desired alignment.
15293 Optional dynamic check for size and libcall for large
15294 blocks is emitted here too, with -minline-stringops-dynamically.
15296 2) Prologue: copy first few bytes in order to get destination aligned
15297 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15298 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15299 We emit either a jump tree on power of two sized blocks, or a byte loop.
15301 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15302 with specified algorithm.
15304 4) Epilogue: code copying tail of the block that is too small to be
15305 handled by main body (or up to size guarded by prologue guard). */
15308 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
15309 rtx expected_align_exp
, rtx expected_size_exp
)
15315 rtx jump_around_label
= NULL
;
15316 HOST_WIDE_INT align
= 1;
15317 unsigned HOST_WIDE_INT count
= 0;
15318 HOST_WIDE_INT expected_size
= -1;
15319 int size_needed
= 0, epilogue_size_needed
;
15320 int desired_align
= 0;
15321 enum stringop_alg alg
;
15324 if (CONST_INT_P (align_exp
))
15325 align
= INTVAL (align_exp
);
15326 /* i386 can do misaligned access on reasonably increased cost. */
15327 if (CONST_INT_P (expected_align_exp
)
15328 && INTVAL (expected_align_exp
) > align
)
15329 align
= INTVAL (expected_align_exp
);
15330 if (CONST_INT_P (count_exp
))
15331 count
= expected_size
= INTVAL (count_exp
);
15332 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15333 expected_size
= INTVAL (expected_size_exp
);
15335 /* Step 0: Decide on preferred algorithm, desired alignment and
15336 size of chunks to be copied by main loop. */
15338 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
15339 desired_align
= decide_alignment (align
, alg
, expected_size
);
15341 if (!TARGET_ALIGN_STRINGOPS
)
15342 align
= desired_align
;
15344 if (alg
== libcall
)
15346 gcc_assert (alg
!= no_stringop
);
15348 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
15349 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15350 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
15355 gcc_unreachable ();
15357 size_needed
= GET_MODE_SIZE (Pmode
);
15359 case unrolled_loop
:
15360 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
15362 case rep_prefix_8_byte
:
15365 case rep_prefix_4_byte
:
15368 case rep_prefix_1_byte
:
15374 epilogue_size_needed
= size_needed
;
15376 /* Step 1: Prologue guard. */
15378 /* Alignment code needs count to be in register. */
15379 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15380 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
15381 gcc_assert (desired_align
>= 1 && align
>= 1);
15383 /* Ensure that alignment prologue won't copy past end of block. */
15384 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15386 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15387 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15388 Make sure it is power of 2. */
15389 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15391 if (CONST_INT_P (count_exp
))
15393 if (UINTVAL (count_exp
) < (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
15398 label
= gen_label_rtx ();
15399 emit_cmp_and_jump_insns (count_exp
,
15400 GEN_INT (epilogue_size_needed
),
15401 LTU
, 0, counter_mode (count_exp
), 1, label
);
15402 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
15403 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15405 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15409 /* Emit code to decide on runtime whether library call or inline should be
15411 if (dynamic_check
!= -1)
15413 if (CONST_INT_P (count_exp
))
15415 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
15417 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
15418 count_exp
= const0_rtx
;
15424 rtx hot_label
= gen_label_rtx ();
15425 jump_around_label
= gen_label_rtx ();
15426 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15427 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
15428 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15429 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
15430 emit_jump (jump_around_label
);
15431 emit_label (hot_label
);
15435 /* Step 2: Alignment prologue. */
15437 if (desired_align
> align
)
15439 /* Except for the first move in epilogue, we no longer know
15440 constant offset in aliasing info. It don't seems to worth
15441 the pain to maintain it for the first move, so throw away
15443 src
= change_address (src
, BLKmode
, srcreg
);
15444 dst
= change_address (dst
, BLKmode
, destreg
);
15445 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
15448 if (label
&& size_needed
== 1)
15450 emit_label (label
);
15451 LABEL_NUSES (label
) = 1;
15455 /* Step 3: Main loop. */
15461 gcc_unreachable ();
15463 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15464 count_exp
, QImode
, 1, expected_size
);
15467 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15468 count_exp
, Pmode
, 1, expected_size
);
15470 case unrolled_loop
:
15471 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15472 registers for 4 temporaries anyway. */
15473 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15474 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
15477 case rep_prefix_8_byte
:
15478 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15481 case rep_prefix_4_byte
:
15482 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15485 case rep_prefix_1_byte
:
15486 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15490 /* Adjust properly the offset of src and dest memory for aliasing. */
15491 if (CONST_INT_P (count_exp
))
15493 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
15494 (count
/ size_needed
) * size_needed
);
15495 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15496 (count
/ size_needed
) * size_needed
);
15500 src
= change_address (src
, BLKmode
, srcreg
);
15501 dst
= change_address (dst
, BLKmode
, destreg
);
15504 /* Step 4: Epilogue to copy the remaining bytes. */
15508 /* When the main loop is done, COUNT_EXP might hold original count,
15509 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15510 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15511 bytes. Compensate if needed. */
15513 if (size_needed
< epilogue_size_needed
)
15516 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15517 GEN_INT (size_needed
- 1), count_exp
, 1,
15519 if (tmp
!= count_exp
)
15520 emit_move_insn (count_exp
, tmp
);
15522 emit_label (label
);
15523 LABEL_NUSES (label
) = 1;
15526 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15527 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
15528 epilogue_size_needed
);
15529 if (jump_around_label
)
15530 emit_label (jump_around_label
);
15534 /* Helper function for memcpy. For QImode value 0xXY produce
15535 0xXYXYXYXY of wide specified by MODE. This is essentially
15536 a * 0x10101010, but we can do slightly better than
15537 synth_mult by unwinding the sequence by hand on CPUs with
15540 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
15542 enum machine_mode valmode
= GET_MODE (val
);
15544 int nops
= mode
== DImode
? 3 : 2;
15546 gcc_assert (mode
== SImode
|| mode
== DImode
);
15547 if (val
== const0_rtx
)
15548 return copy_to_mode_reg (mode
, const0_rtx
);
15549 if (CONST_INT_P (val
))
15551 HOST_WIDE_INT v
= INTVAL (val
) & 255;
15555 if (mode
== DImode
)
15556 v
|= (v
<< 16) << 16;
15557 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
15560 if (valmode
== VOIDmode
)
15562 if (valmode
!= QImode
)
15563 val
= gen_lowpart (QImode
, val
);
15564 if (mode
== QImode
)
15566 if (!TARGET_PARTIAL_REG_STALL
)
15568 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
15569 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
15570 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
15571 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
15573 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15574 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
15575 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
15580 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15582 if (!TARGET_PARTIAL_REG_STALL
)
15583 if (mode
== SImode
)
15584 emit_insn (gen_movsi_insv_1 (reg
, reg
));
15586 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
15589 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
15590 NULL
, 1, OPTAB_DIRECT
);
15592 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15594 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
15595 NULL
, 1, OPTAB_DIRECT
);
15596 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15597 if (mode
== SImode
)
15599 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
15600 NULL
, 1, OPTAB_DIRECT
);
15601 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15606 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15607 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15608 alignment from ALIGN to DESIRED_ALIGN. */
15610 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
15615 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
15616 promoted_val
= promote_duplicated_reg (DImode
, val
);
15617 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
15618 promoted_val
= promote_duplicated_reg (SImode
, val
);
15619 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
15620 promoted_val
= promote_duplicated_reg (HImode
, val
);
15622 promoted_val
= val
;
15624 return promoted_val
;
15627 /* Expand string clear operation (bzero). Use i386 string operations when
15628 profitable. See expand_movmem comment for explanation of individual
15629 steps performed. */
15631 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
15632 rtx expected_align_exp
, rtx expected_size_exp
)
15637 rtx jump_around_label
= NULL
;
15638 HOST_WIDE_INT align
= 1;
15639 unsigned HOST_WIDE_INT count
= 0;
15640 HOST_WIDE_INT expected_size
= -1;
15641 int size_needed
= 0, epilogue_size_needed
;
15642 int desired_align
= 0;
15643 enum stringop_alg alg
;
15644 rtx promoted_val
= NULL
;
15645 bool force_loopy_epilogue
= false;
15648 if (CONST_INT_P (align_exp
))
15649 align
= INTVAL (align_exp
);
15650 /* i386 can do misaligned access on reasonably increased cost. */
15651 if (CONST_INT_P (expected_align_exp
)
15652 && INTVAL (expected_align_exp
) > align
)
15653 align
= INTVAL (expected_align_exp
);
15654 if (CONST_INT_P (count_exp
))
15655 count
= expected_size
= INTVAL (count_exp
);
15656 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15657 expected_size
= INTVAL (expected_size_exp
);
15659 /* Step 0: Decide on preferred algorithm, desired alignment and
15660 size of chunks to be copied by main loop. */
15662 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
15663 desired_align
= decide_alignment (align
, alg
, expected_size
);
15665 if (!TARGET_ALIGN_STRINGOPS
)
15666 align
= desired_align
;
15668 if (alg
== libcall
)
15670 gcc_assert (alg
!= no_stringop
);
15672 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
15673 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15678 gcc_unreachable ();
15680 size_needed
= GET_MODE_SIZE (Pmode
);
15682 case unrolled_loop
:
15683 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
15685 case rep_prefix_8_byte
:
15688 case rep_prefix_4_byte
:
15691 case rep_prefix_1_byte
:
15696 epilogue_size_needed
= size_needed
;
15698 /* Step 1: Prologue guard. */
15700 /* Alignment code needs count to be in register. */
15701 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15703 enum machine_mode mode
= SImode
;
15704 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15706 count_exp
= force_reg (mode
, count_exp
);
15708 /* Do the cheap promotion to allow better CSE across the
15709 main loop and epilogue (ie one load of the big constant in the
15710 front of all code. */
15711 if (CONST_INT_P (val_exp
))
15712 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15713 desired_align
, align
);
15714 /* Ensure that alignment prologue won't copy past end of block. */
15715 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15717 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15718 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15719 Make sure it is power of 2. */
15720 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15722 /* To improve performance of small blocks, we jump around the VAL
15723 promoting mode. This mean that if the promoted VAL is not constant,
15724 we might not use it in the epilogue and have to use byte
15726 if (epilogue_size_needed
> 2 && !promoted_val
)
15727 force_loopy_epilogue
= true;
15728 label
= gen_label_rtx ();
15729 emit_cmp_and_jump_insns (count_exp
,
15730 GEN_INT (epilogue_size_needed
),
15731 LTU
, 0, counter_mode (count_exp
), 1, label
);
15732 if (GET_CODE (count_exp
) == CONST_INT
)
15734 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
15735 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15737 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15739 if (dynamic_check
!= -1)
15741 rtx hot_label
= gen_label_rtx ();
15742 jump_around_label
= gen_label_rtx ();
15743 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15744 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
15745 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15746 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
15747 emit_jump (jump_around_label
);
15748 emit_label (hot_label
);
15751 /* Step 2: Alignment prologue. */
15753 /* Do the expensive promotion once we branched off the small blocks. */
15755 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15756 desired_align
, align
);
15757 gcc_assert (desired_align
>= 1 && align
>= 1);
15759 if (desired_align
> align
)
15761 /* Except for the first move in epilogue, we no longer know
15762 constant offset in aliasing info. It don't seems to worth
15763 the pain to maintain it for the first move, so throw away
15765 dst
= change_address (dst
, BLKmode
, destreg
);
15766 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
15769 if (label
&& size_needed
== 1)
15771 emit_label (label
);
15772 LABEL_NUSES (label
) = 1;
15776 /* Step 3: Main loop. */
15782 gcc_unreachable ();
15784 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15785 count_exp
, QImode
, 1, expected_size
);
15788 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15789 count_exp
, Pmode
, 1, expected_size
);
15791 case unrolled_loop
:
15792 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15793 count_exp
, Pmode
, 4, expected_size
);
15795 case rep_prefix_8_byte
:
15796 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15799 case rep_prefix_4_byte
:
15800 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15803 case rep_prefix_1_byte
:
15804 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15808 /* Adjust properly the offset of src and dest memory for aliasing. */
15809 if (CONST_INT_P (count_exp
))
15810 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15811 (count
/ size_needed
) * size_needed
);
15813 dst
= change_address (dst
, BLKmode
, destreg
);
15815 /* Step 4: Epilogue to copy the remaining bytes. */
15819 /* When the main loop is done, COUNT_EXP might hold original count,
15820 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15821 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15822 bytes. Compensate if needed. */
15824 if (size_needed
< desired_align
- align
)
15827 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15828 GEN_INT (size_needed
- 1), count_exp
, 1,
15830 size_needed
= desired_align
- align
+ 1;
15831 if (tmp
!= count_exp
)
15832 emit_move_insn (count_exp
, tmp
);
15834 emit_label (label
);
15835 LABEL_NUSES (label
) = 1;
15837 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15839 if (force_loopy_epilogue
)
15840 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
15843 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
15846 if (jump_around_label
)
15847 emit_label (jump_around_label
);
15851 /* Expand the appropriate insns for doing strlen if not just doing
15854 out = result, initialized with the start address
15855 align_rtx = alignment of the address.
15856 scratch = scratch register, initialized with the startaddress when
15857 not aligned, otherwise undefined
15859 This is just the body. It needs the initializations mentioned above and
15860 some address computing at the end. These things are done in i386.md. */
15863 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
15867 rtx align_2_label
= NULL_RTX
;
15868 rtx align_3_label
= NULL_RTX
;
15869 rtx align_4_label
= gen_label_rtx ();
15870 rtx end_0_label
= gen_label_rtx ();
15872 rtx tmpreg
= gen_reg_rtx (SImode
);
15873 rtx scratch
= gen_reg_rtx (SImode
);
15877 if (CONST_INT_P (align_rtx
))
15878 align
= INTVAL (align_rtx
);
15880 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15882 /* Is there a known alignment and is it less than 4? */
15885 rtx scratch1
= gen_reg_rtx (Pmode
);
15886 emit_move_insn (scratch1
, out
);
15887 /* Is there a known alignment and is it not 2? */
15890 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
15891 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
15893 /* Leave just the 3 lower bits. */
15894 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
15895 NULL_RTX
, 0, OPTAB_WIDEN
);
15897 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15898 Pmode
, 1, align_4_label
);
15899 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
15900 Pmode
, 1, align_2_label
);
15901 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
15902 Pmode
, 1, align_3_label
);
15906 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15907 check if is aligned to 4 - byte. */
15909 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
15910 NULL_RTX
, 0, OPTAB_WIDEN
);
15912 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15913 Pmode
, 1, align_4_label
);
15916 mem
= change_address (src
, QImode
, out
);
15918 /* Now compare the bytes. */
15920 /* Compare the first n unaligned byte on a byte per byte basis. */
15921 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15922 QImode
, 1, end_0_label
);
15924 /* Increment the address. */
15926 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15928 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15930 /* Not needed with an alignment of 2 */
15933 emit_label (align_2_label
);
15935 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15939 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15941 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15943 emit_label (align_3_label
);
15946 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15950 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15952 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15955 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15956 align this loop. It gives only huge programs, but does not help to
15958 emit_label (align_4_label
);
15960 mem
= change_address (src
, SImode
, out
);
15961 emit_move_insn (scratch
, mem
);
15963 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15965 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15967 /* This formula yields a nonzero result iff one of the bytes is zero.
15968 This saves three branches inside loop and many cycles. */
15970 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15971 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15972 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15973 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15974 gen_int_mode (0x80808080, SImode
)));
15975 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15980 rtx reg
= gen_reg_rtx (SImode
);
15981 rtx reg2
= gen_reg_rtx (Pmode
);
15982 emit_move_insn (reg
, tmpreg
);
15983 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15985 /* If zero is not in the first two bytes, move two bytes forward. */
15986 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15987 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15988 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15989 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15990 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15993 /* Emit lea manually to avoid clobbering of flags. */
15994 emit_insn (gen_rtx_SET (SImode
, reg2
,
15995 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15997 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15998 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15999 emit_insn (gen_rtx_SET (VOIDmode
, out
,
16000 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
16007 rtx end_2_label
= gen_label_rtx ();
16008 /* Is zero in the first two bytes? */
16010 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
16011 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16012 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
16013 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
16014 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
16016 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
16017 JUMP_LABEL (tmp
) = end_2_label
;
16019 /* Not in the first two. Move two bytes forward. */
16020 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
16022 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
16024 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
16026 emit_label (end_2_label
);
16030 /* Avoid branch in fixing the byte. */
16031 tmpreg
= gen_lowpart (QImode
, tmpreg
);
16032 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
16033 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
16035 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
16037 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
16039 emit_label (end_0_label
);
16042 /* Expand strlen. */
16045 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
16047 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
16049 /* The generic case of strlen expander is long. Avoid it's
16050 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16052 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
16053 && !TARGET_INLINE_ALL_STRINGOPS
16055 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
16058 addr
= force_reg (Pmode
, XEXP (src
, 0));
16059 scratch1
= gen_reg_rtx (Pmode
);
16061 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
16064 /* Well it seems that some optimizer does not combine a call like
16065 foo(strlen(bar), strlen(bar));
16066 when the move and the subtraction is done here. It does calculate
16067 the length just once when these instructions are done inside of
16068 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16069 often used and I use one fewer register for the lifetime of
16070 output_strlen_unroll() this is better. */
16072 emit_move_insn (out
, addr
);
16074 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
16076 /* strlensi_unroll_1 returns the address of the zero at the end of
16077 the string, like memchr(), so compute the length by subtracting
16078 the start address. */
16080 emit_insn (gen_subdi3 (out
, out
, addr
));
16082 emit_insn (gen_subsi3 (out
, out
, addr
));
16088 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16089 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
16092 scratch2
= gen_reg_rtx (Pmode
);
16093 scratch3
= gen_reg_rtx (Pmode
);
16094 scratch4
= force_reg (Pmode
, constm1_rtx
);
16096 emit_move_insn (scratch3
, addr
);
16097 eoschar
= force_reg (QImode
, eoschar
);
16099 src
= replace_equiv_address_nv (src
, scratch3
);
16101 /* If .md starts supporting :P, this can be done in .md. */
16102 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
16103 scratch4
), UNSPEC_SCAS
);
16104 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
16107 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
16108 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
16112 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
16113 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
16119 /* For given symbol (function) construct code to compute address of it's PLT
16120 entry in large x86-64 PIC model. */
16122 construct_plt_address (rtx symbol
)
16124 rtx tmp
= gen_reg_rtx (Pmode
);
16125 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
16127 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
16128 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
16130 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
16131 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
16136 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
16137 rtx callarg2 ATTRIBUTE_UNUSED
,
16138 rtx pop
, int sibcall
)
16140 rtx use
= NULL
, call
;
16142 if (pop
== const0_rtx
)
16144 gcc_assert (!TARGET_64BIT
|| !pop
);
16146 if (TARGET_MACHO
&& !TARGET_64BIT
)
16149 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
16150 fnaddr
= machopic_indirect_call_target (fnaddr
);
16155 /* Static functions and indirect calls don't need the pic register. */
16156 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
16157 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
16158 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
16159 use_reg (&use
, pic_offset_table_rtx
);
16162 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
16164 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
16165 emit_move_insn (al
, callarg2
);
16166 use_reg (&use
, al
);
16169 if (ix86_cmodel
== CM_LARGE_PIC
16170 && GET_CODE (fnaddr
) == MEM
16171 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
16172 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
16173 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
16174 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
16176 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
16177 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
16179 if (sibcall
&& TARGET_64BIT
16180 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
16183 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
16184 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
16185 emit_move_insn (fnaddr
, addr
);
16186 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
16189 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
16191 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
16194 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
16195 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
16196 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
16199 call
= emit_call_insn (call
);
16201 CALL_INSN_FUNCTION_USAGE (call
) = use
;
16205 /* Clear stack slot assignments remembered from previous functions.
16206 This is called from INIT_EXPANDERS once before RTL is emitted for each
16209 static struct machine_function
*
16210 ix86_init_machine_status (void)
16212 struct machine_function
*f
;
16214 f
= GGC_CNEW (struct machine_function
);
16215 f
->use_fast_prologue_epilogue_nregs
= -1;
16216 f
->tls_descriptor_call_expanded_p
= 0;
16221 /* Return a MEM corresponding to a stack slot with mode MODE.
16222 Allocate a new slot if necessary.
16224 The RTL for a function can have several slots available: N is
16225 which slot to use. */
16228 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
16230 struct stack_local_entry
*s
;
16232 gcc_assert (n
< MAX_386_STACK_LOCALS
);
16234 /* Virtual slot is valid only before vregs are instantiated. */
16235 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
16237 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16238 if (s
->mode
== mode
&& s
->n
== n
)
16239 return copy_rtx (s
->rtl
);
16241 s
= (struct stack_local_entry
*)
16242 ggc_alloc (sizeof (struct stack_local_entry
));
16245 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
16247 s
->next
= ix86_stack_locals
;
16248 ix86_stack_locals
= s
;
16252 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16254 static GTY(()) rtx ix86_tls_symbol
;
16256 ix86_tls_get_addr (void)
16259 if (!ix86_tls_symbol
)
16261 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
16262 (TARGET_ANY_GNU_TLS
16264 ? "___tls_get_addr"
16265 : "__tls_get_addr");
16268 return ix86_tls_symbol
;
16271 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16273 static GTY(()) rtx ix86_tls_module_base_symbol
;
16275 ix86_tls_module_base (void)
16278 if (!ix86_tls_module_base_symbol
)
16280 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
16281 "_TLS_MODULE_BASE_");
16282 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
16283 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
16286 return ix86_tls_module_base_symbol
;
16289 /* Calculate the length of the memory address in the instruction
16290 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16293 memory_address_length (rtx addr
)
16295 struct ix86_address parts
;
16296 rtx base
, index
, disp
;
16300 if (GET_CODE (addr
) == PRE_DEC
16301 || GET_CODE (addr
) == POST_INC
16302 || GET_CODE (addr
) == PRE_MODIFY
16303 || GET_CODE (addr
) == POST_MODIFY
)
16306 ok
= ix86_decompose_address (addr
, &parts
);
16309 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
16310 parts
.base
= SUBREG_REG (parts
.base
);
16311 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
16312 parts
.index
= SUBREG_REG (parts
.index
);
16315 index
= parts
.index
;
16320 - esp as the base always wants an index,
16321 - ebp as the base always wants a displacement. */
16323 /* Register Indirect. */
16324 if (base
&& !index
&& !disp
)
16326 /* esp (for its index) and ebp (for its displacement) need
16327 the two-byte modrm form. */
16328 if (addr
== stack_pointer_rtx
16329 || addr
== arg_pointer_rtx
16330 || addr
== frame_pointer_rtx
16331 || addr
== hard_frame_pointer_rtx
)
16335 /* Direct Addressing. */
16336 else if (disp
&& !base
&& !index
)
16341 /* Find the length of the displacement constant. */
16344 if (base
&& satisfies_constraint_K (disp
))
16349 /* ebp always wants a displacement. */
16350 else if (base
== hard_frame_pointer_rtx
)
16353 /* An index requires the two-byte modrm form.... */
16355 /* ...like esp, which always wants an index. */
16356 || base
== stack_pointer_rtx
16357 || base
== arg_pointer_rtx
16358 || base
== frame_pointer_rtx
)
16365 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16366 is set, expect that insn have 8bit immediate alternative. */
16368 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
16372 extract_insn_cached (insn
);
16373 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16374 if (CONSTANT_P (recog_data
.operand
[i
]))
16377 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
16381 switch (get_attr_mode (insn
))
16392 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16397 fatal_insn ("unknown insn mode", insn
);
16403 /* Compute default value for "length_address" attribute. */
16405 ix86_attr_length_address_default (rtx insn
)
16409 if (get_attr_type (insn
) == TYPE_LEA
)
16411 rtx set
= PATTERN (insn
);
16413 if (GET_CODE (set
) == PARALLEL
)
16414 set
= XVECEXP (set
, 0, 0);
16416 gcc_assert (GET_CODE (set
) == SET
);
16418 return memory_address_length (SET_SRC (set
));
16421 extract_insn_cached (insn
);
16422 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16423 if (MEM_P (recog_data
.operand
[i
]))
16425 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
16431 /* Return the maximum number of instructions a cpu can issue. */
16434 ix86_issue_rate (void)
16438 case PROCESSOR_PENTIUM
:
16442 case PROCESSOR_PENTIUMPRO
:
16443 case PROCESSOR_PENTIUM4
:
16444 case PROCESSOR_ATHLON
:
16446 case PROCESSOR_AMDFAM10
:
16447 case PROCESSOR_NOCONA
:
16448 case PROCESSOR_GENERIC32
:
16449 case PROCESSOR_GENERIC64
:
16452 case PROCESSOR_CORE2
:
16460 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16461 by DEP_INSN and nothing set by DEP_INSN. */
16464 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16468 /* Simplify the test for uninteresting insns. */
16469 if (insn_type
!= TYPE_SETCC
16470 && insn_type
!= TYPE_ICMOV
16471 && insn_type
!= TYPE_FCMOV
16472 && insn_type
!= TYPE_IBR
)
16475 if ((set
= single_set (dep_insn
)) != 0)
16477 set
= SET_DEST (set
);
16480 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
16481 && XVECLEN (PATTERN (dep_insn
), 0) == 2
16482 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
16483 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
16485 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16486 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16491 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
16494 /* This test is true if the dependent insn reads the flags but
16495 not any other potentially set register. */
16496 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
16499 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
16505 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16506 address with operands set by DEP_INSN. */
16509 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16513 if (insn_type
== TYPE_LEA
16516 addr
= PATTERN (insn
);
16518 if (GET_CODE (addr
) == PARALLEL
)
16519 addr
= XVECEXP (addr
, 0, 0);
16521 gcc_assert (GET_CODE (addr
) == SET
);
16523 addr
= SET_SRC (addr
);
16528 extract_insn_cached (insn
);
16529 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16530 if (MEM_P (recog_data
.operand
[i
]))
16532 addr
= XEXP (recog_data
.operand
[i
], 0);
16539 return modified_in_p (addr
, dep_insn
);
16543 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
16545 enum attr_type insn_type
, dep_insn_type
;
16546 enum attr_memory memory
;
16548 int dep_insn_code_number
;
16550 /* Anti and output dependencies have zero cost on all CPUs. */
16551 if (REG_NOTE_KIND (link
) != 0)
16554 dep_insn_code_number
= recog_memoized (dep_insn
);
16556 /* If we can't recognize the insns, we can't really do anything. */
16557 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
16560 insn_type
= get_attr_type (insn
);
16561 dep_insn_type
= get_attr_type (dep_insn
);
16565 case PROCESSOR_PENTIUM
:
16566 /* Address Generation Interlock adds a cycle of latency. */
16567 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16570 /* ??? Compares pair with jump/setcc. */
16571 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
16574 /* Floating point stores require value to be ready one cycle earlier. */
16575 if (insn_type
== TYPE_FMOV
16576 && get_attr_memory (insn
) == MEMORY_STORE
16577 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16581 case PROCESSOR_PENTIUMPRO
:
16582 memory
= get_attr_memory (insn
);
16584 /* INT->FP conversion is expensive. */
16585 if (get_attr_fp_int_src (dep_insn
))
16588 /* There is one cycle extra latency between an FP op and a store. */
16589 if (insn_type
== TYPE_FMOV
16590 && (set
= single_set (dep_insn
)) != NULL_RTX
16591 && (set2
= single_set (insn
)) != NULL_RTX
16592 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
16593 && MEM_P (SET_DEST (set2
)))
16596 /* Show ability of reorder buffer to hide latency of load by executing
16597 in parallel with previous instruction in case
16598 previous instruction is not needed to compute the address. */
16599 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16600 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16602 /* Claim moves to take one cycle, as core can issue one load
16603 at time and the next load can start cycle later. */
16604 if (dep_insn_type
== TYPE_IMOV
16605 || dep_insn_type
== TYPE_FMOV
)
16613 memory
= get_attr_memory (insn
);
16615 /* The esp dependency is resolved before the instruction is really
16617 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
16618 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
16621 /* INT->FP conversion is expensive. */
16622 if (get_attr_fp_int_src (dep_insn
))
16625 /* Show ability of reorder buffer to hide latency of load by executing
16626 in parallel with previous instruction in case
16627 previous instruction is not needed to compute the address. */
16628 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16629 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16631 /* Claim moves to take one cycle, as core can issue one load
16632 at time and the next load can start cycle later. */
16633 if (dep_insn_type
== TYPE_IMOV
16634 || dep_insn_type
== TYPE_FMOV
)
16643 case PROCESSOR_ATHLON
:
16645 case PROCESSOR_AMDFAM10
:
16646 case PROCESSOR_GENERIC32
:
16647 case PROCESSOR_GENERIC64
:
16648 memory
= get_attr_memory (insn
);
16650 /* Show ability of reorder buffer to hide latency of load by executing
16651 in parallel with previous instruction in case
16652 previous instruction is not needed to compute the address. */
16653 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16654 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16656 enum attr_unit unit
= get_attr_unit (insn
);
16659 /* Because of the difference between the length of integer and
16660 floating unit pipeline preparation stages, the memory operands
16661 for floating point are cheaper.
16663 ??? For Athlon it the difference is most probably 2. */
16664 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
16667 loadcost
= TARGET_ATHLON
? 2 : 0;
16669 if (cost
>= loadcost
)
16682 /* How many alternative schedules to try. This should be as wide as the
16683 scheduling freedom in the DFA, but no wider. Making this value too
16684 large results extra work for the scheduler. */
16687 ia32_multipass_dfa_lookahead (void)
16691 case PROCESSOR_PENTIUM
:
16694 case PROCESSOR_PENTIUMPRO
:
16704 /* Compute the alignment given to a constant that is being placed in memory.
16705 EXP is the constant and ALIGN is the alignment that the object would
16707 The value of this function is used instead of that alignment to align
16711 ix86_constant_alignment (tree exp
, int align
)
16713 if (TREE_CODE (exp
) == REAL_CST
)
16715 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
16717 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
16720 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16721 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16722 return BITS_PER_WORD
;
16727 /* Compute the alignment for a static variable.
16728 TYPE is the data type, and ALIGN is the alignment that
16729 the object would ordinarily have. The value of this function is used
16730 instead of that alignment to align the object. */
16733 ix86_data_alignment (tree type
, int align
)
16735 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
16737 if (AGGREGATE_TYPE_P (type
)
16738 && TYPE_SIZE (type
)
16739 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16740 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
16741 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
16742 && align
< max_align
)
16745 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16746 to 16byte boundary. */
16749 if (AGGREGATE_TYPE_P (type
)
16750 && TYPE_SIZE (type
)
16751 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16752 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
16753 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16757 if (TREE_CODE (type
) == ARRAY_TYPE
)
16759 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16761 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16764 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16767 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16769 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16772 else if ((TREE_CODE (type
) == RECORD_TYPE
16773 || TREE_CODE (type
) == UNION_TYPE
16774 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16775 && TYPE_FIELDS (type
))
16777 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16779 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16782 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16783 || TREE_CODE (type
) == INTEGER_TYPE
)
16785 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16787 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16794 /* Compute the alignment for a local variable.
16795 TYPE is the data type, and ALIGN is the alignment that
16796 the object would ordinarily have. The value of this macro is used
16797 instead of that alignment to align the object. */
16800 ix86_local_alignment (tree type
, int align
)
16802 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16803 to 16byte boundary. */
16806 if (AGGREGATE_TYPE_P (type
)
16807 && TYPE_SIZE (type
)
16808 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16809 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
16810 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16813 if (TREE_CODE (type
) == ARRAY_TYPE
)
16815 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16817 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16820 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16822 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16824 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16827 else if ((TREE_CODE (type
) == RECORD_TYPE
16828 || TREE_CODE (type
) == UNION_TYPE
16829 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16830 && TYPE_FIELDS (type
))
16832 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16834 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16837 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16838 || TREE_CODE (type
) == INTEGER_TYPE
)
16841 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16843 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16849 /* Emit RTL insns to initialize the variable parts of a trampoline.
16850 FNADDR is an RTX for the address of the function's pure code.
16851 CXT is an RTX for the static chain value for the function. */
16853 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
16857 /* Compute offset from the end of the jmp to the target function. */
16858 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16859 plus_constant (tramp
, 10),
16860 NULL_RTX
, 1, OPTAB_DIRECT
);
16861 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
16862 gen_int_mode (0xb9, QImode
));
16863 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
16864 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
16865 gen_int_mode (0xe9, QImode
));
16866 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
16871 /* Try to load address using shorter movl instead of movabs.
16872 We may want to support movq for kernel mode, but kernel does not use
16873 trampolines at the moment. */
16874 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16876 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
16877 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16878 gen_int_mode (0xbb41, HImode
));
16879 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
16880 gen_lowpart (SImode
, fnaddr
));
16885 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16886 gen_int_mode (0xbb49, HImode
));
16887 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16891 /* Load static chain using movabs to r10. */
16892 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16893 gen_int_mode (0xba49, HImode
));
16894 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16897 /* Jump to the r11 */
16898 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16899 gen_int_mode (0xff49, HImode
));
16900 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
16901 gen_int_mode (0xe3, QImode
));
16903 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16906 #ifdef ENABLE_EXECUTE_STACK
16907 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16908 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
16912 /* Codes for all the SSE/MMX builtins. */
16915 IX86_BUILTIN_ADDPS
,
16916 IX86_BUILTIN_ADDSS
,
16917 IX86_BUILTIN_DIVPS
,
16918 IX86_BUILTIN_DIVSS
,
16919 IX86_BUILTIN_MULPS
,
16920 IX86_BUILTIN_MULSS
,
16921 IX86_BUILTIN_SUBPS
,
16922 IX86_BUILTIN_SUBSS
,
16924 IX86_BUILTIN_CMPEQPS
,
16925 IX86_BUILTIN_CMPLTPS
,
16926 IX86_BUILTIN_CMPLEPS
,
16927 IX86_BUILTIN_CMPGTPS
,
16928 IX86_BUILTIN_CMPGEPS
,
16929 IX86_BUILTIN_CMPNEQPS
,
16930 IX86_BUILTIN_CMPNLTPS
,
16931 IX86_BUILTIN_CMPNLEPS
,
16932 IX86_BUILTIN_CMPNGTPS
,
16933 IX86_BUILTIN_CMPNGEPS
,
16934 IX86_BUILTIN_CMPORDPS
,
16935 IX86_BUILTIN_CMPUNORDPS
,
16936 IX86_BUILTIN_CMPEQSS
,
16937 IX86_BUILTIN_CMPLTSS
,
16938 IX86_BUILTIN_CMPLESS
,
16939 IX86_BUILTIN_CMPNEQSS
,
16940 IX86_BUILTIN_CMPNLTSS
,
16941 IX86_BUILTIN_CMPNLESS
,
16942 IX86_BUILTIN_CMPNGTSS
,
16943 IX86_BUILTIN_CMPNGESS
,
16944 IX86_BUILTIN_CMPORDSS
,
16945 IX86_BUILTIN_CMPUNORDSS
,
16947 IX86_BUILTIN_COMIEQSS
,
16948 IX86_BUILTIN_COMILTSS
,
16949 IX86_BUILTIN_COMILESS
,
16950 IX86_BUILTIN_COMIGTSS
,
16951 IX86_BUILTIN_COMIGESS
,
16952 IX86_BUILTIN_COMINEQSS
,
16953 IX86_BUILTIN_UCOMIEQSS
,
16954 IX86_BUILTIN_UCOMILTSS
,
16955 IX86_BUILTIN_UCOMILESS
,
16956 IX86_BUILTIN_UCOMIGTSS
,
16957 IX86_BUILTIN_UCOMIGESS
,
16958 IX86_BUILTIN_UCOMINEQSS
,
16960 IX86_BUILTIN_CVTPI2PS
,
16961 IX86_BUILTIN_CVTPS2PI
,
16962 IX86_BUILTIN_CVTSI2SS
,
16963 IX86_BUILTIN_CVTSI642SS
,
16964 IX86_BUILTIN_CVTSS2SI
,
16965 IX86_BUILTIN_CVTSS2SI64
,
16966 IX86_BUILTIN_CVTTPS2PI
,
16967 IX86_BUILTIN_CVTTSS2SI
,
16968 IX86_BUILTIN_CVTTSS2SI64
,
16970 IX86_BUILTIN_MAXPS
,
16971 IX86_BUILTIN_MAXSS
,
16972 IX86_BUILTIN_MINPS
,
16973 IX86_BUILTIN_MINSS
,
16975 IX86_BUILTIN_LOADUPS
,
16976 IX86_BUILTIN_STOREUPS
,
16977 IX86_BUILTIN_MOVSS
,
16979 IX86_BUILTIN_MOVHLPS
,
16980 IX86_BUILTIN_MOVLHPS
,
16981 IX86_BUILTIN_LOADHPS
,
16982 IX86_BUILTIN_LOADLPS
,
16983 IX86_BUILTIN_STOREHPS
,
16984 IX86_BUILTIN_STORELPS
,
16986 IX86_BUILTIN_MASKMOVQ
,
16987 IX86_BUILTIN_MOVMSKPS
,
16988 IX86_BUILTIN_PMOVMSKB
,
16990 IX86_BUILTIN_MOVNTPS
,
16991 IX86_BUILTIN_MOVNTQ
,
16993 IX86_BUILTIN_LOADDQU
,
16994 IX86_BUILTIN_STOREDQU
,
16996 IX86_BUILTIN_PACKSSWB
,
16997 IX86_BUILTIN_PACKSSDW
,
16998 IX86_BUILTIN_PACKUSWB
,
17000 IX86_BUILTIN_PADDB
,
17001 IX86_BUILTIN_PADDW
,
17002 IX86_BUILTIN_PADDD
,
17003 IX86_BUILTIN_PADDQ
,
17004 IX86_BUILTIN_PADDSB
,
17005 IX86_BUILTIN_PADDSW
,
17006 IX86_BUILTIN_PADDUSB
,
17007 IX86_BUILTIN_PADDUSW
,
17008 IX86_BUILTIN_PSUBB
,
17009 IX86_BUILTIN_PSUBW
,
17010 IX86_BUILTIN_PSUBD
,
17011 IX86_BUILTIN_PSUBQ
,
17012 IX86_BUILTIN_PSUBSB
,
17013 IX86_BUILTIN_PSUBSW
,
17014 IX86_BUILTIN_PSUBUSB
,
17015 IX86_BUILTIN_PSUBUSW
,
17018 IX86_BUILTIN_PANDN
,
17022 IX86_BUILTIN_PAVGB
,
17023 IX86_BUILTIN_PAVGW
,
17025 IX86_BUILTIN_PCMPEQB
,
17026 IX86_BUILTIN_PCMPEQW
,
17027 IX86_BUILTIN_PCMPEQD
,
17028 IX86_BUILTIN_PCMPGTB
,
17029 IX86_BUILTIN_PCMPGTW
,
17030 IX86_BUILTIN_PCMPGTD
,
17032 IX86_BUILTIN_PMADDWD
,
17034 IX86_BUILTIN_PMAXSW
,
17035 IX86_BUILTIN_PMAXUB
,
17036 IX86_BUILTIN_PMINSW
,
17037 IX86_BUILTIN_PMINUB
,
17039 IX86_BUILTIN_PMULHUW
,
17040 IX86_BUILTIN_PMULHW
,
17041 IX86_BUILTIN_PMULLW
,
17043 IX86_BUILTIN_PSADBW
,
17044 IX86_BUILTIN_PSHUFW
,
17046 IX86_BUILTIN_PSLLW
,
17047 IX86_BUILTIN_PSLLD
,
17048 IX86_BUILTIN_PSLLQ
,
17049 IX86_BUILTIN_PSRAW
,
17050 IX86_BUILTIN_PSRAD
,
17051 IX86_BUILTIN_PSRLW
,
17052 IX86_BUILTIN_PSRLD
,
17053 IX86_BUILTIN_PSRLQ
,
17054 IX86_BUILTIN_PSLLWI
,
17055 IX86_BUILTIN_PSLLDI
,
17056 IX86_BUILTIN_PSLLQI
,
17057 IX86_BUILTIN_PSRAWI
,
17058 IX86_BUILTIN_PSRADI
,
17059 IX86_BUILTIN_PSRLWI
,
17060 IX86_BUILTIN_PSRLDI
,
17061 IX86_BUILTIN_PSRLQI
,
17063 IX86_BUILTIN_PUNPCKHBW
,
17064 IX86_BUILTIN_PUNPCKHWD
,
17065 IX86_BUILTIN_PUNPCKHDQ
,
17066 IX86_BUILTIN_PUNPCKLBW
,
17067 IX86_BUILTIN_PUNPCKLWD
,
17068 IX86_BUILTIN_PUNPCKLDQ
,
17070 IX86_BUILTIN_SHUFPS
,
17072 IX86_BUILTIN_RCPPS
,
17073 IX86_BUILTIN_RCPSS
,
17074 IX86_BUILTIN_RSQRTPS
,
17075 IX86_BUILTIN_RSQRTSS
,
17076 IX86_BUILTIN_RSQRTF
,
17077 IX86_BUILTIN_SQRTPS
,
17078 IX86_BUILTIN_SQRTSS
,
17080 IX86_BUILTIN_UNPCKHPS
,
17081 IX86_BUILTIN_UNPCKLPS
,
17083 IX86_BUILTIN_ANDPS
,
17084 IX86_BUILTIN_ANDNPS
,
17086 IX86_BUILTIN_XORPS
,
17089 IX86_BUILTIN_LDMXCSR
,
17090 IX86_BUILTIN_STMXCSR
,
17091 IX86_BUILTIN_SFENCE
,
17093 /* 3DNow! Original */
17094 IX86_BUILTIN_FEMMS
,
17095 IX86_BUILTIN_PAVGUSB
,
17096 IX86_BUILTIN_PF2ID
,
17097 IX86_BUILTIN_PFACC
,
17098 IX86_BUILTIN_PFADD
,
17099 IX86_BUILTIN_PFCMPEQ
,
17100 IX86_BUILTIN_PFCMPGE
,
17101 IX86_BUILTIN_PFCMPGT
,
17102 IX86_BUILTIN_PFMAX
,
17103 IX86_BUILTIN_PFMIN
,
17104 IX86_BUILTIN_PFMUL
,
17105 IX86_BUILTIN_PFRCP
,
17106 IX86_BUILTIN_PFRCPIT1
,
17107 IX86_BUILTIN_PFRCPIT2
,
17108 IX86_BUILTIN_PFRSQIT1
,
17109 IX86_BUILTIN_PFRSQRT
,
17110 IX86_BUILTIN_PFSUB
,
17111 IX86_BUILTIN_PFSUBR
,
17112 IX86_BUILTIN_PI2FD
,
17113 IX86_BUILTIN_PMULHRW
,
17115 /* 3DNow! Athlon Extensions */
17116 IX86_BUILTIN_PF2IW
,
17117 IX86_BUILTIN_PFNACC
,
17118 IX86_BUILTIN_PFPNACC
,
17119 IX86_BUILTIN_PI2FW
,
17120 IX86_BUILTIN_PSWAPDSI
,
17121 IX86_BUILTIN_PSWAPDSF
,
17124 IX86_BUILTIN_ADDPD
,
17125 IX86_BUILTIN_ADDSD
,
17126 IX86_BUILTIN_DIVPD
,
17127 IX86_BUILTIN_DIVSD
,
17128 IX86_BUILTIN_MULPD
,
17129 IX86_BUILTIN_MULSD
,
17130 IX86_BUILTIN_SUBPD
,
17131 IX86_BUILTIN_SUBSD
,
17133 IX86_BUILTIN_CMPEQPD
,
17134 IX86_BUILTIN_CMPLTPD
,
17135 IX86_BUILTIN_CMPLEPD
,
17136 IX86_BUILTIN_CMPGTPD
,
17137 IX86_BUILTIN_CMPGEPD
,
17138 IX86_BUILTIN_CMPNEQPD
,
17139 IX86_BUILTIN_CMPNLTPD
,
17140 IX86_BUILTIN_CMPNLEPD
,
17141 IX86_BUILTIN_CMPNGTPD
,
17142 IX86_BUILTIN_CMPNGEPD
,
17143 IX86_BUILTIN_CMPORDPD
,
17144 IX86_BUILTIN_CMPUNORDPD
,
17145 IX86_BUILTIN_CMPEQSD
,
17146 IX86_BUILTIN_CMPLTSD
,
17147 IX86_BUILTIN_CMPLESD
,
17148 IX86_BUILTIN_CMPNEQSD
,
17149 IX86_BUILTIN_CMPNLTSD
,
17150 IX86_BUILTIN_CMPNLESD
,
17151 IX86_BUILTIN_CMPORDSD
,
17152 IX86_BUILTIN_CMPUNORDSD
,
17154 IX86_BUILTIN_COMIEQSD
,
17155 IX86_BUILTIN_COMILTSD
,
17156 IX86_BUILTIN_COMILESD
,
17157 IX86_BUILTIN_COMIGTSD
,
17158 IX86_BUILTIN_COMIGESD
,
17159 IX86_BUILTIN_COMINEQSD
,
17160 IX86_BUILTIN_UCOMIEQSD
,
17161 IX86_BUILTIN_UCOMILTSD
,
17162 IX86_BUILTIN_UCOMILESD
,
17163 IX86_BUILTIN_UCOMIGTSD
,
17164 IX86_BUILTIN_UCOMIGESD
,
17165 IX86_BUILTIN_UCOMINEQSD
,
17167 IX86_BUILTIN_MAXPD
,
17168 IX86_BUILTIN_MAXSD
,
17169 IX86_BUILTIN_MINPD
,
17170 IX86_BUILTIN_MINSD
,
17172 IX86_BUILTIN_ANDPD
,
17173 IX86_BUILTIN_ANDNPD
,
17175 IX86_BUILTIN_XORPD
,
17177 IX86_BUILTIN_SQRTPD
,
17178 IX86_BUILTIN_SQRTSD
,
17180 IX86_BUILTIN_UNPCKHPD
,
17181 IX86_BUILTIN_UNPCKLPD
,
17183 IX86_BUILTIN_SHUFPD
,
17185 IX86_BUILTIN_LOADUPD
,
17186 IX86_BUILTIN_STOREUPD
,
17187 IX86_BUILTIN_MOVSD
,
17189 IX86_BUILTIN_LOADHPD
,
17190 IX86_BUILTIN_LOADLPD
,
17192 IX86_BUILTIN_CVTDQ2PD
,
17193 IX86_BUILTIN_CVTDQ2PS
,
17195 IX86_BUILTIN_CVTPD2DQ
,
17196 IX86_BUILTIN_CVTPD2PI
,
17197 IX86_BUILTIN_CVTPD2PS
,
17198 IX86_BUILTIN_CVTTPD2DQ
,
17199 IX86_BUILTIN_CVTTPD2PI
,
17201 IX86_BUILTIN_CVTPI2PD
,
17202 IX86_BUILTIN_CVTSI2SD
,
17203 IX86_BUILTIN_CVTSI642SD
,
17205 IX86_BUILTIN_CVTSD2SI
,
17206 IX86_BUILTIN_CVTSD2SI64
,
17207 IX86_BUILTIN_CVTSD2SS
,
17208 IX86_BUILTIN_CVTSS2SD
,
17209 IX86_BUILTIN_CVTTSD2SI
,
17210 IX86_BUILTIN_CVTTSD2SI64
,
17212 IX86_BUILTIN_CVTPS2DQ
,
17213 IX86_BUILTIN_CVTPS2PD
,
17214 IX86_BUILTIN_CVTTPS2DQ
,
17216 IX86_BUILTIN_MOVNTI
,
17217 IX86_BUILTIN_MOVNTPD
,
17218 IX86_BUILTIN_MOVNTDQ
,
17221 IX86_BUILTIN_MASKMOVDQU
,
17222 IX86_BUILTIN_MOVMSKPD
,
17223 IX86_BUILTIN_PMOVMSKB128
,
17225 IX86_BUILTIN_PACKSSWB128
,
17226 IX86_BUILTIN_PACKSSDW128
,
17227 IX86_BUILTIN_PACKUSWB128
,
17229 IX86_BUILTIN_PADDB128
,
17230 IX86_BUILTIN_PADDW128
,
17231 IX86_BUILTIN_PADDD128
,
17232 IX86_BUILTIN_PADDQ128
,
17233 IX86_BUILTIN_PADDSB128
,
17234 IX86_BUILTIN_PADDSW128
,
17235 IX86_BUILTIN_PADDUSB128
,
17236 IX86_BUILTIN_PADDUSW128
,
17237 IX86_BUILTIN_PSUBB128
,
17238 IX86_BUILTIN_PSUBW128
,
17239 IX86_BUILTIN_PSUBD128
,
17240 IX86_BUILTIN_PSUBQ128
,
17241 IX86_BUILTIN_PSUBSB128
,
17242 IX86_BUILTIN_PSUBSW128
,
17243 IX86_BUILTIN_PSUBUSB128
,
17244 IX86_BUILTIN_PSUBUSW128
,
17246 IX86_BUILTIN_PAND128
,
17247 IX86_BUILTIN_PANDN128
,
17248 IX86_BUILTIN_POR128
,
17249 IX86_BUILTIN_PXOR128
,
17251 IX86_BUILTIN_PAVGB128
,
17252 IX86_BUILTIN_PAVGW128
,
17254 IX86_BUILTIN_PCMPEQB128
,
17255 IX86_BUILTIN_PCMPEQW128
,
17256 IX86_BUILTIN_PCMPEQD128
,
17257 IX86_BUILTIN_PCMPGTB128
,
17258 IX86_BUILTIN_PCMPGTW128
,
17259 IX86_BUILTIN_PCMPGTD128
,
17261 IX86_BUILTIN_PMADDWD128
,
17263 IX86_BUILTIN_PMAXSW128
,
17264 IX86_BUILTIN_PMAXUB128
,
17265 IX86_BUILTIN_PMINSW128
,
17266 IX86_BUILTIN_PMINUB128
,
17268 IX86_BUILTIN_PMULUDQ
,
17269 IX86_BUILTIN_PMULUDQ128
,
17270 IX86_BUILTIN_PMULHUW128
,
17271 IX86_BUILTIN_PMULHW128
,
17272 IX86_BUILTIN_PMULLW128
,
17274 IX86_BUILTIN_PSADBW128
,
17275 IX86_BUILTIN_PSHUFHW
,
17276 IX86_BUILTIN_PSHUFLW
,
17277 IX86_BUILTIN_PSHUFD
,
17279 IX86_BUILTIN_PSLLDQI128
,
17280 IX86_BUILTIN_PSLLWI128
,
17281 IX86_BUILTIN_PSLLDI128
,
17282 IX86_BUILTIN_PSLLQI128
,
17283 IX86_BUILTIN_PSRAWI128
,
17284 IX86_BUILTIN_PSRADI128
,
17285 IX86_BUILTIN_PSRLDQI128
,
17286 IX86_BUILTIN_PSRLWI128
,
17287 IX86_BUILTIN_PSRLDI128
,
17288 IX86_BUILTIN_PSRLQI128
,
17290 IX86_BUILTIN_PSLLDQ128
,
17291 IX86_BUILTIN_PSLLW128
,
17292 IX86_BUILTIN_PSLLD128
,
17293 IX86_BUILTIN_PSLLQ128
,
17294 IX86_BUILTIN_PSRAW128
,
17295 IX86_BUILTIN_PSRAD128
,
17296 IX86_BUILTIN_PSRLW128
,
17297 IX86_BUILTIN_PSRLD128
,
17298 IX86_BUILTIN_PSRLQ128
,
17300 IX86_BUILTIN_PUNPCKHBW128
,
17301 IX86_BUILTIN_PUNPCKHWD128
,
17302 IX86_BUILTIN_PUNPCKHDQ128
,
17303 IX86_BUILTIN_PUNPCKHQDQ128
,
17304 IX86_BUILTIN_PUNPCKLBW128
,
17305 IX86_BUILTIN_PUNPCKLWD128
,
17306 IX86_BUILTIN_PUNPCKLDQ128
,
17307 IX86_BUILTIN_PUNPCKLQDQ128
,
17309 IX86_BUILTIN_CLFLUSH
,
17310 IX86_BUILTIN_MFENCE
,
17311 IX86_BUILTIN_LFENCE
,
17313 /* Prescott New Instructions. */
17314 IX86_BUILTIN_ADDSUBPS
,
17315 IX86_BUILTIN_HADDPS
,
17316 IX86_BUILTIN_HSUBPS
,
17317 IX86_BUILTIN_MOVSHDUP
,
17318 IX86_BUILTIN_MOVSLDUP
,
17319 IX86_BUILTIN_ADDSUBPD
,
17320 IX86_BUILTIN_HADDPD
,
17321 IX86_BUILTIN_HSUBPD
,
17322 IX86_BUILTIN_LDDQU
,
17324 IX86_BUILTIN_MONITOR
,
17325 IX86_BUILTIN_MWAIT
,
17328 IX86_BUILTIN_PHADDW
,
17329 IX86_BUILTIN_PHADDD
,
17330 IX86_BUILTIN_PHADDSW
,
17331 IX86_BUILTIN_PHSUBW
,
17332 IX86_BUILTIN_PHSUBD
,
17333 IX86_BUILTIN_PHSUBSW
,
17334 IX86_BUILTIN_PMADDUBSW
,
17335 IX86_BUILTIN_PMULHRSW
,
17336 IX86_BUILTIN_PSHUFB
,
17337 IX86_BUILTIN_PSIGNB
,
17338 IX86_BUILTIN_PSIGNW
,
17339 IX86_BUILTIN_PSIGND
,
17340 IX86_BUILTIN_PALIGNR
,
17341 IX86_BUILTIN_PABSB
,
17342 IX86_BUILTIN_PABSW
,
17343 IX86_BUILTIN_PABSD
,
17345 IX86_BUILTIN_PHADDW128
,
17346 IX86_BUILTIN_PHADDD128
,
17347 IX86_BUILTIN_PHADDSW128
,
17348 IX86_BUILTIN_PHSUBW128
,
17349 IX86_BUILTIN_PHSUBD128
,
17350 IX86_BUILTIN_PHSUBSW128
,
17351 IX86_BUILTIN_PMADDUBSW128
,
17352 IX86_BUILTIN_PMULHRSW128
,
17353 IX86_BUILTIN_PSHUFB128
,
17354 IX86_BUILTIN_PSIGNB128
,
17355 IX86_BUILTIN_PSIGNW128
,
17356 IX86_BUILTIN_PSIGND128
,
17357 IX86_BUILTIN_PALIGNR128
,
17358 IX86_BUILTIN_PABSB128
,
17359 IX86_BUILTIN_PABSW128
,
17360 IX86_BUILTIN_PABSD128
,
17362 /* AMDFAM10 - SSE4A New Instructions. */
17363 IX86_BUILTIN_MOVNTSD
,
17364 IX86_BUILTIN_MOVNTSS
,
17365 IX86_BUILTIN_EXTRQI
,
17366 IX86_BUILTIN_EXTRQ
,
17367 IX86_BUILTIN_INSERTQI
,
17368 IX86_BUILTIN_INSERTQ
,
17371 IX86_BUILTIN_BLENDPD
,
17372 IX86_BUILTIN_BLENDPS
,
17373 IX86_BUILTIN_BLENDVPD
,
17374 IX86_BUILTIN_BLENDVPS
,
17375 IX86_BUILTIN_PBLENDVB128
,
17376 IX86_BUILTIN_PBLENDW128
,
17381 IX86_BUILTIN_INSERTPS128
,
17383 IX86_BUILTIN_MOVNTDQA
,
17384 IX86_BUILTIN_MPSADBW128
,
17385 IX86_BUILTIN_PACKUSDW128
,
17386 IX86_BUILTIN_PCMPEQQ
,
17387 IX86_BUILTIN_PHMINPOSUW128
,
17389 IX86_BUILTIN_PMAXSB128
,
17390 IX86_BUILTIN_PMAXSD128
,
17391 IX86_BUILTIN_PMAXUD128
,
17392 IX86_BUILTIN_PMAXUW128
,
17394 IX86_BUILTIN_PMINSB128
,
17395 IX86_BUILTIN_PMINSD128
,
17396 IX86_BUILTIN_PMINUD128
,
17397 IX86_BUILTIN_PMINUW128
,
17399 IX86_BUILTIN_PMOVSXBW128
,
17400 IX86_BUILTIN_PMOVSXBD128
,
17401 IX86_BUILTIN_PMOVSXBQ128
,
17402 IX86_BUILTIN_PMOVSXWD128
,
17403 IX86_BUILTIN_PMOVSXWQ128
,
17404 IX86_BUILTIN_PMOVSXDQ128
,
17406 IX86_BUILTIN_PMOVZXBW128
,
17407 IX86_BUILTIN_PMOVZXBD128
,
17408 IX86_BUILTIN_PMOVZXBQ128
,
17409 IX86_BUILTIN_PMOVZXWD128
,
17410 IX86_BUILTIN_PMOVZXWQ128
,
17411 IX86_BUILTIN_PMOVZXDQ128
,
17413 IX86_BUILTIN_PMULDQ128
,
17414 IX86_BUILTIN_PMULLD128
,
17416 IX86_BUILTIN_ROUNDPD
,
17417 IX86_BUILTIN_ROUNDPS
,
17418 IX86_BUILTIN_ROUNDSD
,
17419 IX86_BUILTIN_ROUNDSS
,
17421 IX86_BUILTIN_PTESTZ
,
17422 IX86_BUILTIN_PTESTC
,
17423 IX86_BUILTIN_PTESTNZC
,
17425 IX86_BUILTIN_VEC_INIT_V2SI
,
17426 IX86_BUILTIN_VEC_INIT_V4HI
,
17427 IX86_BUILTIN_VEC_INIT_V8QI
,
17428 IX86_BUILTIN_VEC_EXT_V2DF
,
17429 IX86_BUILTIN_VEC_EXT_V2DI
,
17430 IX86_BUILTIN_VEC_EXT_V4SF
,
17431 IX86_BUILTIN_VEC_EXT_V4SI
,
17432 IX86_BUILTIN_VEC_EXT_V8HI
,
17433 IX86_BUILTIN_VEC_EXT_V2SI
,
17434 IX86_BUILTIN_VEC_EXT_V4HI
,
17435 IX86_BUILTIN_VEC_EXT_V16QI
,
17436 IX86_BUILTIN_VEC_SET_V2DI
,
17437 IX86_BUILTIN_VEC_SET_V4SF
,
17438 IX86_BUILTIN_VEC_SET_V4SI
,
17439 IX86_BUILTIN_VEC_SET_V8HI
,
17440 IX86_BUILTIN_VEC_SET_V4HI
,
17441 IX86_BUILTIN_VEC_SET_V16QI
,
17443 IX86_BUILTIN_VEC_PACK_SFIX
,
17446 IX86_BUILTIN_CRC32QI
,
17447 IX86_BUILTIN_CRC32HI
,
17448 IX86_BUILTIN_CRC32SI
,
17449 IX86_BUILTIN_CRC32DI
,
17451 IX86_BUILTIN_PCMPESTRI128
,
17452 IX86_BUILTIN_PCMPESTRM128
,
17453 IX86_BUILTIN_PCMPESTRA128
,
17454 IX86_BUILTIN_PCMPESTRC128
,
17455 IX86_BUILTIN_PCMPESTRO128
,
17456 IX86_BUILTIN_PCMPESTRS128
,
17457 IX86_BUILTIN_PCMPESTRZ128
,
17458 IX86_BUILTIN_PCMPISTRI128
,
17459 IX86_BUILTIN_PCMPISTRM128
,
17460 IX86_BUILTIN_PCMPISTRA128
,
17461 IX86_BUILTIN_PCMPISTRC128
,
17462 IX86_BUILTIN_PCMPISTRO128
,
17463 IX86_BUILTIN_PCMPISTRS128
,
17464 IX86_BUILTIN_PCMPISTRZ128
,
17466 IX86_BUILTIN_PCMPGTQ
,
17468 /* TFmode support builtins. */
17470 IX86_BUILTIN_FABSQ
,
17471 IX86_BUILTIN_COPYSIGNQ
,
17473 /* SSE5 instructions */
17474 IX86_BUILTIN_FMADDSS
,
17475 IX86_BUILTIN_FMADDSD
,
17476 IX86_BUILTIN_FMADDPS
,
17477 IX86_BUILTIN_FMADDPD
,
17478 IX86_BUILTIN_FMSUBSS
,
17479 IX86_BUILTIN_FMSUBSD
,
17480 IX86_BUILTIN_FMSUBPS
,
17481 IX86_BUILTIN_FMSUBPD
,
17482 IX86_BUILTIN_FNMADDSS
,
17483 IX86_BUILTIN_FNMADDSD
,
17484 IX86_BUILTIN_FNMADDPS
,
17485 IX86_BUILTIN_FNMADDPD
,
17486 IX86_BUILTIN_FNMSUBSS
,
17487 IX86_BUILTIN_FNMSUBSD
,
17488 IX86_BUILTIN_FNMSUBPS
,
17489 IX86_BUILTIN_FNMSUBPD
,
17490 IX86_BUILTIN_PCMOV_V2DI
,
17491 IX86_BUILTIN_PCMOV_V4SI
,
17492 IX86_BUILTIN_PCMOV_V8HI
,
17493 IX86_BUILTIN_PCMOV_V16QI
,
17494 IX86_BUILTIN_PCMOV_V4SF
,
17495 IX86_BUILTIN_PCMOV_V2DF
,
17496 IX86_BUILTIN_PPERM
,
17497 IX86_BUILTIN_PERMPS
,
17498 IX86_BUILTIN_PERMPD
,
17499 IX86_BUILTIN_PMACSSWW
,
17500 IX86_BUILTIN_PMACSWW
,
17501 IX86_BUILTIN_PMACSSWD
,
17502 IX86_BUILTIN_PMACSWD
,
17503 IX86_BUILTIN_PMACSSDD
,
17504 IX86_BUILTIN_PMACSDD
,
17505 IX86_BUILTIN_PMACSSDQL
,
17506 IX86_BUILTIN_PMACSSDQH
,
17507 IX86_BUILTIN_PMACSDQL
,
17508 IX86_BUILTIN_PMACSDQH
,
17509 IX86_BUILTIN_PMADCSSWD
,
17510 IX86_BUILTIN_PMADCSWD
,
17511 IX86_BUILTIN_PHADDBW
,
17512 IX86_BUILTIN_PHADDBD
,
17513 IX86_BUILTIN_PHADDBQ
,
17514 IX86_BUILTIN_PHADDWD
,
17515 IX86_BUILTIN_PHADDWQ
,
17516 IX86_BUILTIN_PHADDDQ
,
17517 IX86_BUILTIN_PHADDUBW
,
17518 IX86_BUILTIN_PHADDUBD
,
17519 IX86_BUILTIN_PHADDUBQ
,
17520 IX86_BUILTIN_PHADDUWD
,
17521 IX86_BUILTIN_PHADDUWQ
,
17522 IX86_BUILTIN_PHADDUDQ
,
17523 IX86_BUILTIN_PHSUBBW
,
17524 IX86_BUILTIN_PHSUBWD
,
17525 IX86_BUILTIN_PHSUBDQ
,
17526 IX86_BUILTIN_PROTB
,
17527 IX86_BUILTIN_PROTW
,
17528 IX86_BUILTIN_PROTD
,
17529 IX86_BUILTIN_PROTQ
,
17530 IX86_BUILTIN_PROTB_IMM
,
17531 IX86_BUILTIN_PROTW_IMM
,
17532 IX86_BUILTIN_PROTD_IMM
,
17533 IX86_BUILTIN_PROTQ_IMM
,
17534 IX86_BUILTIN_PSHLB
,
17535 IX86_BUILTIN_PSHLW
,
17536 IX86_BUILTIN_PSHLD
,
17537 IX86_BUILTIN_PSHLQ
,
17538 IX86_BUILTIN_PSHAB
,
17539 IX86_BUILTIN_PSHAW
,
17540 IX86_BUILTIN_PSHAD
,
17541 IX86_BUILTIN_PSHAQ
,
17542 IX86_BUILTIN_FRCZSS
,
17543 IX86_BUILTIN_FRCZSD
,
17544 IX86_BUILTIN_FRCZPS
,
17545 IX86_BUILTIN_FRCZPD
,
17546 IX86_BUILTIN_CVTPH2PS
,
17547 IX86_BUILTIN_CVTPS2PH
,
17549 IX86_BUILTIN_COMEQSS
,
17550 IX86_BUILTIN_COMNESS
,
17551 IX86_BUILTIN_COMLTSS
,
17552 IX86_BUILTIN_COMLESS
,
17553 IX86_BUILTIN_COMGTSS
,
17554 IX86_BUILTIN_COMGESS
,
17555 IX86_BUILTIN_COMUEQSS
,
17556 IX86_BUILTIN_COMUNESS
,
17557 IX86_BUILTIN_COMULTSS
,
17558 IX86_BUILTIN_COMULESS
,
17559 IX86_BUILTIN_COMUGTSS
,
17560 IX86_BUILTIN_COMUGESS
,
17561 IX86_BUILTIN_COMORDSS
,
17562 IX86_BUILTIN_COMUNORDSS
,
17563 IX86_BUILTIN_COMFALSESS
,
17564 IX86_BUILTIN_COMTRUESS
,
17566 IX86_BUILTIN_COMEQSD
,
17567 IX86_BUILTIN_COMNESD
,
17568 IX86_BUILTIN_COMLTSD
,
17569 IX86_BUILTIN_COMLESD
,
17570 IX86_BUILTIN_COMGTSD
,
17571 IX86_BUILTIN_COMGESD
,
17572 IX86_BUILTIN_COMUEQSD
,
17573 IX86_BUILTIN_COMUNESD
,
17574 IX86_BUILTIN_COMULTSD
,
17575 IX86_BUILTIN_COMULESD
,
17576 IX86_BUILTIN_COMUGTSD
,
17577 IX86_BUILTIN_COMUGESD
,
17578 IX86_BUILTIN_COMORDSD
,
17579 IX86_BUILTIN_COMUNORDSD
,
17580 IX86_BUILTIN_COMFALSESD
,
17581 IX86_BUILTIN_COMTRUESD
,
17583 IX86_BUILTIN_COMEQPS
,
17584 IX86_BUILTIN_COMNEPS
,
17585 IX86_BUILTIN_COMLTPS
,
17586 IX86_BUILTIN_COMLEPS
,
17587 IX86_BUILTIN_COMGTPS
,
17588 IX86_BUILTIN_COMGEPS
,
17589 IX86_BUILTIN_COMUEQPS
,
17590 IX86_BUILTIN_COMUNEPS
,
17591 IX86_BUILTIN_COMULTPS
,
17592 IX86_BUILTIN_COMULEPS
,
17593 IX86_BUILTIN_COMUGTPS
,
17594 IX86_BUILTIN_COMUGEPS
,
17595 IX86_BUILTIN_COMORDPS
,
17596 IX86_BUILTIN_COMUNORDPS
,
17597 IX86_BUILTIN_COMFALSEPS
,
17598 IX86_BUILTIN_COMTRUEPS
,
17600 IX86_BUILTIN_COMEQPD
,
17601 IX86_BUILTIN_COMNEPD
,
17602 IX86_BUILTIN_COMLTPD
,
17603 IX86_BUILTIN_COMLEPD
,
17604 IX86_BUILTIN_COMGTPD
,
17605 IX86_BUILTIN_COMGEPD
,
17606 IX86_BUILTIN_COMUEQPD
,
17607 IX86_BUILTIN_COMUNEPD
,
17608 IX86_BUILTIN_COMULTPD
,
17609 IX86_BUILTIN_COMULEPD
,
17610 IX86_BUILTIN_COMUGTPD
,
17611 IX86_BUILTIN_COMUGEPD
,
17612 IX86_BUILTIN_COMORDPD
,
17613 IX86_BUILTIN_COMUNORDPD
,
17614 IX86_BUILTIN_COMFALSEPD
,
17615 IX86_BUILTIN_COMTRUEPD
,
17617 IX86_BUILTIN_PCOMEQUB
,
17618 IX86_BUILTIN_PCOMNEUB
,
17619 IX86_BUILTIN_PCOMLTUB
,
17620 IX86_BUILTIN_PCOMLEUB
,
17621 IX86_BUILTIN_PCOMGTUB
,
17622 IX86_BUILTIN_PCOMGEUB
,
17623 IX86_BUILTIN_PCOMFALSEUB
,
17624 IX86_BUILTIN_PCOMTRUEUB
,
17625 IX86_BUILTIN_PCOMEQUW
,
17626 IX86_BUILTIN_PCOMNEUW
,
17627 IX86_BUILTIN_PCOMLTUW
,
17628 IX86_BUILTIN_PCOMLEUW
,
17629 IX86_BUILTIN_PCOMGTUW
,
17630 IX86_BUILTIN_PCOMGEUW
,
17631 IX86_BUILTIN_PCOMFALSEUW
,
17632 IX86_BUILTIN_PCOMTRUEUW
,
17633 IX86_BUILTIN_PCOMEQUD
,
17634 IX86_BUILTIN_PCOMNEUD
,
17635 IX86_BUILTIN_PCOMLTUD
,
17636 IX86_BUILTIN_PCOMLEUD
,
17637 IX86_BUILTIN_PCOMGTUD
,
17638 IX86_BUILTIN_PCOMGEUD
,
17639 IX86_BUILTIN_PCOMFALSEUD
,
17640 IX86_BUILTIN_PCOMTRUEUD
,
17641 IX86_BUILTIN_PCOMEQUQ
,
17642 IX86_BUILTIN_PCOMNEUQ
,
17643 IX86_BUILTIN_PCOMLTUQ
,
17644 IX86_BUILTIN_PCOMLEUQ
,
17645 IX86_BUILTIN_PCOMGTUQ
,
17646 IX86_BUILTIN_PCOMGEUQ
,
17647 IX86_BUILTIN_PCOMFALSEUQ
,
17648 IX86_BUILTIN_PCOMTRUEUQ
,
17650 IX86_BUILTIN_PCOMEQB
,
17651 IX86_BUILTIN_PCOMNEB
,
17652 IX86_BUILTIN_PCOMLTB
,
17653 IX86_BUILTIN_PCOMLEB
,
17654 IX86_BUILTIN_PCOMGTB
,
17655 IX86_BUILTIN_PCOMGEB
,
17656 IX86_BUILTIN_PCOMFALSEB
,
17657 IX86_BUILTIN_PCOMTRUEB
,
17658 IX86_BUILTIN_PCOMEQW
,
17659 IX86_BUILTIN_PCOMNEW
,
17660 IX86_BUILTIN_PCOMLTW
,
17661 IX86_BUILTIN_PCOMLEW
,
17662 IX86_BUILTIN_PCOMGTW
,
17663 IX86_BUILTIN_PCOMGEW
,
17664 IX86_BUILTIN_PCOMFALSEW
,
17665 IX86_BUILTIN_PCOMTRUEW
,
17666 IX86_BUILTIN_PCOMEQD
,
17667 IX86_BUILTIN_PCOMNED
,
17668 IX86_BUILTIN_PCOMLTD
,
17669 IX86_BUILTIN_PCOMLED
,
17670 IX86_BUILTIN_PCOMGTD
,
17671 IX86_BUILTIN_PCOMGED
,
17672 IX86_BUILTIN_PCOMFALSED
,
17673 IX86_BUILTIN_PCOMTRUED
,
17674 IX86_BUILTIN_PCOMEQQ
,
17675 IX86_BUILTIN_PCOMNEQ
,
17676 IX86_BUILTIN_PCOMLTQ
,
17677 IX86_BUILTIN_PCOMLEQ
,
17678 IX86_BUILTIN_PCOMGTQ
,
17679 IX86_BUILTIN_PCOMGEQ
,
17680 IX86_BUILTIN_PCOMFALSEQ
,
17681 IX86_BUILTIN_PCOMTRUEQ
,
17686 /* Table for the ix86 builtin decls. */
17687 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
17689 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17690 * if the target_flags include one of MASK. Stores the function decl
17691 * in the ix86_builtins array.
17692 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17695 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
17697 tree decl
= NULL_TREE
;
17699 if (mask
& ix86_isa_flags
17700 && (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
))
17702 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
17704 ix86_builtins
[(int) code
] = decl
;
17710 /* Like def_builtin, but also marks the function decl "const". */
17713 def_builtin_const (int mask
, const char *name
, tree type
,
17714 enum ix86_builtins code
)
17716 tree decl
= def_builtin (mask
, name
, type
, code
);
17718 TREE_READONLY (decl
) = 1;
17722 /* Bits for builtin_description.flag. */
17724 /* Set when we don't support the comparison natively, and should
17725 swap_comparison in order to support it. */
17726 #define BUILTIN_DESC_SWAP_OPERANDS 1
17728 struct builtin_description
17730 const unsigned int mask
;
17731 const enum insn_code icode
;
17732 const char *const name
;
17733 const enum ix86_builtins code
;
17734 const enum rtx_code comparison
;
17738 static const struct builtin_description bdesc_comi
[] =
17740 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
17741 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
17742 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
17743 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
17744 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
17745 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
17746 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
17747 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
17748 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
17749 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
17750 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
17751 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
17752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
17753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
17754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
17755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
17756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
17757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
17758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
17759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
17760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
17761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
17762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
17763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
17766 static const struct builtin_description bdesc_ptest
[] =
17769 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
17770 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
17771 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
17774 static const struct builtin_description bdesc_pcmpestr
[] =
17777 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
17778 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
17779 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
17780 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
17781 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
17782 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
17783 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
17786 static const struct builtin_description bdesc_pcmpistr
[] =
17789 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
17790 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
17791 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
17792 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
17793 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
17794 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
17795 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
17798 static const struct builtin_description bdesc_crc32
[] =
17801 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32qi
, 0, IX86_BUILTIN_CRC32QI
, UNKNOWN
, 0 },
17802 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32hi
, 0, IX86_BUILTIN_CRC32HI
, UNKNOWN
, 0 },
17803 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32si
, 0, IX86_BUILTIN_CRC32SI
, UNKNOWN
, 0 },
17804 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32di
, 0, IX86_BUILTIN_CRC32DI
, UNKNOWN
, 0 },
17807 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17808 static const struct builtin_description bdesc_sse_3arg
[] =
17811 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, 0 },
17812 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, 0 },
17813 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, 0 },
17814 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, 0 },
17815 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, 0 },
17816 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, 0 },
17817 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, 0 },
17818 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, 0 },
17819 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, 0 },
17820 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, 0 },
17821 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, UNKNOWN
, 0 },
17822 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, UNKNOWN
, 0 },
17825 static const struct builtin_description bdesc_2arg
[] =
17828 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, 0 },
17829 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, 0 },
17830 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, 0 },
17831 { OPTION_MASK_ISA_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, 0 },
17832 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, 0 },
17833 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, 0 },
17834 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, 0 },
17835 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, 0 },
17837 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
17838 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
17839 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
17840 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17841 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17842 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
17843 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
17844 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
17845 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
17846 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17847 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17848 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
17849 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
17850 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
17851 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
17852 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
17853 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
17854 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
17855 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
17856 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17857 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17858 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
17860 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, 0 },
17861 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, 0 },
17862 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, 0 },
17863 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, 0 },
17865 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, 0 },
17866 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, 0 },
17867 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, 0 },
17868 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, 0 },
17870 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, 0 },
17871 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, 0 },
17872 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, 0 },
17873 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, 0 },
17874 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, 0 },
17877 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, 0 },
17878 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, 0 },
17879 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, 0 },
17880 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, 0 },
17881 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, 0 },
17882 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, 0 },
17883 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, 0 },
17884 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, 0 },
17886 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, 0 },
17887 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, 0 },
17888 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, 0 },
17889 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, 0 },
17890 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, 0 },
17891 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, 0 },
17892 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, 0 },
17893 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, 0 },
17895 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, 0 },
17896 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, 0 },
17897 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, 0 },
17899 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, 0 },
17900 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, 0 },
17901 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, 0 },
17902 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, 0 },
17904 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, 0 },
17905 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, 0 },
17907 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, 0 },
17908 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, 0 },
17909 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, 0 },
17910 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, 0 },
17911 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, 0 },
17912 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, 0 },
17914 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, 0 },
17915 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, 0 },
17916 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, 0 },
17917 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, 0 },
17919 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, 0 },
17920 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, 0 },
17921 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, 0 },
17922 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, 0 },
17923 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, 0 },
17924 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, 0 },
17927 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, UNKNOWN
, 0 },
17928 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, UNKNOWN
, 0 },
17929 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, UNKNOWN
, 0 },
17931 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, 0 },
17932 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, 0 },
17933 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, 0 },
17935 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, UNKNOWN
, 0 },
17936 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, UNKNOWN
, 0 },
17937 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, UNKNOWN
, 0 },
17938 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, UNKNOWN
, 0 },
17939 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, UNKNOWN
, 0 },
17940 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, UNKNOWN
, 0 },
17942 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, UNKNOWN
, 0 },
17943 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, UNKNOWN
, 0 },
17944 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, UNKNOWN
, 0 },
17945 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, UNKNOWN
, 0 },
17946 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, UNKNOWN
, 0 },
17947 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, UNKNOWN
, 0 },
17949 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, UNKNOWN
, 0 },
17950 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, UNKNOWN
, 0 },
17951 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, UNKNOWN
, 0 },
17952 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, UNKNOWN
, 0 },
17954 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, UNKNOWN
, 0 },
17955 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, UNKNOWN
, 0 },
17958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, 0 },
17959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, 0 },
17960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, 0 },
17961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, 0 },
17962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, 0 },
17963 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, 0 },
17964 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, 0 },
17965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, 0 },
17967 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
17968 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
17969 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
17970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17972 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
17973 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
17974 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
17975 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
17976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17977 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17978 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
17979 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
17980 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
17981 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
17982 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
17983 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
17984 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
17985 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
17986 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
17988 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, 0 },
17989 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, 0 },
17990 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, 0 },
17991 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, 0 },
17993 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, 0 },
17994 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, 0 },
17995 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, 0 },
17996 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, 0 },
17998 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, 0 },
17999 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, 0 },
18000 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, 0 },
18002 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, 0 },
18005 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, 0 },
18006 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, 0 },
18007 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, 0 },
18008 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, 0 },
18009 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, 0 },
18010 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, 0 },
18011 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, 0 },
18012 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, 0 },
18014 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, 0 },
18015 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, 0 },
18016 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, 0 },
18017 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, 0 },
18018 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, 0 },
18019 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, 0 },
18020 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, 0 },
18021 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, 0 },
18023 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, 0 },
18024 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
, 0 },
18026 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, 0 },
18027 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, 0 },
18028 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, 0 },
18029 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, 0 },
18031 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, 0 },
18032 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, 0 },
18034 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, 0 },
18035 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, 0 },
18036 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, 0 },
18037 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, 0 },
18038 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, 0 },
18039 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, 0 },
18041 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, 0 },
18042 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, 0 },
18043 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, 0 },
18044 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, 0 },
18046 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, 0 },
18047 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, 0 },
18048 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, 0 },
18049 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, 0 },
18050 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, 0 },
18051 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, 0 },
18052 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, 0 },
18053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, 0 },
18055 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, 0 },
18056 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, 0 },
18057 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, 0 },
18059 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, 0 },
18060 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, UNKNOWN
, 0 },
18062 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, UNKNOWN
, 0 },
18063 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, 0 },
18065 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, UNKNOWN
, 0 },
18066 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, UNKNOWN
, 0 },
18067 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, UNKNOWN
, 0 },
18069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, UNKNOWN
, 0 },
18070 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, UNKNOWN
, 0 },
18071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, UNKNOWN
, 0 },
18073 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, UNKNOWN
, 0 },
18074 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, UNKNOWN
, 0 },
18076 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, UNKNOWN
, 0 },
18078 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, 0 },
18079 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, 0 },
18080 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, 0 },
18081 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, 0 },
18084 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, 0 },
18085 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, 0 },
18086 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, 0 },
18087 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, 0 },
18088 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, 0 },
18089 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, 0 },
18092 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, 0 },
18093 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, 0 },
18094 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, 0 },
18095 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, 0 },
18096 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, 0 },
18097 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, 0 },
18098 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, 0 },
18099 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, 0 },
18100 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, 0 },
18101 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, 0 },
18102 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, 0 },
18103 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, 0 },
18104 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, 0 },
18105 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, 0 },
18106 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, 0 },
18107 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, 0 },
18108 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, 0 },
18109 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, 0 },
18110 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, 0 },
18111 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, 0 },
18112 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, 0 },
18113 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, 0 },
18114 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, 0 },
18115 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, 0 },
18118 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, 0 },
18119 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, 0 },
18120 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, 0 },
18121 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, 0 },
18122 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, 0 },
18123 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, 0 },
18124 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, 0 },
18125 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, 0 },
18126 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, 0 },
18127 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, 0 },
18128 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, UNKNOWN
, 0 },
18129 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, 0 },
18132 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, 0 },
18135 static const struct builtin_description bdesc_1arg
[] =
18137 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, 0 },
18138 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, 0 },
18140 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, UNKNOWN
, 0 },
18141 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, UNKNOWN
, 0 },
18142 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, UNKNOWN
, 0 },
18144 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, 0 },
18145 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, 0 },
18146 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, 0 },
18147 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, 0 },
18148 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, 0 },
18149 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, 0 },
18151 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, 0 },
18152 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, 0 },
18154 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, UNKNOWN
, 0 },
18156 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, 0 },
18157 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, 0 },
18159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, 0 },
18160 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, 0 },
18161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, 0 },
18162 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, 0 },
18163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, 0 },
18165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, 0 },
18167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, 0 },
18168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, 0 },
18169 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, 0 },
18170 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, 0 },
18172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, 0 },
18173 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, 0 },
18174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, 0 },
18177 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, 0 },
18178 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, 0 },
18181 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, 0 },
18182 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, 0 },
18183 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, 0 },
18184 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, 0 },
18185 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, 0 },
18186 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, 0 },
18189 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, 0 },
18190 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, 0 },
18191 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, 0 },
18192 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, 0 },
18193 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, 0 },
18194 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, 0 },
18195 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, 0 },
18196 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, 0 },
18197 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, 0 },
18198 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, 0 },
18199 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, 0 },
18200 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, 0 },
18201 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, 0 },
18203 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18204 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, UNKNOWN
, 0 },
18205 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, UNKNOWN
, 0 },
18209 enum multi_arg_type
{
18219 MULTI_ARG_3_PERMPS
,
18220 MULTI_ARG_3_PERMPD
,
18227 MULTI_ARG_2_DI_IMM
,
18228 MULTI_ARG_2_SI_IMM
,
18229 MULTI_ARG_2_HI_IMM
,
18230 MULTI_ARG_2_QI_IMM
,
18231 MULTI_ARG_2_SF_CMP
,
18232 MULTI_ARG_2_DF_CMP
,
18233 MULTI_ARG_2_DI_CMP
,
18234 MULTI_ARG_2_SI_CMP
,
18235 MULTI_ARG_2_HI_CMP
,
18236 MULTI_ARG_2_QI_CMP
,
18259 static const struct builtin_description bdesc_multi_arg
[] =
18261 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv4sf4
, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS
, 0, (int)MULTI_ARG_3_SF
},
18262 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmaddv2df4
, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD
, 0, (int)MULTI_ARG_3_DF
},
18263 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv4sf4
, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS
, 0, (int)MULTI_ARG_3_SF
},
18264 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmaddv2df4
, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD
, 0, (int)MULTI_ARG_3_DF
},
18265 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv4sf4
, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS
, 0, (int)MULTI_ARG_3_SF
},
18266 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfmsubv2df4
, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD
, 0, (int)MULTI_ARG_3_DF
},
18267 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv4sf4
, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS
, 0, (int)MULTI_ARG_3_SF
},
18268 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fmsubv2df4
, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD
, 0, (int)MULTI_ARG_3_DF
},
18269 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv4sf4
, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS
, 0, (int)MULTI_ARG_3_SF
},
18270 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmaddv2df4
, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD
, 0, (int)MULTI_ARG_3_DF
},
18271 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv4sf4
, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS
, 0, (int)MULTI_ARG_3_SF
},
18272 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmaddv2df4
, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD
, 0, (int)MULTI_ARG_3_DF
},
18273 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv4sf4
, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS
, 0, (int)MULTI_ARG_3_SF
},
18274 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_vmfnmsubv2df4
, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD
, 0, (int)MULTI_ARG_3_DF
},
18275 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv4sf4
, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS
, 0, (int)MULTI_ARG_3_SF
},
18276 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5i_fnmsubv2df4
, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD
, 0, (int)MULTI_ARG_3_DF
},
18277 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI
, 0, (int)MULTI_ARG_3_DI
},
18278 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2di
, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI
, 0, (int)MULTI_ARG_3_DI
},
18279 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4si
, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI
, 0, (int)MULTI_ARG_3_SI
},
18280 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v8hi
, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI
, 0, (int)MULTI_ARG_3_HI
},
18281 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v16qi
, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI
,0, (int)MULTI_ARG_3_QI
},
18282 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v2df
, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF
, 0, (int)MULTI_ARG_3_DF
},
18283 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcmov_v4sf
, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF
, 0, (int)MULTI_ARG_3_SF
},
18284 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pperm
, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM
, 0, (int)MULTI_ARG_3_QI
},
18285 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv4sf
, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS
, 0, (int)MULTI_ARG_3_PERMPS
},
18286 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_permv2df
, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD
, 0, (int)MULTI_ARG_3_PERMPD
},
18287 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssww
, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW
, 0, (int)MULTI_ARG_3_HI
},
18288 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsww
, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW
, 0, (int)MULTI_ARG_3_HI
},
18289 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsswd
, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18290 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacswd
, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18291 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdd
, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD
, 0, (int)MULTI_ARG_3_SI
},
18292 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdd
, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD
, 0, (int)MULTI_ARG_3_SI
},
18293 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdql
, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL
, 0, (int)MULTI_ARG_3_SI_DI
},
18294 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacssdqh
, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH
, 0, (int)MULTI_ARG_3_SI_DI
},
18295 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdql
, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL
, 0, (int)MULTI_ARG_3_SI_DI
},
18296 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmacsdqh
, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH
, 0, (int)MULTI_ARG_3_SI_DI
},
18297 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcsswd
, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18298 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pmadcswd
, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD
, 0, (int)MULTI_ARG_3_HI_SI
},
18299 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv2di3
, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ
, 0, (int)MULTI_ARG_2_DI
},
18300 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv4si3
, "__builtin_ia32_protd", IX86_BUILTIN_PROTD
, 0, (int)MULTI_ARG_2_SI
},
18301 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv8hi3
, "__builtin_ia32_protw", IX86_BUILTIN_PROTW
, 0, (int)MULTI_ARG_2_HI
},
18302 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_rotlv16qi3
, "__builtin_ia32_protb", IX86_BUILTIN_PROTB
, 0, (int)MULTI_ARG_2_QI
},
18303 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv2di3
, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM
, 0, (int)MULTI_ARG_2_DI_IMM
},
18304 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv4si3
, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM
, 0, (int)MULTI_ARG_2_SI_IMM
},
18305 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv8hi3
, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM
, 0, (int)MULTI_ARG_2_HI_IMM
},
18306 { OPTION_MASK_ISA_SSE5
, CODE_FOR_rotlv16qi3
, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM
, 0, (int)MULTI_ARG_2_QI_IMM
},
18307 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv2di3
, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ
, 0, (int)MULTI_ARG_2_DI
},
18308 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv4si3
, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD
, 0, (int)MULTI_ARG_2_SI
},
18309 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv8hi3
, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW
, 0, (int)MULTI_ARG_2_HI
},
18310 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_ashlv16qi3
, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB
, 0, (int)MULTI_ARG_2_QI
},
18311 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv2di3
, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ
, 0, (int)MULTI_ARG_2_DI
},
18312 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv4si3
, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD
, 0, (int)MULTI_ARG_2_SI
},
18313 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv8hi3
, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW
, 0, (int)MULTI_ARG_2_HI
},
18314 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_lshlv16qi3
, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB
, 0, (int)MULTI_ARG_2_QI
},
18315 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv4sf2
, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS
, 0, (int)MULTI_ARG_2_SF
},
18316 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmfrczv2df2
, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD
, 0, (int)MULTI_ARG_2_DF
},
18317 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv4sf2
, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS
, 0, (int)MULTI_ARG_1_SF
},
18318 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_frczv2df2
, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD
, 0, (int)MULTI_ARG_1_DF
},
18319 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtph2ps
, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS
, 0, (int)MULTI_ARG_1_PH2PS
},
18320 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_cvtps2ph
, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH
, 0, (int)MULTI_ARG_1_PS2PH
},
18321 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbw
, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18322 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbd
, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD
, 0, (int)MULTI_ARG_1_QI_SI
},
18323 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddbq
, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ
, 0, (int)MULTI_ARG_1_QI_DI
},
18324 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwd
, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18325 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddwq
, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ
, 0, (int)MULTI_ARG_1_HI_DI
},
18326 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadddq
, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18327 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubw
, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18328 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubd
, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD
, 0, (int)MULTI_ARG_1_QI_SI
},
18329 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddubq
, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ
, 0, (int)MULTI_ARG_1_QI_DI
},
18330 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwd
, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18331 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phadduwq
, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ
, 0, (int)MULTI_ARG_1_HI_DI
},
18332 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phaddudq
, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18333 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubbw
, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW
, 0, (int)MULTI_ARG_1_QI_HI
},
18334 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubwd
, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD
, 0, (int)MULTI_ARG_1_HI_SI
},
18335 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_phsubdq
, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ
, 0, (int)MULTI_ARG_1_SI_DI
},
18337 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
18338 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18339 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18340 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
18341 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
18342 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
18343 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
18344 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
18345 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18346 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18347 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
18348 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
18349 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
18350 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
18351 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18352 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv4sf3
, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18354 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
18355 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18356 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18357 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
18358 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
18359 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
18360 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
18361 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
18362 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18363 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18364 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
18365 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
18366 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
18367 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
18368 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18369 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_vmmaskcmpv2df3
, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18371 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS
, EQ
, (int)MULTI_ARG_2_SF_CMP
},
18372 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18373 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS
, NE
, (int)MULTI_ARG_2_SF_CMP
},
18374 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS
, LT
, (int)MULTI_ARG_2_SF_CMP
},
18375 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS
, LE
, (int)MULTI_ARG_2_SF_CMP
},
18376 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS
, GT
, (int)MULTI_ARG_2_SF_CMP
},
18377 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS
, GE
, (int)MULTI_ARG_2_SF_CMP
},
18378 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS
, UNEQ
, (int)MULTI_ARG_2_SF_CMP
},
18379 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18380 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS
, LTGT
, (int)MULTI_ARG_2_SF_CMP
},
18381 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS
, UNLT
, (int)MULTI_ARG_2_SF_CMP
},
18382 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS
, UNLE
, (int)MULTI_ARG_2_SF_CMP
},
18383 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS
, UNGT
, (int)MULTI_ARG_2_SF_CMP
},
18384 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS
, UNGE
, (int)MULTI_ARG_2_SF_CMP
},
18385 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS
, ORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18386 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4sf3
, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS
, UNORDERED
, (int)MULTI_ARG_2_SF_CMP
},
18388 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD
, EQ
, (int)MULTI_ARG_2_DF_CMP
},
18389 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18390 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD
, NE
, (int)MULTI_ARG_2_DF_CMP
},
18391 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD
, LT
, (int)MULTI_ARG_2_DF_CMP
},
18392 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD
, LE
, (int)MULTI_ARG_2_DF_CMP
},
18393 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD
, GT
, (int)MULTI_ARG_2_DF_CMP
},
18394 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD
, GE
, (int)MULTI_ARG_2_DF_CMP
},
18395 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD
, UNEQ
, (int)MULTI_ARG_2_DF_CMP
},
18396 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18397 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD
, LTGT
, (int)MULTI_ARG_2_DF_CMP
},
18398 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD
, UNLT
, (int)MULTI_ARG_2_DF_CMP
},
18399 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD
, UNLE
, (int)MULTI_ARG_2_DF_CMP
},
18400 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD
, UNGT
, (int)MULTI_ARG_2_DF_CMP
},
18401 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD
, UNGE
, (int)MULTI_ARG_2_DF_CMP
},
18402 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD
, ORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18403 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2df3
, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD
, UNORDERED
, (int)MULTI_ARG_2_DF_CMP
},
18405 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
18406 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18407 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18408 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
18409 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
18410 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
18411 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv16qi3
, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
18413 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
18414 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18415 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18416 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
18417 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
18418 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
18419 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv8hi3
, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
18421 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
18422 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18423 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18424 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
18425 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
18426 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
18427 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv4si3
, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
18429 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
18430 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18431 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18432 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
18433 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
18434 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
18435 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmpv2di3
, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
18437 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
18438 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18439 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v16qi3
,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
18440 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
18441 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
18442 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
18443 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv16qi3
, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
18445 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
18446 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18447 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v8hi3
, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
18448 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
18449 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
18450 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
18451 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv8hi3
, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
18453 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
18454 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18455 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v4si3
, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
18456 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
18457 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
18458 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
18459 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv4si3
, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
18461 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
18462 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18463 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_uns2v2di3
, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
18464 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
18465 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
18466 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
18467 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_maskcmp_unsv2di3
, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
18469 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS
, COM_FALSE_S
, (int)MULTI_ARG_2_SF_TF
},
18470 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS
, COM_TRUE_S
, (int)MULTI_ARG_2_SF_TF
},
18471 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS
, COM_FALSE_P
, (int)MULTI_ARG_2_SF_TF
},
18472 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv4sf3
, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS
, COM_TRUE_P
, (int)MULTI_ARG_2_SF_TF
},
18473 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD
, COM_FALSE_S
, (int)MULTI_ARG_2_DF_TF
},
18474 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD
, COM_TRUE_S
, (int)MULTI_ARG_2_DF_TF
},
18475 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD
, COM_FALSE_P
, (int)MULTI_ARG_2_DF_TF
},
18476 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_com_tfv2df3
, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD
, COM_TRUE_P
, (int)MULTI_ARG_2_DF_TF
},
18478 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB
, PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
18479 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW
, PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
18480 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED
, PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
18481 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ
, PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
18482 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB
,PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
18483 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW
,PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
18484 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD
,PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
18485 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ
,PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
18487 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB
, PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
18488 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW
, PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
18489 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED
, PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
18490 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ
, PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
18491 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv16qi3
, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB
, PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
18492 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv8hi3
, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW
, PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
18493 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv4si3
, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD
, PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
18494 { OPTION_MASK_ISA_SSE5
, CODE_FOR_sse5_pcom_tfv2di3
, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ
, PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
18497 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18498 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18501 ix86_init_mmx_sse_builtins (void)
18503 const struct builtin_description
* d
;
18506 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
18507 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
18508 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
18509 tree V2DI_type_node
18510 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
18511 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
18512 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
18513 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
18514 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
18515 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
18516 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
18518 tree pchar_type_node
= build_pointer_type (char_type_node
);
18519 tree pcchar_type_node
= build_pointer_type (
18520 build_type_variant (char_type_node
, 1, 0));
18521 tree pfloat_type_node
= build_pointer_type (float_type_node
);
18522 tree pcfloat_type_node
= build_pointer_type (
18523 build_type_variant (float_type_node
, 1, 0));
18524 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
18525 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
18526 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
18529 tree int_ftype_v4sf_v4sf
18530 = build_function_type_list (integer_type_node
,
18531 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18532 tree v4si_ftype_v4sf_v4sf
18533 = build_function_type_list (V4SI_type_node
,
18534 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18535 /* MMX/SSE/integer conversions. */
18536 tree int_ftype_v4sf
18537 = build_function_type_list (integer_type_node
,
18538 V4SF_type_node
, NULL_TREE
);
18539 tree int64_ftype_v4sf
18540 = build_function_type_list (long_long_integer_type_node
,
18541 V4SF_type_node
, NULL_TREE
);
18542 tree int_ftype_v8qi
18543 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
18544 tree v4sf_ftype_v4sf_int
18545 = build_function_type_list (V4SF_type_node
,
18546 V4SF_type_node
, integer_type_node
, NULL_TREE
);
18547 tree v4sf_ftype_v4sf_int64
18548 = build_function_type_list (V4SF_type_node
,
18549 V4SF_type_node
, long_long_integer_type_node
,
18551 tree v4sf_ftype_v4sf_v2si
18552 = build_function_type_list (V4SF_type_node
,
18553 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
18555 /* Miscellaneous. */
18556 tree v8qi_ftype_v4hi_v4hi
18557 = build_function_type_list (V8QI_type_node
,
18558 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18559 tree v4hi_ftype_v2si_v2si
18560 = build_function_type_list (V4HI_type_node
,
18561 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18562 tree v4sf_ftype_v4sf_v4sf_int
18563 = build_function_type_list (V4SF_type_node
,
18564 V4SF_type_node
, V4SF_type_node
,
18565 integer_type_node
, NULL_TREE
);
18566 tree v2si_ftype_v4hi_v4hi
18567 = build_function_type_list (V2SI_type_node
,
18568 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18569 tree v4hi_ftype_v4hi_int
18570 = build_function_type_list (V4HI_type_node
,
18571 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18572 tree v4hi_ftype_v4hi_di
18573 = build_function_type_list (V4HI_type_node
,
18574 V4HI_type_node
, long_long_unsigned_type_node
,
18576 tree v2si_ftype_v2si_di
18577 = build_function_type_list (V2SI_type_node
,
18578 V2SI_type_node
, long_long_unsigned_type_node
,
18580 tree void_ftype_void
18581 = build_function_type (void_type_node
, void_list_node
);
18582 tree void_ftype_unsigned
18583 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
18584 tree void_ftype_unsigned_unsigned
18585 = build_function_type_list (void_type_node
, unsigned_type_node
,
18586 unsigned_type_node
, NULL_TREE
);
18587 tree void_ftype_pcvoid_unsigned_unsigned
18588 = build_function_type_list (void_type_node
, const_ptr_type_node
,
18589 unsigned_type_node
, unsigned_type_node
,
18591 tree unsigned_ftype_void
18592 = build_function_type (unsigned_type_node
, void_list_node
);
18593 tree v2si_ftype_v4sf
18594 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
18595 /* Loads/stores. */
18596 tree void_ftype_v8qi_v8qi_pchar
18597 = build_function_type_list (void_type_node
,
18598 V8QI_type_node
, V8QI_type_node
,
18599 pchar_type_node
, NULL_TREE
);
18600 tree v4sf_ftype_pcfloat
18601 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
18602 /* @@@ the type is bogus */
18603 tree v4sf_ftype_v4sf_pv2si
18604 = build_function_type_list (V4SF_type_node
,
18605 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
18606 tree void_ftype_pv2si_v4sf
18607 = build_function_type_list (void_type_node
,
18608 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
18609 tree void_ftype_pfloat_v4sf
18610 = build_function_type_list (void_type_node
,
18611 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
18612 tree void_ftype_pdi_di
18613 = build_function_type_list (void_type_node
,
18614 pdi_type_node
, long_long_unsigned_type_node
,
18616 tree void_ftype_pv2di_v2di
18617 = build_function_type_list (void_type_node
,
18618 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
18619 /* Normal vector unops. */
18620 tree v4sf_ftype_v4sf
18621 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18622 tree v16qi_ftype_v16qi
18623 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18624 tree v8hi_ftype_v8hi
18625 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18626 tree v4si_ftype_v4si
18627 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18628 tree v8qi_ftype_v8qi
18629 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18630 tree v4hi_ftype_v4hi
18631 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18633 /* Normal vector binops. */
18634 tree v4sf_ftype_v4sf_v4sf
18635 = build_function_type_list (V4SF_type_node
,
18636 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18637 tree v8qi_ftype_v8qi_v8qi
18638 = build_function_type_list (V8QI_type_node
,
18639 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18640 tree v4hi_ftype_v4hi_v4hi
18641 = build_function_type_list (V4HI_type_node
,
18642 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18643 tree v2si_ftype_v2si_v2si
18644 = build_function_type_list (V2SI_type_node
,
18645 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18646 tree di_ftype_di_di
18647 = build_function_type_list (long_long_unsigned_type_node
,
18648 long_long_unsigned_type_node
,
18649 long_long_unsigned_type_node
, NULL_TREE
);
18651 tree di_ftype_di_di_int
18652 = build_function_type_list (long_long_unsigned_type_node
,
18653 long_long_unsigned_type_node
,
18654 long_long_unsigned_type_node
,
18655 integer_type_node
, NULL_TREE
);
18657 tree v2si_ftype_v2sf
18658 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
18659 tree v2sf_ftype_v2si
18660 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
18661 tree v2si_ftype_v2si
18662 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18663 tree v2sf_ftype_v2sf
18664 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18665 tree v2sf_ftype_v2sf_v2sf
18666 = build_function_type_list (V2SF_type_node
,
18667 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18668 tree v2si_ftype_v2sf_v2sf
18669 = build_function_type_list (V2SI_type_node
,
18670 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
18671 tree pint_type_node
= build_pointer_type (integer_type_node
);
18672 tree pdouble_type_node
= build_pointer_type (double_type_node
);
18673 tree pcdouble_type_node
= build_pointer_type (
18674 build_type_variant (double_type_node
, 1, 0));
18675 tree int_ftype_v2df_v2df
18676 = build_function_type_list (integer_type_node
,
18677 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18679 tree void_ftype_pcvoid
18680 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
18681 tree v4sf_ftype_v4si
18682 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
18683 tree v4si_ftype_v4sf
18684 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
18685 tree v2df_ftype_v4si
18686 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
18687 tree v4si_ftype_v2df
18688 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
18689 tree v4si_ftype_v2df_v2df
18690 = build_function_type_list (V4SI_type_node
,
18691 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18692 tree v2si_ftype_v2df
18693 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
18694 tree v4sf_ftype_v2df
18695 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
18696 tree v2df_ftype_v2si
18697 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
18698 tree v2df_ftype_v4sf
18699 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
18700 tree int_ftype_v2df
18701 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
18702 tree int64_ftype_v2df
18703 = build_function_type_list (long_long_integer_type_node
,
18704 V2DF_type_node
, NULL_TREE
);
18705 tree v2df_ftype_v2df_int
18706 = build_function_type_list (V2DF_type_node
,
18707 V2DF_type_node
, integer_type_node
, NULL_TREE
);
18708 tree v2df_ftype_v2df_int64
18709 = build_function_type_list (V2DF_type_node
,
18710 V2DF_type_node
, long_long_integer_type_node
,
18712 tree v4sf_ftype_v4sf_v2df
18713 = build_function_type_list (V4SF_type_node
,
18714 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
18715 tree v2df_ftype_v2df_v4sf
18716 = build_function_type_list (V2DF_type_node
,
18717 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
18718 tree v2df_ftype_v2df_v2df_int
18719 = build_function_type_list (V2DF_type_node
,
18720 V2DF_type_node
, V2DF_type_node
,
18723 tree v2df_ftype_v2df_pcdouble
18724 = build_function_type_list (V2DF_type_node
,
18725 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
18726 tree void_ftype_pdouble_v2df
18727 = build_function_type_list (void_type_node
,
18728 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
18729 tree void_ftype_pint_int
18730 = build_function_type_list (void_type_node
,
18731 pint_type_node
, integer_type_node
, NULL_TREE
);
18732 tree void_ftype_v16qi_v16qi_pchar
18733 = build_function_type_list (void_type_node
,
18734 V16QI_type_node
, V16QI_type_node
,
18735 pchar_type_node
, NULL_TREE
);
18736 tree v2df_ftype_pcdouble
18737 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
18738 tree v2df_ftype_v2df_v2df
18739 = build_function_type_list (V2DF_type_node
,
18740 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18741 tree v16qi_ftype_v16qi_v16qi
18742 = build_function_type_list (V16QI_type_node
,
18743 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18744 tree v8hi_ftype_v8hi_v8hi
18745 = build_function_type_list (V8HI_type_node
,
18746 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18747 tree v4si_ftype_v4si_v4si
18748 = build_function_type_list (V4SI_type_node
,
18749 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18750 tree v2di_ftype_v2di_v2di
18751 = build_function_type_list (V2DI_type_node
,
18752 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18753 tree v2di_ftype_v2df_v2df
18754 = build_function_type_list (V2DI_type_node
,
18755 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18756 tree v2df_ftype_v2df
18757 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18758 tree v2di_ftype_v2di_int
18759 = build_function_type_list (V2DI_type_node
,
18760 V2DI_type_node
, integer_type_node
, NULL_TREE
);
18761 tree v2di_ftype_v2di_v2di_int
18762 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18763 V2DI_type_node
, integer_type_node
, NULL_TREE
);
18764 tree v4si_ftype_v4si_int
18765 = build_function_type_list (V4SI_type_node
,
18766 V4SI_type_node
, integer_type_node
, NULL_TREE
);
18767 tree v8hi_ftype_v8hi_int
18768 = build_function_type_list (V8HI_type_node
,
18769 V8HI_type_node
, integer_type_node
, NULL_TREE
);
18770 tree v4si_ftype_v8hi_v8hi
18771 = build_function_type_list (V4SI_type_node
,
18772 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18773 tree di_ftype_v8qi_v8qi
18774 = build_function_type_list (long_long_unsigned_type_node
,
18775 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18776 tree di_ftype_v2si_v2si
18777 = build_function_type_list (long_long_unsigned_type_node
,
18778 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18779 tree v2di_ftype_v16qi_v16qi
18780 = build_function_type_list (V2DI_type_node
,
18781 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18782 tree v2di_ftype_v4si_v4si
18783 = build_function_type_list (V2DI_type_node
,
18784 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18785 tree int_ftype_v16qi
18786 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
18787 tree v16qi_ftype_pcchar
18788 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
18789 tree void_ftype_pchar_v16qi
18790 = build_function_type_list (void_type_node
,
18791 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
18793 tree v2di_ftype_v2di_unsigned_unsigned
18794 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18795 unsigned_type_node
, unsigned_type_node
,
18797 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18798 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
18799 unsigned_type_node
, unsigned_type_node
,
18801 tree v2di_ftype_v2di_v16qi
18802 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
18804 tree v2df_ftype_v2df_v2df_v2df
18805 = build_function_type_list (V2DF_type_node
,
18806 V2DF_type_node
, V2DF_type_node
,
18807 V2DF_type_node
, NULL_TREE
);
18808 tree v4sf_ftype_v4sf_v4sf_v4sf
18809 = build_function_type_list (V4SF_type_node
,
18810 V4SF_type_node
, V4SF_type_node
,
18811 V4SF_type_node
, NULL_TREE
);
18812 tree v8hi_ftype_v16qi
18813 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
18815 tree v4si_ftype_v16qi
18816 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
18818 tree v2di_ftype_v16qi
18819 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
18821 tree v4si_ftype_v8hi
18822 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
18824 tree v2di_ftype_v8hi
18825 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
18827 tree v2di_ftype_v4si
18828 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
18830 tree v2di_ftype_pv2di
18831 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
18833 tree v16qi_ftype_v16qi_v16qi_int
18834 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18835 V16QI_type_node
, integer_type_node
,
18837 tree v16qi_ftype_v16qi_v16qi_v16qi
18838 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18839 V16QI_type_node
, V16QI_type_node
,
18841 tree v8hi_ftype_v8hi_v8hi_int
18842 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18843 V8HI_type_node
, integer_type_node
,
18845 tree v4si_ftype_v4si_v4si_int
18846 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18847 V4SI_type_node
, integer_type_node
,
18849 tree int_ftype_v2di_v2di
18850 = build_function_type_list (integer_type_node
,
18851 V2DI_type_node
, V2DI_type_node
,
18853 tree int_ftype_v16qi_int_v16qi_int_int
18854 = build_function_type_list (integer_type_node
,
18861 tree v16qi_ftype_v16qi_int_v16qi_int_int
18862 = build_function_type_list (V16QI_type_node
,
18869 tree int_ftype_v16qi_v16qi_int
18870 = build_function_type_list (integer_type_node
,
18876 /* SSE5 instructions */
18877 tree v2di_ftype_v2di_v2di_v2di
18878 = build_function_type_list (V2DI_type_node
,
18884 tree v4si_ftype_v4si_v4si_v4si
18885 = build_function_type_list (V4SI_type_node
,
18891 tree v4si_ftype_v4si_v4si_v2di
18892 = build_function_type_list (V4SI_type_node
,
18898 tree v8hi_ftype_v8hi_v8hi_v8hi
18899 = build_function_type_list (V8HI_type_node
,
18905 tree v8hi_ftype_v8hi_v8hi_v4si
18906 = build_function_type_list (V8HI_type_node
,
18912 tree v2df_ftype_v2df_v2df_v16qi
18913 = build_function_type_list (V2DF_type_node
,
18919 tree v4sf_ftype_v4sf_v4sf_v16qi
18920 = build_function_type_list (V4SF_type_node
,
18926 tree v2di_ftype_v2di_si
18927 = build_function_type_list (V2DI_type_node
,
18932 tree v4si_ftype_v4si_si
18933 = build_function_type_list (V4SI_type_node
,
18938 tree v8hi_ftype_v8hi_si
18939 = build_function_type_list (V8HI_type_node
,
18944 tree v16qi_ftype_v16qi_si
18945 = build_function_type_list (V16QI_type_node
,
18949 tree v4sf_ftype_v4hi
18950 = build_function_type_list (V4SF_type_node
,
18954 tree v4hi_ftype_v4sf
18955 = build_function_type_list (V4HI_type_node
,
18959 tree v2di_ftype_v2di
18960 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18964 /* The __float80 type. */
18965 if (TYPE_MODE (long_double_type_node
) == XFmode
)
18966 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
18970 /* The __float80 type. */
18971 tree float80_type_node
= make_node (REAL_TYPE
);
18973 TYPE_PRECISION (float80_type_node
) = 80;
18974 layout_type (float80_type_node
);
18975 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
18981 tree float128_type_node
= make_node (REAL_TYPE
);
18983 TYPE_PRECISION (float128_type_node
) = 128;
18984 layout_type (float128_type_node
);
18985 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
18988 /* TFmode support builtins. */
18989 ftype
= build_function_type (float128_type_node
,
18991 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_infq", ftype
, IX86_BUILTIN_INFQ
);
18993 ftype
= build_function_type_list (float128_type_node
,
18994 float128_type_node
,
18996 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
);
18998 ftype
= build_function_type_list (float128_type_node
,
18999 float128_type_node
,
19000 float128_type_node
,
19002 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_copysignq", ftype
, IX86_BUILTIN_COPYSIGNQ
);
19005 /* Add all SSE builtins that are more or less simple operations on
19007 for (i
= 0, d
= bdesc_sse_3arg
;
19008 i
< ARRAY_SIZE (bdesc_sse_3arg
);
19011 /* Use one of the operands; the target can have a different mode for
19012 mask-generating compares. */
19013 enum machine_mode mode
;
19018 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19023 type
= v16qi_ftype_v16qi_v16qi_int
;
19026 type
= v8hi_ftype_v8hi_v8hi_int
;
19029 type
= v4si_ftype_v4si_v4si_int
;
19032 type
= v2di_ftype_v2di_v2di_int
;
19035 type
= v2df_ftype_v2df_v2df_int
;
19038 type
= v4sf_ftype_v4sf_v4sf_int
;
19041 gcc_unreachable ();
19044 /* Override for variable blends. */
19047 case CODE_FOR_sse4_1_blendvpd
:
19048 type
= v2df_ftype_v2df_v2df_v2df
;
19050 case CODE_FOR_sse4_1_blendvps
:
19051 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
19053 case CODE_FOR_sse4_1_pblendvb
:
19054 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
19060 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19063 /* Add all builtins that are more or less simple operations on two
19065 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19067 /* Use one of the operands; the target can have a different mode for
19068 mask-generating compares. */
19069 enum machine_mode mode
;
19074 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19079 type
= v16qi_ftype_v16qi_v16qi
;
19082 type
= v8hi_ftype_v8hi_v8hi
;
19085 type
= v4si_ftype_v4si_v4si
;
19088 type
= v2di_ftype_v2di_v2di
;
19091 type
= v2df_ftype_v2df_v2df
;
19094 type
= v4sf_ftype_v4sf_v4sf
;
19097 type
= v8qi_ftype_v8qi_v8qi
;
19100 type
= v4hi_ftype_v4hi_v4hi
;
19103 type
= v2si_ftype_v2si_v2si
;
19106 type
= di_ftype_di_di
;
19110 gcc_unreachable ();
19113 /* Override for comparisons. */
19114 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
19115 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
19116 type
= v4si_ftype_v4sf_v4sf
;
19118 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
19119 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
19120 type
= v2di_ftype_v2df_v2df
;
19122 if (d
->icode
== CODE_FOR_vec_pack_sfix_v2df
)
19123 type
= v4si_ftype_v2df_v2df
;
19125 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19128 /* Add all builtins that are more or less simple operations on 1 operand. */
19129 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19131 enum machine_mode mode
;
19136 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19141 type
= v16qi_ftype_v16qi
;
19144 type
= v8hi_ftype_v8hi
;
19147 type
= v4si_ftype_v4si
;
19150 type
= v2df_ftype_v2df
;
19153 type
= v4sf_ftype_v4sf
;
19156 type
= v8qi_ftype_v8qi
;
19159 type
= v4hi_ftype_v4hi
;
19162 type
= v2si_ftype_v2si
;
19169 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
19172 /* pcmpestr[im] insns. */
19173 for (i
= 0, d
= bdesc_pcmpestr
;
19174 i
< ARRAY_SIZE (bdesc_pcmpestr
);
19177 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
19178 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
19180 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
19181 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
19184 /* pcmpistr[im] insns. */
19185 for (i
= 0, d
= bdesc_pcmpistr
;
19186 i
< ARRAY_SIZE (bdesc_pcmpistr
);
19189 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
19190 ftype
= v16qi_ftype_v16qi_v16qi_int
;
19192 ftype
= int_ftype_v16qi_v16qi_int
;
19193 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
19196 /* Add the remaining MMX insns with somewhat more complicated types. */
19197 def_builtin (OPTION_MASK_ISA_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
19198 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
19199 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
19200 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
19202 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
19203 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
19204 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
19206 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
19207 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
19209 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
19210 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
19212 /* comi/ucomi insns. */
19213 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
19214 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
19215 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
19217 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
19220 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
19221 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
19223 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
19224 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
19225 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
19227 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
19228 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
19229 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
19230 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
19231 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
19232 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
19233 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
19234 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
19235 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
19236 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
19237 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
19239 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
19241 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
19242 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
19244 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
19245 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
19246 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
19247 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
19249 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
19250 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
19251 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
19252 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
19254 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
19256 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
19258 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
19259 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
19260 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
19261 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
19262 ftype
= build_function_type_list (float_type_node
,
19265 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtf", ftype
, IX86_BUILTIN_RSQRTF
);
19266 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
19267 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
19269 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
19271 /* Original 3DNow! */
19272 def_builtin (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
19273 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
19274 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
19275 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
19276 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
19277 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
19278 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
19279 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
19280 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
19281 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
19282 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
19283 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
19284 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
19285 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
19286 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
19287 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
19288 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
19289 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
19290 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
19291 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
19293 /* 3DNow! extension as used in the Athlon CPU. */
19294 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
19295 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
19296 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
19297 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
19298 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
19299 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
19302 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
19304 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
19305 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
19307 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
19308 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
19310 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
19311 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
19312 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
19313 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
19314 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
19316 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
19317 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
19318 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
19319 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
19321 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
19322 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
19324 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
19326 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
19327 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
19329 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
19330 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
19331 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
19332 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
19333 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
19335 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
19337 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
19338 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
19339 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
19340 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
19342 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
19343 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
19344 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
19346 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
19347 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
19348 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
19349 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
19351 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
19352 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
19353 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
19355 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
19356 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
19358 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
19361 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
19363 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
19364 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
19366 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
19367 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
19369 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
19371 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
19373 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
19374 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
19377 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
19378 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
19380 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
19382 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
19384 /* Prescott New Instructions. */
19385 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
19386 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
19387 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_lddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
19390 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
19391 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
, IX86_BUILTIN_PALIGNR
);
19394 def_builtin (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_movntdqa", v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
19395 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
19396 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
19397 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
19398 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
19399 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
19400 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
19401 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
19402 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
19403 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
19404 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
19405 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
19406 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
19407 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
19409 /* SSE4.1 and SSE5 */
19410 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundpd", v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
19411 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
19412 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
19413 def_builtin_const (OPTION_MASK_ISA_ROUND
, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
19416 ftype
= build_function_type_list (unsigned_type_node
,
19417 unsigned_type_node
,
19418 unsigned_char_type_node
,
19420 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32qi", ftype
, IX86_BUILTIN_CRC32QI
);
19421 ftype
= build_function_type_list (unsigned_type_node
,
19422 unsigned_type_node
,
19423 short_unsigned_type_node
,
19425 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32hi", ftype
, IX86_BUILTIN_CRC32HI
);
19426 ftype
= build_function_type_list (unsigned_type_node
,
19427 unsigned_type_node
,
19428 unsigned_type_node
,
19430 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32si", ftype
, IX86_BUILTIN_CRC32SI
);
19431 ftype
= build_function_type_list (long_long_unsigned_type_node
,
19432 long_long_unsigned_type_node
,
19433 long_long_unsigned_type_node
,
19435 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32di", ftype
, IX86_BUILTIN_CRC32DI
);
19437 /* AMDFAM10 SSE4A New built-ins */
19438 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
19439 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
19440 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
19441 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
19442 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
19443 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
19445 /* Access to the vec_init patterns. */
19446 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
19447 integer_type_node
, NULL_TREE
);
19448 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
19450 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
19451 short_integer_type_node
,
19452 short_integer_type_node
,
19453 short_integer_type_node
, NULL_TREE
);
19454 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
19456 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
19457 char_type_node
, char_type_node
,
19458 char_type_node
, char_type_node
,
19459 char_type_node
, char_type_node
,
19460 char_type_node
, NULL_TREE
);
19461 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
19463 /* Access to the vec_extract patterns. */
19464 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
19465 integer_type_node
, NULL_TREE
);
19466 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
19468 ftype
= build_function_type_list (long_long_integer_type_node
,
19469 V2DI_type_node
, integer_type_node
,
19471 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
19473 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
19474 integer_type_node
, NULL_TREE
);
19475 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
19477 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
19478 integer_type_node
, NULL_TREE
);
19479 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
19481 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
19482 integer_type_node
, NULL_TREE
);
19483 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
19485 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
19486 integer_type_node
, NULL_TREE
);
19487 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
19489 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
19490 integer_type_node
, NULL_TREE
);
19491 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
19493 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
19494 integer_type_node
, NULL_TREE
);
19495 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
19497 /* Access to the vec_set patterns. */
19498 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
19500 integer_type_node
, NULL_TREE
);
19501 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
19503 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
19505 integer_type_node
, NULL_TREE
);
19506 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
19508 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
19510 integer_type_node
, NULL_TREE
);
19511 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
19513 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
19515 integer_type_node
, NULL_TREE
);
19516 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
19518 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
19520 integer_type_node
, NULL_TREE
);
19521 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
19523 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
19525 integer_type_node
, NULL_TREE
);
19526 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
19528 /* Add SSE5 multi-arg argument instructions */
19529 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
19531 tree mtype
= NULL_TREE
;
19536 switch ((enum multi_arg_type
)d
->flag
)
19538 case MULTI_ARG_3_SF
: mtype
= v4sf_ftype_v4sf_v4sf_v4sf
; break;
19539 case MULTI_ARG_3_DF
: mtype
= v2df_ftype_v2df_v2df_v2df
; break;
19540 case MULTI_ARG_3_DI
: mtype
= v2di_ftype_v2di_v2di_v2di
; break;
19541 case MULTI_ARG_3_SI
: mtype
= v4si_ftype_v4si_v4si_v4si
; break;
19542 case MULTI_ARG_3_SI_DI
: mtype
= v4si_ftype_v4si_v4si_v2di
; break;
19543 case MULTI_ARG_3_HI
: mtype
= v8hi_ftype_v8hi_v8hi_v8hi
; break;
19544 case MULTI_ARG_3_HI_SI
: mtype
= v8hi_ftype_v8hi_v8hi_v4si
; break;
19545 case MULTI_ARG_3_QI
: mtype
= v16qi_ftype_v16qi_v16qi_v16qi
; break;
19546 case MULTI_ARG_3_PERMPS
: mtype
= v4sf_ftype_v4sf_v4sf_v16qi
; break;
19547 case MULTI_ARG_3_PERMPD
: mtype
= v2df_ftype_v2df_v2df_v16qi
; break;
19548 case MULTI_ARG_2_SF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19549 case MULTI_ARG_2_DF
: mtype
= v2df_ftype_v2df_v2df
; break;
19550 case MULTI_ARG_2_DI
: mtype
= v2di_ftype_v2di_v2di
; break;
19551 case MULTI_ARG_2_SI
: mtype
= v4si_ftype_v4si_v4si
; break;
19552 case MULTI_ARG_2_HI
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19553 case MULTI_ARG_2_QI
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19554 case MULTI_ARG_2_DI_IMM
: mtype
= v2di_ftype_v2di_si
; break;
19555 case MULTI_ARG_2_SI_IMM
: mtype
= v4si_ftype_v4si_si
; break;
19556 case MULTI_ARG_2_HI_IMM
: mtype
= v8hi_ftype_v8hi_si
; break;
19557 case MULTI_ARG_2_QI_IMM
: mtype
= v16qi_ftype_v16qi_si
; break;
19558 case MULTI_ARG_2_SF_CMP
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19559 case MULTI_ARG_2_DF_CMP
: mtype
= v2df_ftype_v2df_v2df
; break;
19560 case MULTI_ARG_2_DI_CMP
: mtype
= v2di_ftype_v2di_v2di
; break;
19561 case MULTI_ARG_2_SI_CMP
: mtype
= v4si_ftype_v4si_v4si
; break;
19562 case MULTI_ARG_2_HI_CMP
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19563 case MULTI_ARG_2_QI_CMP
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19564 case MULTI_ARG_2_SF_TF
: mtype
= v4sf_ftype_v4sf_v4sf
; break;
19565 case MULTI_ARG_2_DF_TF
: mtype
= v2df_ftype_v2df_v2df
; break;
19566 case MULTI_ARG_2_DI_TF
: mtype
= v2di_ftype_v2di_v2di
; break;
19567 case MULTI_ARG_2_SI_TF
: mtype
= v4si_ftype_v4si_v4si
; break;
19568 case MULTI_ARG_2_HI_TF
: mtype
= v8hi_ftype_v8hi_v8hi
; break;
19569 case MULTI_ARG_2_QI_TF
: mtype
= v16qi_ftype_v16qi_v16qi
; break;
19570 case MULTI_ARG_1_SF
: mtype
= v4sf_ftype_v4sf
; break;
19571 case MULTI_ARG_1_DF
: mtype
= v2df_ftype_v2df
; break;
19572 case MULTI_ARG_1_DI
: mtype
= v2di_ftype_v2di
; break;
19573 case MULTI_ARG_1_SI
: mtype
= v4si_ftype_v4si
; break;
19574 case MULTI_ARG_1_HI
: mtype
= v8hi_ftype_v8hi
; break;
19575 case MULTI_ARG_1_QI
: mtype
= v16qi_ftype_v16qi
; break;
19576 case MULTI_ARG_1_SI_DI
: mtype
= v2di_ftype_v4si
; break;
19577 case MULTI_ARG_1_HI_DI
: mtype
= v2di_ftype_v8hi
; break;
19578 case MULTI_ARG_1_HI_SI
: mtype
= v4si_ftype_v8hi
; break;
19579 case MULTI_ARG_1_QI_DI
: mtype
= v2di_ftype_v16qi
; break;
19580 case MULTI_ARG_1_QI_SI
: mtype
= v4si_ftype_v16qi
; break;
19581 case MULTI_ARG_1_QI_HI
: mtype
= v8hi_ftype_v16qi
; break;
19582 case MULTI_ARG_1_PH2PS
: mtype
= v4sf_ftype_v4hi
; break;
19583 case MULTI_ARG_1_PS2PH
: mtype
= v4hi_ftype_v4sf
; break;
19584 case MULTI_ARG_UNKNOWN
:
19586 gcc_unreachable ();
19590 def_builtin_const (d
->mask
, d
->name
, mtype
, d
->code
);
19595 ix86_init_builtins (void)
19598 ix86_init_mmx_sse_builtins ();
19601 /* Errors in the source file can cause expand_expr to return const0_rtx
19602 where we expect a vector. To avoid crashing, use one of the vector
19603 clear instructions. */
19605 safe_vector_operand (rtx x
, enum machine_mode mode
)
19607 if (x
== const0_rtx
)
19608 x
= CONST0_RTX (mode
);
19612 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19613 4 operands. The third argument must be a constant smaller than 8
19617 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
19621 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19622 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19623 tree arg2
= CALL_EXPR_ARG (exp
, 2);
19624 rtx op0
= expand_normal (arg0
);
19625 rtx op1
= expand_normal (arg1
);
19626 rtx op2
= expand_normal (arg2
);
19627 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19628 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
19629 enum machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
19630 enum machine_mode mode3
= insn_data
[icode
].operand
[3].mode
;
19632 if (VECTOR_MODE_P (mode1
))
19633 op0
= safe_vector_operand (op0
, mode1
);
19634 if (VECTOR_MODE_P (mode2
))
19635 op1
= safe_vector_operand (op1
, mode2
);
19636 if (VECTOR_MODE_P (mode3
))
19637 op2
= safe_vector_operand (op2
, mode3
);
19641 || GET_MODE (target
) != tmode
19642 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19643 target
= gen_reg_rtx (tmode
);
19645 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19646 op0
= copy_to_mode_reg (mode1
, op0
);
19647 if ((optimize
&& !register_operand (op1
, mode2
))
19648 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19649 op1
= copy_to_mode_reg (mode2
, op1
);
19651 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19654 case CODE_FOR_sse4_1_blendvpd
:
19655 case CODE_FOR_sse4_1_blendvps
:
19656 case CODE_FOR_sse4_1_pblendvb
:
19657 op2
= copy_to_mode_reg (mode3
, op2
);
19660 case CODE_FOR_sse4_1_roundsd
:
19661 case CODE_FOR_sse4_1_roundss
:
19662 error ("the third argument must be a 4-bit immediate");
19666 error ("the third argument must be an 8-bit immediate");
19670 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19677 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19680 ix86_expand_crc32 (enum insn_code icode
, tree exp
, rtx target
)
19683 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19684 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19685 rtx op0
= expand_normal (arg0
);
19686 rtx op1
= expand_normal (arg1
);
19687 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19688 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19689 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19693 || GET_MODE (target
) != tmode
19694 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19695 target
= gen_reg_rtx (tmode
);
19697 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19698 op0
= copy_to_mode_reg (mode0
, op0
);
19699 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19701 op1
= copy_to_reg (op1
);
19702 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
19705 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19712 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19715 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
19718 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19719 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19720 rtx op0
= expand_normal (arg0
);
19721 rtx op1
= expand_normal (arg1
);
19722 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19723 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19724 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19726 if (VECTOR_MODE_P (mode0
))
19727 op0
= safe_vector_operand (op0
, mode0
);
19728 if (VECTOR_MODE_P (mode1
))
19729 op1
= safe_vector_operand (op1
, mode1
);
19731 if (optimize
|| !target
19732 || GET_MODE (target
) != tmode
19733 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19734 target
= gen_reg_rtx (tmode
);
19736 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
19738 rtx x
= gen_reg_rtx (V4SImode
);
19739 emit_insn (gen_sse2_loadd (x
, op1
));
19740 op1
= gen_lowpart (TImode
, x
);
19743 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19744 op0
= copy_to_mode_reg (mode0
, op0
);
19745 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19746 op1
= copy_to_mode_reg (mode1
, op1
);
19748 /* ??? Using ix86_fixup_binary_operands is problematic when
19749 we've got mismatched modes. Fake it. */
19755 if (tmode
== mode0
&& tmode
== mode1
)
19757 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
19761 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
19763 op0
= force_reg (mode0
, op0
);
19764 op1
= force_reg (mode1
, op1
);
19765 target
= gen_reg_rtx (tmode
);
19768 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19775 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19778 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
19779 enum multi_arg_type m_type
,
19780 enum insn_code sub_code
)
19785 bool comparison_p
= false;
19787 bool last_arg_constant
= false;
19788 int num_memory
= 0;
19791 enum machine_mode mode
;
19794 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19798 case MULTI_ARG_3_SF
:
19799 case MULTI_ARG_3_DF
:
19800 case MULTI_ARG_3_DI
:
19801 case MULTI_ARG_3_SI
:
19802 case MULTI_ARG_3_SI_DI
:
19803 case MULTI_ARG_3_HI
:
19804 case MULTI_ARG_3_HI_SI
:
19805 case MULTI_ARG_3_QI
:
19806 case MULTI_ARG_3_PERMPS
:
19807 case MULTI_ARG_3_PERMPD
:
19811 case MULTI_ARG_2_SF
:
19812 case MULTI_ARG_2_DF
:
19813 case MULTI_ARG_2_DI
:
19814 case MULTI_ARG_2_SI
:
19815 case MULTI_ARG_2_HI
:
19816 case MULTI_ARG_2_QI
:
19820 case MULTI_ARG_2_DI_IMM
:
19821 case MULTI_ARG_2_SI_IMM
:
19822 case MULTI_ARG_2_HI_IMM
:
19823 case MULTI_ARG_2_QI_IMM
:
19825 last_arg_constant
= true;
19828 case MULTI_ARG_1_SF
:
19829 case MULTI_ARG_1_DF
:
19830 case MULTI_ARG_1_DI
:
19831 case MULTI_ARG_1_SI
:
19832 case MULTI_ARG_1_HI
:
19833 case MULTI_ARG_1_QI
:
19834 case MULTI_ARG_1_SI_DI
:
19835 case MULTI_ARG_1_HI_DI
:
19836 case MULTI_ARG_1_HI_SI
:
19837 case MULTI_ARG_1_QI_DI
:
19838 case MULTI_ARG_1_QI_SI
:
19839 case MULTI_ARG_1_QI_HI
:
19840 case MULTI_ARG_1_PH2PS
:
19841 case MULTI_ARG_1_PS2PH
:
19845 case MULTI_ARG_2_SF_CMP
:
19846 case MULTI_ARG_2_DF_CMP
:
19847 case MULTI_ARG_2_DI_CMP
:
19848 case MULTI_ARG_2_SI_CMP
:
19849 case MULTI_ARG_2_HI_CMP
:
19850 case MULTI_ARG_2_QI_CMP
:
19852 comparison_p
= true;
19855 case MULTI_ARG_2_SF_TF
:
19856 case MULTI_ARG_2_DF_TF
:
19857 case MULTI_ARG_2_DI_TF
:
19858 case MULTI_ARG_2_SI_TF
:
19859 case MULTI_ARG_2_HI_TF
:
19860 case MULTI_ARG_2_QI_TF
:
19865 case MULTI_ARG_UNKNOWN
:
19867 gcc_unreachable ();
19870 if (optimize
|| !target
19871 || GET_MODE (target
) != tmode
19872 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19873 target
= gen_reg_rtx (tmode
);
19875 gcc_assert (nargs
<= 4);
19877 for (i
= 0; i
< nargs
; i
++)
19879 tree arg
= CALL_EXPR_ARG (exp
, i
);
19880 rtx op
= expand_normal (arg
);
19881 int adjust
= (comparison_p
) ? 1 : 0;
19882 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
19884 if (last_arg_constant
&& i
== nargs
-1)
19886 if (GET_CODE (op
) != CONST_INT
)
19888 error ("last argument must be an immediate");
19889 return gen_reg_rtx (tmode
);
19894 if (VECTOR_MODE_P (mode
))
19895 op
= safe_vector_operand (op
, mode
);
19897 /* If we aren't optimizing, only allow one memory operand to be
19899 if (memory_operand (op
, mode
))
19902 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
19905 || ! (*insn_data
[icode
].operand
[i
+adjust
+1].predicate
) (op
, mode
)
19907 op
= force_reg (mode
, op
);
19911 args
[i
].mode
= mode
;
19917 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
19922 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
19923 GEN_INT ((int)sub_code
));
19924 else if (! comparison_p
)
19925 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
19928 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
19932 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
19937 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
19941 gcc_unreachable ();
19951 /* Subroutine of ix86_expand_builtin to take care of stores. */
19954 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
19957 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19958 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19959 rtx op0
= expand_normal (arg0
);
19960 rtx op1
= expand_normal (arg1
);
19961 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
19962 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
19964 if (VECTOR_MODE_P (mode1
))
19965 op1
= safe_vector_operand (op1
, mode1
);
19967 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19968 op1
= copy_to_mode_reg (mode1
, op1
);
19970 pat
= GEN_FCN (icode
) (op0
, op1
);
19976 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19979 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
19980 rtx target
, int do_load
)
19983 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19984 rtx op0
= expand_normal (arg0
);
19985 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19986 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19988 if (optimize
|| !target
19989 || GET_MODE (target
) != tmode
19990 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19991 target
= gen_reg_rtx (tmode
);
19993 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19996 if (VECTOR_MODE_P (mode0
))
19997 op0
= safe_vector_operand (op0
, mode0
);
19999 if ((optimize
&& !register_operand (op0
, mode0
))
20000 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20001 op0
= copy_to_mode_reg (mode0
, op0
);
20006 case CODE_FOR_sse4_1_roundpd
:
20007 case CODE_FOR_sse4_1_roundps
:
20009 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20010 rtx op1
= expand_normal (arg1
);
20011 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
20013 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20015 error ("the second argument must be a 4-bit immediate");
20018 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20022 pat
= GEN_FCN (icode
) (target
, op0
);
20032 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20033 sqrtss, rsqrtss, rcpss. */
20036 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
20039 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20040 rtx op1
, op0
= expand_normal (arg0
);
20041 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20042 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20044 if (optimize
|| !target
20045 || GET_MODE (target
) != tmode
20046 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20047 target
= gen_reg_rtx (tmode
);
20049 if (VECTOR_MODE_P (mode0
))
20050 op0
= safe_vector_operand (op0
, mode0
);
20052 if ((optimize
&& !register_operand (op0
, mode0
))
20053 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20054 op0
= copy_to_mode_reg (mode0
, op0
);
20057 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
20058 op1
= copy_to_mode_reg (mode0
, op1
);
20060 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20067 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20070 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
20074 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20075 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20076 rtx op0
= expand_normal (arg0
);
20077 rtx op1
= expand_normal (arg1
);
20079 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
20080 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
20081 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
20082 enum rtx_code comparison
= d
->comparison
;
20084 if (VECTOR_MODE_P (mode0
))
20085 op0
= safe_vector_operand (op0
, mode0
);
20086 if (VECTOR_MODE_P (mode1
))
20087 op1
= safe_vector_operand (op1
, mode1
);
20089 /* Swap operands if we have a comparison that isn't available in
20091 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
20093 rtx tmp
= gen_reg_rtx (mode1
);
20094 emit_move_insn (tmp
, op1
);
20099 if (optimize
|| !target
20100 || GET_MODE (target
) != tmode
20101 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
20102 target
= gen_reg_rtx (tmode
);
20104 if ((optimize
&& !register_operand (op0
, mode0
))
20105 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
20106 op0
= copy_to_mode_reg (mode0
, op0
);
20107 if ((optimize
&& !register_operand (op1
, mode1
))
20108 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
20109 op1
= copy_to_mode_reg (mode1
, op1
);
20111 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
20112 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
20119 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20122 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
20126 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20127 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20128 rtx op0
= expand_normal (arg0
);
20129 rtx op1
= expand_normal (arg1
);
20130 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
20131 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
20132 enum rtx_code comparison
= d
->comparison
;
20134 if (VECTOR_MODE_P (mode0
))
20135 op0
= safe_vector_operand (op0
, mode0
);
20136 if (VECTOR_MODE_P (mode1
))
20137 op1
= safe_vector_operand (op1
, mode1
);
20139 /* Swap operands if we have a comparison that isn't available in
20141 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
20148 target
= gen_reg_rtx (SImode
);
20149 emit_move_insn (target
, const0_rtx
);
20150 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20152 if ((optimize
&& !register_operand (op0
, mode0
))
20153 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
20154 op0
= copy_to_mode_reg (mode0
, op0
);
20155 if ((optimize
&& !register_operand (op1
, mode1
))
20156 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
20157 op1
= copy_to_mode_reg (mode1
, op1
);
20159 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
20163 emit_insn (gen_rtx_SET (VOIDmode
,
20164 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20165 gen_rtx_fmt_ee (comparison
, QImode
,
20169 return SUBREG_REG (target
);
20172 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20175 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
20179 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20180 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20181 rtx op0
= expand_normal (arg0
);
20182 rtx op1
= expand_normal (arg1
);
20183 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
20184 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
20185 enum rtx_code comparison
= d
->comparison
;
20187 if (VECTOR_MODE_P (mode0
))
20188 op0
= safe_vector_operand (op0
, mode0
);
20189 if (VECTOR_MODE_P (mode1
))
20190 op1
= safe_vector_operand (op1
, mode1
);
20192 target
= gen_reg_rtx (SImode
);
20193 emit_move_insn (target
, const0_rtx
);
20194 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20196 if ((optimize
&& !register_operand (op0
, mode0
))
20197 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
20198 op0
= copy_to_mode_reg (mode0
, op0
);
20199 if ((optimize
&& !register_operand (op1
, mode1
))
20200 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
20201 op1
= copy_to_mode_reg (mode1
, op1
);
20203 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
20207 emit_insn (gen_rtx_SET (VOIDmode
,
20208 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20209 gen_rtx_fmt_ee (comparison
, QImode
,
20213 return SUBREG_REG (target
);
20216 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20219 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
20220 tree exp
, rtx target
)
20223 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20224 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20225 tree arg2
= CALL_EXPR_ARG (exp
, 2);
20226 tree arg3
= CALL_EXPR_ARG (exp
, 3);
20227 tree arg4
= CALL_EXPR_ARG (exp
, 4);
20228 rtx scratch0
, scratch1
;
20229 rtx op0
= expand_normal (arg0
);
20230 rtx op1
= expand_normal (arg1
);
20231 rtx op2
= expand_normal (arg2
);
20232 rtx op3
= expand_normal (arg3
);
20233 rtx op4
= expand_normal (arg4
);
20234 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
20236 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
20237 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
20238 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
20239 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
20240 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
20241 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
20242 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
20244 if (VECTOR_MODE_P (modev2
))
20245 op0
= safe_vector_operand (op0
, modev2
);
20246 if (VECTOR_MODE_P (modev4
))
20247 op2
= safe_vector_operand (op2
, modev4
);
20249 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
20250 op0
= copy_to_mode_reg (modev2
, op0
);
20251 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
20252 op1
= copy_to_mode_reg (modei3
, op1
);
20253 if ((optimize
&& !register_operand (op2
, modev4
))
20254 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
20255 op2
= copy_to_mode_reg (modev4
, op2
);
20256 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
20257 op3
= copy_to_mode_reg (modei5
, op3
);
20259 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
20261 error ("the fifth argument must be a 8-bit immediate");
20265 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
20267 if (optimize
|| !target
20268 || GET_MODE (target
) != tmode0
20269 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
20270 target
= gen_reg_rtx (tmode0
);
20272 scratch1
= gen_reg_rtx (tmode1
);
20274 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
20276 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
20278 if (optimize
|| !target
20279 || GET_MODE (target
) != tmode1
20280 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
20281 target
= gen_reg_rtx (tmode1
);
20283 scratch0
= gen_reg_rtx (tmode0
);
20285 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
20289 gcc_assert (d
->flag
);
20291 scratch0
= gen_reg_rtx (tmode0
);
20292 scratch1
= gen_reg_rtx (tmode1
);
20294 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
20304 target
= gen_reg_rtx (SImode
);
20305 emit_move_insn (target
, const0_rtx
);
20306 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20309 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20310 gen_rtx_fmt_ee (EQ
, QImode
,
20311 gen_rtx_REG ((enum machine_mode
) d
->flag
,
20314 return SUBREG_REG (target
);
20321 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20324 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
20325 tree exp
, rtx target
)
20328 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20329 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20330 tree arg2
= CALL_EXPR_ARG (exp
, 2);
20331 rtx scratch0
, scratch1
;
20332 rtx op0
= expand_normal (arg0
);
20333 rtx op1
= expand_normal (arg1
);
20334 rtx op2
= expand_normal (arg2
);
20335 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
20337 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
20338 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
20339 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
20340 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
20341 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
20343 if (VECTOR_MODE_P (modev2
))
20344 op0
= safe_vector_operand (op0
, modev2
);
20345 if (VECTOR_MODE_P (modev3
))
20346 op1
= safe_vector_operand (op1
, modev3
);
20348 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
20349 op0
= copy_to_mode_reg (modev2
, op0
);
20350 if ((optimize
&& !register_operand (op1
, modev3
))
20351 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
20352 op1
= copy_to_mode_reg (modev3
, op1
);
20354 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
20356 error ("the third argument must be a 8-bit immediate");
20360 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
20362 if (optimize
|| !target
20363 || GET_MODE (target
) != tmode0
20364 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
20365 target
= gen_reg_rtx (tmode0
);
20367 scratch1
= gen_reg_rtx (tmode1
);
20369 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
20371 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
20373 if (optimize
|| !target
20374 || GET_MODE (target
) != tmode1
20375 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
20376 target
= gen_reg_rtx (tmode1
);
20378 scratch0
= gen_reg_rtx (tmode0
);
20380 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
20384 gcc_assert (d
->flag
);
20386 scratch0
= gen_reg_rtx (tmode0
);
20387 scratch1
= gen_reg_rtx (tmode1
);
20389 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
20399 target
= gen_reg_rtx (SImode
);
20400 emit_move_insn (target
, const0_rtx
);
20401 target
= gen_rtx_SUBREG (QImode
, target
, 0);
20404 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
20405 gen_rtx_fmt_ee (EQ
, QImode
,
20406 gen_rtx_REG ((enum machine_mode
) d
->flag
,
20409 return SUBREG_REG (target
);
20415 /* Return the integer constant in ARG. Constrain it to be in the range
20416 of the subparts of VEC_TYPE; issue an error if not. */
20419 get_element_number (tree vec_type
, tree arg
)
20421 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
20423 if (!host_integerp (arg
, 1)
20424 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
20426 error ("selector must be an integer constant in the range 0..%wi", max
);
20433 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20434 ix86_expand_vector_init. We DO have language-level syntax for this, in
20435 the form of (type){ init-list }. Except that since we can't place emms
20436 instructions from inside the compiler, we can't allow the use of MMX
20437 registers unless the user explicitly asks for it. So we do *not* define
20438 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20439 we have builtins invoked by mmintrin.h that gives us license to emit
20440 these sorts of instructions. */
20443 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
20445 enum machine_mode tmode
= TYPE_MODE (type
);
20446 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
20447 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
20448 rtvec v
= rtvec_alloc (n_elt
);
20450 gcc_assert (VECTOR_MODE_P (tmode
));
20451 gcc_assert (call_expr_nargs (exp
) == n_elt
);
20453 for (i
= 0; i
< n_elt
; ++i
)
20455 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
20456 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
20459 if (!target
|| !register_operand (target
, tmode
))
20460 target
= gen_reg_rtx (tmode
);
20462 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
20466 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20468 had a language-level syntax for referencing vector elements. */
20471 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
20473 enum machine_mode tmode
, mode0
;
20478 arg0
= CALL_EXPR_ARG (exp
, 0);
20479 arg1
= CALL_EXPR_ARG (exp
, 1);
20481 op0
= expand_normal (arg0
);
20482 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
20484 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
20485 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
20486 gcc_assert (VECTOR_MODE_P (mode0
));
20488 op0
= force_reg (mode0
, op0
);
20490 if (optimize
|| !target
|| !register_operand (target
, tmode
))
20491 target
= gen_reg_rtx (tmode
);
20493 ix86_expand_vector_extract (true, target
, op0
, elt
);
20498 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20500 a language-level syntax for referencing vector elements. */
20503 ix86_expand_vec_set_builtin (tree exp
)
20505 enum machine_mode tmode
, mode1
;
20506 tree arg0
, arg1
, arg2
;
20508 rtx op0
, op1
, target
;
20510 arg0
= CALL_EXPR_ARG (exp
, 0);
20511 arg1
= CALL_EXPR_ARG (exp
, 1);
20512 arg2
= CALL_EXPR_ARG (exp
, 2);
20514 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
20515 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
20516 gcc_assert (VECTOR_MODE_P (tmode
));
20518 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
20519 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
20520 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
20522 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
20523 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
20525 op0
= force_reg (tmode
, op0
);
20526 op1
= force_reg (mode1
, op1
);
20528 /* OP0 is the source of these builtin functions and shouldn't be
20529 modified. Create a copy, use it and return it as target. */
20530 target
= gen_reg_rtx (tmode
);
20531 emit_move_insn (target
, op0
);
20532 ix86_expand_vector_set (true, target
, op1
, elt
);
20537 /* Expand an expression EXP that calls a built-in function,
20538 with result going to TARGET if that's convenient
20539 (and in mode MODE if that's convenient).
20540 SUBTARGET may be used as the target for computing one of EXP's operands.
20541 IGNORE is nonzero if the value is to be ignored. */
20544 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
20545 enum machine_mode mode ATTRIBUTE_UNUSED
,
20546 int ignore ATTRIBUTE_UNUSED
)
20548 const struct builtin_description
*d
;
20550 enum insn_code icode
;
20551 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
20552 tree arg0
, arg1
, arg2
, arg3
;
20553 rtx op0
, op1
, op2
, op3
, pat
;
20554 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
20555 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
20559 case IX86_BUILTIN_EMMS
:
20560 emit_insn (gen_mmx_emms ());
20563 case IX86_BUILTIN_SFENCE
:
20564 emit_insn (gen_sse_sfence ());
20567 case IX86_BUILTIN_MASKMOVQ
:
20568 case IX86_BUILTIN_MASKMOVDQU
:
20569 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
20570 ? CODE_FOR_mmx_maskmovq
20571 : CODE_FOR_sse2_maskmovdqu
);
20572 /* Note the arg order is different from the operand order. */
20573 arg1
= CALL_EXPR_ARG (exp
, 0);
20574 arg2
= CALL_EXPR_ARG (exp
, 1);
20575 arg0
= CALL_EXPR_ARG (exp
, 2);
20576 op0
= expand_normal (arg0
);
20577 op1
= expand_normal (arg1
);
20578 op2
= expand_normal (arg2
);
20579 mode0
= insn_data
[icode
].operand
[0].mode
;
20580 mode1
= insn_data
[icode
].operand
[1].mode
;
20581 mode2
= insn_data
[icode
].operand
[2].mode
;
20583 op0
= force_reg (Pmode
, op0
);
20584 op0
= gen_rtx_MEM (mode1
, op0
);
20586 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
20587 op0
= copy_to_mode_reg (mode0
, op0
);
20588 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
20589 op1
= copy_to_mode_reg (mode1
, op1
);
20590 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
20591 op2
= copy_to_mode_reg (mode2
, op2
);
20592 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
20598 case IX86_BUILTIN_RSQRTF
:
20599 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
20601 case IX86_BUILTIN_SQRTSS
:
20602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
20603 case IX86_BUILTIN_RSQRTSS
:
20604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
20605 case IX86_BUILTIN_RCPSS
:
20606 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
20608 case IX86_BUILTIN_LOADUPS
:
20609 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
20611 case IX86_BUILTIN_STOREUPS
:
20612 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
20614 case IX86_BUILTIN_LOADHPS
:
20615 case IX86_BUILTIN_LOADLPS
:
20616 case IX86_BUILTIN_LOADHPD
:
20617 case IX86_BUILTIN_LOADLPD
:
20618 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
20619 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
20620 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
20621 : CODE_FOR_sse2_loadlpd
);
20622 arg0
= CALL_EXPR_ARG (exp
, 0);
20623 arg1
= CALL_EXPR_ARG (exp
, 1);
20624 op0
= expand_normal (arg0
);
20625 op1
= expand_normal (arg1
);
20626 tmode
= insn_data
[icode
].operand
[0].mode
;
20627 mode0
= insn_data
[icode
].operand
[1].mode
;
20628 mode1
= insn_data
[icode
].operand
[2].mode
;
20630 op0
= force_reg (mode0
, op0
);
20631 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
20632 if (optimize
|| target
== 0
20633 || GET_MODE (target
) != tmode
20634 || !register_operand (target
, tmode
))
20635 target
= gen_reg_rtx (tmode
);
20636 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20642 case IX86_BUILTIN_STOREHPS
:
20643 case IX86_BUILTIN_STORELPS
:
20644 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
20645 : CODE_FOR_sse_storelps
);
20646 arg0
= CALL_EXPR_ARG (exp
, 0);
20647 arg1
= CALL_EXPR_ARG (exp
, 1);
20648 op0
= expand_normal (arg0
);
20649 op1
= expand_normal (arg1
);
20650 mode0
= insn_data
[icode
].operand
[0].mode
;
20651 mode1
= insn_data
[icode
].operand
[1].mode
;
20653 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
20654 op1
= force_reg (mode1
, op1
);
20656 pat
= GEN_FCN (icode
) (op0
, op1
);
20662 case IX86_BUILTIN_MOVNTPS
:
20663 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
20664 case IX86_BUILTIN_MOVNTQ
:
20665 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
20667 case IX86_BUILTIN_LDMXCSR
:
20668 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
20669 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
20670 emit_move_insn (target
, op0
);
20671 emit_insn (gen_sse_ldmxcsr (target
));
20674 case IX86_BUILTIN_STMXCSR
:
20675 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
20676 emit_insn (gen_sse_stmxcsr (target
));
20677 return copy_to_mode_reg (SImode
, target
);
20679 case IX86_BUILTIN_SHUFPS
:
20680 case IX86_BUILTIN_SHUFPD
:
20681 icode
= (fcode
== IX86_BUILTIN_SHUFPS
20682 ? CODE_FOR_sse_shufps
20683 : CODE_FOR_sse2_shufpd
);
20684 arg0
= CALL_EXPR_ARG (exp
, 0);
20685 arg1
= CALL_EXPR_ARG (exp
, 1);
20686 arg2
= CALL_EXPR_ARG (exp
, 2);
20687 op0
= expand_normal (arg0
);
20688 op1
= expand_normal (arg1
);
20689 op2
= expand_normal (arg2
);
20690 tmode
= insn_data
[icode
].operand
[0].mode
;
20691 mode0
= insn_data
[icode
].operand
[1].mode
;
20692 mode1
= insn_data
[icode
].operand
[2].mode
;
20693 mode2
= insn_data
[icode
].operand
[3].mode
;
20695 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20696 op0
= copy_to_mode_reg (mode0
, op0
);
20697 if ((optimize
&& !register_operand (op1
, mode1
))
20698 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20699 op1
= copy_to_mode_reg (mode1
, op1
);
20700 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20702 /* @@@ better error message */
20703 error ("mask must be an immediate");
20704 return gen_reg_rtx (tmode
);
20706 if (optimize
|| target
== 0
20707 || GET_MODE (target
) != tmode
20708 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20709 target
= gen_reg_rtx (tmode
);
20710 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20716 case IX86_BUILTIN_PSHUFW
:
20717 case IX86_BUILTIN_PSHUFD
:
20718 case IX86_BUILTIN_PSHUFHW
:
20719 case IX86_BUILTIN_PSHUFLW
:
20720 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
20721 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
20722 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
20723 : CODE_FOR_mmx_pshufw
);
20724 arg0
= CALL_EXPR_ARG (exp
, 0);
20725 arg1
= CALL_EXPR_ARG (exp
, 1);
20726 op0
= expand_normal (arg0
);
20727 op1
= expand_normal (arg1
);
20728 tmode
= insn_data
[icode
].operand
[0].mode
;
20729 mode1
= insn_data
[icode
].operand
[1].mode
;
20730 mode2
= insn_data
[icode
].operand
[2].mode
;
20732 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20733 op0
= copy_to_mode_reg (mode1
, op0
);
20734 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20736 /* @@@ better error message */
20737 error ("mask must be an immediate");
20741 || GET_MODE (target
) != tmode
20742 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20743 target
= gen_reg_rtx (tmode
);
20744 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20750 case IX86_BUILTIN_PSLLW128
:
20751 case IX86_BUILTIN_PSLLWI128
:
20752 icode
= CODE_FOR_ashlv8hi3
;
20754 case IX86_BUILTIN_PSLLD128
:
20755 case IX86_BUILTIN_PSLLDI128
:
20756 icode
= CODE_FOR_ashlv4si3
;
20758 case IX86_BUILTIN_PSLLQ128
:
20759 case IX86_BUILTIN_PSLLQI128
:
20760 icode
= CODE_FOR_ashlv2di3
;
20762 case IX86_BUILTIN_PSRAW128
:
20763 case IX86_BUILTIN_PSRAWI128
:
20764 icode
= CODE_FOR_ashrv8hi3
;
20766 case IX86_BUILTIN_PSRAD128
:
20767 case IX86_BUILTIN_PSRADI128
:
20768 icode
= CODE_FOR_ashrv4si3
;
20770 case IX86_BUILTIN_PSRLW128
:
20771 case IX86_BUILTIN_PSRLWI128
:
20772 icode
= CODE_FOR_lshrv8hi3
;
20774 case IX86_BUILTIN_PSRLD128
:
20775 case IX86_BUILTIN_PSRLDI128
:
20776 icode
= CODE_FOR_lshrv4si3
;
20778 case IX86_BUILTIN_PSRLQ128
:
20779 case IX86_BUILTIN_PSRLQI128
:
20780 icode
= CODE_FOR_lshrv2di3
;
20783 arg0
= CALL_EXPR_ARG (exp
, 0);
20784 arg1
= CALL_EXPR_ARG (exp
, 1);
20785 op0
= expand_normal (arg0
);
20786 op1
= expand_normal (arg1
);
20788 tmode
= insn_data
[icode
].operand
[0].mode
;
20789 mode1
= insn_data
[icode
].operand
[1].mode
;
20791 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20792 op0
= copy_to_reg (op0
);
20794 if (!CONST_INT_P (op1
))
20795 op1
= simplify_gen_subreg (SImode
, op1
, GET_MODE (op1
), 0);
20797 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, SImode
))
20798 op1
= copy_to_reg (op1
);
20800 target
= gen_reg_rtx (tmode
);
20801 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20807 case IX86_BUILTIN_PSLLDQI128
:
20808 case IX86_BUILTIN_PSRLDQI128
:
20809 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
20810 : CODE_FOR_sse2_lshrti3
);
20811 arg0
= CALL_EXPR_ARG (exp
, 0);
20812 arg1
= CALL_EXPR_ARG (exp
, 1);
20813 op0
= expand_normal (arg0
);
20814 op1
= expand_normal (arg1
);
20815 tmode
= insn_data
[icode
].operand
[0].mode
;
20816 mode1
= insn_data
[icode
].operand
[1].mode
;
20817 mode2
= insn_data
[icode
].operand
[2].mode
;
20819 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20821 op0
= copy_to_reg (op0
);
20822 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
20824 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20826 error ("shift must be an immediate");
20829 target
= gen_reg_rtx (V2DImode
);
20830 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
20837 case IX86_BUILTIN_FEMMS
:
20838 emit_insn (gen_mmx_femms ());
20841 case IX86_BUILTIN_PAVGUSB
:
20842 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
20844 case IX86_BUILTIN_PF2ID
:
20845 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
20847 case IX86_BUILTIN_PFACC
:
20848 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
20850 case IX86_BUILTIN_PFADD
:
20851 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
20853 case IX86_BUILTIN_PFCMPEQ
:
20854 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
20856 case IX86_BUILTIN_PFCMPGE
:
20857 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
20859 case IX86_BUILTIN_PFCMPGT
:
20860 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
20862 case IX86_BUILTIN_PFMAX
:
20863 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
20865 case IX86_BUILTIN_PFMIN
:
20866 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
20868 case IX86_BUILTIN_PFMUL
:
20869 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
20871 case IX86_BUILTIN_PFRCP
:
20872 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
20874 case IX86_BUILTIN_PFRCPIT1
:
20875 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
20877 case IX86_BUILTIN_PFRCPIT2
:
20878 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
20880 case IX86_BUILTIN_PFRSQIT1
:
20881 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
20883 case IX86_BUILTIN_PFRSQRT
:
20884 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
20886 case IX86_BUILTIN_PFSUB
:
20887 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
20889 case IX86_BUILTIN_PFSUBR
:
20890 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
20892 case IX86_BUILTIN_PI2FD
:
20893 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
20895 case IX86_BUILTIN_PMULHRW
:
20896 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
20898 case IX86_BUILTIN_PF2IW
:
20899 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
20901 case IX86_BUILTIN_PFNACC
:
20902 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
20904 case IX86_BUILTIN_PFPNACC
:
20905 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
20907 case IX86_BUILTIN_PI2FW
:
20908 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
20910 case IX86_BUILTIN_PSWAPDSI
:
20911 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
20913 case IX86_BUILTIN_PSWAPDSF
:
20914 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
20916 case IX86_BUILTIN_SQRTSD
:
20917 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
20918 case IX86_BUILTIN_LOADUPD
:
20919 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
20920 case IX86_BUILTIN_STOREUPD
:
20921 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
20923 case IX86_BUILTIN_MFENCE
:
20924 emit_insn (gen_sse2_mfence ());
20926 case IX86_BUILTIN_LFENCE
:
20927 emit_insn (gen_sse2_lfence ());
20930 case IX86_BUILTIN_CLFLUSH
:
20931 arg0
= CALL_EXPR_ARG (exp
, 0);
20932 op0
= expand_normal (arg0
);
20933 icode
= CODE_FOR_sse2_clflush
;
20934 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
20935 op0
= copy_to_mode_reg (Pmode
, op0
);
20937 emit_insn (gen_sse2_clflush (op0
));
20940 case IX86_BUILTIN_MOVNTPD
:
20941 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
20942 case IX86_BUILTIN_MOVNTDQ
:
20943 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
20944 case IX86_BUILTIN_MOVNTI
:
20945 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
20947 case IX86_BUILTIN_LOADDQU
:
20948 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
20949 case IX86_BUILTIN_STOREDQU
:
20950 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
20952 case IX86_BUILTIN_MONITOR
:
20953 arg0
= CALL_EXPR_ARG (exp
, 0);
20954 arg1
= CALL_EXPR_ARG (exp
, 1);
20955 arg2
= CALL_EXPR_ARG (exp
, 2);
20956 op0
= expand_normal (arg0
);
20957 op1
= expand_normal (arg1
);
20958 op2
= expand_normal (arg2
);
20960 op0
= copy_to_mode_reg (Pmode
, op0
);
20962 op1
= copy_to_mode_reg (SImode
, op1
);
20964 op2
= copy_to_mode_reg (SImode
, op2
);
20966 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
20968 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
20971 case IX86_BUILTIN_MWAIT
:
20972 arg0
= CALL_EXPR_ARG (exp
, 0);
20973 arg1
= CALL_EXPR_ARG (exp
, 1);
20974 op0
= expand_normal (arg0
);
20975 op1
= expand_normal (arg1
);
20977 op0
= copy_to_mode_reg (SImode
, op0
);
20979 op1
= copy_to_mode_reg (SImode
, op1
);
20980 emit_insn (gen_sse3_mwait (op0
, op1
));
20983 case IX86_BUILTIN_LDDQU
:
20984 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
20987 case IX86_BUILTIN_PALIGNR
:
20988 case IX86_BUILTIN_PALIGNR128
:
20989 if (fcode
== IX86_BUILTIN_PALIGNR
)
20991 icode
= CODE_FOR_ssse3_palignrdi
;
20996 icode
= CODE_FOR_ssse3_palignrti
;
20999 arg0
= CALL_EXPR_ARG (exp
, 0);
21000 arg1
= CALL_EXPR_ARG (exp
, 1);
21001 arg2
= CALL_EXPR_ARG (exp
, 2);
21002 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
21003 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
21004 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
21005 tmode
= insn_data
[icode
].operand
[0].mode
;
21006 mode1
= insn_data
[icode
].operand
[1].mode
;
21007 mode2
= insn_data
[icode
].operand
[2].mode
;
21008 mode3
= insn_data
[icode
].operand
[3].mode
;
21010 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21012 op0
= copy_to_reg (op0
);
21013 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
21015 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21017 op1
= copy_to_reg (op1
);
21018 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
21020 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21022 error ("shift must be an immediate");
21025 target
= gen_reg_rtx (mode
);
21026 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
21033 case IX86_BUILTIN_MOVNTDQA
:
21034 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
21037 case IX86_BUILTIN_MOVNTSD
:
21038 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
21040 case IX86_BUILTIN_MOVNTSS
:
21041 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
21043 case IX86_BUILTIN_INSERTQ
:
21044 case IX86_BUILTIN_EXTRQ
:
21045 icode
= (fcode
== IX86_BUILTIN_EXTRQ
21046 ? CODE_FOR_sse4a_extrq
21047 : CODE_FOR_sse4a_insertq
);
21048 arg0
= CALL_EXPR_ARG (exp
, 0);
21049 arg1
= CALL_EXPR_ARG (exp
, 1);
21050 op0
= expand_normal (arg0
);
21051 op1
= expand_normal (arg1
);
21052 tmode
= insn_data
[icode
].operand
[0].mode
;
21053 mode1
= insn_data
[icode
].operand
[1].mode
;
21054 mode2
= insn_data
[icode
].operand
[2].mode
;
21055 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21056 op0
= copy_to_mode_reg (mode1
, op0
);
21057 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21058 op1
= copy_to_mode_reg (mode2
, op1
);
21059 if (optimize
|| target
== 0
21060 || GET_MODE (target
) != tmode
21061 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21062 target
= gen_reg_rtx (tmode
);
21063 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21069 case IX86_BUILTIN_EXTRQI
:
21070 icode
= CODE_FOR_sse4a_extrqi
;
21071 arg0
= CALL_EXPR_ARG (exp
, 0);
21072 arg1
= CALL_EXPR_ARG (exp
, 1);
21073 arg2
= CALL_EXPR_ARG (exp
, 2);
21074 op0
= expand_normal (arg0
);
21075 op1
= expand_normal (arg1
);
21076 op2
= expand_normal (arg2
);
21077 tmode
= insn_data
[icode
].operand
[0].mode
;
21078 mode1
= insn_data
[icode
].operand
[1].mode
;
21079 mode2
= insn_data
[icode
].operand
[2].mode
;
21080 mode3
= insn_data
[icode
].operand
[3].mode
;
21081 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21082 op0
= copy_to_mode_reg (mode1
, op0
);
21083 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21085 error ("index mask must be an immediate");
21086 return gen_reg_rtx (tmode
);
21088 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21090 error ("length mask must be an immediate");
21091 return gen_reg_rtx (tmode
);
21093 if (optimize
|| target
== 0
21094 || GET_MODE (target
) != tmode
21095 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21096 target
= gen_reg_rtx (tmode
);
21097 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21103 case IX86_BUILTIN_INSERTQI
:
21104 icode
= CODE_FOR_sse4a_insertqi
;
21105 arg0
= CALL_EXPR_ARG (exp
, 0);
21106 arg1
= CALL_EXPR_ARG (exp
, 1);
21107 arg2
= CALL_EXPR_ARG (exp
, 2);
21108 arg3
= CALL_EXPR_ARG (exp
, 3);
21109 op0
= expand_normal (arg0
);
21110 op1
= expand_normal (arg1
);
21111 op2
= expand_normal (arg2
);
21112 op3
= expand_normal (arg3
);
21113 tmode
= insn_data
[icode
].operand
[0].mode
;
21114 mode1
= insn_data
[icode
].operand
[1].mode
;
21115 mode2
= insn_data
[icode
].operand
[2].mode
;
21116 mode3
= insn_data
[icode
].operand
[3].mode
;
21117 mode4
= insn_data
[icode
].operand
[4].mode
;
21119 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21120 op0
= copy_to_mode_reg (mode1
, op0
);
21122 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21123 op1
= copy_to_mode_reg (mode2
, op1
);
21125 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
21127 error ("index mask must be an immediate");
21128 return gen_reg_rtx (tmode
);
21130 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
21132 error ("length mask must be an immediate");
21133 return gen_reg_rtx (tmode
);
21135 if (optimize
|| target
== 0
21136 || GET_MODE (target
) != tmode
21137 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21138 target
= gen_reg_rtx (tmode
);
21139 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
21145 case IX86_BUILTIN_VEC_INIT_V2SI
:
21146 case IX86_BUILTIN_VEC_INIT_V4HI
:
21147 case IX86_BUILTIN_VEC_INIT_V8QI
:
21148 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
21150 case IX86_BUILTIN_VEC_EXT_V2DF
:
21151 case IX86_BUILTIN_VEC_EXT_V2DI
:
21152 case IX86_BUILTIN_VEC_EXT_V4SF
:
21153 case IX86_BUILTIN_VEC_EXT_V4SI
:
21154 case IX86_BUILTIN_VEC_EXT_V8HI
:
21155 case IX86_BUILTIN_VEC_EXT_V2SI
:
21156 case IX86_BUILTIN_VEC_EXT_V4HI
:
21157 case IX86_BUILTIN_VEC_EXT_V16QI
:
21158 return ix86_expand_vec_ext_builtin (exp
, target
);
21160 case IX86_BUILTIN_VEC_SET_V2DI
:
21161 case IX86_BUILTIN_VEC_SET_V4SF
:
21162 case IX86_BUILTIN_VEC_SET_V4SI
:
21163 case IX86_BUILTIN_VEC_SET_V8HI
:
21164 case IX86_BUILTIN_VEC_SET_V4HI
:
21165 case IX86_BUILTIN_VEC_SET_V16QI
:
21166 return ix86_expand_vec_set_builtin (exp
);
21168 case IX86_BUILTIN_INFQ
:
21170 REAL_VALUE_TYPE inf
;
21174 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
21176 tmp
= validize_mem (force_const_mem (mode
, tmp
));
21179 target
= gen_reg_rtx (mode
);
21181 emit_move_insn (target
, tmp
);
21185 case IX86_BUILTIN_FABSQ
:
21186 return ix86_expand_unop_builtin (CODE_FOR_abstf2
, exp
, target
, 0);
21188 case IX86_BUILTIN_COPYSIGNQ
:
21189 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3
, exp
, target
);
21195 for (i
= 0, d
= bdesc_sse_3arg
;
21196 i
< ARRAY_SIZE (bdesc_sse_3arg
);
21198 if (d
->code
== fcode
)
21199 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
21202 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
21203 if (d
->code
== fcode
)
21205 /* Compares are treated specially. */
21206 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
21207 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
21208 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
21209 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
21210 return ix86_expand_sse_compare (d
, exp
, target
);
21212 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
21215 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
21216 if (d
->code
== fcode
)
21217 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
21219 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
21220 if (d
->code
== fcode
)
21221 return ix86_expand_sse_comi (d
, exp
, target
);
21223 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
21224 if (d
->code
== fcode
)
21225 return ix86_expand_sse_ptest (d
, exp
, target
);
21227 for (i
= 0, d
= bdesc_crc32
; i
< ARRAY_SIZE (bdesc_crc32
); i
++, d
++)
21228 if (d
->code
== fcode
)
21229 return ix86_expand_crc32 (d
->icode
, exp
, target
);
21231 for (i
= 0, d
= bdesc_pcmpestr
;
21232 i
< ARRAY_SIZE (bdesc_pcmpestr
);
21234 if (d
->code
== fcode
)
21235 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
21237 for (i
= 0, d
= bdesc_pcmpistr
;
21238 i
< ARRAY_SIZE (bdesc_pcmpistr
);
21240 if (d
->code
== fcode
)
21241 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
21243 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
21244 if (d
->code
== fcode
)
21245 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
21246 (enum multi_arg_type
)d
->flag
,
21249 gcc_unreachable ();
21252 /* Returns a function decl for a vectorized version of the builtin function
21253 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21254 if it is not available. */
21257 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
21260 enum machine_mode in_mode
, out_mode
;
21263 if (TREE_CODE (type_out
) != VECTOR_TYPE
21264 || TREE_CODE (type_in
) != VECTOR_TYPE
)
21267 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
21268 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
21269 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
21270 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
21274 case BUILT_IN_SQRT
:
21275 if (out_mode
== DFmode
&& out_n
== 2
21276 && in_mode
== DFmode
&& in_n
== 2)
21277 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
21280 case BUILT_IN_SQRTF
:
21281 if (out_mode
== SFmode
&& out_n
== 4
21282 && in_mode
== SFmode
&& in_n
== 4)
21283 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
21286 case BUILT_IN_LRINT
:
21287 if (out_mode
== SImode
&& out_n
== 4
21288 && in_mode
== DFmode
&& in_n
== 2)
21289 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
21292 case BUILT_IN_LRINTF
:
21293 if (out_mode
== SImode
&& out_n
== 4
21294 && in_mode
== SFmode
&& in_n
== 4)
21295 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
21302 /* Dispatch to a handler for a vectorization library. */
21303 if (ix86_veclib_handler
)
21304 return (*ix86_veclib_handler
)(fn
, type_out
, type_in
);
21309 /* Handler for an ACML-style interface to a library with vectorized
21313 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
21315 char name
[20] = "__vr.._";
21316 tree fntype
, new_fndecl
, args
;
21319 enum machine_mode el_mode
, in_mode
;
21322 /* The ACML is 64bits only and suitable for unsafe math only as
21323 it does not correctly support parts of IEEE with the required
21324 precision such as denormals. */
21326 || !flag_unsafe_math_optimizations
)
21329 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
21330 n
= TYPE_VECTOR_SUBPARTS (type_out
);
21331 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
21332 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
21333 if (el_mode
!= in_mode
21343 case BUILT_IN_LOG2
:
21344 case BUILT_IN_LOG10
:
21347 if (el_mode
!= DFmode
21352 case BUILT_IN_SINF
:
21353 case BUILT_IN_COSF
:
21354 case BUILT_IN_EXPF
:
21355 case BUILT_IN_POWF
:
21356 case BUILT_IN_LOGF
:
21357 case BUILT_IN_LOG2F
:
21358 case BUILT_IN_LOG10F
:
21361 if (el_mode
!= SFmode
21370 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
21371 sprintf (name
+ 7, "%s", bname
+10);
21374 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
21375 args
= TREE_CHAIN (args
))
21379 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
21381 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
21383 /* Build a function declaration for the vectorized function. */
21384 new_fndecl
= build_decl (FUNCTION_DECL
, get_identifier (name
), fntype
);
21385 TREE_PUBLIC (new_fndecl
) = 1;
21386 DECL_EXTERNAL (new_fndecl
) = 1;
21387 DECL_IS_NOVOPS (new_fndecl
) = 1;
21388 TREE_READONLY (new_fndecl
) = 1;
21394 /* Returns a decl of a function that implements conversion of the
21395 input vector of type TYPE, or NULL_TREE if it is not available. */
21398 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
21400 if (TREE_CODE (type
) != VECTOR_TYPE
)
21406 switch (TYPE_MODE (type
))
21409 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
21414 case FIX_TRUNC_EXPR
:
21415 switch (TYPE_MODE (type
))
21418 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
21428 /* Returns a code for a target-specific builtin that implements
21429 reciprocal of the function, or NULL_TREE if not available. */
21432 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
21433 bool sqrt ATTRIBUTE_UNUSED
)
21435 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_size
21436 && flag_finite_math_only
&& !flag_trapping_math
21437 && flag_unsafe_math_optimizations
))
21441 /* Machine dependent builtins. */
21444 /* Vectorized version of sqrt to rsqrt conversion. */
21445 case IX86_BUILTIN_SQRTPS
:
21446 return ix86_builtins
[IX86_BUILTIN_RSQRTPS
];
21452 /* Normal builtins. */
21455 /* Sqrt to rsqrt conversion. */
21456 case BUILT_IN_SQRTF
:
21457 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
21464 /* Store OPERAND to the memory after reload is completed. This means
21465 that we can't easily use assign_stack_local. */
21467 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
21471 gcc_assert (reload_completed
);
21472 if (TARGET_RED_ZONE
)
21474 result
= gen_rtx_MEM (mode
,
21475 gen_rtx_PLUS (Pmode
,
21477 GEN_INT (-RED_ZONE_SIZE
)));
21478 emit_move_insn (result
, operand
);
21480 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
21486 operand
= gen_lowpart (DImode
, operand
);
21490 gen_rtx_SET (VOIDmode
,
21491 gen_rtx_MEM (DImode
,
21492 gen_rtx_PRE_DEC (DImode
,
21493 stack_pointer_rtx
)),
21497 gcc_unreachable ();
21499 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
21508 split_di (&operand
, 1, operands
, operands
+ 1);
21510 gen_rtx_SET (VOIDmode
,
21511 gen_rtx_MEM (SImode
,
21512 gen_rtx_PRE_DEC (Pmode
,
21513 stack_pointer_rtx
)),
21516 gen_rtx_SET (VOIDmode
,
21517 gen_rtx_MEM (SImode
,
21518 gen_rtx_PRE_DEC (Pmode
,
21519 stack_pointer_rtx
)),
21524 /* Store HImodes as SImodes. */
21525 operand
= gen_lowpart (SImode
, operand
);
21529 gen_rtx_SET (VOIDmode
,
21530 gen_rtx_MEM (GET_MODE (operand
),
21531 gen_rtx_PRE_DEC (SImode
,
21532 stack_pointer_rtx
)),
21536 gcc_unreachable ();
21538 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
21543 /* Free operand from the memory. */
21545 ix86_free_from_memory (enum machine_mode mode
)
21547 if (!TARGET_RED_ZONE
)
21551 if (mode
== DImode
|| TARGET_64BIT
)
21555 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21556 to pop or add instruction if registers are available. */
21557 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21558 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
21563 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21564 QImode must go into class Q_REGS.
21565 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21566 movdf to do mem-to-mem moves through integer regs. */
21568 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
21570 enum machine_mode mode
= GET_MODE (x
);
21572 /* We're only allowed to return a subclass of CLASS. Many of the
21573 following checks fail for NO_REGS, so eliminate that early. */
21574 if (regclass
== NO_REGS
)
21577 /* All classes can load zeros. */
21578 if (x
== CONST0_RTX (mode
))
21581 /* Force constants into memory if we are loading a (nonzero) constant into
21582 an MMX or SSE register. This is because there are no MMX/SSE instructions
21583 to load from a constant. */
21585 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
21588 /* Prefer SSE regs only, if we can use them for math. */
21589 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
21590 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
21592 /* Floating-point constants need more complex checks. */
21593 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
21595 /* General regs can load everything. */
21596 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
21599 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21600 zero above. We only want to wind up preferring 80387 registers if
21601 we plan on doing computation with them. */
21603 && standard_80387_constant_p (x
))
21605 /* Limit class to non-sse. */
21606 if (regclass
== FLOAT_SSE_REGS
)
21608 if (regclass
== FP_TOP_SSE_REGS
)
21610 if (regclass
== FP_SECOND_SSE_REGS
)
21611 return FP_SECOND_REG
;
21612 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
21619 /* Generally when we see PLUS here, it's the function invariant
21620 (plus soft-fp const_int). Which can only be computed into general
21622 if (GET_CODE (x
) == PLUS
)
21623 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
21625 /* QImode constants are easy to load, but non-constant QImode data
21626 must go into Q_REGS. */
21627 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
21629 if (reg_class_subset_p (regclass
, Q_REGS
))
21631 if (reg_class_subset_p (Q_REGS
, regclass
))
21639 /* Discourage putting floating-point values in SSE registers unless
21640 SSE math is being used, and likewise for the 387 registers. */
21642 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
21644 enum machine_mode mode
= GET_MODE (x
);
21646 /* Restrict the output reload class to the register bank that we are doing
21647 math on. If we would like not to return a subset of CLASS, reject this
21648 alternative: if reload cannot do this, it will still use its choice. */
21649 mode
= GET_MODE (x
);
21650 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
21651 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
21653 if (X87_FLOAT_MODE_P (mode
))
21655 if (regclass
== FP_TOP_SSE_REGS
)
21657 else if (regclass
== FP_SECOND_SSE_REGS
)
21658 return FP_SECOND_REG
;
21660 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
21666 /* If we are copying between general and FP registers, we need a memory
21667 location. The same is true for SSE and MMX registers.
21669 To optimize register_move_cost performance, allow inline variant.
21671 The macro can't work reliably when one of the CLASSES is class containing
21672 registers from multiple units (SSE, MMX, integer). We avoid this by never
21673 combining those units in single alternative in the machine description.
21674 Ensure that this constraint holds to avoid unexpected surprises.
21676 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21677 enforce these sanity checks. */
21680 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
21681 enum machine_mode mode
, int strict
)
21683 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
21684 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
21685 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
21686 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
21687 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
21688 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
21690 gcc_assert (!strict
);
21694 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
21697 /* ??? This is a lie. We do have moves between mmx/general, and for
21698 mmx/sse2. But by saying we need secondary memory we discourage the
21699 register allocator from using the mmx registers unless needed. */
21700 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
21703 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
21705 /* SSE1 doesn't have any direct moves from other classes. */
21709 /* If the target says that inter-unit moves are more expensive
21710 than moving through memory, then don't generate them. */
21711 if (!TARGET_INTER_UNIT_MOVES
)
21714 /* Between SSE and general, we have moves no larger than word size. */
21715 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21723 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
21724 enum machine_mode mode
, int strict
)
21726 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
21729 /* Return true if the registers in CLASS cannot represent the change from
21730 modes FROM to TO. */
21733 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
21734 enum reg_class regclass
)
21739 /* x87 registers can't do subreg at all, as all values are reformatted
21740 to extended precision. */
21741 if (MAYBE_FLOAT_CLASS_P (regclass
))
21744 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
21746 /* Vector registers do not support QI or HImode loads. If we don't
21747 disallow a change to these modes, reload will assume it's ok to
21748 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21749 the vec_dupv4hi pattern. */
21750 if (GET_MODE_SIZE (from
) < 4)
21753 /* Vector registers do not support subreg with nonzero offsets, which
21754 are otherwise valid for integer registers. Since we can't see
21755 whether we have a nonzero offset from here, prohibit all
21756 nonparadoxical subregs changing size. */
21757 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
21764 /* Return the cost of moving data of mode M between a
21765 register and memory. A value of 2 is the default; this cost is
21766 relative to those in `REGISTER_MOVE_COST'.
21768 This function is used extensively by register_move_cost that is used to
21769 build tables at startup. Make it inline in this case.
21770 When IN is 2, return maximum of in and out move cost.
21772 If moving between registers and memory is more expensive than
21773 between two registers, you should define this macro to express the
21776 Model also increased moving costs of QImode registers in non
21780 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
21784 if (FLOAT_CLASS_P (regclass
))
21802 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
21803 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
21805 if (SSE_CLASS_P (regclass
))
21808 switch (GET_MODE_SIZE (mode
))
21823 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
21824 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
21826 if (MMX_CLASS_P (regclass
))
21829 switch (GET_MODE_SIZE (mode
))
21841 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
21842 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
21844 switch (GET_MODE_SIZE (mode
))
21847 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
21850 return ix86_cost
->int_store
[0];
21851 if (TARGET_PARTIAL_REG_DEPENDENCY
&& !optimize_size
)
21852 cost
= ix86_cost
->movzbl_load
;
21854 cost
= ix86_cost
->int_load
[0];
21856 return MAX (cost
, ix86_cost
->int_store
[0]);
21862 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
21864 return ix86_cost
->movzbl_load
;
21866 return ix86_cost
->int_store
[0] + 4;
21871 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
21872 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
21874 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21875 if (mode
== TFmode
)
21878 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
21880 cost
= ix86_cost
->int_load
[2];
21882 cost
= ix86_cost
->int_store
[2];
21883 return (cost
* (((int) GET_MODE_SIZE (mode
)
21884 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
21889 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
21891 return inline_memory_move_cost (mode
, regclass
, in
);
21895 /* Return the cost of moving data from a register in class CLASS1 to
21896 one in class CLASS2.
21898 It is not required that the cost always equal 2 when FROM is the same as TO;
21899 on some machines it is expensive to move between registers if they are not
21900 general registers. */
21903 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
21904 enum reg_class class2
)
21906 /* In case we require secondary memory, compute cost of the store followed
21907 by load. In order to avoid bad register allocation choices, we need
21908 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21910 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
21914 cost
+= inline_memory_move_cost (mode
, class1
, 2);
21915 cost
+= inline_memory_move_cost (mode
, class2
, 2);
21917 /* In case of copying from general_purpose_register we may emit multiple
21918 stores followed by single load causing memory size mismatch stall.
21919 Count this as arbitrarily high cost of 20. */
21920 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
21923 /* In the case of FP/MMX moves, the registers actually overlap, and we
21924 have to switch modes in order to treat them differently. */
21925 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
21926 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
21932 /* Moves between SSE/MMX and integer unit are expensive. */
21933 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
21934 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
21936 /* ??? By keeping returned value relatively high, we limit the number
21937 of moves between integer and MMX/SSE registers for all targets.
21938 Additionally, high value prevents problem with x86_modes_tieable_p(),
21939 where integer modes in MMX/SSE registers are not tieable
21940 because of missing QImode and HImode moves to, from or between
21941 MMX/SSE registers. */
21942 return MAX (ix86_cost
->mmxsse_to_integer
, 8);
21944 if (MAYBE_FLOAT_CLASS_P (class1
))
21945 return ix86_cost
->fp_move
;
21946 if (MAYBE_SSE_CLASS_P (class1
))
21947 return ix86_cost
->sse_move
;
21948 if (MAYBE_MMX_CLASS_P (class1
))
21949 return ix86_cost
->mmx_move
;
21953 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21956 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
21958 /* Flags and only flags can only hold CCmode values. */
21959 if (CC_REGNO_P (regno
))
21960 return GET_MODE_CLASS (mode
) == MODE_CC
;
21961 if (GET_MODE_CLASS (mode
) == MODE_CC
21962 || GET_MODE_CLASS (mode
) == MODE_RANDOM
21963 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
21965 if (FP_REGNO_P (regno
))
21966 return VALID_FP_MODE_P (mode
);
21967 if (SSE_REGNO_P (regno
))
21969 /* We implement the move patterns for all vector modes into and
21970 out of SSE registers, even when no operation instructions
21972 return (VALID_SSE_REG_MODE (mode
)
21973 || VALID_SSE2_REG_MODE (mode
)
21974 || VALID_MMX_REG_MODE (mode
)
21975 || VALID_MMX_REG_MODE_3DNOW (mode
));
21977 if (MMX_REGNO_P (regno
))
21979 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21980 so if the register is available at all, then we can move data of
21981 the given mode into or out of it. */
21982 return (VALID_MMX_REG_MODE (mode
)
21983 || VALID_MMX_REG_MODE_3DNOW (mode
));
21986 if (mode
== QImode
)
21988 /* Take care for QImode values - they can be in non-QI regs,
21989 but then they do cause partial register stalls. */
21990 if (regno
< 4 || TARGET_64BIT
)
21992 if (!TARGET_PARTIAL_REG_STALL
)
21994 return reload_in_progress
|| reload_completed
;
21996 /* We handle both integer and floats in the general purpose registers. */
21997 else if (VALID_INT_MODE_P (mode
))
21999 else if (VALID_FP_MODE_P (mode
))
22001 else if (VALID_DFP_MODE_P (mode
))
22003 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22004 on to use that value in smaller contexts, this can easily force a
22005 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22006 supporting DImode, allow it. */
22007 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
22013 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22014 tieable integer mode. */
22017 ix86_tieable_integer_mode_p (enum machine_mode mode
)
22026 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
22029 return TARGET_64BIT
;
22036 /* Return true if MODE1 is accessible in a register that can hold MODE2
22037 without copying. That is, all register classes that can hold MODE2
22038 can also hold MODE1. */
22041 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22043 if (mode1
== mode2
)
22046 if (ix86_tieable_integer_mode_p (mode1
)
22047 && ix86_tieable_integer_mode_p (mode2
))
22050 /* MODE2 being XFmode implies fp stack or general regs, which means we
22051 can tie any smaller floating point modes to it. Note that we do not
22052 tie this with TFmode. */
22053 if (mode2
== XFmode
)
22054 return mode1
== SFmode
|| mode1
== DFmode
;
22056 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22057 that we can tie it with SFmode. */
22058 if (mode2
== DFmode
)
22059 return mode1
== SFmode
;
22061 /* If MODE2 is only appropriate for an SSE register, then tie with
22062 any other mode acceptable to SSE registers. */
22063 if (GET_MODE_SIZE (mode2
) == 16
22064 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
22065 return (GET_MODE_SIZE (mode1
) == 16
22066 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
22068 /* If MODE2 is appropriate for an MMX register, then tie
22069 with any other mode acceptable to MMX registers. */
22070 if (GET_MODE_SIZE (mode2
) == 8
22071 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
22072 return (GET_MODE_SIZE (mode1
) == 8
22073 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
22078 /* Compute a (partial) cost for rtx X. Return true if the complete
22079 cost has been computed, and false if subexpressions should be
22080 scanned. In either case, *TOTAL contains the cost result. */
22083 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
)
22085 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
22086 enum machine_mode mode
= GET_MODE (x
);
22094 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
22096 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
22098 else if (flag_pic
&& SYMBOLIC_CONST (x
)
22100 || (!GET_CODE (x
) != LABEL_REF
22101 && (GET_CODE (x
) != SYMBOL_REF
22102 || !SYMBOL_REF_LOCAL_P (x
)))))
22109 if (mode
== VOIDmode
)
22112 switch (standard_80387_constant_p (x
))
22117 default: /* Other constants */
22122 /* Start with (MEM (SYMBOL_REF)), since that's where
22123 it'll probably end up. Add a penalty for size. */
22124 *total
= (COSTS_N_INSNS (1)
22125 + (flag_pic
!= 0 && !TARGET_64BIT
)
22126 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
22132 /* The zero extensions is often completely free on x86_64, so make
22133 it as cheap as possible. */
22134 if (TARGET_64BIT
&& mode
== DImode
22135 && GET_MODE (XEXP (x
, 0)) == SImode
)
22137 else if (TARGET_ZERO_EXTEND_WITH_AND
)
22138 *total
= ix86_cost
->add
;
22140 *total
= ix86_cost
->movzx
;
22144 *total
= ix86_cost
->movsx
;
22148 if (CONST_INT_P (XEXP (x
, 1))
22149 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
22151 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
22154 *total
= ix86_cost
->add
;
22157 if ((value
== 2 || value
== 3)
22158 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
22160 *total
= ix86_cost
->lea
;
22170 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
22172 if (CONST_INT_P (XEXP (x
, 1)))
22174 if (INTVAL (XEXP (x
, 1)) > 32)
22175 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
22177 *total
= ix86_cost
->shift_const
* 2;
22181 if (GET_CODE (XEXP (x
, 1)) == AND
)
22182 *total
= ix86_cost
->shift_var
* 2;
22184 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
22189 if (CONST_INT_P (XEXP (x
, 1)))
22190 *total
= ix86_cost
->shift_const
;
22192 *total
= ix86_cost
->shift_var
;
22197 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22199 /* ??? SSE scalar cost should be used here. */
22200 *total
= ix86_cost
->fmul
;
22203 else if (X87_FLOAT_MODE_P (mode
))
22205 *total
= ix86_cost
->fmul
;
22208 else if (FLOAT_MODE_P (mode
))
22210 /* ??? SSE vector cost should be used here. */
22211 *total
= ix86_cost
->fmul
;
22216 rtx op0
= XEXP (x
, 0);
22217 rtx op1
= XEXP (x
, 1);
22219 if (CONST_INT_P (XEXP (x
, 1)))
22221 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
22222 for (nbits
= 0; value
!= 0; value
&= value
- 1)
22226 /* This is arbitrary. */
22229 /* Compute costs correctly for widening multiplication. */
22230 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
22231 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
22232 == GET_MODE_SIZE (mode
))
22234 int is_mulwiden
= 0;
22235 enum machine_mode inner_mode
= GET_MODE (op0
);
22237 if (GET_CODE (op0
) == GET_CODE (op1
))
22238 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
22239 else if (CONST_INT_P (op1
))
22241 if (GET_CODE (op0
) == SIGN_EXTEND
)
22242 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
22245 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
22249 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
22252 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
22253 + nbits
* ix86_cost
->mult_bit
22254 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
22263 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22264 /* ??? SSE cost should be used here. */
22265 *total
= ix86_cost
->fdiv
;
22266 else if (X87_FLOAT_MODE_P (mode
))
22267 *total
= ix86_cost
->fdiv
;
22268 else if (FLOAT_MODE_P (mode
))
22269 /* ??? SSE vector cost should be used here. */
22270 *total
= ix86_cost
->fdiv
;
22272 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
22276 if (GET_MODE_CLASS (mode
) == MODE_INT
22277 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
22279 if (GET_CODE (XEXP (x
, 0)) == PLUS
22280 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
22281 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
22282 && CONSTANT_P (XEXP (x
, 1)))
22284 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
22285 if (val
== 2 || val
== 4 || val
== 8)
22287 *total
= ix86_cost
->lea
;
22288 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
22289 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
22291 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22295 else if (GET_CODE (XEXP (x
, 0)) == MULT
22296 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
22298 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
22299 if (val
== 2 || val
== 4 || val
== 8)
22301 *total
= ix86_cost
->lea
;
22302 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
22303 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22307 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
22309 *total
= ix86_cost
->lea
;
22310 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
22311 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
22312 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
22319 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22321 /* ??? SSE cost should be used here. */
22322 *total
= ix86_cost
->fadd
;
22325 else if (X87_FLOAT_MODE_P (mode
))
22327 *total
= ix86_cost
->fadd
;
22330 else if (FLOAT_MODE_P (mode
))
22332 /* ??? SSE vector cost should be used here. */
22333 *total
= ix86_cost
->fadd
;
22341 if (!TARGET_64BIT
&& mode
== DImode
)
22343 *total
= (ix86_cost
->add
* 2
22344 + (rtx_cost (XEXP (x
, 0), outer_code
)
22345 << (GET_MODE (XEXP (x
, 0)) != DImode
))
22346 + (rtx_cost (XEXP (x
, 1), outer_code
)
22347 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
22353 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22355 /* ??? SSE cost should be used here. */
22356 *total
= ix86_cost
->fchs
;
22359 else if (X87_FLOAT_MODE_P (mode
))
22361 *total
= ix86_cost
->fchs
;
22364 else if (FLOAT_MODE_P (mode
))
22366 /* ??? SSE vector cost should be used here. */
22367 *total
= ix86_cost
->fchs
;
22373 if (!TARGET_64BIT
&& mode
== DImode
)
22374 *total
= ix86_cost
->add
* 2;
22376 *total
= ix86_cost
->add
;
22380 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
22381 && XEXP (XEXP (x
, 0), 1) == const1_rtx
22382 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
22383 && XEXP (x
, 1) == const0_rtx
)
22385 /* This kind of construct is implemented using test[bwl].
22386 Treat it as if we had an AND. */
22387 *total
= (ix86_cost
->add
22388 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
22389 + rtx_cost (const1_rtx
, outer_code
));
22395 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
22400 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22401 /* ??? SSE cost should be used here. */
22402 *total
= ix86_cost
->fabs
;
22403 else if (X87_FLOAT_MODE_P (mode
))
22404 *total
= ix86_cost
->fabs
;
22405 else if (FLOAT_MODE_P (mode
))
22406 /* ??? SSE vector cost should be used here. */
22407 *total
= ix86_cost
->fabs
;
22411 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22412 /* ??? SSE cost should be used here. */
22413 *total
= ix86_cost
->fsqrt
;
22414 else if (X87_FLOAT_MODE_P (mode
))
22415 *total
= ix86_cost
->fsqrt
;
22416 else if (FLOAT_MODE_P (mode
))
22417 /* ??? SSE vector cost should be used here. */
22418 *total
= ix86_cost
->fsqrt
;
22422 if (XINT (x
, 1) == UNSPEC_TP
)
22433 static int current_machopic_label_num
;
22435 /* Given a symbol name and its associated stub, write out the
22436 definition of the stub. */
22439 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
22441 unsigned int length
;
22442 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
22443 int label
= ++current_machopic_label_num
;
22445 /* For 64-bit we shouldn't get here. */
22446 gcc_assert (!TARGET_64BIT
);
22448 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22449 symb
= (*targetm
.strip_name_encoding
) (symb
);
22451 length
= strlen (stub
);
22452 binder_name
= alloca (length
+ 32);
22453 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
22455 length
= strlen (symb
);
22456 symbol_name
= alloca (length
+ 32);
22457 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
22459 sprintf (lazy_ptr_name
, "L%d$lz", label
);
22462 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
22464 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
22466 fprintf (file
, "%s:\n", stub
);
22467 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22471 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
22472 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
22473 fprintf (file
, "\tjmp\t*%%edx\n");
22476 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
22478 fprintf (file
, "%s:\n", binder_name
);
22482 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
22483 fprintf (file
, "\tpushl\t%%eax\n");
22486 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
22488 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
22490 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
22491 fprintf (file
, "%s:\n", lazy_ptr_name
);
22492 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22493 fprintf (file
, "\t.long %s\n", binder_name
);
22497 darwin_x86_file_end (void)
22499 darwin_file_end ();
22502 #endif /* TARGET_MACHO */
22504 /* Order the registers for register allocator. */
22507 x86_order_regs_for_local_alloc (void)
22512 /* First allocate the local general purpose registers. */
22513 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22514 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
22515 reg_alloc_order
[pos
++] = i
;
22517 /* Global general purpose registers. */
22518 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22519 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
22520 reg_alloc_order
[pos
++] = i
;
22522 /* x87 registers come first in case we are doing FP math
22524 if (!TARGET_SSE_MATH
)
22525 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22526 reg_alloc_order
[pos
++] = i
;
22528 /* SSE registers. */
22529 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
22530 reg_alloc_order
[pos
++] = i
;
22531 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
22532 reg_alloc_order
[pos
++] = i
;
22534 /* x87 registers. */
22535 if (TARGET_SSE_MATH
)
22536 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22537 reg_alloc_order
[pos
++] = i
;
22539 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
22540 reg_alloc_order
[pos
++] = i
;
22542 /* Initialize the rest of array as we do not allocate some registers
22544 while (pos
< FIRST_PSEUDO_REGISTER
)
22545 reg_alloc_order
[pos
++] = 0;
22548 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22549 struct attribute_spec.handler. */
22551 ix86_handle_struct_attribute (tree
*node
, tree name
,
22552 tree args ATTRIBUTE_UNUSED
,
22553 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
22556 if (DECL_P (*node
))
22558 if (TREE_CODE (*node
) == TYPE_DECL
)
22559 type
= &TREE_TYPE (*node
);
22564 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
22565 || TREE_CODE (*type
) == UNION_TYPE
)))
22567 warning (OPT_Wattributes
, "%qs attribute ignored",
22568 IDENTIFIER_POINTER (name
));
22569 *no_add_attrs
= true;
22572 else if ((is_attribute_p ("ms_struct", name
)
22573 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
22574 || ((is_attribute_p ("gcc_struct", name
)
22575 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
22577 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
22578 IDENTIFIER_POINTER (name
));
22579 *no_add_attrs
= true;
22586 ix86_ms_bitfield_layout_p (const_tree record_type
)
22588 return (TARGET_MS_BITFIELD_LAYOUT
&&
22589 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
22590 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
22593 /* Returns an expression indicating where the this parameter is
22594 located on entry to the FUNCTION. */
22597 x86_this_parameter (tree function
)
22599 tree type
= TREE_TYPE (function
);
22600 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
22604 const int *parm_regs
;
22606 if (TARGET_64BIT_MS_ABI
)
22607 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
22609 parm_regs
= x86_64_int_parameter_registers
;
22610 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
22613 if (ix86_function_regparm (type
, function
) > 0 && !stdarg_p (type
))
22615 int regno
= AX_REG
;
22616 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
22618 return gen_rtx_REG (SImode
, regno
);
22621 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
22624 /* Determine whether x86_output_mi_thunk can succeed. */
22627 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
22628 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
22629 HOST_WIDE_INT vcall_offset
, const_tree function
)
22631 /* 64-bit can handle anything. */
22635 /* For 32-bit, everything's fine if we have one free register. */
22636 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
22639 /* Need a free register for vcall_offset. */
22643 /* Need a free register for GOT references. */
22644 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
22647 /* Otherwise ok. */
22651 /* Output the assembler code for a thunk function. THUNK_DECL is the
22652 declaration for the thunk function itself, FUNCTION is the decl for
22653 the target function. DELTA is an immediate constant offset to be
22654 added to THIS. If VCALL_OFFSET is nonzero, the word at
22655 *(*this + vcall_offset) should be added to THIS. */
22658 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
22659 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
22660 HOST_WIDE_INT vcall_offset
, tree function
)
22663 rtx this_param
= x86_this_parameter (function
);
22666 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22667 pull it in now and let DELTA benefit. */
22668 if (REG_P (this_param
))
22669 this_reg
= this_param
;
22670 else if (vcall_offset
)
22672 /* Put the this parameter into %eax. */
22673 xops
[0] = this_param
;
22674 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
22675 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22678 this_reg
= NULL_RTX
;
22680 /* Adjust the this parameter by a fixed constant. */
22683 xops
[0] = GEN_INT (delta
);
22684 xops
[1] = this_reg
? this_reg
: this_param
;
22687 if (!x86_64_general_operand (xops
[0], DImode
))
22689 tmp
= gen_rtx_REG (DImode
, R10_REG
);
22691 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
22693 xops
[1] = this_param
;
22695 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
22698 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
22701 /* Adjust the this parameter by a value stored in the vtable. */
22705 tmp
= gen_rtx_REG (DImode
, R10_REG
);
22708 int tmp_regno
= CX_REG
;
22709 if (lookup_attribute ("fastcall",
22710 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
22711 tmp_regno
= AX_REG
;
22712 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
22715 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
22718 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
22720 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22722 /* Adjust the this parameter. */
22723 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
22724 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
22726 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
22727 xops
[0] = GEN_INT (vcall_offset
);
22729 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
22730 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
22732 xops
[1] = this_reg
;
22734 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
22736 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
22739 /* If necessary, drop THIS back to its stack slot. */
22740 if (this_reg
&& this_reg
!= this_param
)
22742 xops
[0] = this_reg
;
22743 xops
[1] = this_param
;
22744 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
22747 xops
[0] = XEXP (DECL_RTL (function
), 0);
22750 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
22751 output_asm_insn ("jmp\t%P0", xops
);
22752 /* All thunks should be in the same object as their target,
22753 and thus binds_local_p should be true. */
22754 else if (TARGET_64BIT_MS_ABI
)
22755 gcc_unreachable ();
22758 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
22759 tmp
= gen_rtx_CONST (Pmode
, tmp
);
22760 tmp
= gen_rtx_MEM (QImode
, tmp
);
22762 output_asm_insn ("jmp\t%A0", xops
);
22767 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
22768 output_asm_insn ("jmp\t%P0", xops
);
22773 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
22774 tmp
= (gen_rtx_SYMBOL_REF
22776 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
22777 tmp
= gen_rtx_MEM (QImode
, tmp
);
22779 output_asm_insn ("jmp\t%0", xops
);
22782 #endif /* TARGET_MACHO */
22784 tmp
= gen_rtx_REG (SImode
, CX_REG
);
22785 output_set_got (tmp
, NULL_RTX
);
22788 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
22789 output_asm_insn ("jmp\t{*}%1", xops
);
22795 x86_file_start (void)
22797 default_file_start ();
22799 darwin_file_start ();
22801 if (X86_FILE_START_VERSION_DIRECTIVE
)
22802 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
22803 if (X86_FILE_START_FLTUSED
)
22804 fputs ("\t.global\t__fltused\n", asm_out_file
);
22805 if (ix86_asm_dialect
== ASM_INTEL
)
22806 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
22810 x86_field_alignment (tree field
, int computed
)
22812 enum machine_mode mode
;
22813 tree type
= TREE_TYPE (field
);
22815 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
22817 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
22818 ? get_inner_array_type (type
) : type
);
22819 if (mode
== DFmode
|| mode
== DCmode
22820 || GET_MODE_CLASS (mode
) == MODE_INT
22821 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
22822 return MIN (32, computed
);
22826 /* Output assembler code to FILE to increment profiler label # LABELNO
22827 for profiling a function entry. */
22829 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
22833 #ifndef NO_PROFILE_COUNTERS
22834 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
22837 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
22838 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
22840 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
22844 #ifndef NO_PROFILE_COUNTERS
22845 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22846 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
22848 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
22852 #ifndef NO_PROFILE_COUNTERS
22853 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
22854 PROFILE_COUNT_REGISTER
);
22856 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
22860 /* We don't have exact information about the insn sizes, but we may assume
22861 quite safely that we are informed about all 1 byte insns and memory
22862 address sizes. This is enough to eliminate unnecessary padding in
22866 min_insn_size (rtx insn
)
22870 if (!INSN_P (insn
) || !active_insn_p (insn
))
22873 /* Discard alignments we've emit and jump instructions. */
22874 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
22875 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
22878 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
22879 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
22882 /* Important case - calls are always 5 bytes.
22883 It is common to have many calls in the row. */
22885 && symbolic_reference_mentioned_p (PATTERN (insn
))
22886 && !SIBLING_CALL_P (insn
))
22888 if (get_attr_length (insn
) <= 1)
22891 /* For normal instructions we may rely on the sizes of addresses
22892 and the presence of symbol to require 4 bytes of encoding.
22893 This is not the case for jumps where references are PC relative. */
22894 if (!JUMP_P (insn
))
22896 l
= get_attr_length_address (insn
);
22897 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
22906 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22910 ix86_avoid_jump_misspredicts (void)
22912 rtx insn
, start
= get_insns ();
22913 int nbytes
= 0, njumps
= 0;
22916 /* Look for all minimal intervals of instructions containing 4 jumps.
22917 The intervals are bounded by START and INSN. NBYTES is the total
22918 size of instructions in the interval including INSN and not including
22919 START. When the NBYTES is smaller than 16 bytes, it is possible
22920 that the end of START and INSN ends up in the same 16byte page.
22922 The smallest offset in the page INSN can start is the case where START
22923 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22924 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22926 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22929 nbytes
+= min_insn_size (insn
);
22931 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
22932 INSN_UID (insn
), min_insn_size (insn
));
22934 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
22935 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
22943 start
= NEXT_INSN (start
);
22944 if ((JUMP_P (start
)
22945 && GET_CODE (PATTERN (start
)) != ADDR_VEC
22946 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
22948 njumps
--, isjump
= 1;
22951 nbytes
-= min_insn_size (start
);
22953 gcc_assert (njumps
>= 0);
22955 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
22956 INSN_UID (start
), INSN_UID (insn
), nbytes
);
22958 if (njumps
== 3 && isjump
&& nbytes
< 16)
22960 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
22963 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
22964 INSN_UID (insn
), padsize
);
22965 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
22970 /* AMD Athlon works faster
22971 when RET is not destination of conditional jump or directly preceded
22972 by other jump instruction. We avoid the penalty by inserting NOP just
22973 before the RET instructions in such cases. */
22975 ix86_pad_returns (void)
22980 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
22982 basic_block bb
= e
->src
;
22983 rtx ret
= BB_END (bb
);
22985 bool replace
= false;
22987 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
22988 || !maybe_hot_bb_p (bb
))
22990 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
22991 if (active_insn_p (prev
) || LABEL_P (prev
))
22993 if (prev
&& LABEL_P (prev
))
22998 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
22999 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
23000 && !(e
->flags
& EDGE_FALLTHRU
))
23005 prev
= prev_active_insn (ret
);
23007 && ((JUMP_P (prev
) && any_condjump_p (prev
))
23010 /* Empty functions get branch mispredict even when the jump destination
23011 is not visible to us. */
23012 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
23017 emit_insn_before (gen_return_internal_long (), ret
);
23023 /* Implement machine specific optimizations. We implement padding of returns
23024 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23028 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
23029 ix86_pad_returns ();
23030 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
23031 ix86_avoid_jump_misspredicts ();
23034 /* Return nonzero when QImode register that must be represented via REX prefix
23037 x86_extended_QIreg_mentioned_p (rtx insn
)
23040 extract_insn_cached (insn
);
23041 for (i
= 0; i
< recog_data
.n_operands
; i
++)
23042 if (REG_P (recog_data
.operand
[i
])
23043 && REGNO (recog_data
.operand
[i
]) >= 4)
23048 /* Return nonzero when P points to register encoded via REX prefix.
23049 Called via for_each_rtx. */
23051 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
23053 unsigned int regno
;
23056 regno
= REGNO (*p
);
23057 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
23060 /* Return true when INSN mentions register that must be encoded using REX
23063 x86_extended_reg_mentioned_p (rtx insn
)
23065 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
23068 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23069 optabs would emit if we didn't have TFmode patterns. */
23072 x86_emit_floatuns (rtx operands
[2])
23074 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
23075 enum machine_mode mode
, inmode
;
23077 inmode
= GET_MODE (operands
[1]);
23078 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
23081 in
= force_reg (inmode
, operands
[1]);
23082 mode
= GET_MODE (out
);
23083 neglab
= gen_label_rtx ();
23084 donelab
= gen_label_rtx ();
23085 f0
= gen_reg_rtx (mode
);
23087 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
23089 expand_float (out
, in
, 0);
23091 emit_jump_insn (gen_jump (donelab
));
23094 emit_label (neglab
);
23096 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
23098 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
23100 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
23102 expand_float (f0
, i0
, 0);
23104 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
23106 emit_label (donelab
);
23109 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23110 with all elements equal to VAR. Return true if successful. */
23113 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
23114 rtx target
, rtx val
)
23116 enum machine_mode smode
, wsmode
, wvmode
;
23131 val
= force_reg (GET_MODE_INNER (mode
), val
);
23132 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
23133 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23139 if (TARGET_SSE
|| TARGET_3DNOW_A
)
23141 val
= gen_lowpart (SImode
, val
);
23142 x
= gen_rtx_TRUNCATE (HImode
, val
);
23143 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
23144 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23166 /* Extend HImode to SImode using a paradoxical SUBREG. */
23167 tmp1
= gen_reg_rtx (SImode
);
23168 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
23169 /* Insert the SImode value as low element of V4SImode vector. */
23170 tmp2
= gen_reg_rtx (V4SImode
);
23171 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
23172 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
23173 CONST0_RTX (V4SImode
),
23175 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
23176 /* Cast the V4SImode vector back to a V8HImode vector. */
23177 tmp1
= gen_reg_rtx (V8HImode
);
23178 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
23179 /* Duplicate the low short through the whole low SImode word. */
23180 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
23181 /* Cast the V8HImode vector back to a V4SImode vector. */
23182 tmp2
= gen_reg_rtx (V4SImode
);
23183 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
23184 /* Replicate the low element of the V4SImode vector. */
23185 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
23186 /* Cast the V2SImode back to V8HImode, and store in target. */
23187 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
23198 /* Extend QImode to SImode using a paradoxical SUBREG. */
23199 tmp1
= gen_reg_rtx (SImode
);
23200 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
23201 /* Insert the SImode value as low element of V4SImode vector. */
23202 tmp2
= gen_reg_rtx (V4SImode
);
23203 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
23204 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
23205 CONST0_RTX (V4SImode
),
23207 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
23208 /* Cast the V4SImode vector back to a V16QImode vector. */
23209 tmp1
= gen_reg_rtx (V16QImode
);
23210 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
23211 /* Duplicate the low byte through the whole low SImode word. */
23212 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
23213 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
23214 /* Cast the V16QImode vector back to a V4SImode vector. */
23215 tmp2
= gen_reg_rtx (V4SImode
);
23216 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
23217 /* Replicate the low element of the V4SImode vector. */
23218 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
23219 /* Cast the V2SImode back to V16QImode, and store in target. */
23220 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
23228 /* Replicate the value once into the next wider mode and recurse. */
23229 val
= convert_modes (wsmode
, smode
, val
, true);
23230 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
23231 GEN_INT (GET_MODE_BITSIZE (smode
)),
23232 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23233 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
23235 x
= gen_reg_rtx (wvmode
);
23236 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
23237 gcc_unreachable ();
23238 emit_move_insn (target
, gen_lowpart (mode
, x
));
23246 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23247 whose ONE_VAR element is VAR, and other elements are zero. Return true
23251 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
23252 rtx target
, rtx var
, int one_var
)
23254 enum machine_mode vsimode
;
23270 var
= force_reg (GET_MODE_INNER (mode
), var
);
23271 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
23272 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
23277 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
23278 new_target
= gen_reg_rtx (mode
);
23280 new_target
= target
;
23281 var
= force_reg (GET_MODE_INNER (mode
), var
);
23282 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
23283 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
23284 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
23287 /* We need to shuffle the value to the correct position, so
23288 create a new pseudo to store the intermediate result. */
23290 /* With SSE2, we can use the integer shuffle insns. */
23291 if (mode
!= V4SFmode
&& TARGET_SSE2
)
23293 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
23295 GEN_INT (one_var
== 1 ? 0 : 1),
23296 GEN_INT (one_var
== 2 ? 0 : 1),
23297 GEN_INT (one_var
== 3 ? 0 : 1)));
23298 if (target
!= new_target
)
23299 emit_move_insn (target
, new_target
);
23303 /* Otherwise convert the intermediate result to V4SFmode and
23304 use the SSE1 shuffle instructions. */
23305 if (mode
!= V4SFmode
)
23307 tmp
= gen_reg_rtx (V4SFmode
);
23308 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
23313 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
23315 GEN_INT (one_var
== 1 ? 0 : 1),
23316 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
23317 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
23319 if (mode
!= V4SFmode
)
23320 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
23321 else if (tmp
!= target
)
23322 emit_move_insn (target
, tmp
);
23324 else if (target
!= new_target
)
23325 emit_move_insn (target
, new_target
);
23330 vsimode
= V4SImode
;
23336 vsimode
= V2SImode
;
23342 /* Zero extend the variable element to SImode and recurse. */
23343 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
23345 x
= gen_reg_rtx (vsimode
);
23346 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
23348 gcc_unreachable ();
23350 emit_move_insn (target
, gen_lowpart (mode
, x
));
23358 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23359 consisting of the values in VALS. It is known that all elements
23360 except ONE_VAR are constants. Return true if successful. */
23363 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
23364 rtx target
, rtx vals
, int one_var
)
23366 rtx var
= XVECEXP (vals
, 0, one_var
);
23367 enum machine_mode wmode
;
23370 const_vec
= copy_rtx (vals
);
23371 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
23372 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
23380 /* For the two element vectors, it's just as easy to use
23381 the general case. */
23397 /* There's no way to set one QImode entry easily. Combine
23398 the variable value with its adjacent constant value, and
23399 promote to an HImode set. */
23400 x
= XVECEXP (vals
, 0, one_var
^ 1);
23403 var
= convert_modes (HImode
, QImode
, var
, true);
23404 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
23405 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23406 x
= GEN_INT (INTVAL (x
) & 0xff);
23410 var
= convert_modes (HImode
, QImode
, var
, true);
23411 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
23413 if (x
!= const0_rtx
)
23414 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
23415 1, OPTAB_LIB_WIDEN
);
23417 x
= gen_reg_rtx (wmode
);
23418 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
23419 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
23421 emit_move_insn (target
, gen_lowpart (mode
, x
));
23428 emit_move_insn (target
, const_vec
);
23429 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
23433 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23434 all values variable, and none identical. */
23437 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
23438 rtx target
, rtx vals
)
23440 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
23441 rtx op0
= NULL
, op1
= NULL
;
23442 bool use_vec_concat
= false;
23448 if (!mmx_ok
&& !TARGET_SSE
)
23454 /* For the two element vectors, we always implement VEC_CONCAT. */
23455 op0
= XVECEXP (vals
, 0, 0);
23456 op1
= XVECEXP (vals
, 0, 1);
23457 use_vec_concat
= true;
23461 half_mode
= V2SFmode
;
23464 half_mode
= V2SImode
;
23470 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23471 Recurse to load the two halves. */
23473 op0
= gen_reg_rtx (half_mode
);
23474 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
23475 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
23477 op1
= gen_reg_rtx (half_mode
);
23478 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
23479 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
23481 use_vec_concat
= true;
23492 gcc_unreachable ();
23495 if (use_vec_concat
)
23497 if (!register_operand (op0
, half_mode
))
23498 op0
= force_reg (half_mode
, op0
);
23499 if (!register_operand (op1
, half_mode
))
23500 op1
= force_reg (half_mode
, op1
);
23502 emit_insn (gen_rtx_SET (VOIDmode
, target
,
23503 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
23507 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
23508 enum machine_mode inner_mode
;
23509 rtx words
[4], shift
;
23511 inner_mode
= GET_MODE_INNER (mode
);
23512 n_elts
= GET_MODE_NUNITS (mode
);
23513 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
23514 n_elt_per_word
= n_elts
/ n_words
;
23515 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
23517 for (i
= 0; i
< n_words
; ++i
)
23519 rtx word
= NULL_RTX
;
23521 for (j
= 0; j
< n_elt_per_word
; ++j
)
23523 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
23524 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
23530 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
23531 word
, 1, OPTAB_LIB_WIDEN
);
23532 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
23533 word
, 1, OPTAB_LIB_WIDEN
);
23541 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
23542 else if (n_words
== 2)
23544 rtx tmp
= gen_reg_rtx (mode
);
23545 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
23546 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
23547 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
23548 emit_move_insn (target
, tmp
);
23550 else if (n_words
== 4)
23552 rtx tmp
= gen_reg_rtx (V4SImode
);
23553 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
23554 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
23555 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
23558 gcc_unreachable ();
23562 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23563 instructions unless MMX_OK is true. */
23566 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
23568 enum machine_mode mode
= GET_MODE (target
);
23569 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23570 int n_elts
= GET_MODE_NUNITS (mode
);
23571 int n_var
= 0, one_var
= -1;
23572 bool all_same
= true, all_const_zero
= true;
23576 for (i
= 0; i
< n_elts
; ++i
)
23578 x
= XVECEXP (vals
, 0, i
);
23579 if (!CONSTANT_P (x
))
23580 n_var
++, one_var
= i
;
23581 else if (x
!= CONST0_RTX (inner_mode
))
23582 all_const_zero
= false;
23583 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
23587 /* Constants are best loaded from the constant pool. */
23590 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
23594 /* If all values are identical, broadcast the value. */
23596 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
23597 XVECEXP (vals
, 0, 0)))
23600 /* Values where only one field is non-constant are best loaded from
23601 the pool and overwritten via move later. */
23605 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
23606 XVECEXP (vals
, 0, one_var
),
23610 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
23614 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
23618 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
23620 enum machine_mode mode
= GET_MODE (target
);
23621 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23622 bool use_vec_merge
= false;
23631 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
23632 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
23634 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
23636 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
23637 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23643 use_vec_merge
= TARGET_SSE4_1
;
23651 /* For the two element vectors, we implement a VEC_CONCAT with
23652 the extraction of the other element. */
23654 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
23655 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
23658 op0
= val
, op1
= tmp
;
23660 op0
= tmp
, op1
= val
;
23662 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
23663 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23668 use_vec_merge
= TARGET_SSE4_1
;
23675 use_vec_merge
= true;
23679 /* tmp = target = A B C D */
23680 tmp
= copy_to_reg (target
);
23681 /* target = A A B B */
23682 emit_insn (gen_sse_unpcklps (target
, target
, target
));
23683 /* target = X A B B */
23684 ix86_expand_vector_set (false, target
, val
, 0);
23685 /* target = A X C D */
23686 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23687 GEN_INT (1), GEN_INT (0),
23688 GEN_INT (2+4), GEN_INT (3+4)));
23692 /* tmp = target = A B C D */
23693 tmp
= copy_to_reg (target
);
23694 /* tmp = X B C D */
23695 ix86_expand_vector_set (false, tmp
, val
, 0);
23696 /* target = A B X D */
23697 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23698 GEN_INT (0), GEN_INT (1),
23699 GEN_INT (0+4), GEN_INT (3+4)));
23703 /* tmp = target = A B C D */
23704 tmp
= copy_to_reg (target
);
23705 /* tmp = X B C D */
23706 ix86_expand_vector_set (false, tmp
, val
, 0);
23707 /* target = A B X D */
23708 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
23709 GEN_INT (0), GEN_INT (1),
23710 GEN_INT (2+4), GEN_INT (0+4)));
23714 gcc_unreachable ();
23719 use_vec_merge
= TARGET_SSE4_1
;
23723 /* Element 0 handled by vec_merge below. */
23726 use_vec_merge
= true;
23732 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23733 store into element 0, then shuffle them back. */
23737 order
[0] = GEN_INT (elt
);
23738 order
[1] = const1_rtx
;
23739 order
[2] = const2_rtx
;
23740 order
[3] = GEN_INT (3);
23741 order
[elt
] = const0_rtx
;
23743 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
23744 order
[1], order
[2], order
[3]));
23746 ix86_expand_vector_set (false, target
, val
, 0);
23748 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
23749 order
[1], order
[2], order
[3]));
23753 /* For SSE1, we have to reuse the V4SF code. */
23754 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
23755 gen_lowpart (SFmode
, val
), elt
);
23760 use_vec_merge
= TARGET_SSE2
;
23763 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
23767 use_vec_merge
= TARGET_SSE4_1
;
23777 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
23778 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
23779 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23783 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
23785 emit_move_insn (mem
, target
);
23787 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
23788 emit_move_insn (tmp
, val
);
23790 emit_move_insn (target
, mem
);
23795 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
23797 enum machine_mode mode
= GET_MODE (vec
);
23798 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
23799 bool use_vec_extr
= false;
23812 use_vec_extr
= true;
23816 use_vec_extr
= TARGET_SSE4_1
;
23828 tmp
= gen_reg_rtx (mode
);
23829 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
23830 GEN_INT (elt
), GEN_INT (elt
),
23831 GEN_INT (elt
+4), GEN_INT (elt
+4)));
23835 tmp
= gen_reg_rtx (mode
);
23836 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
23840 gcc_unreachable ();
23843 use_vec_extr
= true;
23848 use_vec_extr
= TARGET_SSE4_1
;
23862 tmp
= gen_reg_rtx (mode
);
23863 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
23864 GEN_INT (elt
), GEN_INT (elt
),
23865 GEN_INT (elt
), GEN_INT (elt
)));
23869 tmp
= gen_reg_rtx (mode
);
23870 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
23874 gcc_unreachable ();
23877 use_vec_extr
= true;
23882 /* For SSE1, we have to reuse the V4SF code. */
23883 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
23884 gen_lowpart (V4SFmode
, vec
), elt
);
23890 use_vec_extr
= TARGET_SSE2
;
23893 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
23897 use_vec_extr
= TARGET_SSE4_1
;
23901 /* ??? Could extract the appropriate HImode element and shift. */
23908 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
23909 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
23911 /* Let the rtl optimizers know about the zero extension performed. */
23912 if (inner_mode
== QImode
|| inner_mode
== HImode
)
23914 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
23915 target
= gen_lowpart (SImode
, target
);
23918 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
23922 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
23924 emit_move_insn (mem
, vec
);
23926 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
23927 emit_move_insn (target
, tmp
);
23931 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23932 pattern to reduce; DEST is the destination; IN is the input vector. */
23935 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
23937 rtx tmp1
, tmp2
, tmp3
;
23939 tmp1
= gen_reg_rtx (V4SFmode
);
23940 tmp2
= gen_reg_rtx (V4SFmode
);
23941 tmp3
= gen_reg_rtx (V4SFmode
);
23943 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
23944 emit_insn (fn (tmp2
, tmp1
, in
));
23946 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
23947 GEN_INT (1), GEN_INT (1),
23948 GEN_INT (1+4), GEN_INT (1+4)));
23949 emit_insn (fn (dest
, tmp2
, tmp3
));
23952 /* Target hook for scalar_mode_supported_p. */
23954 ix86_scalar_mode_supported_p (enum machine_mode mode
)
23956 if (DECIMAL_FLOAT_MODE_P (mode
))
23958 else if (mode
== TFmode
)
23959 return TARGET_64BIT
;
23961 return default_scalar_mode_supported_p (mode
);
23964 /* Implements target hook vector_mode_supported_p. */
23966 ix86_vector_mode_supported_p (enum machine_mode mode
)
23968 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
23970 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
23972 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
23974 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
23979 /* Target hook for c_mode_for_suffix. */
23980 static enum machine_mode
23981 ix86_c_mode_for_suffix (char suffix
)
23983 if (TARGET_64BIT
&& suffix
== 'q')
23985 if (TARGET_MMX
&& suffix
== 'w')
23991 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23993 We do this in the new i386 backend to maintain source compatibility
23994 with the old cc0-based compiler. */
23997 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
23998 tree inputs ATTRIBUTE_UNUSED
,
24001 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
24003 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
24008 /* Implements target vector targetm.asm.encode_section_info. This
24009 is not used by netware. */
24011 static void ATTRIBUTE_UNUSED
24012 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
24014 default_encode_section_info (decl
, rtl
, first
);
24016 if (TREE_CODE (decl
) == VAR_DECL
24017 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
24018 && ix86_in_large_data_p (decl
))
24019 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
24022 /* Worker function for REVERSE_CONDITION. */
24025 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
24027 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
24028 ? reverse_condition (code
)
24029 : reverse_condition_maybe_unordered (code
));
24032 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24036 output_387_reg_move (rtx insn
, rtx
*operands
)
24038 if (REG_P (operands
[0]))
24040 if (REG_P (operands
[1])
24041 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
24043 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
24044 return output_387_ffreep (operands
, 0);
24045 return "fstp\t%y0";
24047 if (STACK_TOP_P (operands
[0]))
24048 return "fld%z1\t%y1";
24051 else if (MEM_P (operands
[0]))
24053 gcc_assert (REG_P (operands
[1]));
24054 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
24055 return "fstp%z0\t%y0";
24058 /* There is no non-popping store to memory for XFmode.
24059 So if we need one, follow the store with a load. */
24060 if (GET_MODE (operands
[0]) == XFmode
)
24061 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24063 return "fst%z0\t%y0";
24070 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24071 FP status register is set. */
24074 ix86_emit_fp_unordered_jump (rtx label
)
24076 rtx reg
= gen_reg_rtx (HImode
);
24079 emit_insn (gen_x86_fnstsw_1 (reg
));
24081 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
24083 emit_insn (gen_x86_sahf_1 (reg
));
24085 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24086 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
24090 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
24092 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24093 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
24096 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
24097 gen_rtx_LABEL_REF (VOIDmode
, label
),
24099 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
24101 emit_jump_insn (temp
);
24102 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
24105 /* Output code to perform a log1p XFmode calculation. */
24107 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
24109 rtx label1
= gen_label_rtx ();
24110 rtx label2
= gen_label_rtx ();
24112 rtx tmp
= gen_reg_rtx (XFmode
);
24113 rtx tmp2
= gen_reg_rtx (XFmode
);
24115 emit_insn (gen_absxf2 (tmp
, op1
));
24116 emit_insn (gen_cmpxf (tmp
,
24117 CONST_DOUBLE_FROM_REAL_VALUE (
24118 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
24120 emit_jump_insn (gen_bge (label1
));
24122 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
24123 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
24124 emit_jump (label2
);
24126 emit_label (label1
);
24127 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
24128 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
24129 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
24130 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
24132 emit_label (label2
);
24135 /* Output code to perform a Newton-Rhapson approximation of a single precision
24136 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24138 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
24140 rtx x0
, x1
, e0
, e1
, two
;
24142 x0
= gen_reg_rtx (mode
);
24143 e0
= gen_reg_rtx (mode
);
24144 e1
= gen_reg_rtx (mode
);
24145 x1
= gen_reg_rtx (mode
);
24147 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
24149 if (VECTOR_MODE_P (mode
))
24150 two
= ix86_build_const_vector (SFmode
, true, two
);
24152 two
= force_reg (mode
, two
);
24154 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24156 /* x0 = 1./b estimate */
24157 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24158 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
24161 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
24162 gen_rtx_MULT (mode
, x0
, b
)));
24164 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
24165 gen_rtx_MINUS (mode
, two
, e0
)));
24167 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
24168 gen_rtx_MULT (mode
, x0
, e1
)));
24170 emit_insn (gen_rtx_SET (VOIDmode
, res
,
24171 gen_rtx_MULT (mode
, a
, x1
)));
24174 /* Output code to perform a Newton-Rhapson approximation of a
24175 single precision floating point [reciprocal] square root. */
24177 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
24180 rtx x0
, e0
, e1
, e2
, e3
, three
, half
, zero
, mask
;
24182 x0
= gen_reg_rtx (mode
);
24183 e0
= gen_reg_rtx (mode
);
24184 e1
= gen_reg_rtx (mode
);
24185 e2
= gen_reg_rtx (mode
);
24186 e3
= gen_reg_rtx (mode
);
24188 three
= CONST_DOUBLE_FROM_REAL_VALUE (dconst3
, SFmode
);
24189 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, SFmode
);
24191 mask
= gen_reg_rtx (mode
);
24193 if (VECTOR_MODE_P (mode
))
24195 three
= ix86_build_const_vector (SFmode
, true, three
);
24196 half
= ix86_build_const_vector (SFmode
, true, half
);
24199 three
= force_reg (mode
, three
);
24200 half
= force_reg (mode
, half
);
24202 zero
= force_reg (mode
, CONST0_RTX(mode
));
24204 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24205 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24207 /* Compare a to zero. */
24208 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
24209 gen_rtx_NE (mode
, a
, zero
)));
24211 /* x0 = 1./sqrt(a) estimate */
24212 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24213 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
24215 /* Filter out infinity. */
24216 if (VECTOR_MODE_P (mode
))
24217 emit_insn (gen_rtx_SET (VOIDmode
, gen_lowpart (V4SFmode
, x0
),
24219 gen_lowpart (V4SFmode
, x0
),
24220 gen_lowpart (V4SFmode
, mask
))));
24222 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
24223 gen_rtx_AND (mode
, x0
, mask
)));
24226 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
24227 gen_rtx_MULT (mode
, x0
, a
)));
24229 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
24230 gen_rtx_MULT (mode
, e0
, x0
)));
24232 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
24233 gen_rtx_MINUS (mode
, three
, e1
)));
24236 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
24237 gen_rtx_MULT (mode
, half
, x0
)));
24240 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
24241 gen_rtx_MULT (mode
, half
, e0
)));
24242 /* ret = e2 * e3 */
24243 emit_insn (gen_rtx_SET (VOIDmode
, res
,
24244 gen_rtx_MULT (mode
, e2
, e3
)));
24247 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24249 static void ATTRIBUTE_UNUSED
24250 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
24253 /* With Binutils 2.15, the "@unwind" marker must be specified on
24254 every occurrence of the ".eh_frame" section, not just the first
24257 && strcmp (name
, ".eh_frame") == 0)
24259 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
24260 flags
& SECTION_WRITE
? "aw" : "a");
24263 default_elf_asm_named_section (name
, flags
, decl
);
24266 /* Return the mangling of TYPE if it is an extended fundamental type. */
24268 static const char *
24269 ix86_mangle_type (const_tree type
)
24271 type
= TYPE_MAIN_VARIANT (type
);
24273 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
24274 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
24277 switch (TYPE_MODE (type
))
24280 /* __float128 is "g". */
24283 /* "long double" or __float80 is "e". */
24290 /* For 32-bit code we can save PIC register setup by using
24291 __stack_chk_fail_local hidden function instead of calling
24292 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24293 register, so it is better to call __stack_chk_fail directly. */
24296 ix86_stack_protect_fail (void)
24298 return TARGET_64BIT
24299 ? default_external_stack_protect_fail ()
24300 : default_hidden_stack_protect_fail ();
24303 /* Select a format to encode pointers in exception handling data. CODE
24304 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24305 true if the symbol may be affected by dynamic relocations.
24307 ??? All x86 object file formats are capable of representing this.
24308 After all, the relocation needed is the same as for the call insn.
24309 Whether or not a particular assembler allows us to enter such, I
24310 guess we'll have to see. */
24312 asm_preferred_eh_data_format (int code
, int global
)
24316 int type
= DW_EH_PE_sdata8
;
24318 || ix86_cmodel
== CM_SMALL_PIC
24319 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
24320 type
= DW_EH_PE_sdata4
;
24321 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
24323 if (ix86_cmodel
== CM_SMALL
24324 || (ix86_cmodel
== CM_MEDIUM
&& code
))
24325 return DW_EH_PE_udata4
;
24326 return DW_EH_PE_absptr
;
24329 /* Expand copysign from SIGN to the positive value ABS_VALUE
24330 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24333 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
24335 enum machine_mode mode
= GET_MODE (sign
);
24336 rtx sgn
= gen_reg_rtx (mode
);
24337 if (mask
== NULL_RTX
)
24339 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
24340 if (!VECTOR_MODE_P (mode
))
24342 /* We need to generate a scalar mode mask in this case. */
24343 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
24344 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
24345 mask
= gen_reg_rtx (mode
);
24346 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
24350 mask
= gen_rtx_NOT (mode
, mask
);
24351 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
24352 gen_rtx_AND (mode
, mask
, sign
)));
24353 emit_insn (gen_rtx_SET (VOIDmode
, result
,
24354 gen_rtx_IOR (mode
, abs_value
, sgn
)));
24357 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24358 mask for masking out the sign-bit is stored in *SMASK, if that is
24361 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
24363 enum machine_mode mode
= GET_MODE (op0
);
24366 xa
= gen_reg_rtx (mode
);
24367 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
24368 if (!VECTOR_MODE_P (mode
))
24370 /* We need to generate a scalar mode mask in this case. */
24371 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
24372 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
24373 mask
= gen_reg_rtx (mode
);
24374 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
24376 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
24377 gen_rtx_AND (mode
, op0
, mask
)));
24385 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24386 swapping the operands if SWAP_OPERANDS is true. The expanded
24387 code is a forward jump to a newly created label in case the
24388 comparison is true. The generated label rtx is returned. */
24390 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
24391 bool swap_operands
)
24402 label
= gen_label_rtx ();
24403 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
24404 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24405 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
24406 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
24407 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24408 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
24409 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24410 JUMP_LABEL (tmp
) = label
;
24415 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24416 using comparison code CODE. Operands are swapped for the comparison if
24417 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24419 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
24420 bool swap_operands
)
24422 enum machine_mode mode
= GET_MODE (op0
);
24423 rtx mask
= gen_reg_rtx (mode
);
24432 if (mode
== DFmode
)
24433 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
24434 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
24436 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
24437 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
24442 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24443 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24445 ix86_gen_TWO52 (enum machine_mode mode
)
24447 REAL_VALUE_TYPE TWO52r
;
24450 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
24451 TWO52
= const_double_from_real_value (TWO52r
, mode
);
24452 TWO52
= force_reg (mode
, TWO52
);
24457 /* Expand SSE sequence for computing lround from OP1 storing
24460 ix86_expand_lround (rtx op0
, rtx op1
)
24462 /* C code for the stuff we're doing below:
24463 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24466 enum machine_mode mode
= GET_MODE (op1
);
24467 const struct real_format
*fmt
;
24468 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
24471 /* load nextafter (0.5, 0.0) */
24472 fmt
= REAL_MODE_FORMAT (mode
);
24473 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
24474 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
24476 /* adj = copysign (0.5, op1) */
24477 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
24478 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
24480 /* adj = op1 + adj */
24481 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
24483 /* op0 = (imode)adj */
24484 expand_fix (op0
, adj
, 0);
24487 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24490 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
24492 /* C code for the stuff we're doing below (for do_floor):
24494 xi -= (double)xi > op1 ? 1 : 0;
24497 enum machine_mode fmode
= GET_MODE (op1
);
24498 enum machine_mode imode
= GET_MODE (op0
);
24499 rtx ireg
, freg
, label
, tmp
;
24501 /* reg = (long)op1 */
24502 ireg
= gen_reg_rtx (imode
);
24503 expand_fix (ireg
, op1
, 0);
24505 /* freg = (double)reg */
24506 freg
= gen_reg_rtx (fmode
);
24507 expand_float (freg
, ireg
, 0);
24509 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24510 label
= ix86_expand_sse_compare_and_jump (UNLE
,
24511 freg
, op1
, !do_floor
);
24512 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
24513 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
24514 emit_move_insn (ireg
, tmp
);
24516 emit_label (label
);
24517 LABEL_NUSES (label
) = 1;
24519 emit_move_insn (op0
, ireg
);
24522 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24523 result in OPERAND0. */
24525 ix86_expand_rint (rtx operand0
, rtx operand1
)
24527 /* C code for the stuff we're doing below:
24528 xa = fabs (operand1);
24529 if (!isless (xa, 2**52))
24531 xa = xa + 2**52 - 2**52;
24532 return copysign (xa, operand1);
24534 enum machine_mode mode
= GET_MODE (operand0
);
24535 rtx res
, xa
, label
, TWO52
, mask
;
24537 res
= gen_reg_rtx (mode
);
24538 emit_move_insn (res
, operand1
);
24540 /* xa = abs (operand1) */
24541 xa
= ix86_expand_sse_fabs (res
, &mask
);
24543 /* if (!isless (xa, TWO52)) goto label; */
24544 TWO52
= ix86_gen_TWO52 (mode
);
24545 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24547 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24548 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
24550 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
24552 emit_label (label
);
24553 LABEL_NUSES (label
) = 1;
24555 emit_move_insn (operand0
, res
);
24558 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24561 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
24563 /* C code for the stuff we expand below.
24564 double xa = fabs (x), x2;
24565 if (!isless (xa, TWO52))
24567 xa = xa + TWO52 - TWO52;
24568 x2 = copysign (xa, x);
24577 enum machine_mode mode
= GET_MODE (operand0
);
24578 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
24580 TWO52
= ix86_gen_TWO52 (mode
);
24582 /* Temporary for holding the result, initialized to the input
24583 operand to ease control flow. */
24584 res
= gen_reg_rtx (mode
);
24585 emit_move_insn (res
, operand1
);
24587 /* xa = abs (operand1) */
24588 xa
= ix86_expand_sse_fabs (res
, &mask
);
24590 /* if (!isless (xa, TWO52)) goto label; */
24591 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24593 /* xa = xa + TWO52 - TWO52; */
24594 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24595 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
24597 /* xa = copysign (xa, operand1) */
24598 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
24600 /* generate 1.0 or -1.0 */
24601 one
= force_reg (mode
,
24602 const_double_from_real_value (do_floor
24603 ? dconst1
: dconstm1
, mode
));
24605 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24606 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
24607 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24608 gen_rtx_AND (mode
, one
, tmp
)));
24609 /* We always need to subtract here to preserve signed zero. */
24610 tmp
= expand_simple_binop (mode
, MINUS
,
24611 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24612 emit_move_insn (res
, tmp
);
24614 emit_label (label
);
24615 LABEL_NUSES (label
) = 1;
24617 emit_move_insn (operand0
, res
);
24620 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24623 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
24625 /* C code for the stuff we expand below.
24626 double xa = fabs (x), x2;
24627 if (!isless (xa, TWO52))
24629 x2 = (double)(long)x;
24636 if (HONOR_SIGNED_ZEROS (mode))
24637 return copysign (x2, x);
24640 enum machine_mode mode
= GET_MODE (operand0
);
24641 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
24643 TWO52
= ix86_gen_TWO52 (mode
);
24645 /* Temporary for holding the result, initialized to the input
24646 operand to ease control flow. */
24647 res
= gen_reg_rtx (mode
);
24648 emit_move_insn (res
, operand1
);
24650 /* xa = abs (operand1) */
24651 xa
= ix86_expand_sse_fabs (res
, &mask
);
24653 /* if (!isless (xa, TWO52)) goto label; */
24654 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24656 /* xa = (double)(long)x */
24657 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24658 expand_fix (xi
, res
, 0);
24659 expand_float (xa
, xi
, 0);
24662 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
24664 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24665 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
24666 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24667 gen_rtx_AND (mode
, one
, tmp
)));
24668 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
24669 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24670 emit_move_insn (res
, tmp
);
24672 if (HONOR_SIGNED_ZEROS (mode
))
24673 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
24675 emit_label (label
);
24676 LABEL_NUSES (label
) = 1;
24678 emit_move_insn (operand0
, res
);
24681 /* Expand SSE sequence for computing round from OPERAND1 storing
24682 into OPERAND0. Sequence that works without relying on DImode truncation
24683 via cvttsd2siq that is only available on 64bit targets. */
24685 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
24687 /* C code for the stuff we expand below.
24688 double xa = fabs (x), xa2, x2;
24689 if (!isless (xa, TWO52))
24691 Using the absolute value and copying back sign makes
24692 -0.0 -> -0.0 correct.
24693 xa2 = xa + TWO52 - TWO52;
24698 else if (dxa > 0.5)
24700 x2 = copysign (xa2, x);
24703 enum machine_mode mode
= GET_MODE (operand0
);
24704 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
24706 TWO52
= ix86_gen_TWO52 (mode
);
24708 /* Temporary for holding the result, initialized to the input
24709 operand to ease control flow. */
24710 res
= gen_reg_rtx (mode
);
24711 emit_move_insn (res
, operand1
);
24713 /* xa = abs (operand1) */
24714 xa
= ix86_expand_sse_fabs (res
, &mask
);
24716 /* if (!isless (xa, TWO52)) goto label; */
24717 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24719 /* xa2 = xa + TWO52 - TWO52; */
24720 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24721 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
24723 /* dxa = xa2 - xa; */
24724 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
24726 /* generate 0.5, 1.0 and -0.5 */
24727 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
24728 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
24729 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
24733 tmp
= gen_reg_rtx (mode
);
24734 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24735 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
24736 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24737 gen_rtx_AND (mode
, one
, tmp
)));
24738 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24739 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24740 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
24741 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
24742 gen_rtx_AND (mode
, one
, tmp
)));
24743 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
24745 /* res = copysign (xa2, operand1) */
24746 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
24748 emit_label (label
);
24749 LABEL_NUSES (label
) = 1;
24751 emit_move_insn (operand0
, res
);
24754 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24757 ix86_expand_trunc (rtx operand0
, rtx operand1
)
24759 /* C code for SSE variant we expand below.
24760 double xa = fabs (x), x2;
24761 if (!isless (xa, TWO52))
24763 x2 = (double)(long)x;
24764 if (HONOR_SIGNED_ZEROS (mode))
24765 return copysign (x2, x);
24768 enum machine_mode mode
= GET_MODE (operand0
);
24769 rtx xa
, xi
, TWO52
, label
, res
, mask
;
24771 TWO52
= ix86_gen_TWO52 (mode
);
24773 /* Temporary for holding the result, initialized to the input
24774 operand to ease control flow. */
24775 res
= gen_reg_rtx (mode
);
24776 emit_move_insn (res
, operand1
);
24778 /* xa = abs (operand1) */
24779 xa
= ix86_expand_sse_fabs (res
, &mask
);
24781 /* if (!isless (xa, TWO52)) goto label; */
24782 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24784 /* x = (double)(long)x */
24785 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24786 expand_fix (xi
, res
, 0);
24787 expand_float (res
, xi
, 0);
24789 if (HONOR_SIGNED_ZEROS (mode
))
24790 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
24792 emit_label (label
);
24793 LABEL_NUSES (label
) = 1;
24795 emit_move_insn (operand0
, res
);
24798 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24801 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
24803 enum machine_mode mode
= GET_MODE (operand0
);
24804 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
24806 /* C code for SSE variant we expand below.
24807 double xa = fabs (x), x2;
24808 if (!isless (xa, TWO52))
24810 xa2 = xa + TWO52 - TWO52;
24814 x2 = copysign (xa2, x);
24818 TWO52
= ix86_gen_TWO52 (mode
);
24820 /* Temporary for holding the result, initialized to the input
24821 operand to ease control flow. */
24822 res
= gen_reg_rtx (mode
);
24823 emit_move_insn (res
, operand1
);
24825 /* xa = abs (operand1) */
24826 xa
= ix86_expand_sse_fabs (res
, &smask
);
24828 /* if (!isless (xa, TWO52)) goto label; */
24829 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24831 /* res = xa + TWO52 - TWO52; */
24832 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
24833 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
24834 emit_move_insn (res
, tmp
);
24837 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
24839 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24840 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
24841 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
24842 gen_rtx_AND (mode
, mask
, one
)));
24843 tmp
= expand_simple_binop (mode
, MINUS
,
24844 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
24845 emit_move_insn (res
, tmp
);
24847 /* res = copysign (res, operand1) */
24848 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
24850 emit_label (label
);
24851 LABEL_NUSES (label
) = 1;
24853 emit_move_insn (operand0
, res
);
24856 /* Expand SSE sequence for computing round from OPERAND1 storing
24859 ix86_expand_round (rtx operand0
, rtx operand1
)
24861 /* C code for the stuff we're doing below:
24862 double xa = fabs (x);
24863 if (!isless (xa, TWO52))
24865 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24866 return copysign (xa, x);
24868 enum machine_mode mode
= GET_MODE (operand0
);
24869 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
24870 const struct real_format
*fmt
;
24871 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
24873 /* Temporary for holding the result, initialized to the input
24874 operand to ease control flow. */
24875 res
= gen_reg_rtx (mode
);
24876 emit_move_insn (res
, operand1
);
24878 TWO52
= ix86_gen_TWO52 (mode
);
24879 xa
= ix86_expand_sse_fabs (res
, &mask
);
24880 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
24882 /* load nextafter (0.5, 0.0) */
24883 fmt
= REAL_MODE_FORMAT (mode
);
24884 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
24885 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
24887 /* xa = xa + 0.5 */
24888 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
24889 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
24891 /* xa = (double)(int64_t)xa */
24892 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
24893 expand_fix (xi
, xa
, 0);
24894 expand_float (xa
, xi
, 0);
24896 /* res = copysign (xa, operand1) */
24897 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
24899 emit_label (label
);
24900 LABEL_NUSES (label
) = 1;
24902 emit_move_insn (operand0
, res
);
24906 /* Validate whether a SSE5 instruction is valid or not.
24907 OPERANDS is the array of operands.
24908 NUM is the number of operands.
24909 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24910 NUM_MEMORY is the maximum number of memory operands to accept. */
24912 ix86_sse5_valid_op_p (rtx operands
[], rtx insn
, int num
, bool uses_oc0
, int num_memory
)
24918 /* Count the number of memory arguments */
24921 for (i
= 0; i
< num
; i
++)
24923 enum machine_mode mode
= GET_MODE (operands
[i
]);
24924 if (register_operand (operands
[i
], mode
))
24927 else if (memory_operand (operands
[i
], mode
))
24929 mem_mask
|= (1 << i
);
24935 rtx pattern
= PATTERN (insn
);
24937 /* allow 0 for pcmov */
24938 if (GET_CODE (pattern
) != SET
24939 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
24941 || operands
[i
] != CONST0_RTX (mode
))
24946 /* If there were no memory operations, allow the insn */
24950 /* Do not allow the destination register to be a memory operand. */
24951 else if (mem_mask
& (1 << 0))
24954 /* If there are too many memory operations, disallow the instruction. While
24955 the hardware only allows 1 memory reference, before register allocation
24956 for some insns, we allow two memory operations sometimes in order to allow
24957 code like the following to be optimized:
24959 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24961 or similar cases that are vectorized into using the fmaddss
24963 else if (mem_count
> num_memory
)
24966 /* Don't allow more than one memory operation if not optimizing. */
24967 else if (mem_count
> 1 && !optimize
)
24970 else if (num
== 4 && mem_count
== 1)
24972 /* formats (destination is the first argument), example fmaddss:
24973 xmm1, xmm1, xmm2, xmm3/mem
24974 xmm1, xmm1, xmm2/mem, xmm3
24975 xmm1, xmm2, xmm3/mem, xmm1
24976 xmm1, xmm2/mem, xmm3, xmm1 */
24978 return ((mem_mask
== (1 << 1))
24979 || (mem_mask
== (1 << 2))
24980 || (mem_mask
== (1 << 3)));
24982 /* format, example pmacsdd:
24983 xmm1, xmm2, xmm3/mem, xmm1 */
24985 return (mem_mask
== (1 << 2));
24988 else if (num
== 4 && num_memory
== 2)
24990 /* If there are two memory operations, we can load one of the memory ops
24991 into the destination register. This is for optimizing the
24992 multiply/add ops, which the combiner has optimized both the multiply
24993 and the add insns to have a memory operation. We have to be careful
24994 that the destination doesn't overlap with the inputs. */
24995 rtx op0
= operands
[0];
24997 if (reg_mentioned_p (op0
, operands
[1])
24998 || reg_mentioned_p (op0
, operands
[2])
24999 || reg_mentioned_p (op0
, operands
[3]))
25002 /* formats (destination is the first argument), example fmaddss:
25003 xmm1, xmm1, xmm2, xmm3/mem
25004 xmm1, xmm1, xmm2/mem, xmm3
25005 xmm1, xmm2, xmm3/mem, xmm1
25006 xmm1, xmm2/mem, xmm3, xmm1
25008 For the oc0 case, we will load either operands[1] or operands[3] into
25009 operands[0], so any combination of 2 memory operands is ok. */
25013 /* format, example pmacsdd:
25014 xmm1, xmm2, xmm3/mem, xmm1
25016 For the integer multiply/add instructions be more restrictive and
25017 require operands[2] and operands[3] to be the memory operands. */
25019 return (mem_mask
== ((1 << 2) | (1 << 3)));
25022 else if (num
== 3 && num_memory
== 1)
25024 /* formats, example protb:
25025 xmm1, xmm2, xmm3/mem
25026 xmm1, xmm2/mem, xmm3 */
25028 return ((mem_mask
== (1 << 1)) || (mem_mask
== (1 << 2)));
25030 /* format, example comeq:
25031 xmm1, xmm2, xmm3/mem */
25033 return (mem_mask
== (1 << 2));
25037 gcc_unreachable ();
25043 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25044 hardware will allow by using the destination register to load one of the
25045 memory operations. Presently this is used by the multiply/add routines to
25046 allow 2 memory references. */
25049 ix86_expand_sse5_multiple_memory (rtx operands
[],
25051 enum machine_mode mode
)
25053 rtx op0
= operands
[0];
25055 || memory_operand (op0
, mode
)
25056 || reg_mentioned_p (op0
, operands
[1])
25057 || reg_mentioned_p (op0
, operands
[2])
25058 || reg_mentioned_p (op0
, operands
[3]))
25059 gcc_unreachable ();
25061 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25062 the destination register. */
25063 if (memory_operand (operands
[1], mode
))
25065 emit_move_insn (op0
, operands
[1]);
25068 else if (memory_operand (operands
[3], mode
))
25070 emit_move_insn (op0
, operands
[3]);
25074 gcc_unreachable ();
25080 /* Table of valid machine attributes. */
25081 static const struct attribute_spec ix86_attribute_table
[] =
25083 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25084 /* Stdcall attribute says callee is responsible for popping arguments
25085 if they are not variable. */
25086 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25087 /* Fastcall attribute says callee is responsible for popping arguments
25088 if they are not variable. */
25089 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25090 /* Cdecl attribute says the callee is a normal C declaration */
25091 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25092 /* Regparm attribute specifies how many integer arguments are to be
25093 passed in registers. */
25094 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
25095 /* Sseregparm attribute says we are using x86_64 calling conventions
25096 for FP arguments. */
25097 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
25098 /* force_align_arg_pointer says this function realigns the stack at entry. */
25099 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
25100 false, true, true, ix86_handle_cconv_attribute
},
25101 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25102 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
25103 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
25104 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
25106 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
25107 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
25108 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25109 SUBTARGET_ATTRIBUTE_TABLE
,
25111 { NULL
, 0, 0, false, false, false, NULL
}
25114 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25116 x86_builtin_vectorization_cost (bool runtime_test
)
25118 /* If the branch of the runtime test is taken - i.e. - the vectorized
25119 version is skipped - this incurs a misprediction cost (because the
25120 vectorized version is expected to be the fall-through). So we subtract
25121 the latency of a mispredicted branch from the costs that are incured
25122 when the vectorized version is executed.
25124 TODO: The values in individual target tables have to be tuned or new
25125 fields may be needed. For eg. on K8, the default branch path is the
25126 not-taken path. If the taken path is predicted correctly, the minimum
25127 penalty of going down the taken-path is 1 cycle. If the taken-path is
25128 not predicted correctly, then the minimum penalty is 10 cycles. */
25132 return (-(ix86_cost
->cond_taken_branch_cost
));
25138 /* Initialize the GCC target structure. */
25139 #undef TARGET_ATTRIBUTE_TABLE
25140 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25141 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25142 # undef TARGET_MERGE_DECL_ATTRIBUTES
25143 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25146 #undef TARGET_COMP_TYPE_ATTRIBUTES
25147 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25149 #undef TARGET_INIT_BUILTINS
25150 #define TARGET_INIT_BUILTINS ix86_init_builtins
25151 #undef TARGET_EXPAND_BUILTIN
25152 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25154 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25155 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25156 ix86_builtin_vectorized_function
25158 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25159 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25161 #undef TARGET_BUILTIN_RECIPROCAL
25162 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25164 #undef TARGET_ASM_FUNCTION_EPILOGUE
25165 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25167 #undef TARGET_ENCODE_SECTION_INFO
25168 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25169 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25171 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25174 #undef TARGET_ASM_OPEN_PAREN
25175 #define TARGET_ASM_OPEN_PAREN ""
25176 #undef TARGET_ASM_CLOSE_PAREN
25177 #define TARGET_ASM_CLOSE_PAREN ""
25179 #undef TARGET_ASM_ALIGNED_HI_OP
25180 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25181 #undef TARGET_ASM_ALIGNED_SI_OP
25182 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25184 #undef TARGET_ASM_ALIGNED_DI_OP
25185 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25188 #undef TARGET_ASM_UNALIGNED_HI_OP
25189 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25190 #undef TARGET_ASM_UNALIGNED_SI_OP
25191 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25192 #undef TARGET_ASM_UNALIGNED_DI_OP
25193 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25195 #undef TARGET_SCHED_ADJUST_COST
25196 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25197 #undef TARGET_SCHED_ISSUE_RATE
25198 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25199 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25200 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25201 ia32_multipass_dfa_lookahead
25203 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25204 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25207 #undef TARGET_HAVE_TLS
25208 #define TARGET_HAVE_TLS true
25210 #undef TARGET_CANNOT_FORCE_CONST_MEM
25211 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25212 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25213 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25215 #undef TARGET_DELEGITIMIZE_ADDRESS
25216 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25218 #undef TARGET_MS_BITFIELD_LAYOUT_P
25219 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25222 #undef TARGET_BINDS_LOCAL_P
25223 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25225 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25226 #undef TARGET_BINDS_LOCAL_P
25227 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25230 #undef TARGET_ASM_OUTPUT_MI_THUNK
25231 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25232 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25233 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25235 #undef TARGET_ASM_FILE_START
25236 #define TARGET_ASM_FILE_START x86_file_start
25238 #undef TARGET_DEFAULT_TARGET_FLAGS
25239 #define TARGET_DEFAULT_TARGET_FLAGS \
25241 | TARGET_SUBTARGET_DEFAULT \
25242 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25244 #undef TARGET_HANDLE_OPTION
25245 #define TARGET_HANDLE_OPTION ix86_handle_option
25247 #undef TARGET_RTX_COSTS
25248 #define TARGET_RTX_COSTS ix86_rtx_costs
25249 #undef TARGET_ADDRESS_COST
25250 #define TARGET_ADDRESS_COST ix86_address_cost
25252 #undef TARGET_FIXED_CONDITION_CODE_REGS
25253 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25254 #undef TARGET_CC_MODES_COMPATIBLE
25255 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25257 #undef TARGET_MACHINE_DEPENDENT_REORG
25258 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25260 #undef TARGET_BUILD_BUILTIN_VA_LIST
25261 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25263 #undef TARGET_EXPAND_BUILTIN_VA_START
25264 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25266 #undef TARGET_MD_ASM_CLOBBERS
25267 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25269 #undef TARGET_PROMOTE_PROTOTYPES
25270 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25271 #undef TARGET_STRUCT_VALUE_RTX
25272 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25273 #undef TARGET_SETUP_INCOMING_VARARGS
25274 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25275 #undef TARGET_MUST_PASS_IN_STACK
25276 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25277 #undef TARGET_PASS_BY_REFERENCE
25278 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25279 #undef TARGET_INTERNAL_ARG_POINTER
25280 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25281 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25282 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25283 #undef TARGET_STRICT_ARGUMENT_NAMING
25284 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25286 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25287 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25289 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25290 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25292 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25293 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25295 #undef TARGET_C_MODE_FOR_SUFFIX
25296 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25299 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25300 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25303 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25304 #undef TARGET_INSERT_ATTRIBUTES
25305 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25308 #undef TARGET_MANGLE_TYPE
25309 #define TARGET_MANGLE_TYPE ix86_mangle_type
25311 #undef TARGET_STACK_PROTECT_FAIL
25312 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25314 #undef TARGET_FUNCTION_VALUE
25315 #define TARGET_FUNCTION_VALUE ix86_function_value
25317 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25318 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25320 struct gcc_target targetm
= TARGET_INITIALIZER
;
25322 #include "gt-i386.h"