1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 static int x86_builtin_vectorization_cost (bool);
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
76 struct processor_costs size_cost
= { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
129 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
130 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
131 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
145 /* Processor costs (relative to an add) */
147 struct processor_costs i386_cost
= { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
200 DUMMY_STRINGOP_ALGS
},
201 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
202 DUMMY_STRINGOP_ALGS
},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
217 struct processor_costs i486_cost
= { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
272 DUMMY_STRINGOP_ALGS
},
273 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
274 DUMMY_STRINGOP_ALGS
},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
289 struct processor_costs pentium_cost
= {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
342 DUMMY_STRINGOP_ALGS
},
343 {{libcall
, {{-1, rep_prefix_4_byte
}}},
344 DUMMY_STRINGOP_ALGS
},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
359 struct processor_costs pentiumpro_cost
= {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
416 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
417 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
418 DUMMY_STRINGOP_ALGS
},
419 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
420 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
421 DUMMY_STRINGOP_ALGS
},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
436 struct processor_costs geode_cost
= {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
490 DUMMY_STRINGOP_ALGS
},
491 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
492 DUMMY_STRINGOP_ALGS
},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
507 struct processor_costs k6_cost
= {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
563 DUMMY_STRINGOP_ALGS
},
564 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
565 DUMMY_STRINGOP_ALGS
},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs athlon_cost
= {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs k8_cost
= {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
702 100, /* number of parallel prefetches */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
714 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
715 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
716 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
717 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
731 struct processor_costs amdfam10_cost
= {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
776 MOVD reg64, xmmreg Double FADD 3
778 MOVD reg32, xmmreg Double FADD 3
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
788 100, /* number of parallel prefetches */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
801 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
802 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
803 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
804 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
819 struct processor_costs pentium4_cost
= {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
872 DUMMY_STRINGOP_ALGS
},
873 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
875 DUMMY_STRINGOP_ALGS
},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
890 struct processor_costs nocona_cost
= {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
943 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
944 {100000, unrolled_loop
}, {-1, libcall
}}}},
945 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
947 {libcall
, {{24, loop
}, {64, unrolled_loop
},
948 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
963 struct processor_costs core2_cost
= {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1015 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1016 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1017 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1018 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1019 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1020 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1036 struct processor_costs generic64_cost
= {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS
,
1095 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1096 {DUMMY_STRINGOP_ALGS
,
1097 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1113 struct processor_costs generic32_cost
= {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1166 DUMMY_STRINGOP_ALGS
},
1167 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1168 DUMMY_STRINGOP_ALGS
},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1182 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1219 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1224 /* X86_TUNE_USE_BIT_TEST */
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_GENERIC
,
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1244 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1249 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2
| m_GENERIC
,
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386
| m_486
| m_K6_GEODE
,
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1270 /* X86_TUNE_USE_MOV0 */
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1285 /* X86_TUNE_READ_MODIFY */
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1290 | m_GENERIC
/* | m_PENT4 ? */,
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT
| m_486
| m_386
),
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386
| m_PENT4
| m_NOCONA
,
1298 /* X86_TUNE_QIMODE_MATH */
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1318 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1325 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1330 | m_GENERIC
| m_GEODE
),
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_ATHLON_K8_AMDFAM10
,
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO
| m_PENT4
| m_NOCONA
,
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1370 /* X86_TUNE_SHIFT1 */
1373 /* X86_TUNE_USE_FFREEP */
1374 m_ATHLON_K8_AMDFAM10
,
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1389 /* X86_TUNE_USE_BT */
1390 m_ATHLON_K8_AMDFAM10
,
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1417 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1433 from integer to FP. */
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386
| m_486
| m_PENT
| m_K6
),
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1460 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1462 static enum stringop_alg stringop_alg
= no_stringop
;
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1471 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1472 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1477 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1479 /* ax, dx, cx, bx */
1480 AREG
, DREG
, CREG
, BREG
,
1481 /* si, di, bp, sp */
1482 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1484 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1485 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1491 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1494 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1497 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1498 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1499 /* SSE REX registers */
1500 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1504 /* The "default" register map used in 32bit mode. */
1506 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1517 static int const x86_64_int_parameter_registers
[6] =
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1523 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1529 static int const x86_64_int_return_registers
[4] =
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1600 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1614 rtx ix86_compare_op0
= NULL_RTX
;
1615 rtx ix86_compare_op1
= NULL_RTX
;
1616 rtx ix86_compare_emitted
= NULL_RTX
;
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621 /* Define the structure for the machine field in struct function. */
1623 struct stack_local_entry
GTY(())
1625 unsigned short mode
;
1628 struct stack_local_entry
*next
;
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1655 HOST_WIDE_INT frame
;
1657 int outgoing_arguments_size
;
1660 HOST_WIDE_INT to_allocate
;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset
;
1663 HOST_WIDE_INT hard_frame_pointer_offset
;
1664 HOST_WIDE_INT stack_pointer_offset
;
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov
;
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel
;
1674 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1676 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath
;
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune
;
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch
;
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse
;
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm
;
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer
;
1695 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary
;
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost
;
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1706 int ix86_section_threshold
= 65536;
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix
[16];
1710 int internal_label_prefix_len
;
1712 /* Fence to use after loop using movnt. */
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1725 X86_64_INTEGER_CLASS
,
1726 X86_64_INTEGERSI_CLASS
,
1733 X86_64_COMPLEX_X87_CLASS
,
1736 static const char * const x86_64_reg_class_name
[] =
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1742 #define MAX_CLASSES 4
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1746 static bool ext_80387_constants_init
= 0;
1749 static struct machine_function
* ix86_init_machine_status (void);
1750 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1751 static int ix86_function_regparm (const_tree
, const_tree
);
1752 static void ix86_compute_frame_layout (struct ix86_frame
*);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit
;
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797 /* Vectorization library interface and handlers. */
1798 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
1799 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
1801 /* Implement TARGET_HANDLE_OPTION. */
1804 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1809 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX
;
1812 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
1813 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
1818 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW
;
1821 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
1822 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
1830 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE
;
1833 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
1834 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
1839 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2
;
1842 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
1843 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
1848 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3
;
1851 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
1852 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
1857 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3
;
1860 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
1861 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
1866 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1
;
1869 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
1870 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
1875 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2
;
1878 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
1879 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
1884 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4
;
1885 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4
;
1889 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
1890 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
1894 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A
;
1897 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
1898 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
1907 /* Sometimes certain combinations of command options do not make
1908 sense on a particular target machine. You can define a macro
1909 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1910 defined, is executed once just after all the command options have
1913 Don't use this macro to turn on various extra optimizations for
1914 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1917 override_options (void)
1920 int ix86_tune_defaulted
= 0;
1921 int ix86_arch_specified
= 0;
1922 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1924 /* Comes from final.c -- no real reason to change it. */
1925 #define MAX_CODE_ALIGN 16
1929 const struct processor_costs
*cost
; /* Processor costs */
1930 const int align_loop
; /* Default alignments. */
1931 const int align_loop_max_skip
;
1932 const int align_jump
;
1933 const int align_jump_max_skip
;
1934 const int align_func
;
1936 const processor_target_table
[PROCESSOR_max
] =
1938 {&i386_cost
, 4, 3, 4, 3, 4},
1939 {&i486_cost
, 16, 15, 16, 15, 16},
1940 {&pentium_cost
, 16, 7, 16, 7, 16},
1941 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
1942 {&geode_cost
, 0, 0, 0, 0, 0},
1943 {&k6_cost
, 32, 7, 32, 7, 32},
1944 {&athlon_cost
, 16, 7, 16, 7, 16},
1945 {&pentium4_cost
, 0, 0, 0, 0, 0},
1946 {&k8_cost
, 16, 7, 16, 7, 16},
1947 {&nocona_cost
, 0, 0, 0, 0, 0},
1948 {&core2_cost
, 16, 10, 16, 10, 16},
1949 {&generic32_cost
, 16, 7, 16, 7, 16},
1950 {&generic64_cost
, 16, 10, 16, 10, 16},
1951 {&amdfam10_cost
, 32, 24, 32, 7, 32}
1954 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1961 PTA_PREFETCH_SSE
= 1 << 4,
1963 PTA_3DNOW_A
= 1 << 6,
1967 PTA_POPCNT
= 1 << 10,
1969 PTA_SSE4A
= 1 << 12,
1970 PTA_NO_SAHF
= 1 << 13,
1971 PTA_SSE4_1
= 1 << 14,
1972 PTA_SSE4_2
= 1 << 15
1977 const char *const name
; /* processor name or nickname. */
1978 const enum processor_type processor
;
1979 const unsigned /*enum pta_flags*/ flags
;
1981 const processor_alias_table
[] =
1983 {"i386", PROCESSOR_I386
, 0},
1984 {"i486", PROCESSOR_I486
, 0},
1985 {"i586", PROCESSOR_PENTIUM
, 0},
1986 {"pentium", PROCESSOR_PENTIUM
, 0},
1987 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1988 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1989 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1990 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1991 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1992 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1993 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1994 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1995 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1996 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1997 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
1998 {"pentium4", PROCESSOR_PENTIUM4
, PTA_MMX
|PTA_SSE
| PTA_SSE2
},
1999 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2000 {"prescott", PROCESSOR_NOCONA
, PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2001 {"nocona", PROCESSOR_NOCONA
, (PTA_64BIT
2002 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2003 | PTA_CX16
| PTA_NO_SAHF
)},
2004 {"core2", PROCESSOR_CORE2
, (PTA_64BIT
2005 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2008 {"geode", PROCESSOR_GEODE
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2009 |PTA_PREFETCH_SSE
)},
2010 {"k6", PROCESSOR_K6
, PTA_MMX
},
2011 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2012 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
2013 {"athlon", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2014 | PTA_PREFETCH_SSE
)},
2015 {"athlon-tbird", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2016 | PTA_PREFETCH_SSE
)},
2017 {"athlon-4", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2019 {"athlon-xp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2021 {"athlon-mp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2023 {"x86-64", PROCESSOR_K8
, (PTA_64BIT
2024 | PTA_MMX
| PTA_SSE
| PTA_SSE2
2026 {"k8", PROCESSOR_K8
, (PTA_64BIT
2027 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2028 | PTA_SSE
| PTA_SSE2
2030 {"k8-sse3", PROCESSOR_K8
, (PTA_64BIT
2031 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2032 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2034 {"opteron", PROCESSOR_K8
, (PTA_64BIT
2035 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2036 | PTA_SSE
| PTA_SSE2
2038 {"opteron-sse3", PROCESSOR_K8
, (PTA_64BIT
2039 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2040 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2042 {"athlon64", PROCESSOR_K8
, (PTA_64BIT
2043 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2044 | PTA_SSE
| PTA_SSE2
2046 {"athlon64-sse3", PROCESSOR_K8
, (PTA_64BIT
2047 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2048 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2050 {"athlon-fx", PROCESSOR_K8
, (PTA_64BIT
2051 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2052 | PTA_SSE
| PTA_SSE2
2054 {"amdfam10", PROCESSOR_AMDFAM10
, (PTA_64BIT
2055 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2056 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2058 | PTA_CX16
| PTA_ABM
)},
2059 {"barcelona", PROCESSOR_AMDFAM10
, (PTA_64BIT
2060 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
2061 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
2063 | PTA_CX16
| PTA_ABM
)},
2064 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
2065 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
2068 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
2070 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2071 SUBTARGET_OVERRIDE_OPTIONS
;
2074 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2075 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2078 /* -fPIC is the default for x86_64. */
2079 if (TARGET_MACHO
&& TARGET_64BIT
)
2082 /* Set the default values for switches whose default depends on TARGET_64BIT
2083 in case they weren't overwritten by command line options. */
2086 /* Mach-O doesn't support omitting the frame pointer for now. */
2087 if (flag_omit_frame_pointer
== 2)
2088 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2089 if (flag_asynchronous_unwind_tables
== 2)
2090 flag_asynchronous_unwind_tables
= 1;
2091 if (flag_pcc_struct_return
== 2)
2092 flag_pcc_struct_return
= 0;
2096 if (flag_omit_frame_pointer
== 2)
2097 flag_omit_frame_pointer
= 0;
2098 if (flag_asynchronous_unwind_tables
== 2)
2099 flag_asynchronous_unwind_tables
= 0;
2100 if (flag_pcc_struct_return
== 2)
2101 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2104 /* Need to check -mtune=generic first. */
2105 if (ix86_tune_string
)
2107 if (!strcmp (ix86_tune_string
, "generic")
2108 || !strcmp (ix86_tune_string
, "i686")
2109 /* As special support for cross compilers we read -mtune=native
2110 as -mtune=generic. With native compilers we won't see the
2111 -mtune=native, as it was changed by the driver. */
2112 || !strcmp (ix86_tune_string
, "native"))
2115 ix86_tune_string
= "generic64";
2117 ix86_tune_string
= "generic32";
2119 else if (!strncmp (ix86_tune_string
, "generic", 7))
2120 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2124 if (ix86_arch_string
)
2125 ix86_tune_string
= ix86_arch_string
;
2126 if (!ix86_tune_string
)
2128 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2129 ix86_tune_defaulted
= 1;
2132 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2133 need to use a sensible tune option. */
2134 if (!strcmp (ix86_tune_string
, "generic")
2135 || !strcmp (ix86_tune_string
, "x86-64")
2136 || !strcmp (ix86_tune_string
, "i686"))
2139 ix86_tune_string
= "generic64";
2141 ix86_tune_string
= "generic32";
2144 if (ix86_stringop_string
)
2146 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2147 stringop_alg
= rep_prefix_1_byte
;
2148 else if (!strcmp (ix86_stringop_string
, "libcall"))
2149 stringop_alg
= libcall
;
2150 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2151 stringop_alg
= rep_prefix_4_byte
;
2152 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2153 stringop_alg
= rep_prefix_8_byte
;
2154 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2155 stringop_alg
= loop_1_byte
;
2156 else if (!strcmp (ix86_stringop_string
, "loop"))
2157 stringop_alg
= loop
;
2158 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2159 stringop_alg
= unrolled_loop
;
2161 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2163 if (!strcmp (ix86_tune_string
, "x86-64"))
2164 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2165 "-mtune=generic instead as appropriate.");
2167 if (!ix86_arch_string
)
2168 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2170 ix86_arch_specified
= 1;
2172 if (!strcmp (ix86_arch_string
, "generic"))
2173 error ("generic CPU can be used only for -mtune= switch");
2174 if (!strncmp (ix86_arch_string
, "generic", 7))
2175 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2177 if (ix86_cmodel_string
!= 0)
2179 if (!strcmp (ix86_cmodel_string
, "small"))
2180 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2181 else if (!strcmp (ix86_cmodel_string
, "medium"))
2182 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2183 else if (!strcmp (ix86_cmodel_string
, "large"))
2184 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2186 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2187 else if (!strcmp (ix86_cmodel_string
, "32"))
2188 ix86_cmodel
= CM_32
;
2189 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2190 ix86_cmodel
= CM_KERNEL
;
2192 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2196 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2197 use of rip-relative addressing. This eliminates fixups that
2198 would otherwise be needed if this object is to be placed in a
2199 DLL, and is essentially just as efficient as direct addressing. */
2200 if (TARGET_64BIT_MS_ABI
)
2201 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2202 else if (TARGET_64BIT
)
2203 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2205 ix86_cmodel
= CM_32
;
2207 if (ix86_asm_string
!= 0)
2210 && !strcmp (ix86_asm_string
, "intel"))
2211 ix86_asm_dialect
= ASM_INTEL
;
2212 else if (!strcmp (ix86_asm_string
, "att"))
2213 ix86_asm_dialect
= ASM_ATT
;
2215 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2217 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2218 error ("code model %qs not supported in the %s bit mode",
2219 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2220 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2221 sorry ("%i-bit mode not compiled in",
2222 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2224 for (i
= 0; i
< pta_size
; i
++)
2225 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2227 ix86_arch
= processor_alias_table
[i
].processor
;
2228 /* Default cpu tuning to the architecture. */
2229 ix86_tune
= ix86_arch
;
2231 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2232 error ("CPU you selected does not support x86-64 "
2235 if (processor_alias_table
[i
].flags
& PTA_MMX
2236 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2237 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2238 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2239 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2240 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2241 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2242 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2243 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2244 if (processor_alias_table
[i
].flags
& PTA_SSE
2245 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2246 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2247 if (processor_alias_table
[i
].flags
& PTA_SSE2
2248 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2249 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2250 if (processor_alias_table
[i
].flags
& PTA_SSE3
2251 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2252 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2253 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2254 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2255 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2256 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2257 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2258 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2259 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2260 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2261 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2262 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2263 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2264 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2266 if (processor_alias_table
[i
].flags
& PTA_ABM
)
2268 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2269 x86_cmpxchg16b
= true;
2270 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
))
2272 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2273 x86_prefetch_sse
= true;
2274 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2281 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2283 ix86_arch_mask
= 1u << ix86_arch
;
2284 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2285 ix86_arch_features
[i
] &= ix86_arch_mask
;
2287 for (i
= 0; i
< pta_size
; i
++)
2288 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2290 ix86_tune
= processor_alias_table
[i
].processor
;
2291 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2293 if (ix86_tune_defaulted
)
2295 ix86_tune_string
= "x86-64";
2296 for (i
= 0; i
< pta_size
; i
++)
2297 if (! strcmp (ix86_tune_string
,
2298 processor_alias_table
[i
].name
))
2300 ix86_tune
= processor_alias_table
[i
].processor
;
2303 error ("CPU you selected does not support x86-64 "
2306 /* Intel CPUs have always interpreted SSE prefetch instructions as
2307 NOPs; so, we can enable SSE prefetch instructions even when
2308 -mtune (rather than -march) points us to a processor that has them.
2309 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2310 higher processors. */
2312 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
2313 x86_prefetch_sse
= true;
2317 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2319 ix86_tune_mask
= 1u << ix86_tune
;
2320 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2321 ix86_tune_features
[i
] &= ix86_tune_mask
;
2324 ix86_cost
= &size_cost
;
2326 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2328 /* Arrange to set up i386_stack_locals for all functions. */
2329 init_machine_status
= ix86_init_machine_status
;
2331 /* Validate -mregparm= value. */
2332 if (ix86_regparm_string
)
2335 warning (0, "-mregparm is ignored in 64-bit mode");
2336 i
= atoi (ix86_regparm_string
);
2337 if (i
< 0 || i
> REGPARM_MAX
)
2338 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2343 ix86_regparm
= REGPARM_MAX
;
2345 /* If the user has provided any of the -malign-* options,
2346 warn and use that value only if -falign-* is not set.
2347 Remove this code in GCC 3.2 or later. */
2348 if (ix86_align_loops_string
)
2350 warning (0, "-malign-loops is obsolete, use -falign-loops");
2351 if (align_loops
== 0)
2353 i
= atoi (ix86_align_loops_string
);
2354 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2355 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2357 align_loops
= 1 << i
;
2361 if (ix86_align_jumps_string
)
2363 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2364 if (align_jumps
== 0)
2366 i
= atoi (ix86_align_jumps_string
);
2367 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2368 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2370 align_jumps
= 1 << i
;
2374 if (ix86_align_funcs_string
)
2376 warning (0, "-malign-functions is obsolete, use -falign-functions");
2377 if (align_functions
== 0)
2379 i
= atoi (ix86_align_funcs_string
);
2380 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2381 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2383 align_functions
= 1 << i
;
2387 /* Default align_* from the processor table. */
2388 if (align_loops
== 0)
2390 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2391 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2393 if (align_jumps
== 0)
2395 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2396 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2398 if (align_functions
== 0)
2400 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2403 /* Validate -mbranch-cost= value, or provide default. */
2404 ix86_branch_cost
= ix86_cost
->branch_cost
;
2405 if (ix86_branch_cost_string
)
2407 i
= atoi (ix86_branch_cost_string
);
2409 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2411 ix86_branch_cost
= i
;
2413 if (ix86_section_threshold_string
)
2415 i
= atoi (ix86_section_threshold_string
);
2417 error ("-mlarge-data-threshold=%d is negative", i
);
2419 ix86_section_threshold
= i
;
2422 if (ix86_tls_dialect_string
)
2424 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2425 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2426 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2427 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2428 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2429 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2431 error ("bad value (%s) for -mtls-dialect= switch",
2432 ix86_tls_dialect_string
);
2435 if (ix87_precision_string
)
2437 i
= atoi (ix87_precision_string
);
2438 if (i
!= 32 && i
!= 64 && i
!= 80)
2439 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2444 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
2446 /* Enable by default the SSE and MMX builtins. Do allow the user to
2447 explicitly disable any of these. In particular, disabling SSE and
2448 MMX for kernel code is extremely useful. */
2449 if (!ix86_arch_specified
)
2451 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
2452 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
2455 warning (0, "-mrtd is ignored in 64bit mode");
2459 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
2461 if (!ix86_arch_specified
)
2463 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
2465 /* i386 ABI does not specify red zone. It still makes sense to use it
2466 when programmer takes care to stack from being destroyed. */
2467 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2468 target_flags
|= MASK_NO_RED_ZONE
;
2471 /* Keep nonleaf frame pointers. */
2472 if (flag_omit_frame_pointer
)
2473 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2474 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2475 flag_omit_frame_pointer
= 1;
2477 /* If we're doing fast math, we don't care about comparison order
2478 wrt NaNs. This lets us use a shorter comparison sequence. */
2479 if (flag_finite_math_only
)
2480 target_flags
&= ~MASK_IEEE_FP
;
2482 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2483 since the insns won't need emulation. */
2484 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2485 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2487 /* Likewise, if the target doesn't have a 387, or we've specified
2488 software floating point, don't use 387 inline intrinsics. */
2490 target_flags
|= MASK_NO_FANCY_MATH_387
;
2492 /* Turn on SSE4.1 builtins for -msse4.2. */
2494 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2496 /* Turn on SSSE3 builtins for -msse4.1. */
2498 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2500 /* Turn on SSE3 builtins for -mssse3. */
2502 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2504 /* Turn on SSE3 builtins for -msse4a. */
2506 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2508 /* Turn on SSE2 builtins for -msse3. */
2510 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2512 /* Turn on SSE builtins for -msse2. */
2514 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2516 /* Turn on MMX builtins for -msse. */
2519 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
2520 x86_prefetch_sse
= true;
2523 /* Turn on MMX builtins for 3Dnow. */
2525 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2527 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2528 if (TARGET_SSE4_2
|| TARGET_ABM
)
2531 /* Validate -mpreferred-stack-boundary= value, or provide default.
2532 The default of 128 bits is for Pentium III's SSE __m128. We can't
2533 change it because of optimize_size. Otherwise, we can't mix object
2534 files compiled with -Os and -On. */
2535 ix86_preferred_stack_boundary
= 128;
2536 if (ix86_preferred_stack_boundary_string
)
2538 i
= atoi (ix86_preferred_stack_boundary_string
);
2539 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2540 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2541 TARGET_64BIT
? 4 : 2);
2543 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2546 /* Accept -msseregparm only if at least SSE support is enabled. */
2547 if (TARGET_SSEREGPARM
2549 error ("-msseregparm used without SSE enabled");
2551 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2552 if (ix86_fpmath_string
!= 0)
2554 if (! strcmp (ix86_fpmath_string
, "387"))
2555 ix86_fpmath
= FPMATH_387
;
2556 else if (! strcmp (ix86_fpmath_string
, "sse"))
2560 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2561 ix86_fpmath
= FPMATH_387
;
2564 ix86_fpmath
= FPMATH_SSE
;
2566 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2567 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2571 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2572 ix86_fpmath
= FPMATH_387
;
2574 else if (!TARGET_80387
)
2576 warning (0, "387 instruction set disabled, using SSE arithmetics");
2577 ix86_fpmath
= FPMATH_SSE
;
2580 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
2583 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2586 /* If the i387 is disabled, then do not return values in it. */
2588 target_flags
&= ~MASK_FLOAT_RETURNS
;
2590 /* Use external vectorized library in vectorizing intrinsics. */
2591 if (ix86_veclibabi_string
)
2593 if (strcmp (ix86_veclibabi_string
, "acml") == 0)
2594 ix86_veclib_handler
= ix86_veclibabi_acml
;
2596 error ("unknown vectorization library ABI type (%s) for "
2597 "-mveclibabi= switch", ix86_veclibabi_string
);
2600 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2601 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2603 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2605 /* ??? Unwind info is not correct around the CFG unless either a frame
2606 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2607 unwind info generation to be aware of the CFG and propagating states
2609 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2610 || flag_exceptions
|| flag_non_call_exceptions
)
2611 && flag_omit_frame_pointer
2612 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2614 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2615 warning (0, "unwind tables currently require either a frame pointer "
2616 "or -maccumulate-outgoing-args for correctness");
2617 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2620 /* For sane SSE instruction set generation we need fcomi instruction.
2621 It is safe to enable all CMOVE instructions. */
2625 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2628 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2629 p
= strchr (internal_label_prefix
, 'X');
2630 internal_label_prefix_len
= p
- internal_label_prefix
;
2634 /* When scheduling description is not available, disable scheduler pass
2635 so it won't slow down the compilation and make x87 code slower. */
2636 if (!TARGET_SCHEDULE
)
2637 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2639 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2640 set_param_value ("simultaneous-prefetches",
2641 ix86_cost
->simultaneous_prefetches
);
2642 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2643 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2644 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
2645 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
2646 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
2647 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
2650 /* Return true if this goes in large data/bss. */
2653 ix86_in_large_data_p (tree exp
)
2655 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2658 /* Functions are never large data. */
2659 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2662 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2664 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2665 if (strcmp (section
, ".ldata") == 0
2666 || strcmp (section
, ".lbss") == 0)
2672 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2674 /* If this is an incomplete type with size 0, then we can't put it
2675 in data because it might be too big when completed. */
2676 if (!size
|| size
> ix86_section_threshold
)
2683 /* Switch to the appropriate section for output of DECL.
2684 DECL is either a `VAR_DECL' node or a constant of some sort.
2685 RELOC indicates whether forming the initial value of DECL requires
2686 link-time relocations. */
2688 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2692 x86_64_elf_select_section (tree decl
, int reloc
,
2693 unsigned HOST_WIDE_INT align
)
2695 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2696 && ix86_in_large_data_p (decl
))
2698 const char *sname
= NULL
;
2699 unsigned int flags
= SECTION_WRITE
;
2700 switch (categorize_decl_for_section (decl
, reloc
))
2705 case SECCAT_DATA_REL
:
2706 sname
= ".ldata.rel";
2708 case SECCAT_DATA_REL_LOCAL
:
2709 sname
= ".ldata.rel.local";
2711 case SECCAT_DATA_REL_RO
:
2712 sname
= ".ldata.rel.ro";
2714 case SECCAT_DATA_REL_RO_LOCAL
:
2715 sname
= ".ldata.rel.ro.local";
2719 flags
|= SECTION_BSS
;
2722 case SECCAT_RODATA_MERGE_STR
:
2723 case SECCAT_RODATA_MERGE_STR_INIT
:
2724 case SECCAT_RODATA_MERGE_CONST
:
2728 case SECCAT_SRODATA
:
2735 /* We don't split these for medium model. Place them into
2736 default sections and hope for best. */
2741 /* We might get called with string constants, but get_named_section
2742 doesn't like them as they are not DECLs. Also, we need to set
2743 flags in that case. */
2745 return get_section (sname
, flags
, NULL
);
2746 return get_named_section (decl
, sname
, reloc
);
2749 return default_elf_select_section (decl
, reloc
, align
);
2752 /* Build up a unique section name, expressed as a
2753 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2754 RELOC indicates whether the initial value of EXP requires
2755 link-time relocations. */
2757 static void ATTRIBUTE_UNUSED
2758 x86_64_elf_unique_section (tree decl
, int reloc
)
2760 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2761 && ix86_in_large_data_p (decl
))
2763 const char *prefix
= NULL
;
2764 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2765 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2767 switch (categorize_decl_for_section (decl
, reloc
))
2770 case SECCAT_DATA_REL
:
2771 case SECCAT_DATA_REL_LOCAL
:
2772 case SECCAT_DATA_REL_RO
:
2773 case SECCAT_DATA_REL_RO_LOCAL
:
2774 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2777 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2780 case SECCAT_RODATA_MERGE_STR
:
2781 case SECCAT_RODATA_MERGE_STR_INIT
:
2782 case SECCAT_RODATA_MERGE_CONST
:
2783 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2785 case SECCAT_SRODATA
:
2792 /* We don't split these for medium model. Place them into
2793 default sections and hope for best. */
2801 plen
= strlen (prefix
);
2803 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2804 name
= targetm
.strip_name_encoding (name
);
2805 nlen
= strlen (name
);
2807 string
= (char *) alloca (nlen
+ plen
+ 1);
2808 memcpy (string
, prefix
, plen
);
2809 memcpy (string
+ plen
, name
, nlen
+ 1);
2811 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2815 default_unique_section (decl
, reloc
);
2818 #ifdef COMMON_ASM_OP
2819 /* This says how to output assembler code to declare an
2820 uninitialized external linkage data object.
2822 For medium model x86-64 we need to use .largecomm opcode for
2825 x86_elf_aligned_common (FILE *file
,
2826 const char *name
, unsigned HOST_WIDE_INT size
,
2829 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2830 && size
> (unsigned int)ix86_section_threshold
)
2831 fprintf (file
, ".largecomm\t");
2833 fprintf (file
, "%s", COMMON_ASM_OP
);
2834 assemble_name (file
, name
);
2835 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2836 size
, align
/ BITS_PER_UNIT
);
2840 /* Utility function for targets to use in implementing
2841 ASM_OUTPUT_ALIGNED_BSS. */
2844 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2845 const char *name
, unsigned HOST_WIDE_INT size
,
2848 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2849 && size
> (unsigned int)ix86_section_threshold
)
2850 switch_to_section (get_named_section (decl
, ".lbss", 0));
2852 switch_to_section (bss_section
);
2853 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2854 #ifdef ASM_DECLARE_OBJECT_NAME
2855 last_assemble_variable_decl
= decl
;
2856 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2858 /* Standard thing is just output label for the object. */
2859 ASM_OUTPUT_LABEL (file
, name
);
2860 #endif /* ASM_DECLARE_OBJECT_NAME */
2861 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2865 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2867 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2868 make the problem with not enough registers even worse. */
2869 #ifdef INSN_SCHEDULING
2871 flag_schedule_insns
= 0;
2875 /* The Darwin libraries never set errno, so we might as well
2876 avoid calling them when that's the only reason we would. */
2877 flag_errno_math
= 0;
2879 /* The default values of these switches depend on the TARGET_64BIT
2880 that is not known at this moment. Mark these values with 2 and
2881 let user the to override these. In case there is no command line option
2882 specifying them, we will set the defaults in override_options. */
2884 flag_omit_frame_pointer
= 2;
2885 flag_pcc_struct_return
= 2;
2886 flag_asynchronous_unwind_tables
= 2;
2887 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2888 SUBTARGET_OPTIMIZATION_OPTIONS
;
2892 /* Decide whether we can make a sibling call to a function. DECL is the
2893 declaration of the function being targeted by the call and EXP is the
2894 CALL_EXPR representing the call. */
2897 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2902 /* If we are generating position-independent code, we cannot sibcall
2903 optimize any indirect call, or a direct call to a global function,
2904 as the PLT requires %ebx be live. */
2905 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2912 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2913 if (POINTER_TYPE_P (func
))
2914 func
= TREE_TYPE (func
);
2917 /* Check that the return value locations are the same. Like
2918 if we are returning floats on the 80387 register stack, we cannot
2919 make a sibcall from a function that doesn't return a float to a
2920 function that does or, conversely, from a function that does return
2921 a float to a function that doesn't; the necessary stack adjustment
2922 would not be executed. This is also the place we notice
2923 differences in the return value ABI. Note that it is ok for one
2924 of the functions to have void return type as long as the return
2925 value of the other is passed in a register. */
2926 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2927 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2929 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2931 if (!rtx_equal_p (a
, b
))
2934 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2936 else if (!rtx_equal_p (a
, b
))
2939 /* If this call is indirect, we'll need to be able to use a call-clobbered
2940 register for the address of the target function. Make sure that all
2941 such registers are not used for passing parameters. */
2942 if (!decl
&& !TARGET_64BIT
)
2946 /* We're looking at the CALL_EXPR, we need the type of the function. */
2947 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2948 type
= TREE_TYPE (type
); /* pointer type */
2949 type
= TREE_TYPE (type
); /* function type */
2951 if (ix86_function_regparm (type
, NULL
) >= 3)
2953 /* ??? Need to count the actual number of registers to be used,
2954 not the possible number of registers. Fix later. */
2959 /* Dllimport'd functions are also called indirectly. */
2960 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2961 && decl
&& DECL_DLLIMPORT_P (decl
)
2962 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2965 /* If we forced aligned the stack, then sibcalling would unalign the
2966 stack, which may break the called function. */
2967 if (cfun
->machine
->force_align_arg_pointer
)
2970 /* Otherwise okay. That also includes certain types of indirect calls. */
2974 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2975 calling convention attributes;
2976 arguments as in struct attribute_spec.handler. */
2979 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2981 int flags ATTRIBUTE_UNUSED
,
2984 if (TREE_CODE (*node
) != FUNCTION_TYPE
2985 && TREE_CODE (*node
) != METHOD_TYPE
2986 && TREE_CODE (*node
) != FIELD_DECL
2987 && TREE_CODE (*node
) != TYPE_DECL
)
2989 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2990 IDENTIFIER_POINTER (name
));
2991 *no_add_attrs
= true;
2995 /* Can combine regparm with all attributes but fastcall. */
2996 if (is_attribute_p ("regparm", name
))
3000 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3002 error ("fastcall and regparm attributes are not compatible");
3005 cst
= TREE_VALUE (args
);
3006 if (TREE_CODE (cst
) != INTEGER_CST
)
3008 warning (OPT_Wattributes
,
3009 "%qs attribute requires an integer constant argument",
3010 IDENTIFIER_POINTER (name
));
3011 *no_add_attrs
= true;
3013 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
3015 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
3016 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
3017 *no_add_attrs
= true;
3021 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3022 TYPE_ATTRIBUTES (*node
))
3023 && compare_tree_int (cst
, REGPARM_MAX
-1))
3025 error ("%s functions limited to %d register parameters",
3026 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
3034 /* Do not warn when emulating the MS ABI. */
3035 if (!TARGET_64BIT_MS_ABI
)
3036 warning (OPT_Wattributes
, "%qs attribute ignored",
3037 IDENTIFIER_POINTER (name
));
3038 *no_add_attrs
= true;
3042 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3043 if (is_attribute_p ("fastcall", name
))
3045 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3047 error ("fastcall and cdecl attributes are not compatible");
3049 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3051 error ("fastcall and stdcall attributes are not compatible");
3053 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
3055 error ("fastcall and regparm attributes are not compatible");
3059 /* Can combine stdcall with fastcall (redundant), regparm and
3061 else if (is_attribute_p ("stdcall", name
))
3063 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
3065 error ("stdcall and cdecl attributes are not compatible");
3067 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3069 error ("stdcall and fastcall attributes are not compatible");
3073 /* Can combine cdecl with regparm and sseregparm. */
3074 else if (is_attribute_p ("cdecl", name
))
3076 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
3078 error ("stdcall and cdecl attributes are not compatible");
3080 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
3082 error ("fastcall and cdecl attributes are not compatible");
3086 /* Can combine sseregparm with all attributes. */
3091 /* Return 0 if the attributes for two types are incompatible, 1 if they
3092 are compatible, and 2 if they are nearly compatible (which causes a
3093 warning to be generated). */
3096 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
3098 /* Check for mismatch of non-default calling convention. */
3099 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
3101 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
3104 /* Check for mismatched fastcall/regparm types. */
3105 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
3106 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
3107 || (ix86_function_regparm (type1
, NULL
)
3108 != ix86_function_regparm (type2
, NULL
)))
3111 /* Check for mismatched sseregparm types. */
3112 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
3113 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
3116 /* Check for mismatched return types (cdecl vs stdcall). */
3117 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
3118 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
3124 /* Return the regparm value for a function with the indicated TYPE and DECL.
3125 DECL may be NULL when calling function indirectly
3126 or considering a libcall. */
3129 ix86_function_regparm (const_tree type
, const_tree decl
)
3132 int regparm
= ix86_regparm
;
3137 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3139 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3141 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3144 /* Use register calling convention for local functions when possible. */
3145 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
3146 && flag_unit_at_a_time
&& !profile_flag
)
3148 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3149 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3152 int local_regparm
, globals
= 0, regno
;
3155 /* Make sure no regparm register is taken by a
3156 global register variable. */
3157 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3158 if (global_regs
[local_regparm
])
3161 /* We can't use regparm(3) for nested functions as these use
3162 static chain pointer in third argument. */
3163 if (local_regparm
== 3
3164 && (decl_function_context (decl
)
3165 || ix86_force_align_arg_pointer
)
3166 && !DECL_NO_STATIC_CHAIN (decl
))
3169 /* If the function realigns its stackpointer, the prologue will
3170 clobber %ecx. If we've already generated code for the callee,
3171 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3172 scanning the attributes for the self-realigning property. */
3173 f
= DECL_STRUCT_FUNCTION (decl
);
3174 if (local_regparm
== 3
3175 && (f
? !!f
->machine
->force_align_arg_pointer
3176 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3177 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3180 /* Each global register variable increases register preassure,
3181 so the more global reg vars there are, the smaller regparm
3182 optimization use, unless requested by the user explicitly. */
3183 for (regno
= 0; regno
< 6; regno
++)
3184 if (global_regs
[regno
])
3187 = globals
< local_regparm
? local_regparm
- globals
: 0;
3189 if (local_regparm
> regparm
)
3190 regparm
= local_regparm
;
3197 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3198 DFmode (2) arguments in SSE registers for a function with the
3199 indicated TYPE and DECL. DECL may be NULL when calling function
3200 indirectly or considering a libcall. Otherwise return 0. */
3203 ix86_function_sseregparm (const_tree type
, const_tree decl
)
3205 gcc_assert (!TARGET_64BIT
);
3207 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3208 by the sseregparm attribute. */
3209 if (TARGET_SSEREGPARM
3210 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3215 error ("Calling %qD with attribute sseregparm without "
3216 "SSE/SSE2 enabled", decl
);
3218 error ("Calling %qT with attribute sseregparm without "
3219 "SSE/SSE2 enabled", type
);
3226 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3227 (and DFmode for SSE2) arguments in SSE registers. */
3228 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3230 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3231 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3233 return TARGET_SSE2
? 2 : 1;
3239 /* Return true if EAX is live at the start of the function. Used by
3240 ix86_expand_prologue to determine if we need special help before
3241 calling allocate_stack_worker. */
3244 ix86_eax_live_at_start_p (void)
3246 /* Cheat. Don't bother working forward from ix86_function_regparm
3247 to the function type to whether an actual argument is located in
3248 eax. Instead just look at cfg info, which is still close enough
3249 to correct at this point. This gives false positives for broken
3250 functions that might use uninitialized data that happens to be
3251 allocated in eax, but who cares? */
3252 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
3255 /* Return true if TYPE has a variable argument list. */
3258 type_has_variadic_args_p (tree type
)
3260 tree n
, t
= TYPE_ARG_TYPES (type
);
3265 while ((n
= TREE_CHAIN (t
)) != NULL
)
3268 return TREE_VALUE (t
) != void_type_node
;
3271 /* Value is the number of bytes of arguments automatically
3272 popped when returning from a subroutine call.
3273 FUNDECL is the declaration node of the function (as a tree),
3274 FUNTYPE is the data type of the function (as a tree),
3275 or for a library call it is an identifier node for the subroutine name.
3276 SIZE is the number of bytes of arguments passed on the stack.
3278 On the 80386, the RTD insn may be used to pop them if the number
3279 of args is fixed, but if the number is variable then the caller
3280 must pop them all. RTD can't be used for library calls now
3281 because the library is compiled with the Unix compiler.
3282 Use of RTD is a selectable option, since it is incompatible with
3283 standard Unix calling sequences. If the option is not selected,
3284 the caller must always pop the args.
3286 The attribute stdcall is equivalent to RTD on a per module basis. */
3289 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3293 /* None of the 64-bit ABIs pop arguments. */
3297 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3299 /* Cdecl functions override -mrtd, and never pop the stack. */
3300 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3302 /* Stdcall and fastcall functions will pop the stack if not
3304 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3305 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3308 if (rtd
&& ! type_has_variadic_args_p (funtype
))
3312 /* Lose any fake structure return argument if it is passed on the stack. */
3313 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3314 && !KEEP_AGGREGATE_RETURN_POINTER
)
3316 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3318 return GET_MODE_SIZE (Pmode
);
3324 /* Argument support functions. */
3326 /* Return true when register may be used to pass function parameters. */
3328 ix86_function_arg_regno_p (int regno
)
3331 const int *parm_regs
;
3336 return (regno
< REGPARM_MAX
3337 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3339 return (regno
< REGPARM_MAX
3340 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3341 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3342 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3343 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3348 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3353 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3354 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3358 /* RAX is used as hidden argument to va_arg functions. */
3359 if (!TARGET_64BIT_MS_ABI
&& regno
== 0)
3362 if (TARGET_64BIT_MS_ABI
)
3363 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3365 parm_regs
= x86_64_int_parameter_registers
;
3366 for (i
= 0; i
< REGPARM_MAX
; i
++)
3367 if (regno
== parm_regs
[i
])
3372 /* Return if we do not know how to pass TYPE solely in registers. */
3375 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
3377 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3380 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3381 The layout_type routine is crafty and tries to trick us into passing
3382 currently unsupported vector types on the stack by using TImode. */
3383 return (!TARGET_64BIT
&& mode
== TImode
3384 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3387 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3388 for a call to a function whose data type is FNTYPE.
3389 For a library call, FNTYPE is 0. */
3392 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3393 tree fntype
, /* tree ptr for function decl */
3394 rtx libname
, /* SYMBOL_REF of library name or 0 */
3397 memset (cum
, 0, sizeof (*cum
));
3399 /* Set up the number of registers to use for passing arguments. */
3400 cum
->nregs
= ix86_regparm
;
3402 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3404 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3405 cum
->warn_sse
= true;
3406 cum
->warn_mmx
= true;
3407 cum
->maybe_vaarg
= (fntype
3408 ? (!TYPE_ARG_TYPES (fntype
)
3409 || type_has_variadic_args_p (fntype
))
3414 /* If there are variable arguments, then we won't pass anything
3415 in registers in 32-bit mode. */
3416 if (cum
->maybe_vaarg
)
3426 /* Use ecx and edx registers if function has fastcall attribute,
3427 else look for regparm information. */
3430 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3436 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3439 /* Set up the number of SSE registers used for passing SFmode
3440 and DFmode arguments. Warn for mismatching ABI. */
3441 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3445 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3446 But in the case of vector types, it is some vector mode.
3448 When we have only some of our vector isa extensions enabled, then there
3449 are some modes for which vector_mode_supported_p is false. For these
3450 modes, the generic vector support in gcc will choose some non-vector mode
3451 in order to implement the type. By computing the natural mode, we'll
3452 select the proper ABI location for the operand and not depend on whatever
3453 the middle-end decides to do with these vector types. */
3455 static enum machine_mode
3456 type_natural_mode (const_tree type
)
3458 enum machine_mode mode
= TYPE_MODE (type
);
3460 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3462 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3463 if ((size
== 8 || size
== 16)
3464 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3465 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3467 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3469 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3470 mode
= MIN_MODE_VECTOR_FLOAT
;
3472 mode
= MIN_MODE_VECTOR_INT
;
3474 /* Get the mode which has this inner mode and number of units. */
3475 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3476 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3477 && GET_MODE_INNER (mode
) == innermode
)
3487 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3488 this may not agree with the mode that the type system has chosen for the
3489 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3490 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3493 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3498 if (orig_mode
!= BLKmode
)
3499 tmp
= gen_rtx_REG (orig_mode
, regno
);
3502 tmp
= gen_rtx_REG (mode
, regno
);
3503 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3504 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3510 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3511 of this code is to classify each 8bytes of incoming argument by the register
3512 class and assign registers accordingly. */
3514 /* Return the union class of CLASS1 and CLASS2.
3515 See the x86-64 PS ABI for details. */
3517 static enum x86_64_reg_class
3518 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3520 /* Rule #1: If both classes are equal, this is the resulting class. */
3521 if (class1
== class2
)
3524 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3526 if (class1
== X86_64_NO_CLASS
)
3528 if (class2
== X86_64_NO_CLASS
)
3531 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3532 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3533 return X86_64_MEMORY_CLASS
;
3535 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3536 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3537 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3538 return X86_64_INTEGERSI_CLASS
;
3539 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3540 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3541 return X86_64_INTEGER_CLASS
;
3543 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3545 if (class1
== X86_64_X87_CLASS
3546 || class1
== X86_64_X87UP_CLASS
3547 || class1
== X86_64_COMPLEX_X87_CLASS
3548 || class2
== X86_64_X87_CLASS
3549 || class2
== X86_64_X87UP_CLASS
3550 || class2
== X86_64_COMPLEX_X87_CLASS
)
3551 return X86_64_MEMORY_CLASS
;
3553 /* Rule #6: Otherwise class SSE is used. */
3554 return X86_64_SSE_CLASS
;
3557 /* Classify the argument of type TYPE and mode MODE.
3558 CLASSES will be filled by the register class used to pass each word
3559 of the operand. The number of words is returned. In case the parameter
3560 should be passed in memory, 0 is returned. As a special case for zero
3561 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3563 BIT_OFFSET is used internally for handling records and specifies offset
3564 of the offset in bits modulo 256 to avoid overflow cases.
3566 See the x86-64 PS ABI for details.
3570 classify_argument (enum machine_mode mode
, const_tree type
,
3571 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3573 HOST_WIDE_INT bytes
=
3574 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3575 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3577 /* Variable sized entities are always passed/returned in memory. */
3581 if (mode
!= VOIDmode
3582 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3585 if (type
&& AGGREGATE_TYPE_P (type
))
3589 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3591 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3595 for (i
= 0; i
< words
; i
++)
3596 classes
[i
] = X86_64_NO_CLASS
;
3598 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3599 signalize memory class, so handle it as special case. */
3602 classes
[0] = X86_64_NO_CLASS
;
3606 /* Classify each field of record and merge classes. */
3607 switch (TREE_CODE (type
))
3610 /* And now merge the fields of structure. */
3611 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3613 if (TREE_CODE (field
) == FIELD_DECL
)
3617 if (TREE_TYPE (field
) == error_mark_node
)
3620 /* Bitfields are always classified as integer. Handle them
3621 early, since later code would consider them to be
3622 misaligned integers. */
3623 if (DECL_BIT_FIELD (field
))
3625 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3626 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3627 + tree_low_cst (DECL_SIZE (field
), 0)
3630 merge_classes (X86_64_INTEGER_CLASS
,
3635 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3636 TREE_TYPE (field
), subclasses
,
3637 (int_bit_position (field
)
3638 + bit_offset
) % 256);
3641 for (i
= 0; i
< num
; i
++)
3644 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3646 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3654 /* Arrays are handled as small records. */
3657 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3658 TREE_TYPE (type
), subclasses
, bit_offset
);
3662 /* The partial classes are now full classes. */
3663 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3664 subclasses
[0] = X86_64_SSE_CLASS
;
3665 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3666 subclasses
[0] = X86_64_INTEGER_CLASS
;
3668 for (i
= 0; i
< words
; i
++)
3669 classes
[i
] = subclasses
[i
% num
];
3674 case QUAL_UNION_TYPE
:
3675 /* Unions are similar to RECORD_TYPE but offset is always 0.
3677 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3679 if (TREE_CODE (field
) == FIELD_DECL
)
3683 if (TREE_TYPE (field
) == error_mark_node
)
3686 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3687 TREE_TYPE (field
), subclasses
,
3691 for (i
= 0; i
< num
; i
++)
3692 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3701 /* Final merger cleanup. */
3702 for (i
= 0; i
< words
; i
++)
3704 /* If one class is MEMORY, everything should be passed in
3706 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3709 /* The X86_64_SSEUP_CLASS should be always preceded by
3710 X86_64_SSE_CLASS. */
3711 if (classes
[i
] == X86_64_SSEUP_CLASS
3712 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3713 classes
[i
] = X86_64_SSE_CLASS
;
3715 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3716 if (classes
[i
] == X86_64_X87UP_CLASS
3717 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3718 classes
[i
] = X86_64_SSE_CLASS
;
3723 /* Compute alignment needed. We align all types to natural boundaries with
3724 exception of XFmode that is aligned to 64bits. */
3725 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3727 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3730 mode_alignment
= 128;
3731 else if (mode
== XCmode
)
3732 mode_alignment
= 256;
3733 if (COMPLEX_MODE_P (mode
))
3734 mode_alignment
/= 2;
3735 /* Misaligned fields are always returned in memory. */
3736 if (bit_offset
% mode_alignment
)
3740 /* for V1xx modes, just use the base mode */
3741 if (VECTOR_MODE_P (mode
)
3742 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3743 mode
= GET_MODE_INNER (mode
);
3745 /* Classification of atomic types. */
3750 classes
[0] = X86_64_SSE_CLASS
;
3753 classes
[0] = X86_64_SSE_CLASS
;
3754 classes
[1] = X86_64_SSEUP_CLASS
;
3763 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3764 classes
[0] = X86_64_INTEGERSI_CLASS
;
3766 classes
[0] = X86_64_INTEGER_CLASS
;
3770 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3775 if (!(bit_offset
% 64))
3776 classes
[0] = X86_64_SSESF_CLASS
;
3778 classes
[0] = X86_64_SSE_CLASS
;
3781 classes
[0] = X86_64_SSEDF_CLASS
;
3784 classes
[0] = X86_64_X87_CLASS
;
3785 classes
[1] = X86_64_X87UP_CLASS
;
3788 classes
[0] = X86_64_SSE_CLASS
;
3789 classes
[1] = X86_64_SSEUP_CLASS
;
3792 classes
[0] = X86_64_SSE_CLASS
;
3795 classes
[0] = X86_64_SSEDF_CLASS
;
3796 classes
[1] = X86_64_SSEDF_CLASS
;
3799 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3802 /* This modes is larger than 16 bytes. */
3810 classes
[0] = X86_64_SSE_CLASS
;
3811 classes
[1] = X86_64_SSEUP_CLASS
;
3817 classes
[0] = X86_64_SSE_CLASS
;
3823 gcc_assert (VECTOR_MODE_P (mode
));
3828 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3830 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3831 classes
[0] = X86_64_INTEGERSI_CLASS
;
3833 classes
[0] = X86_64_INTEGER_CLASS
;
3834 classes
[1] = X86_64_INTEGER_CLASS
;
3835 return 1 + (bytes
> 8);
3839 /* Examine the argument and return set number of register required in each
3840 class. Return 0 iff parameter should be passed in memory. */
3842 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
3843 int *int_nregs
, int *sse_nregs
)
3845 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3846 int n
= classify_argument (mode
, type
, regclass
, 0);
3852 for (n
--; n
>= 0; n
--)
3853 switch (regclass
[n
])
3855 case X86_64_INTEGER_CLASS
:
3856 case X86_64_INTEGERSI_CLASS
:
3859 case X86_64_SSE_CLASS
:
3860 case X86_64_SSESF_CLASS
:
3861 case X86_64_SSEDF_CLASS
:
3864 case X86_64_NO_CLASS
:
3865 case X86_64_SSEUP_CLASS
:
3867 case X86_64_X87_CLASS
:
3868 case X86_64_X87UP_CLASS
:
3872 case X86_64_COMPLEX_X87_CLASS
:
3873 return in_return
? 2 : 0;
3874 case X86_64_MEMORY_CLASS
:
3880 /* Construct container for the argument used by GCC interface. See
3881 FUNCTION_ARG for the detailed description. */
3884 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3885 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
3886 const int *intreg
, int sse_regno
)
3888 /* The following variables hold the static issued_error state. */
3889 static bool issued_sse_arg_error
;
3890 static bool issued_sse_ret_error
;
3891 static bool issued_x87_ret_error
;
3893 enum machine_mode tmpmode
;
3895 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3896 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3900 int needed_sseregs
, needed_intregs
;
3901 rtx exp
[MAX_CLASSES
];
3904 n
= classify_argument (mode
, type
, regclass
, 0);
3907 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3910 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3913 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3914 some less clueful developer tries to use floating-point anyway. */
3915 if (needed_sseregs
&& !TARGET_SSE
)
3919 if (!issued_sse_ret_error
)
3921 error ("SSE register return with SSE disabled");
3922 issued_sse_ret_error
= true;
3925 else if (!issued_sse_arg_error
)
3927 error ("SSE register argument with SSE disabled");
3928 issued_sse_arg_error
= true;
3933 /* Likewise, error if the ABI requires us to return values in the
3934 x87 registers and the user specified -mno-80387. */
3935 if (!TARGET_80387
&& in_return
)
3936 for (i
= 0; i
< n
; i
++)
3937 if (regclass
[i
] == X86_64_X87_CLASS
3938 || regclass
[i
] == X86_64_X87UP_CLASS
3939 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
3941 if (!issued_x87_ret_error
)
3943 error ("x87 register return with x87 disabled");
3944 issued_x87_ret_error
= true;
3949 /* First construct simple cases. Avoid SCmode, since we want to use
3950 single register to pass this type. */
3951 if (n
== 1 && mode
!= SCmode
)
3952 switch (regclass
[0])
3954 case X86_64_INTEGER_CLASS
:
3955 case X86_64_INTEGERSI_CLASS
:
3956 return gen_rtx_REG (mode
, intreg
[0]);
3957 case X86_64_SSE_CLASS
:
3958 case X86_64_SSESF_CLASS
:
3959 case X86_64_SSEDF_CLASS
:
3960 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3961 case X86_64_X87_CLASS
:
3962 case X86_64_COMPLEX_X87_CLASS
:
3963 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3964 case X86_64_NO_CLASS
:
3965 /* Zero sized array, struct or class. */
3970 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
3971 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
3972 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3975 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
3976 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3977 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
3978 && regclass
[1] == X86_64_INTEGER_CLASS
3979 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3980 && intreg
[0] + 1 == intreg
[1])
3981 return gen_rtx_REG (mode
, intreg
[0]);
3983 /* Otherwise figure out the entries of the PARALLEL. */
3984 for (i
= 0; i
< n
; i
++)
3986 switch (regclass
[i
])
3988 case X86_64_NO_CLASS
:
3990 case X86_64_INTEGER_CLASS
:
3991 case X86_64_INTEGERSI_CLASS
:
3992 /* Merge TImodes on aligned occasions here too. */
3993 if (i
* 8 + 8 > bytes
)
3994 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3995 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
3999 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4000 if (tmpmode
== BLKmode
)
4002 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4003 gen_rtx_REG (tmpmode
, *intreg
),
4007 case X86_64_SSESF_CLASS
:
4008 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4009 gen_rtx_REG (SFmode
,
4010 SSE_REGNO (sse_regno
)),
4014 case X86_64_SSEDF_CLASS
:
4015 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4016 gen_rtx_REG (DFmode
,
4017 SSE_REGNO (sse_regno
)),
4021 case X86_64_SSE_CLASS
:
4022 if (i
< n
- 1 && regclass
[i
+ 1] == X86_64_SSEUP_CLASS
)
4026 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
4027 gen_rtx_REG (tmpmode
,
4028 SSE_REGNO (sse_regno
)),
4030 if (tmpmode
== TImode
)
4039 /* Empty aligned struct, union or class. */
4043 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
4044 for (i
= 0; i
< nexps
; i
++)
4045 XVECEXP (ret
, 0, i
) = exp
[i
];
4049 /* Update the data in CUM to advance over an argument of mode MODE
4050 and data type TYPE. (TYPE is null for libcalls where that information
4051 may not be available.) */
4054 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4055 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4071 cum
->words
+= words
;
4072 cum
->nregs
-= words
;
4073 cum
->regno
+= words
;
4075 if (cum
->nregs
<= 0)
4083 if (cum
->float_in_sse
< 2)
4086 if (cum
->float_in_sse
< 1)
4097 if (!type
|| !AGGREGATE_TYPE_P (type
))
4099 cum
->sse_words
+= words
;
4100 cum
->sse_nregs
-= 1;
4101 cum
->sse_regno
+= 1;
4102 if (cum
->sse_nregs
<= 0)
4114 if (!type
|| !AGGREGATE_TYPE_P (type
))
4116 cum
->mmx_words
+= words
;
4117 cum
->mmx_nregs
-= 1;
4118 cum
->mmx_regno
+= 1;
4119 if (cum
->mmx_nregs
<= 0)
4130 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4131 tree type
, HOST_WIDE_INT words
)
4133 int int_nregs
, sse_nregs
;
4135 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
4136 cum
->words
+= words
;
4137 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
4139 cum
->nregs
-= int_nregs
;
4140 cum
->sse_nregs
-= sse_nregs
;
4141 cum
->regno
+= int_nregs
;
4142 cum
->sse_regno
+= sse_nregs
;
4145 cum
->words
+= words
;
4149 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
4150 HOST_WIDE_INT words
)
4152 /* Otherwise, this should be passed indirect. */
4153 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
4155 cum
->words
+= words
;
4164 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4165 tree type
, int named ATTRIBUTE_UNUSED
)
4167 HOST_WIDE_INT bytes
, words
;
4169 if (mode
== BLKmode
)
4170 bytes
= int_size_in_bytes (type
);
4172 bytes
= GET_MODE_SIZE (mode
);
4173 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4176 mode
= type_natural_mode (type
);
4178 if (TARGET_64BIT_MS_ABI
)
4179 function_arg_advance_ms_64 (cum
, bytes
, words
);
4180 else if (TARGET_64BIT
)
4181 function_arg_advance_64 (cum
, mode
, type
, words
);
4183 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4186 /* Define where to put the arguments to a function.
4187 Value is zero to push the argument on the stack,
4188 or a hard register in which to store the argument.
4190 MODE is the argument's machine mode.
4191 TYPE is the data type of the argument (as a tree).
4192 This is null for libcalls where that information may
4194 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4195 the preceding args and about the function being called.
4196 NAMED is nonzero if this argument is a named parameter
4197 (otherwise it is an extra parameter matching an ellipsis). */
4200 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4201 enum machine_mode orig_mode
, tree type
,
4202 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4204 static bool warnedsse
, warnedmmx
;
4206 /* Avoid the AL settings for the Unix64 ABI. */
4207 if (mode
== VOIDmode
)
4223 if (words
<= cum
->nregs
)
4225 int regno
= cum
->regno
;
4227 /* Fastcall allocates the first two DWORD (SImode) or
4228 smaller arguments to ECX and EDX. */
4231 if (mode
== BLKmode
|| mode
== DImode
)
4234 /* ECX not EAX is the first allocated register. */
4238 return gen_rtx_REG (mode
, regno
);
4243 if (cum
->float_in_sse
< 2)
4246 if (cum
->float_in_sse
< 1)
4256 if (!type
|| !AGGREGATE_TYPE_P (type
))
4258 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4261 warning (0, "SSE vector argument without SSE enabled "
4265 return gen_reg_or_parallel (mode
, orig_mode
,
4266 cum
->sse_regno
+ FIRST_SSE_REG
);
4274 if (!type
|| !AGGREGATE_TYPE_P (type
))
4276 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4279 warning (0, "MMX vector argument without MMX enabled "
4283 return gen_reg_or_parallel (mode
, orig_mode
,
4284 cum
->mmx_regno
+ FIRST_MMX_REG
);
4293 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4294 enum machine_mode orig_mode
, tree type
)
4296 /* Handle a hidden AL argument containing number of registers
4297 for varargs x86-64 functions. */
4298 if (mode
== VOIDmode
)
4299 return GEN_INT (cum
->maybe_vaarg
4300 ? (cum
->sse_nregs
< 0
4305 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4307 &x86_64_int_parameter_registers
[cum
->regno
],
4312 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4313 enum machine_mode orig_mode
, int named
)
4317 /* Avoid the AL settings for the Unix64 ABI. */
4318 if (mode
== VOIDmode
)
4321 /* If we've run out of registers, it goes on the stack. */
4322 if (cum
->nregs
== 0)
4325 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
4327 /* Only floating point modes are passed in anything but integer regs. */
4328 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4331 regno
= cum
->regno
+ FIRST_SSE_REG
;
4336 /* Unnamed floating parameters are passed in both the
4337 SSE and integer registers. */
4338 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4339 t2
= gen_rtx_REG (mode
, regno
);
4340 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4341 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4342 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4346 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4350 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4351 tree type
, int named
)
4353 enum machine_mode mode
= omode
;
4354 HOST_WIDE_INT bytes
, words
;
4356 if (mode
== BLKmode
)
4357 bytes
= int_size_in_bytes (type
);
4359 bytes
= GET_MODE_SIZE (mode
);
4360 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4362 /* To simplify the code below, represent vector types with a vector mode
4363 even if MMX/SSE are not active. */
4364 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4365 mode
= type_natural_mode (type
);
4367 if (TARGET_64BIT_MS_ABI
)
4368 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4369 else if (TARGET_64BIT
)
4370 return function_arg_64 (cum
, mode
, omode
, type
);
4372 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4375 /* A C expression that indicates when an argument must be passed by
4376 reference. If nonzero for an argument, a copy of that argument is
4377 made in memory and a pointer to the argument is passed instead of
4378 the argument itself. The pointer is passed in whatever way is
4379 appropriate for passing a pointer to that type. */
4382 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4383 enum machine_mode mode ATTRIBUTE_UNUSED
,
4384 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4386 if (TARGET_64BIT_MS_ABI
)
4390 /* Arrays are passed by reference. */
4391 if (TREE_CODE (type
) == ARRAY_TYPE
)
4394 if (AGGREGATE_TYPE_P (type
))
4396 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4397 are passed by reference. */
4398 int el2
= exact_log2 (int_size_in_bytes (type
));
4399 return !(el2
>= 0 && el2
<= 3);
4403 /* __m128 is passed by reference. */
4404 /* ??? How to handle complex? For now treat them as structs,
4405 and pass them by reference if they're too large. */
4406 if (GET_MODE_SIZE (mode
) > 8)
4409 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4415 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4416 ABI. Only called if TARGET_SSE. */
4418 contains_128bit_aligned_vector_p (tree type
)
4420 enum machine_mode mode
= TYPE_MODE (type
);
4421 if (SSE_REG_MODE_P (mode
)
4422 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4424 if (TYPE_ALIGN (type
) < 128)
4427 if (AGGREGATE_TYPE_P (type
))
4429 /* Walk the aggregates recursively. */
4430 switch (TREE_CODE (type
))
4434 case QUAL_UNION_TYPE
:
4438 /* Walk all the structure fields. */
4439 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4441 if (TREE_CODE (field
) == FIELD_DECL
4442 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4449 /* Just for use if some languages passes arrays by value. */
4450 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4461 /* Gives the alignment boundary, in bits, of an argument with the
4462 specified mode and type. */
4465 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4469 align
= TYPE_ALIGN (type
);
4471 align
= GET_MODE_ALIGNMENT (mode
);
4472 if (align
< PARM_BOUNDARY
)
4473 align
= PARM_BOUNDARY
;
4476 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4477 make an exception for SSE modes since these require 128bit
4480 The handling here differs from field_alignment. ICC aligns MMX
4481 arguments to 4 byte boundaries, while structure fields are aligned
4482 to 8 byte boundaries. */
4484 align
= PARM_BOUNDARY
;
4487 if (!SSE_REG_MODE_P (mode
))
4488 align
= PARM_BOUNDARY
;
4492 if (!contains_128bit_aligned_vector_p (type
))
4493 align
= PARM_BOUNDARY
;
4501 /* Return true if N is a possible register number of function value. */
4504 ix86_function_value_regno_p (int regno
)
4511 case FIRST_FLOAT_REG
:
4512 if (TARGET_64BIT_MS_ABI
)
4514 return TARGET_FLOAT_RETURNS_IN_80387
;
4520 if (TARGET_MACHO
|| TARGET_64BIT
)
4528 /* Define how to find the value returned by a function.
4529 VALTYPE is the data type of the value (as a tree).
4530 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4531 otherwise, FUNC is 0. */
4534 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4535 const_tree fntype
, const_tree fn
)
4539 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4540 we normally prevent this case when mmx is not available. However
4541 some ABIs may require the result to be returned like DImode. */
4542 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4543 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4545 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4546 we prevent this case when sse is not available. However some ABIs
4547 may require the result to be returned like integer TImode. */
4548 else if (mode
== TImode
4549 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4550 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4552 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4553 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4554 regno
= FIRST_FLOAT_REG
;
4556 /* Most things go in %eax. */
4559 /* Override FP return register with %xmm0 for local functions when
4560 SSE math is enabled or for functions with sseregparm attribute. */
4561 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4563 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4564 if ((sse_level
>= 1 && mode
== SFmode
)
4565 || (sse_level
== 2 && mode
== DFmode
))
4566 regno
= FIRST_SSE_REG
;
4569 return gen_rtx_REG (orig_mode
, regno
);
4573 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4578 /* Handle libcalls, which don't provide a type node. */
4579 if (valtype
== NULL
)
4591 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4594 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4598 return gen_rtx_REG (mode
, 0);
4602 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4603 REGPARM_MAX
, SSE_REGPARM_MAX
,
4604 x86_64_int_return_registers
, 0);
4606 /* For zero sized structures, construct_container returns NULL, but we
4607 need to keep rest of compiler happy by returning meaningful value. */
4609 ret
= gen_rtx_REG (orig_mode
, 0);
4615 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4617 unsigned int regno
= 0;
4621 if (mode
== SFmode
|| mode
== DFmode
)
4622 regno
= FIRST_SSE_REG
;
4623 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4624 regno
= FIRST_SSE_REG
;
4627 return gen_rtx_REG (orig_mode
, regno
);
4631 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4632 enum machine_mode orig_mode
, enum machine_mode mode
)
4634 const_tree fn
, fntype
;
4637 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4638 fn
= fntype_or_decl
;
4639 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4641 if (TARGET_64BIT_MS_ABI
)
4642 return function_value_ms_64 (orig_mode
, mode
);
4643 else if (TARGET_64BIT
)
4644 return function_value_64 (orig_mode
, mode
, valtype
);
4646 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4650 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
4651 bool outgoing ATTRIBUTE_UNUSED
)
4653 enum machine_mode mode
, orig_mode
;
4655 orig_mode
= TYPE_MODE (valtype
);
4656 mode
= type_natural_mode (valtype
);
4657 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4661 ix86_libcall_value (enum machine_mode mode
)
4663 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4666 /* Return true iff type is returned in memory. */
4669 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
4673 if (mode
== BLKmode
)
4676 size
= int_size_in_bytes (type
);
4678 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4681 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4683 /* User-created vectors small enough to fit in EAX. */
4687 /* MMX/3dNow values are returned in MM0,
4688 except when it doesn't exits. */
4690 return (TARGET_MMX
? 0 : 1);
4692 /* SSE values are returned in XMM0, except when it doesn't exist. */
4694 return (TARGET_SSE
? 0 : 1);
4709 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
4711 int needed_intregs
, needed_sseregs
;
4712 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4716 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
4718 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4720 /* __m128 and friends are returned in xmm0. */
4721 if (size
== 16 && VECTOR_MODE_P (mode
))
4724 /* Otherwise, the size must be exactly in [1248]. */
4725 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8);
4729 ix86_return_in_memory (const_tree type
)
4731 const enum machine_mode mode
= type_natural_mode (type
);
4733 if (TARGET_64BIT_MS_ABI
)
4734 return return_in_memory_ms_64 (type
, mode
);
4735 else if (TARGET_64BIT
)
4736 return return_in_memory_64 (type
, mode
);
4738 return return_in_memory_32 (type
, mode
);
4741 /* Return false iff TYPE is returned in memory. This version is used
4742 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4743 but differs notably in that when MMX is available, 8-byte vectors
4744 are returned in memory, rather than in MMX registers. */
4747 ix86_sol10_return_in_memory (const_tree type
)
4750 enum machine_mode mode
= type_natural_mode (type
);
4753 return return_in_memory_64 (type
, mode
);
4755 if (mode
== BLKmode
)
4758 size
= int_size_in_bytes (type
);
4760 if (VECTOR_MODE_P (mode
))
4762 /* Return in memory only if MMX registers *are* available. This
4763 seems backwards, but it is consistent with the existing
4770 else if (mode
== TImode
)
4772 else if (mode
== XFmode
)
4778 /* When returning SSE vector types, we have a choice of either
4779 (1) being abi incompatible with a -march switch, or
4780 (2) generating an error.
4781 Given no good solution, I think the safest thing is one warning.
4782 The user won't be able to use -Werror, but....
4784 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4785 called in response to actually generating a caller or callee that
4786 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4787 via aggregate_value_p for general type probing from tree-ssa. */
4790 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4792 static bool warnedsse
, warnedmmx
;
4794 if (!TARGET_64BIT
&& type
)
4796 /* Look at the return type of the function, not the function type. */
4797 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4799 if (!TARGET_SSE
&& !warnedsse
)
4802 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4805 warning (0, "SSE vector return without SSE enabled "
4810 if (!TARGET_MMX
&& !warnedmmx
)
4812 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4815 warning (0, "MMX vector return without MMX enabled "
4825 /* Create the va_list data type. */
4828 ix86_build_builtin_va_list (void)
4830 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4832 /* For i386 we use plain pointer to argument area. */
4833 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4834 return build_pointer_type (char_type_node
);
4836 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4837 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4839 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4840 unsigned_type_node
);
4841 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4842 unsigned_type_node
);
4843 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4845 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4848 va_list_gpr_counter_field
= f_gpr
;
4849 va_list_fpr_counter_field
= f_fpr
;
4851 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4852 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4853 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4854 DECL_FIELD_CONTEXT (f_sav
) = record
;
4856 TREE_CHAIN (record
) = type_decl
;
4857 TYPE_NAME (record
) = type_decl
;
4858 TYPE_FIELDS (record
) = f_gpr
;
4859 TREE_CHAIN (f_gpr
) = f_fpr
;
4860 TREE_CHAIN (f_fpr
) = f_ovf
;
4861 TREE_CHAIN (f_ovf
) = f_sav
;
4863 layout_type (record
);
4865 /* The correct type is an array type of one element. */
4866 return build_array_type (record
, build_index_type (size_zero_node
));
4869 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4872 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4882 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4885 /* Indicate to allocate space on the stack for varargs save area. */
4886 ix86_save_varrargs_registers
= 1;
4887 /* We need 16-byte stack alignment to save SSE registers. If user
4888 asked for lower preferred_stack_boundary, lets just hope that he knows
4889 what he is doing and won't varargs SSE values.
4891 We also may end up assuming that only 64bit values are stored in SSE
4892 register let some floating point program work. */
4893 if (ix86_preferred_stack_boundary
>= 128)
4894 cfun
->stack_alignment_needed
= 128;
4896 save_area
= frame_pointer_rtx
;
4897 set
= get_varargs_alias_set ();
4899 for (i
= cum
->regno
;
4901 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4904 mem
= gen_rtx_MEM (Pmode
,
4905 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4906 MEM_NOTRAP_P (mem
) = 1;
4907 set_mem_alias_set (mem
, set
);
4908 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4909 x86_64_int_parameter_registers
[i
]));
4912 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4914 /* Now emit code to save SSE registers. The AX parameter contains number
4915 of SSE parameter registers used to call this function. We use
4916 sse_prologue_save insn template that produces computed jump across
4917 SSE saves. We need some preparation work to get this working. */
4919 label
= gen_label_rtx ();
4920 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4922 /* Compute address to jump to :
4923 label - 5*eax + nnamed_sse_arguments*5 */
4924 tmp_reg
= gen_reg_rtx (Pmode
);
4925 nsse_reg
= gen_reg_rtx (Pmode
);
4926 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4927 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4928 gen_rtx_MULT (Pmode
, nsse_reg
,
4933 gen_rtx_CONST (DImode
,
4934 gen_rtx_PLUS (DImode
,
4936 GEN_INT (cum
->sse_regno
* 4))));
4938 emit_move_insn (nsse_reg
, label_ref
);
4939 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4941 /* Compute address of memory block we save into. We always use pointer
4942 pointing 127 bytes after first byte to store - this is needed to keep
4943 instruction size limited by 4 bytes. */
4944 tmp_reg
= gen_reg_rtx (Pmode
);
4945 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4946 plus_constant (save_area
,
4947 8 * REGPARM_MAX
+ 127)));
4948 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4949 MEM_NOTRAP_P (mem
) = 1;
4950 set_mem_alias_set (mem
, set
);
4951 set_mem_align (mem
, BITS_PER_WORD
);
4953 /* And finally do the dirty job! */
4954 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4955 GEN_INT (cum
->sse_regno
), label
));
4960 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4962 alias_set_type set
= get_varargs_alias_set ();
4965 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
4969 mem
= gen_rtx_MEM (Pmode
,
4970 plus_constant (virtual_incoming_args_rtx
,
4971 i
* UNITS_PER_WORD
));
4972 MEM_NOTRAP_P (mem
) = 1;
4973 set_mem_alias_set (mem
, set
);
4975 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4976 emit_move_insn (mem
, reg
);
4981 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4982 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4985 CUMULATIVE_ARGS next_cum
;
4989 /* This argument doesn't appear to be used anymore. Which is good,
4990 because the old code here didn't suppress rtl generation. */
4991 gcc_assert (!no_rtl
);
4996 fntype
= TREE_TYPE (current_function_decl
);
4997 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4998 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4999 != void_type_node
));
5001 /* For varargs, we do not want to skip the dummy va_dcl argument.
5002 For stdargs, we do want to skip the last named argument. */
5005 function_arg_advance (&next_cum
, mode
, type
, 1);
5007 if (TARGET_64BIT_MS_ABI
)
5008 setup_incoming_varargs_ms_64 (&next_cum
);
5010 setup_incoming_varargs_64 (&next_cum
);
5013 /* Implement va_start. */
5016 ix86_va_start (tree valist
, rtx nextarg
)
5018 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
5019 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5020 tree gpr
, fpr
, ovf
, sav
, t
;
5023 /* Only 64bit target needs something special. */
5024 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5026 std_expand_builtin_va_start (valist
, nextarg
);
5030 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5031 f_fpr
= TREE_CHAIN (f_gpr
);
5032 f_ovf
= TREE_CHAIN (f_fpr
);
5033 f_sav
= TREE_CHAIN (f_ovf
);
5035 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
5036 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5037 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5038 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5039 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5041 /* Count number of gp and fp argument registers used. */
5042 words
= current_function_args_info
.words
;
5043 n_gpr
= current_function_args_info
.regno
;
5044 n_fpr
= current_function_args_info
.sse_regno
;
5046 if (cfun
->va_list_gpr_size
)
5048 type
= TREE_TYPE (gpr
);
5049 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
5050 build_int_cst (type
, n_gpr
* 8));
5051 TREE_SIDE_EFFECTS (t
) = 1;
5052 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5055 if (cfun
->va_list_fpr_size
)
5057 type
= TREE_TYPE (fpr
);
5058 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
5059 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
5060 TREE_SIDE_EFFECTS (t
) = 1;
5061 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5064 /* Find the overflow area. */
5065 type
= TREE_TYPE (ovf
);
5066 t
= make_tree (type
, virtual_incoming_args_rtx
);
5068 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
5069 size_int (words
* UNITS_PER_WORD
));
5070 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
5071 TREE_SIDE_EFFECTS (t
) = 1;
5072 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5074 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
5076 /* Find the register save area.
5077 Prologue of the function save it right above stack frame. */
5078 type
= TREE_TYPE (sav
);
5079 t
= make_tree (type
, frame_pointer_rtx
);
5080 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
5081 TREE_SIDE_EFFECTS (t
) = 1;
5082 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5086 /* Implement va_arg. */
5089 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
5091 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
5092 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
5093 tree gpr
, fpr
, ovf
, sav
, t
;
5095 tree lab_false
, lab_over
= NULL_TREE
;
5100 enum machine_mode nat_mode
;
5102 /* Only 64bit target needs something special. */
5103 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
5104 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5106 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
5107 f_fpr
= TREE_CHAIN (f_gpr
);
5108 f_ovf
= TREE_CHAIN (f_fpr
);
5109 f_sav
= TREE_CHAIN (f_ovf
);
5111 valist
= build_va_arg_indirect_ref (valist
);
5112 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
5113 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
5114 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
5115 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
5117 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5119 type
= build_pointer_type (type
);
5120 size
= int_size_in_bytes (type
);
5121 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5123 nat_mode
= type_natural_mode (type
);
5124 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
5125 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
5127 /* Pull the value out of the saved registers. */
5129 addr
= create_tmp_var (ptr_type_node
, "addr");
5130 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
5134 int needed_intregs
, needed_sseregs
;
5136 tree int_addr
, sse_addr
;
5138 lab_false
= create_artificial_label ();
5139 lab_over
= create_artificial_label ();
5141 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
5143 need_temp
= (!REG_P (container
)
5144 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
5145 || TYPE_ALIGN (type
) > 128));
5147 /* In case we are passing structure, verify that it is consecutive block
5148 on the register save area. If not we need to do moves. */
5149 if (!need_temp
&& !REG_P (container
))
5151 /* Verify that all registers are strictly consecutive */
5152 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
5156 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5158 rtx slot
= XVECEXP (container
, 0, i
);
5159 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
5160 || INTVAL (XEXP (slot
, 1)) != i
* 16)
5168 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
5170 rtx slot
= XVECEXP (container
, 0, i
);
5171 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
5172 || INTVAL (XEXP (slot
, 1)) != i
* 8)
5184 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
5185 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
5186 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
5187 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
5190 /* First ensure that we fit completely in registers. */
5193 t
= build_int_cst (TREE_TYPE (gpr
),
5194 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
5195 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
5196 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5197 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5198 gimplify_and_add (t
, pre_p
);
5202 t
= build_int_cst (TREE_TYPE (fpr
),
5203 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
5205 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
5206 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5207 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5208 gimplify_and_add (t
, pre_p
);
5211 /* Compute index to start of area used for integer regs. */
5214 /* int_addr = gpr + sav; */
5215 t
= fold_convert (sizetype
, gpr
);
5216 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5217 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
5218 gimplify_and_add (t
, pre_p
);
5222 /* sse_addr = fpr + sav; */
5223 t
= fold_convert (sizetype
, fpr
);
5224 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5225 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
5226 gimplify_and_add (t
, pre_p
);
5231 tree temp
= create_tmp_var (type
, "va_arg_tmp");
5234 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
5235 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5236 gimplify_and_add (t
, pre_p
);
5238 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
5240 rtx slot
= XVECEXP (container
, 0, i
);
5241 rtx reg
= XEXP (slot
, 0);
5242 enum machine_mode mode
= GET_MODE (reg
);
5243 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5244 tree addr_type
= build_pointer_type (piece_type
);
5247 tree dest_addr
, dest
;
5249 if (SSE_REGNO_P (REGNO (reg
)))
5251 src_addr
= sse_addr
;
5252 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5256 src_addr
= int_addr
;
5257 src_offset
= REGNO (reg
) * 8;
5259 src_addr
= fold_convert (addr_type
, src_addr
);
5260 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
5261 size_int (src_offset
));
5262 src
= build_va_arg_indirect_ref (src_addr
);
5264 dest_addr
= fold_convert (addr_type
, addr
);
5265 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, dest_addr
,
5266 size_int (INTVAL (XEXP (slot
, 1))));
5267 dest
= build_va_arg_indirect_ref (dest_addr
);
5269 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
5270 gimplify_and_add (t
, pre_p
);
5276 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5277 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5278 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
5279 gimplify_and_add (t
, pre_p
);
5283 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5284 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5285 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
5286 gimplify_and_add (t
, pre_p
);
5289 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
5290 gimplify_and_add (t
, pre_p
);
5292 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
5293 append_to_statement_list (t
, pre_p
);
5296 /* ... otherwise out of the overflow area. */
5298 /* Care for on-stack alignment if needed. */
5299 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
5300 || integer_zerop (TYPE_SIZE (type
)))
5304 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
5305 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
5306 size_int (align
- 1));
5307 t
= fold_convert (sizetype
, t
);
5308 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5310 t
= fold_convert (TREE_TYPE (ovf
), t
);
5312 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5314 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5315 gimplify_and_add (t2
, pre_p
);
5317 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
5318 size_int (rsize
* UNITS_PER_WORD
));
5319 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
5320 gimplify_and_add (t
, pre_p
);
5324 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
5325 append_to_statement_list (t
, pre_p
);
5328 ptrtype
= build_pointer_type (type
);
5329 addr
= fold_convert (ptrtype
, addr
);
5332 addr
= build_va_arg_indirect_ref (addr
);
5333 return build_va_arg_indirect_ref (addr
);
5336 /* Return nonzero if OPNUM's MEM should be matched
5337 in movabs* patterns. */
5340 ix86_check_movabs (rtx insn
, int opnum
)
5344 set
= PATTERN (insn
);
5345 if (GET_CODE (set
) == PARALLEL
)
5346 set
= XVECEXP (set
, 0, 0);
5347 gcc_assert (GET_CODE (set
) == SET
);
5348 mem
= XEXP (set
, opnum
);
5349 while (GET_CODE (mem
) == SUBREG
)
5350 mem
= SUBREG_REG (mem
);
5351 gcc_assert (MEM_P (mem
));
5352 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5355 /* Initialize the table of extra 80387 mathematical constants. */
5358 init_ext_80387_constants (void)
5360 static const char * cst
[5] =
5362 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5363 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5364 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5365 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5366 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5370 for (i
= 0; i
< 5; i
++)
5372 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5373 /* Ensure each constant is rounded to XFmode precision. */
5374 real_convert (&ext_80387_constants_table
[i
],
5375 XFmode
, &ext_80387_constants_table
[i
]);
5378 ext_80387_constants_init
= 1;
5381 /* Return true if the constant is something that can be loaded with
5382 a special instruction. */
5385 standard_80387_constant_p (rtx x
)
5387 enum machine_mode mode
= GET_MODE (x
);
5391 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5394 if (x
== CONST0_RTX (mode
))
5396 if (x
== CONST1_RTX (mode
))
5399 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5401 /* For XFmode constants, try to find a special 80387 instruction when
5402 optimizing for size or on those CPUs that benefit from them. */
5404 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5408 if (! ext_80387_constants_init
)
5409 init_ext_80387_constants ();
5411 for (i
= 0; i
< 5; i
++)
5412 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5416 /* Load of the constant -0.0 or -1.0 will be split as
5417 fldz;fchs or fld1;fchs sequence. */
5418 if (real_isnegzero (&r
))
5420 if (real_identical (&r
, &dconstm1
))
5426 /* Return the opcode of the special instruction to be used to load
5430 standard_80387_constant_opcode (rtx x
)
5432 switch (standard_80387_constant_p (x
))
5456 /* Return the CONST_DOUBLE representing the 80387 constant that is
5457 loaded by the specified special instruction. The argument IDX
5458 matches the return value from standard_80387_constant_p. */
5461 standard_80387_constant_rtx (int idx
)
5465 if (! ext_80387_constants_init
)
5466 init_ext_80387_constants ();
5482 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5486 /* Return 1 if mode is a valid mode for sse. */
5488 standard_sse_mode_p (enum machine_mode mode
)
5505 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5508 standard_sse_constant_p (rtx x
)
5510 enum machine_mode mode
= GET_MODE (x
);
5512 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5514 if (vector_all_ones_operand (x
, mode
)
5515 && standard_sse_mode_p (mode
))
5516 return TARGET_SSE2
? 2 : -1;
5521 /* Return the opcode of the special instruction to be used to load
5525 standard_sse_constant_opcode (rtx insn
, rtx x
)
5527 switch (standard_sse_constant_p (x
))
5530 if (get_attr_mode (insn
) == MODE_V4SF
)
5531 return "xorps\t%0, %0";
5532 else if (get_attr_mode (insn
) == MODE_V2DF
)
5533 return "xorpd\t%0, %0";
5535 return "pxor\t%0, %0";
5537 return "pcmpeqd\t%0, %0";
5542 /* Returns 1 if OP contains a symbol reference */
5545 symbolic_reference_mentioned_p (rtx op
)
5550 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5553 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5554 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5560 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5561 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5565 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5572 /* Return 1 if it is appropriate to emit `ret' instructions in the
5573 body of a function. Do this only if the epilogue is simple, needing a
5574 couple of insns. Prior to reloading, we can't tell how many registers
5575 must be saved, so return 0 then. Return 0 if there is no frame
5576 marker to de-allocate. */
5579 ix86_can_use_return_insn_p (void)
5581 struct ix86_frame frame
;
5583 if (! reload_completed
|| frame_pointer_needed
)
5586 /* Don't allow more than 32 pop, since that's all we can do
5587 with one instruction. */
5588 if (current_function_pops_args
5589 && current_function_args_size
>= 32768)
5592 ix86_compute_frame_layout (&frame
);
5593 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5596 /* Value should be nonzero if functions must have frame pointers.
5597 Zero means the frame pointer need not be set up (and parms may
5598 be accessed via the stack pointer) in functions that seem suitable. */
5601 ix86_frame_pointer_required (void)
5603 /* If we accessed previous frames, then the generated code expects
5604 to be able to access the saved ebp value in our frame. */
5605 if (cfun
->machine
->accesses_prev_frame
)
5608 /* Several x86 os'es need a frame pointer for other reasons,
5609 usually pertaining to setjmp. */
5610 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5613 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5614 the frame pointer by default. Turn it back on now if we've not
5615 got a leaf function. */
5616 if (TARGET_OMIT_LEAF_FRAME_POINTER
5617 && (!current_function_is_leaf
5618 || ix86_current_function_calls_tls_descriptor
))
5621 if (current_function_profile
)
5627 /* Record that the current function accesses previous call frames. */
5630 ix86_setup_frame_addresses (void)
5632 cfun
->machine
->accesses_prev_frame
= 1;
5635 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5636 # define USE_HIDDEN_LINKONCE 1
5638 # define USE_HIDDEN_LINKONCE 0
5641 static int pic_labels_used
;
5643 /* Fills in the label name that should be used for a pc thunk for
5644 the given register. */
5647 get_pc_thunk_name (char name
[32], unsigned int regno
)
5649 gcc_assert (!TARGET_64BIT
);
5651 if (USE_HIDDEN_LINKONCE
)
5652 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5654 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5658 /* This function generates code for -fpic that loads %ebx with
5659 the return address of the caller and then returns. */
5662 ix86_file_end (void)
5667 for (regno
= 0; regno
< 8; ++regno
)
5671 if (! ((pic_labels_used
>> regno
) & 1))
5674 get_pc_thunk_name (name
, regno
);
5679 switch_to_section (darwin_sections
[text_coal_section
]);
5680 fputs ("\t.weak_definition\t", asm_out_file
);
5681 assemble_name (asm_out_file
, name
);
5682 fputs ("\n\t.private_extern\t", asm_out_file
);
5683 assemble_name (asm_out_file
, name
);
5684 fputs ("\n", asm_out_file
);
5685 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5689 if (USE_HIDDEN_LINKONCE
)
5693 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5695 TREE_PUBLIC (decl
) = 1;
5696 TREE_STATIC (decl
) = 1;
5697 DECL_ONE_ONLY (decl
) = 1;
5699 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5700 switch_to_section (get_named_section (decl
, NULL
, 0));
5702 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5703 fputs ("\t.hidden\t", asm_out_file
);
5704 assemble_name (asm_out_file
, name
);
5705 fputc ('\n', asm_out_file
);
5706 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5710 switch_to_section (text_section
);
5711 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5714 xops
[0] = gen_rtx_REG (SImode
, regno
);
5715 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5716 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5717 output_asm_insn ("ret", xops
);
5720 if (NEED_INDICATE_EXEC_STACK
)
5721 file_end_indicate_exec_stack ();
5724 /* Emit code for the SET_GOT patterns. */
5727 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5733 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5735 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5736 xops
[2] = gen_rtx_MEM (Pmode
,
5737 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5738 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5740 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5741 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5742 an unadorned address. */
5743 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5744 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5745 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5749 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5751 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5753 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5756 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5758 output_asm_insn ("call\t%a2", xops
);
5761 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5762 is what will be referenced by the Mach-O PIC subsystem. */
5764 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5767 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5768 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5771 output_asm_insn ("pop{l}\t%0", xops
);
5776 get_pc_thunk_name (name
, REGNO (dest
));
5777 pic_labels_used
|= 1 << REGNO (dest
);
5779 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5780 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5781 output_asm_insn ("call\t%X2", xops
);
5782 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5783 is what will be referenced by the Mach-O PIC subsystem. */
5786 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5788 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5789 CODE_LABEL_NUMBER (label
));
5796 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5797 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5799 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5804 /* Generate an "push" pattern for input ARG. */
5809 return gen_rtx_SET (VOIDmode
,
5811 gen_rtx_PRE_DEC (Pmode
,
5812 stack_pointer_rtx
)),
5816 /* Return >= 0 if there is an unused call-clobbered register available
5817 for the entire function. */
5820 ix86_select_alt_pic_regnum (void)
5822 if (current_function_is_leaf
&& !current_function_profile
5823 && !ix86_current_function_calls_tls_descriptor
)
5826 for (i
= 2; i
>= 0; --i
)
5827 if (!df_regs_ever_live_p (i
))
5831 return INVALID_REGNUM
;
5834 /* Return 1 if we need to save REGNO. */
5836 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5838 if (pic_offset_table_rtx
5839 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5840 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5841 || current_function_profile
5842 || current_function_calls_eh_return
5843 || current_function_uses_const_pool
))
5845 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5850 if (current_function_calls_eh_return
&& maybe_eh_return
)
5855 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5856 if (test
== INVALID_REGNUM
)
5863 if (cfun
->machine
->force_align_arg_pointer
5864 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5867 return (df_regs_ever_live_p (regno
)
5868 && !call_used_regs
[regno
]
5869 && !fixed_regs
[regno
]
5870 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5873 /* Return number of registers to be saved on the stack. */
5876 ix86_nsaved_regs (void)
5881 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5882 if (ix86_save_reg (regno
, true))
5887 /* Return the offset between two registers, one to be eliminated, and the other
5888 its replacement, at the start of a routine. */
5891 ix86_initial_elimination_offset (int from
, int to
)
5893 struct ix86_frame frame
;
5894 ix86_compute_frame_layout (&frame
);
5896 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5897 return frame
.hard_frame_pointer_offset
;
5898 else if (from
== FRAME_POINTER_REGNUM
5899 && to
== HARD_FRAME_POINTER_REGNUM
)
5900 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5903 gcc_assert (to
== STACK_POINTER_REGNUM
);
5905 if (from
== ARG_POINTER_REGNUM
)
5906 return frame
.stack_pointer_offset
;
5908 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5909 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5913 /* Fill structure ix86_frame about frame of currently computed function. */
5916 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5918 HOST_WIDE_INT total_size
;
5919 unsigned int stack_alignment_needed
;
5920 HOST_WIDE_INT offset
;
5921 unsigned int preferred_alignment
;
5922 HOST_WIDE_INT size
= get_frame_size ();
5924 frame
->nregs
= ix86_nsaved_regs ();
5927 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5928 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5930 /* During reload iteration the amount of registers saved can change.
5931 Recompute the value as needed. Do not recompute when amount of registers
5932 didn't change as reload does multiple calls to the function and does not
5933 expect the decision to change within single iteration. */
5935 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5937 int count
= frame
->nregs
;
5939 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5940 /* The fast prologue uses move instead of push to save registers. This
5941 is significantly longer, but also executes faster as modern hardware
5942 can execute the moves in parallel, but can't do that for push/pop.
5944 Be careful about choosing what prologue to emit: When function takes
5945 many instructions to execute we may use slow version as well as in
5946 case function is known to be outside hot spot (this is known with
5947 feedback only). Weight the size of function by number of registers
5948 to save as it is cheap to use one or two push instructions but very
5949 slow to use many of them. */
5951 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5952 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5953 || (flag_branch_probabilities
5954 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5955 cfun
->machine
->use_fast_prologue_epilogue
= false;
5957 cfun
->machine
->use_fast_prologue_epilogue
5958 = !expensive_function_p (count
);
5960 if (TARGET_PROLOGUE_USING_MOVE
5961 && cfun
->machine
->use_fast_prologue_epilogue
)
5962 frame
->save_regs_using_mov
= true;
5964 frame
->save_regs_using_mov
= false;
5967 /* Skip return address and saved base pointer. */
5968 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5970 frame
->hard_frame_pointer_offset
= offset
;
5972 /* Do some sanity checking of stack_alignment_needed and
5973 preferred_alignment, since i386 port is the only using those features
5974 that may break easily. */
5976 gcc_assert (!size
|| stack_alignment_needed
);
5977 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5978 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5979 gcc_assert (stack_alignment_needed
5980 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5982 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5983 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5985 /* Register save area */
5986 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5989 if (ix86_save_varrargs_registers
)
5991 offset
+= X86_64_VARARGS_SIZE
;
5992 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5995 frame
->va_arg_size
= 0;
5997 /* Align start of frame for local function. */
5998 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5999 & -stack_alignment_needed
) - offset
;
6001 offset
+= frame
->padding1
;
6003 /* Frame pointer points here. */
6004 frame
->frame_pointer_offset
= offset
;
6008 /* Add outgoing arguments area. Can be skipped if we eliminated
6009 all the function calls as dead code.
6010 Skipping is however impossible when function calls alloca. Alloca
6011 expander assumes that last current_function_outgoing_args_size
6012 of stack frame are unused. */
6013 if (ACCUMULATE_OUTGOING_ARGS
6014 && (!current_function_is_leaf
|| current_function_calls_alloca
6015 || ix86_current_function_calls_tls_descriptor
))
6017 offset
+= current_function_outgoing_args_size
;
6018 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
6021 frame
->outgoing_arguments_size
= 0;
6023 /* Align stack boundary. Only needed if we're calling another function
6025 if (!current_function_is_leaf
|| current_function_calls_alloca
6026 || ix86_current_function_calls_tls_descriptor
)
6027 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
6028 & -preferred_alignment
) - offset
;
6030 frame
->padding2
= 0;
6032 offset
+= frame
->padding2
;
6034 /* We've reached end of stack frame. */
6035 frame
->stack_pointer_offset
= offset
;
6037 /* Size prologue needs to allocate. */
6038 frame
->to_allocate
=
6039 (size
+ frame
->padding1
+ frame
->padding2
6040 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
6042 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
6043 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
6044 frame
->save_regs_using_mov
= false;
6046 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
6047 && current_function_is_leaf
6048 && !ix86_current_function_calls_tls_descriptor
)
6050 frame
->red_zone_size
= frame
->to_allocate
;
6051 if (frame
->save_regs_using_mov
)
6052 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6053 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6054 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6057 frame
->red_zone_size
= 0;
6058 frame
->to_allocate
-= frame
->red_zone_size
;
6059 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6061 fprintf (stderr
, "\n");
6062 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
6063 fprintf (stderr
, "size: %ld\n", (long)size
);
6064 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
6065 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
6066 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
6067 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
6068 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
6069 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
6070 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
6071 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
6072 (long)frame
->hard_frame_pointer_offset
);
6073 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
6074 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
6075 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
6076 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
6080 /* Emit code to save registers in the prologue. */
6083 ix86_emit_save_regs (void)
6088 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
6089 if (ix86_save_reg (regno
, true))
6091 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
6092 RTX_FRAME_RELATED_P (insn
) = 1;
6096 /* Emit code to save registers using MOV insns. First register
6097 is restored from POINTER + OFFSET. */
6099 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
6104 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6105 if (ix86_save_reg (regno
, true))
6107 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
6109 gen_rtx_REG (Pmode
, regno
));
6110 RTX_FRAME_RELATED_P (insn
) = 1;
6111 offset
+= UNITS_PER_WORD
;
6115 /* Expand prologue or epilogue stack adjustment.
6116 The pattern exist to put a dependency on all ebp-based memory accesses.
6117 STYLE should be negative if instructions should be marked as frame related,
6118 zero if %r11 register is live and cannot be freely used and positive
6122 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
6127 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
6128 else if (x86_64_immediate_operand (offset
, DImode
))
6129 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
6133 /* r11 is used by indirect sibcall return as well, set before the
6134 epilogue and used after the epilogue. ATM indirect sibcall
6135 shouldn't be used together with huge frame sizes in one
6136 function because of the frame_size check in sibcall.c. */
6138 r11
= gen_rtx_REG (DImode
, R11_REG
);
6139 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
6141 RTX_FRAME_RELATED_P (insn
) = 1;
6142 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
6146 RTX_FRAME_RELATED_P (insn
) = 1;
6149 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6152 ix86_internal_arg_pointer (void)
6154 bool has_force_align_arg_pointer
=
6155 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
6156 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
6157 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6158 && DECL_NAME (current_function_decl
)
6159 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
6160 && DECL_FILE_SCOPE_P (current_function_decl
))
6161 || ix86_force_align_arg_pointer
6162 || has_force_align_arg_pointer
)
6164 /* Nested functions can't realign the stack due to a register
6166 if (DECL_CONTEXT (current_function_decl
)
6167 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
6169 if (ix86_force_align_arg_pointer
)
6170 warning (0, "-mstackrealign ignored for nested functions");
6171 if (has_force_align_arg_pointer
)
6172 error ("%s not supported for nested functions",
6173 ix86_force_align_arg_pointer_string
);
6174 return virtual_incoming_args_rtx
;
6176 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
6177 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
6180 return virtual_incoming_args_rtx
;
6183 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6184 This is called from dwarf2out.c to emit call frame instructions
6185 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6187 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
6189 rtx unspec
= SET_SRC (pattern
);
6190 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
6194 case UNSPEC_REG_SAVE
:
6195 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
6196 SET_DEST (pattern
));
6198 case UNSPEC_DEF_CFA
:
6199 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
6200 INTVAL (XVECEXP (unspec
, 0, 0)));
6207 /* Expand the prologue into a bunch of separate insns. */
6210 ix86_expand_prologue (void)
6214 struct ix86_frame frame
;
6215 HOST_WIDE_INT allocate
;
6217 ix86_compute_frame_layout (&frame
);
6219 if (cfun
->machine
->force_align_arg_pointer
)
6223 /* Grab the argument pointer. */
6224 x
= plus_constant (stack_pointer_rtx
, 4);
6225 y
= cfun
->machine
->force_align_arg_pointer
;
6226 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
6227 RTX_FRAME_RELATED_P (insn
) = 1;
6229 /* The unwind info consists of two parts: install the fafp as the cfa,
6230 and record the fafp as the "save register" of the stack pointer.
6231 The later is there in order that the unwinder can see where it
6232 should restore the stack pointer across the and insn. */
6233 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
6234 x
= gen_rtx_SET (VOIDmode
, y
, x
);
6235 RTX_FRAME_RELATED_P (x
) = 1;
6236 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
6238 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
6239 RTX_FRAME_RELATED_P (y
) = 1;
6240 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
6241 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6242 REG_NOTES (insn
) = x
;
6244 /* Align the stack. */
6245 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6248 /* And here we cheat like madmen with the unwind info. We force the
6249 cfa register back to sp+4, which is exactly what it was at the
6250 start of the function. Re-pushing the return address results in
6251 the return at the same spot relative to the cfa, and thus is
6252 correct wrt the unwind info. */
6253 x
= cfun
->machine
->force_align_arg_pointer
;
6254 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
6255 insn
= emit_insn (gen_push (x
));
6256 RTX_FRAME_RELATED_P (insn
) = 1;
6259 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
6260 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
6261 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6262 REG_NOTES (insn
) = x
;
6265 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6266 slower on all targets. Also sdb doesn't like it. */
6268 if (frame_pointer_needed
)
6270 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
6271 RTX_FRAME_RELATED_P (insn
) = 1;
6273 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
6274 RTX_FRAME_RELATED_P (insn
) = 1;
6277 allocate
= frame
.to_allocate
;
6279 if (!frame
.save_regs_using_mov
)
6280 ix86_emit_save_regs ();
6282 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
6284 /* When using red zone we may start register saving before allocating
6285 the stack frame saving one cycle of the prologue. */
6286 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
6287 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
6288 : stack_pointer_rtx
,
6289 -frame
.nregs
* UNITS_PER_WORD
);
6293 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
6294 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6295 GEN_INT (-allocate
), -1);
6298 /* Only valid for Win32. */
6299 rtx eax
= gen_rtx_REG (Pmode
, 0);
6303 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
6305 if (TARGET_64BIT_MS_ABI
)
6308 eax_live
= ix86_eax_live_at_start_p ();
6312 emit_insn (gen_push (eax
));
6313 allocate
-= UNITS_PER_WORD
;
6316 emit_move_insn (eax
, GEN_INT (allocate
));
6319 insn
= gen_allocate_stack_worker_64 (eax
);
6321 insn
= gen_allocate_stack_worker_32 (eax
);
6322 insn
= emit_insn (insn
);
6323 RTX_FRAME_RELATED_P (insn
) = 1;
6324 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
6325 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
6326 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
6327 t
, REG_NOTES (insn
));
6331 if (frame_pointer_needed
)
6332 t
= plus_constant (hard_frame_pointer_rtx
,
6335 - frame
.nregs
* UNITS_PER_WORD
);
6337 t
= plus_constant (stack_pointer_rtx
, allocate
);
6338 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6342 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6344 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6345 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6347 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6348 -frame
.nregs
* UNITS_PER_WORD
);
6351 pic_reg_used
= false;
6352 if (pic_offset_table_rtx
6353 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6354 || current_function_profile
))
6356 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6358 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6359 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
6361 pic_reg_used
= true;
6368 if (ix86_cmodel
== CM_LARGE_PIC
)
6370 rtx tmp_reg
= gen_rtx_REG (DImode
,
6371 FIRST_REX_INT_REG
+ 3 /* R11 */);
6372 rtx label
= gen_label_rtx ();
6374 LABEL_PRESERVE_P (label
) = 1;
6375 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6376 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6377 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6378 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6379 pic_offset_table_rtx
, tmp_reg
));
6382 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6385 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6388 /* Prevent function calls from be scheduled before the call to mcount.
6389 In the pic_reg_used case, make sure that the got load isn't deleted. */
6390 if (current_function_profile
)
6393 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
6394 emit_insn (gen_blockage ());
6398 /* Emit code to restore saved registers using MOV insns. First register
6399 is restored from POINTER + OFFSET. */
6401 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6402 int maybe_eh_return
)
6405 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6407 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6408 if (ix86_save_reg (regno
, maybe_eh_return
))
6410 /* Ensure that adjust_address won't be forced to produce pointer
6411 out of range allowed by x86-64 instruction set. */
6412 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6416 r11
= gen_rtx_REG (DImode
, R11_REG
);
6417 emit_move_insn (r11
, GEN_INT (offset
));
6418 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6419 base_address
= gen_rtx_MEM (Pmode
, r11
);
6422 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6423 adjust_address (base_address
, Pmode
, offset
));
6424 offset
+= UNITS_PER_WORD
;
6428 /* Restore function stack, frame, and registers. */
6431 ix86_expand_epilogue (int style
)
6434 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6435 struct ix86_frame frame
;
6436 HOST_WIDE_INT offset
;
6438 ix86_compute_frame_layout (&frame
);
6440 /* Calculate start of saved registers relative to ebp. Special care
6441 must be taken for the normal return case of a function using
6442 eh_return: the eax and edx registers are marked as saved, but not
6443 restored along this path. */
6444 offset
= frame
.nregs
;
6445 if (current_function_calls_eh_return
&& style
!= 2)
6447 offset
*= -UNITS_PER_WORD
;
6449 /* If we're only restoring one register and sp is not valid then
6450 using a move instruction to restore the register since it's
6451 less work than reloading sp and popping the register.
6453 The default code result in stack adjustment using add/lea instruction,
6454 while this code results in LEAVE instruction (or discrete equivalent),
6455 so it is profitable in some other cases as well. Especially when there
6456 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6457 and there is exactly one register to pop. This heuristic may need some
6458 tuning in future. */
6459 if ((!sp_valid
&& frame
.nregs
<= 1)
6460 || (TARGET_EPILOGUE_USING_MOVE
6461 && cfun
->machine
->use_fast_prologue_epilogue
6462 && (frame
.nregs
> 1 || frame
.to_allocate
))
6463 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6464 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6465 && cfun
->machine
->use_fast_prologue_epilogue
6466 && frame
.nregs
== 1)
6467 || current_function_calls_eh_return
)
6469 /* Restore registers. We can use ebp or esp to address the memory
6470 locations. If both are available, default to ebp, since offsets
6471 are known to be small. Only exception is esp pointing directly to the
6472 end of block of saved registers, where we may simplify addressing
6475 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6476 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6477 frame
.to_allocate
, style
== 2);
6479 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6480 offset
, style
== 2);
6482 /* eh_return epilogues need %ecx added to the stack pointer. */
6485 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6487 if (frame_pointer_needed
)
6489 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6490 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6491 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6493 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6494 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6496 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6501 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6502 tmp
= plus_constant (tmp
, (frame
.to_allocate
6503 + frame
.nregs
* UNITS_PER_WORD
));
6504 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6507 else if (!frame_pointer_needed
)
6508 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6509 GEN_INT (frame
.to_allocate
6510 + frame
.nregs
* UNITS_PER_WORD
),
6512 /* If not an i386, mov & pop is faster than "leave". */
6513 else if (TARGET_USE_LEAVE
|| optimize_size
6514 || !cfun
->machine
->use_fast_prologue_epilogue
)
6515 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6518 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6519 hard_frame_pointer_rtx
,
6522 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6524 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6529 /* First step is to deallocate the stack frame so that we can
6530 pop the registers. */
6533 gcc_assert (frame_pointer_needed
);
6534 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6535 hard_frame_pointer_rtx
,
6536 GEN_INT (offset
), style
);
6538 else if (frame
.to_allocate
)
6539 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6540 GEN_INT (frame
.to_allocate
), style
);
6542 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6543 if (ix86_save_reg (regno
, false))
6546 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6548 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6550 if (frame_pointer_needed
)
6552 /* Leave results in shorter dependency chains on CPUs that are
6553 able to grok it fast. */
6554 if (TARGET_USE_LEAVE
)
6555 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6556 else if (TARGET_64BIT
)
6557 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6559 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6563 if (cfun
->machine
->force_align_arg_pointer
)
6565 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6566 cfun
->machine
->force_align_arg_pointer
,
6570 /* Sibcall epilogues don't want a return instruction. */
6574 if (current_function_pops_args
&& current_function_args_size
)
6576 rtx popc
= GEN_INT (current_function_pops_args
);
6578 /* i386 can only pop 64K bytes. If asked to pop more, pop
6579 return address, do explicit add, and jump indirectly to the
6582 if (current_function_pops_args
>= 65536)
6584 rtx ecx
= gen_rtx_REG (SImode
, 2);
6586 /* There is no "pascal" calling convention in any 64bit ABI. */
6587 gcc_assert (!TARGET_64BIT
);
6589 emit_insn (gen_popsi1 (ecx
));
6590 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6591 emit_jump_insn (gen_return_indirect_internal (ecx
));
6594 emit_jump_insn (gen_return_pop_internal (popc
));
6597 emit_jump_insn (gen_return_internal ());
6600 /* Reset from the function's potential modifications. */
6603 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6604 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6606 if (pic_offset_table_rtx
)
6607 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
6609 /* Mach-O doesn't support labels at the end of objects, so if
6610 it looks like we might want one, insert a NOP. */
6612 rtx insn
= get_last_insn ();
6615 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6616 insn
= PREV_INSN (insn
);
6620 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6621 fputs ("\tnop\n", file
);
6627 /* Extract the parts of an RTL expression that is a valid memory address
6628 for an instruction. Return 0 if the structure of the address is
6629 grossly off. Return -1 if the address contains ASHIFT, so it is not
6630 strictly valid, but still used for computing length of lea instruction. */
6633 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6635 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6636 rtx base_reg
, index_reg
;
6637 HOST_WIDE_INT scale
= 1;
6638 rtx scale_rtx
= NULL_RTX
;
6640 enum ix86_address_seg seg
= SEG_DEFAULT
;
6642 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6644 else if (GET_CODE (addr
) == PLUS
)
6654 addends
[n
++] = XEXP (op
, 1);
6657 while (GET_CODE (op
) == PLUS
);
6662 for (i
= n
; i
>= 0; --i
)
6665 switch (GET_CODE (op
))
6670 index
= XEXP (op
, 0);
6671 scale_rtx
= XEXP (op
, 1);
6675 if (XINT (op
, 1) == UNSPEC_TP
6676 && TARGET_TLS_DIRECT_SEG_REFS
6677 && seg
== SEG_DEFAULT
)
6678 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6707 else if (GET_CODE (addr
) == MULT
)
6709 index
= XEXP (addr
, 0); /* index*scale */
6710 scale_rtx
= XEXP (addr
, 1);
6712 else if (GET_CODE (addr
) == ASHIFT
)
6716 /* We're called for lea too, which implements ashift on occasion. */
6717 index
= XEXP (addr
, 0);
6718 tmp
= XEXP (addr
, 1);
6719 if (!CONST_INT_P (tmp
))
6721 scale
= INTVAL (tmp
);
6722 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6728 disp
= addr
; /* displacement */
6730 /* Extract the integral value of scale. */
6733 if (!CONST_INT_P (scale_rtx
))
6735 scale
= INTVAL (scale_rtx
);
6738 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6739 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6741 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6742 if (base_reg
&& index_reg
&& scale
== 1
6743 && (index_reg
== arg_pointer_rtx
6744 || index_reg
== frame_pointer_rtx
6745 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6748 tmp
= base
, base
= index
, index
= tmp
;
6749 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6752 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6753 if ((base_reg
== hard_frame_pointer_rtx
6754 || base_reg
== frame_pointer_rtx
6755 || base_reg
== arg_pointer_rtx
) && !disp
)
6758 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6759 Avoid this by transforming to [%esi+0]. */
6760 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6761 && base_reg
&& !index_reg
&& !disp
6763 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6766 /* Special case: encode reg+reg instead of reg*2. */
6767 if (!base
&& index
&& scale
&& scale
== 2)
6768 base
= index
, base_reg
= index_reg
, scale
= 1;
6770 /* Special case: scaling cannot be encoded without base or displacement. */
6771 if (!base
&& !disp
&& index
&& scale
!= 1)
6783 /* Return cost of the memory address x.
6784 For i386, it is better to use a complex address than let gcc copy
6785 the address into a reg and make a new pseudo. But not if the address
6786 requires to two regs - that would mean more pseudos with longer
6789 ix86_address_cost (rtx x
)
6791 struct ix86_address parts
;
6793 int ok
= ix86_decompose_address (x
, &parts
);
6797 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6798 parts
.base
= SUBREG_REG (parts
.base
);
6799 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6800 parts
.index
= SUBREG_REG (parts
.index
);
6802 /* Attempt to minimize number of registers in the address. */
6804 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6806 && (!REG_P (parts
.index
)
6807 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6811 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6813 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6814 && parts
.base
!= parts
.index
)
6817 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6818 since it's predecode logic can't detect the length of instructions
6819 and it degenerates to vector decoded. Increase cost of such
6820 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6821 to split such addresses or even refuse such addresses at all.
6823 Following addressing modes are affected:
6828 The first and last case may be avoidable by explicitly coding the zero in
6829 memory address, but I don't have AMD-K6 machine handy to check this
6833 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6834 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6835 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6841 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6842 this is used for to form addresses to local data when -fPIC is in
6846 darwin_local_data_pic (rtx disp
)
6848 if (GET_CODE (disp
) == MINUS
)
6850 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6851 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6852 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6854 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6855 if (! strcmp (sym_name
, "<pic base>"))
6863 /* Determine if a given RTX is a valid constant. We already know this
6864 satisfies CONSTANT_P. */
6867 legitimate_constant_p (rtx x
)
6869 switch (GET_CODE (x
))
6874 if (GET_CODE (x
) == PLUS
)
6876 if (!CONST_INT_P (XEXP (x
, 1)))
6881 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6884 /* Only some unspecs are valid as "constants". */
6885 if (GET_CODE (x
) == UNSPEC
)
6886 switch (XINT (x
, 1))
6891 return TARGET_64BIT
;
6894 x
= XVECEXP (x
, 0, 0);
6895 return (GET_CODE (x
) == SYMBOL_REF
6896 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6898 x
= XVECEXP (x
, 0, 0);
6899 return (GET_CODE (x
) == SYMBOL_REF
6900 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6905 /* We must have drilled down to a symbol. */
6906 if (GET_CODE (x
) == LABEL_REF
)
6908 if (GET_CODE (x
) != SYMBOL_REF
)
6913 /* TLS symbols are never valid. */
6914 if (SYMBOL_REF_TLS_MODEL (x
))
6917 /* DLLIMPORT symbols are never valid. */
6918 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6919 && SYMBOL_REF_DLLIMPORT_P (x
))
6924 if (GET_MODE (x
) == TImode
6925 && x
!= CONST0_RTX (TImode
)
6931 if (x
== CONST0_RTX (GET_MODE (x
)))
6939 /* Otherwise we handle everything else in the move patterns. */
6943 /* Determine if it's legal to put X into the constant pool. This
6944 is not possible for the address of thread-local symbols, which
6945 is checked above. */
6948 ix86_cannot_force_const_mem (rtx x
)
6950 /* We can always put integral constants and vectors in memory. */
6951 switch (GET_CODE (x
))
6961 return !legitimate_constant_p (x
);
6964 /* Determine if a given RTX is a valid constant address. */
6967 constant_address_p (rtx x
)
6969 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6972 /* Nonzero if the constant value X is a legitimate general operand
6973 when generating PIC code. It is given that flag_pic is on and
6974 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6977 legitimate_pic_operand_p (rtx x
)
6981 switch (GET_CODE (x
))
6984 inner
= XEXP (x
, 0);
6985 if (GET_CODE (inner
) == PLUS
6986 && CONST_INT_P (XEXP (inner
, 1)))
6987 inner
= XEXP (inner
, 0);
6989 /* Only some unspecs are valid as "constants". */
6990 if (GET_CODE (inner
) == UNSPEC
)
6991 switch (XINT (inner
, 1))
6996 return TARGET_64BIT
;
6998 x
= XVECEXP (inner
, 0, 0);
6999 return (GET_CODE (x
) == SYMBOL_REF
7000 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
7008 return legitimate_pic_address_disp_p (x
);
7015 /* Determine if a given CONST RTX is a valid memory displacement
7019 legitimate_pic_address_disp_p (rtx disp
)
7023 /* In 64bit mode we can allow direct addresses of symbols and labels
7024 when they are not dynamic symbols. */
7027 rtx op0
= disp
, op1
;
7029 switch (GET_CODE (disp
))
7035 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
7037 op0
= XEXP (XEXP (disp
, 0), 0);
7038 op1
= XEXP (XEXP (disp
, 0), 1);
7039 if (!CONST_INT_P (op1
)
7040 || INTVAL (op1
) >= 16*1024*1024
7041 || INTVAL (op1
) < -16*1024*1024)
7043 if (GET_CODE (op0
) == LABEL_REF
)
7045 if (GET_CODE (op0
) != SYMBOL_REF
)
7050 /* TLS references should always be enclosed in UNSPEC. */
7051 if (SYMBOL_REF_TLS_MODEL (op0
))
7053 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
7054 && ix86_cmodel
!= CM_LARGE_PIC
)
7062 if (GET_CODE (disp
) != CONST
)
7064 disp
= XEXP (disp
, 0);
7068 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7069 of GOT tables. We should not need these anyway. */
7070 if (GET_CODE (disp
) != UNSPEC
7071 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
7072 && XINT (disp
, 1) != UNSPEC_GOTOFF
7073 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
7076 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
7077 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
7083 if (GET_CODE (disp
) == PLUS
)
7085 if (!CONST_INT_P (XEXP (disp
, 1)))
7087 disp
= XEXP (disp
, 0);
7091 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
7094 if (GET_CODE (disp
) != UNSPEC
)
7097 switch (XINT (disp
, 1))
7102 /* We need to check for both symbols and labels because VxWorks loads
7103 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7105 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7106 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
7108 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7109 While ABI specify also 32bit relocation but we don't produce it in
7110 small PIC model at all. */
7111 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
7112 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
7114 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
7116 case UNSPEC_GOTTPOFF
:
7117 case UNSPEC_GOTNTPOFF
:
7118 case UNSPEC_INDNTPOFF
:
7121 disp
= XVECEXP (disp
, 0, 0);
7122 return (GET_CODE (disp
) == SYMBOL_REF
7123 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
7125 disp
= XVECEXP (disp
, 0, 0);
7126 return (GET_CODE (disp
) == SYMBOL_REF
7127 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
7129 disp
= XVECEXP (disp
, 0, 0);
7130 return (GET_CODE (disp
) == SYMBOL_REF
7131 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
7137 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7138 memory address for an instruction. The MODE argument is the machine mode
7139 for the MEM expression that wants to use this address.
7141 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7142 convert common non-canonical forms to canonical form so that they will
7146 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
7147 rtx addr
, int strict
)
7149 struct ix86_address parts
;
7150 rtx base
, index
, disp
;
7151 HOST_WIDE_INT scale
;
7152 const char *reason
= NULL
;
7153 rtx reason_rtx
= NULL_RTX
;
7155 if (ix86_decompose_address (addr
, &parts
) <= 0)
7157 reason
= "decomposition failed";
7162 index
= parts
.index
;
7164 scale
= parts
.scale
;
7166 /* Validate base register.
7168 Don't allow SUBREG's that span more than a word here. It can lead to spill
7169 failures when the base is one word out of a two word structure, which is
7170 represented internally as a DImode int. */
7179 else if (GET_CODE (base
) == SUBREG
7180 && REG_P (SUBREG_REG (base
))
7181 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
7183 reg
= SUBREG_REG (base
);
7186 reason
= "base is not a register";
7190 if (GET_MODE (base
) != Pmode
)
7192 reason
= "base is not in Pmode";
7196 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
7197 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
7199 reason
= "base is not valid";
7204 /* Validate index register.
7206 Don't allow SUBREG's that span more than a word here -- same as above. */
7215 else if (GET_CODE (index
) == SUBREG
7216 && REG_P (SUBREG_REG (index
))
7217 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
7219 reg
= SUBREG_REG (index
);
7222 reason
= "index is not a register";
7226 if (GET_MODE (index
) != Pmode
)
7228 reason
= "index is not in Pmode";
7232 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
7233 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
7235 reason
= "index is not valid";
7240 /* Validate scale factor. */
7243 reason_rtx
= GEN_INT (scale
);
7246 reason
= "scale without index";
7250 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
7252 reason
= "scale is not a valid multiplier";
7257 /* Validate displacement. */
7262 if (GET_CODE (disp
) == CONST
7263 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
7264 switch (XINT (XEXP (disp
, 0), 1))
7266 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7267 used. While ABI specify also 32bit relocations, we don't produce
7268 them at all and use IP relative instead. */
7271 gcc_assert (flag_pic
);
7273 goto is_legitimate_pic
;
7274 reason
= "64bit address unspec";
7277 case UNSPEC_GOTPCREL
:
7278 gcc_assert (flag_pic
);
7279 goto is_legitimate_pic
;
7281 case UNSPEC_GOTTPOFF
:
7282 case UNSPEC_GOTNTPOFF
:
7283 case UNSPEC_INDNTPOFF
:
7289 reason
= "invalid address unspec";
7293 else if (SYMBOLIC_CONST (disp
)
7297 && MACHOPIC_INDIRECT
7298 && !machopic_operand_p (disp
)
7304 if (TARGET_64BIT
&& (index
|| base
))
7306 /* foo@dtpoff(%rX) is ok. */
7307 if (GET_CODE (disp
) != CONST
7308 || GET_CODE (XEXP (disp
, 0)) != PLUS
7309 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7310 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7311 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7312 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7314 reason
= "non-constant pic memory reference";
7318 else if (! legitimate_pic_address_disp_p (disp
))
7320 reason
= "displacement is an invalid pic construct";
7324 /* This code used to verify that a symbolic pic displacement
7325 includes the pic_offset_table_rtx register.
7327 While this is good idea, unfortunately these constructs may
7328 be created by "adds using lea" optimization for incorrect
7337 This code is nonsensical, but results in addressing
7338 GOT table with pic_offset_table_rtx base. We can't
7339 just refuse it easily, since it gets matched by
7340 "addsi3" pattern, that later gets split to lea in the
7341 case output register differs from input. While this
7342 can be handled by separate addsi pattern for this case
7343 that never results in lea, this seems to be easier and
7344 correct fix for crash to disable this test. */
7346 else if (GET_CODE (disp
) != LABEL_REF
7347 && !CONST_INT_P (disp
)
7348 && (GET_CODE (disp
) != CONST
7349 || !legitimate_constant_p (disp
))
7350 && (GET_CODE (disp
) != SYMBOL_REF
7351 || !legitimate_constant_p (disp
)))
7353 reason
= "displacement is not constant";
7356 else if (TARGET_64BIT
7357 && !x86_64_immediate_operand (disp
, VOIDmode
))
7359 reason
= "displacement is out of range";
7364 /* Everything looks valid. */
7371 /* Return a unique alias set for the GOT. */
7373 static alias_set_type
7374 ix86_GOT_alias_set (void)
7376 static alias_set_type set
= -1;
7378 set
= new_alias_set ();
7382 /* Return a legitimate reference for ORIG (an address) using the
7383 register REG. If REG is 0, a new pseudo is generated.
7385 There are two types of references that must be handled:
7387 1. Global data references must load the address from the GOT, via
7388 the PIC reg. An insn is emitted to do this load, and the reg is
7391 2. Static data references, constant pool addresses, and code labels
7392 compute the address as an offset from the GOT, whose base is in
7393 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7394 differentiate them from global data objects. The returned
7395 address is the PIC reg + an unspec constant.
7397 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7398 reg also appears in the address. */
7401 legitimize_pic_address (rtx orig
, rtx reg
)
7408 if (TARGET_MACHO
&& !TARGET_64BIT
)
7411 reg
= gen_reg_rtx (Pmode
);
7412 /* Use the generic Mach-O PIC machinery. */
7413 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7417 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7419 else if (TARGET_64BIT
7420 && ix86_cmodel
!= CM_SMALL_PIC
7421 && gotoff_operand (addr
, Pmode
))
7424 /* This symbol may be referenced via a displacement from the PIC
7425 base address (@GOTOFF). */
7427 if (reload_in_progress
)
7428 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7429 if (GET_CODE (addr
) == CONST
)
7430 addr
= XEXP (addr
, 0);
7431 if (GET_CODE (addr
) == PLUS
)
7433 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7435 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7438 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7439 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7441 tmpreg
= gen_reg_rtx (Pmode
);
7444 emit_move_insn (tmpreg
, new_rtx
);
7448 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7449 tmpreg
, 1, OPTAB_DIRECT
);
7452 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7454 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7456 /* This symbol may be referenced via a displacement from the PIC
7457 base address (@GOTOFF). */
7459 if (reload_in_progress
)
7460 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7461 if (GET_CODE (addr
) == CONST
)
7462 addr
= XEXP (addr
, 0);
7463 if (GET_CODE (addr
) == PLUS
)
7465 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7467 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7470 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7471 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7472 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7476 emit_move_insn (reg
, new_rtx
);
7480 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7481 /* We can't use @GOTOFF for text labels on VxWorks;
7482 see gotoff_operand. */
7483 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7485 /* Given that we've already handled dllimport variables separately
7486 in legitimize_address, and all other variables should satisfy
7487 legitimate_pic_address_disp_p, we should never arrive here. */
7488 gcc_assert (!TARGET_64BIT_MS_ABI
);
7490 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7492 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7493 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7494 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7495 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7498 reg
= gen_reg_rtx (Pmode
);
7499 /* Use directly gen_movsi, otherwise the address is loaded
7500 into register for CSE. We don't want to CSE this addresses,
7501 instead we CSE addresses from the GOT table, so skip this. */
7502 emit_insn (gen_movsi (reg
, new_rtx
));
7507 /* This symbol must be referenced via a load from the
7508 Global Offset Table (@GOT). */
7510 if (reload_in_progress
)
7511 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7512 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7513 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7515 new_rtx
= force_reg (Pmode
, new_rtx
);
7516 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7517 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7518 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7521 reg
= gen_reg_rtx (Pmode
);
7522 emit_move_insn (reg
, new_rtx
);
7528 if (CONST_INT_P (addr
)
7529 && !x86_64_immediate_operand (addr
, VOIDmode
))
7533 emit_move_insn (reg
, addr
);
7537 new_rtx
= force_reg (Pmode
, addr
);
7539 else if (GET_CODE (addr
) == CONST
)
7541 addr
= XEXP (addr
, 0);
7543 /* We must match stuff we generate before. Assume the only
7544 unspecs that can get here are ours. Not that we could do
7545 anything with them anyway.... */
7546 if (GET_CODE (addr
) == UNSPEC
7547 || (GET_CODE (addr
) == PLUS
7548 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7550 gcc_assert (GET_CODE (addr
) == PLUS
);
7552 if (GET_CODE (addr
) == PLUS
)
7554 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7556 /* Check first to see if this is a constant offset from a @GOTOFF
7557 symbol reference. */
7558 if (gotoff_operand (op0
, Pmode
)
7559 && CONST_INT_P (op1
))
7563 if (reload_in_progress
)
7564 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7565 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7567 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
7568 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7569 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7573 emit_move_insn (reg
, new_rtx
);
7579 if (INTVAL (op1
) < -16*1024*1024
7580 || INTVAL (op1
) >= 16*1024*1024)
7582 if (!x86_64_immediate_operand (op1
, Pmode
))
7583 op1
= force_reg (Pmode
, op1
);
7584 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7590 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7591 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
7592 base
== reg
? NULL_RTX
: reg
);
7594 if (CONST_INT_P (new_rtx
))
7595 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
7598 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
7600 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
7601 new_rtx
= XEXP (new_rtx
, 1);
7603 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
7611 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7614 get_thread_pointer (int to_reg
)
7618 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7622 reg
= gen_reg_rtx (Pmode
);
7623 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7624 insn
= emit_insn (insn
);
7629 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7630 false if we expect this to be used for a memory address and true if
7631 we expect to load the address into a register. */
7634 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7636 rtx dest
, base
, off
, pic
, tp
;
7641 case TLS_MODEL_GLOBAL_DYNAMIC
:
7642 dest
= gen_reg_rtx (Pmode
);
7643 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7645 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7647 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7650 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7651 insns
= get_insns ();
7654 CONST_OR_PURE_CALL_P (insns
) = 1;
7655 emit_libcall_block (insns
, dest
, rax
, x
);
7657 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7658 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7660 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7662 if (TARGET_GNU2_TLS
)
7664 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7666 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7670 case TLS_MODEL_LOCAL_DYNAMIC
:
7671 base
= gen_reg_rtx (Pmode
);
7672 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7674 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7676 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7679 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7680 insns
= get_insns ();
7683 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7684 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7685 CONST_OR_PURE_CALL_P (insns
) = 1;
7686 emit_libcall_block (insns
, base
, rax
, note
);
7688 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7689 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7691 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7693 if (TARGET_GNU2_TLS
)
7695 rtx x
= ix86_tls_module_base ();
7697 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7698 gen_rtx_MINUS (Pmode
, x
, tp
));
7701 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7702 off
= gen_rtx_CONST (Pmode
, off
);
7704 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7706 if (TARGET_GNU2_TLS
)
7708 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7710 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7715 case TLS_MODEL_INITIAL_EXEC
:
7719 type
= UNSPEC_GOTNTPOFF
;
7723 if (reload_in_progress
)
7724 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7725 pic
= pic_offset_table_rtx
;
7726 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7728 else if (!TARGET_ANY_GNU_TLS
)
7730 pic
= gen_reg_rtx (Pmode
);
7731 emit_insn (gen_set_got (pic
));
7732 type
= UNSPEC_GOTTPOFF
;
7737 type
= UNSPEC_INDNTPOFF
;
7740 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7741 off
= gen_rtx_CONST (Pmode
, off
);
7743 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7744 off
= gen_const_mem (Pmode
, off
);
7745 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7747 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7749 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7750 off
= force_reg (Pmode
, off
);
7751 return gen_rtx_PLUS (Pmode
, base
, off
);
7755 base
= get_thread_pointer (true);
7756 dest
= gen_reg_rtx (Pmode
);
7757 emit_insn (gen_subsi3 (dest
, base
, off
));
7761 case TLS_MODEL_LOCAL_EXEC
:
7762 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7763 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7764 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7765 off
= gen_rtx_CONST (Pmode
, off
);
7767 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7769 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7770 return gen_rtx_PLUS (Pmode
, base
, off
);
7774 base
= get_thread_pointer (true);
7775 dest
= gen_reg_rtx (Pmode
);
7776 emit_insn (gen_subsi3 (dest
, base
, off
));
7787 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7790 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7791 htab_t dllimport_map
;
7794 get_dllimport_decl (tree decl
)
7796 struct tree_map
*h
, in
;
7800 size_t namelen
, prefixlen
;
7806 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7808 in
.hash
= htab_hash_pointer (decl
);
7809 in
.base
.from
= decl
;
7810 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7811 h
= (struct tree_map
*) *loc
;
7815 *loc
= h
= GGC_NEW (struct tree_map
);
7817 h
->base
.from
= decl
;
7818 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7819 DECL_ARTIFICIAL (to
) = 1;
7820 DECL_IGNORED_P (to
) = 1;
7821 DECL_EXTERNAL (to
) = 1;
7822 TREE_READONLY (to
) = 1;
7824 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7825 name
= targetm
.strip_name_encoding (name
);
7826 prefix
= name
[0] == FASTCALL_PREFIX
? "*__imp_": "*__imp__";
7827 namelen
= strlen (name
);
7828 prefixlen
= strlen (prefix
);
7829 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
7830 memcpy (imp_name
, prefix
, prefixlen
);
7831 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7833 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7834 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7835 SET_SYMBOL_REF_DECL (rtl
, to
);
7836 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7838 rtl
= gen_const_mem (Pmode
, rtl
);
7839 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7841 SET_DECL_RTL (to
, rtl
);
7846 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7847 true if we require the result be a register. */
7850 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7855 gcc_assert (SYMBOL_REF_DECL (symbol
));
7856 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7858 x
= DECL_RTL (imp_decl
);
7860 x
= force_reg (Pmode
, x
);
7864 /* Try machine-dependent ways of modifying an illegitimate address
7865 to be legitimate. If we find one, return the new, valid address.
7866 This macro is used in only one place: `memory_address' in explow.c.
7868 OLDX is the address as it was before break_out_memory_refs was called.
7869 In some cases it is useful to look at this to decide what needs to be done.
7871 MODE and WIN are passed so that this macro can use
7872 GO_IF_LEGITIMATE_ADDRESS.
7874 It is always safe for this macro to do nothing. It exists to recognize
7875 opportunities to optimize the output.
7877 For the 80386, we handle X+REG by loading X into a register R and
7878 using R+REG. R will go in a general reg and indexing will be used.
7879 However, if REG is a broken-out memory address or multiplication,
7880 nothing needs to be done because REG can certainly go in a general reg.
7882 When -fpic is used, special handling is needed for symbolic references.
7883 See comments by legitimize_pic_address in i386.c for details. */
7886 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7891 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7893 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
7894 if (GET_CODE (x
) == CONST
7895 && GET_CODE (XEXP (x
, 0)) == PLUS
7896 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7897 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7899 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
7900 (enum tls_model
) log
, false);
7901 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7904 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7906 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7907 return legitimize_dllimport_symbol (x
, true);
7908 if (GET_CODE (x
) == CONST
7909 && GET_CODE (XEXP (x
, 0)) == PLUS
7910 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7911 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7913 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7914 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7918 if (flag_pic
&& SYMBOLIC_CONST (x
))
7919 return legitimize_pic_address (x
, 0);
7921 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7922 if (GET_CODE (x
) == ASHIFT
7923 && CONST_INT_P (XEXP (x
, 1))
7924 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7927 log
= INTVAL (XEXP (x
, 1));
7928 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7929 GEN_INT (1 << log
));
7932 if (GET_CODE (x
) == PLUS
)
7934 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7936 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7937 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7938 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7941 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7942 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7943 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7944 GEN_INT (1 << log
));
7947 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7948 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7949 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7952 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7953 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7954 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7955 GEN_INT (1 << log
));
7958 /* Put multiply first if it isn't already. */
7959 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7961 rtx tmp
= XEXP (x
, 0);
7962 XEXP (x
, 0) = XEXP (x
, 1);
7967 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7968 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7969 created by virtual register instantiation, register elimination, and
7970 similar optimizations. */
7971 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7974 x
= gen_rtx_PLUS (Pmode
,
7975 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7976 XEXP (XEXP (x
, 1), 0)),
7977 XEXP (XEXP (x
, 1), 1));
7981 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7982 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7983 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7984 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7985 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7986 && CONSTANT_P (XEXP (x
, 1)))
7989 rtx other
= NULL_RTX
;
7991 if (CONST_INT_P (XEXP (x
, 1)))
7993 constant
= XEXP (x
, 1);
7994 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7996 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7998 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7999 other
= XEXP (x
, 1);
8007 x
= gen_rtx_PLUS (Pmode
,
8008 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
8009 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
8010 plus_constant (other
, INTVAL (constant
)));
8014 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8017 if (GET_CODE (XEXP (x
, 0)) == MULT
)
8020 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
8023 if (GET_CODE (XEXP (x
, 1)) == MULT
)
8026 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
8030 && REG_P (XEXP (x
, 1))
8031 && REG_P (XEXP (x
, 0)))
8034 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
8037 x
= legitimize_pic_address (x
, 0);
8040 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
8043 if (REG_P (XEXP (x
, 0)))
8045 rtx temp
= gen_reg_rtx (Pmode
);
8046 rtx val
= force_operand (XEXP (x
, 1), temp
);
8048 emit_move_insn (temp
, val
);
8054 else if (REG_P (XEXP (x
, 1)))
8056 rtx temp
= gen_reg_rtx (Pmode
);
8057 rtx val
= force_operand (XEXP (x
, 0), temp
);
8059 emit_move_insn (temp
, val
);
8069 /* Print an integer constant expression in assembler syntax. Addition
8070 and subtraction are the only arithmetic that may appear in these
8071 expressions. FILE is the stdio stream to write to, X is the rtx, and
8072 CODE is the operand print code from the output string. */
8075 output_pic_addr_const (FILE *file
, rtx x
, int code
)
8079 switch (GET_CODE (x
))
8082 gcc_assert (flag_pic
);
8087 if (! TARGET_MACHO
|| TARGET_64BIT
)
8088 output_addr_const (file
, x
);
8091 const char *name
= XSTR (x
, 0);
8093 /* Mark the decl as referenced so that cgraph will
8094 output the function. */
8095 if (SYMBOL_REF_DECL (x
))
8096 mark_decl_referenced (SYMBOL_REF_DECL (x
));
8099 if (MACHOPIC_INDIRECT
8100 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
8101 name
= machopic_indirection_name (x
, /*stub_p=*/true);
8103 assemble_name (file
, name
);
8105 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
8106 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
8107 fputs ("@PLT", file
);
8114 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
8115 assemble_name (asm_out_file
, buf
);
8119 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8123 /* This used to output parentheses around the expression,
8124 but that does not work on the 386 (either ATT or BSD assembler). */
8125 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8129 if (GET_MODE (x
) == VOIDmode
)
8131 /* We can use %d if the number is <32 bits and positive. */
8132 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
8133 fprintf (file
, "0x%lx%08lx",
8134 (unsigned long) CONST_DOUBLE_HIGH (x
),
8135 (unsigned long) CONST_DOUBLE_LOW (x
));
8137 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
8140 /* We can't handle floating point constants;
8141 PRINT_OPERAND must handle them. */
8142 output_operand_lossage ("floating constant misused");
8146 /* Some assemblers need integer constants to appear first. */
8147 if (CONST_INT_P (XEXP (x
, 0)))
8149 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8151 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8155 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
8156 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8158 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8164 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
8165 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8167 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8169 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
8173 gcc_assert (XVECLEN (x
, 0) == 1);
8174 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
8175 switch (XINT (x
, 1))
8178 fputs ("@GOT", file
);
8181 fputs ("@GOTOFF", file
);
8184 fputs ("@PLTOFF", file
);
8186 case UNSPEC_GOTPCREL
:
8187 fputs ("@GOTPCREL(%rip)", file
);
8189 case UNSPEC_GOTTPOFF
:
8190 /* FIXME: This might be @TPOFF in Sun ld too. */
8191 fputs ("@GOTTPOFF", file
);
8194 fputs ("@TPOFF", file
);
8198 fputs ("@TPOFF", file
);
8200 fputs ("@NTPOFF", file
);
8203 fputs ("@DTPOFF", file
);
8205 case UNSPEC_GOTNTPOFF
:
8207 fputs ("@GOTTPOFF(%rip)", file
);
8209 fputs ("@GOTNTPOFF", file
);
8211 case UNSPEC_INDNTPOFF
:
8212 fputs ("@INDNTPOFF", file
);
8215 output_operand_lossage ("invalid UNSPEC as operand");
8221 output_operand_lossage ("invalid expression as operand");
8225 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8226 We need to emit DTP-relative relocations. */
8228 static void ATTRIBUTE_UNUSED
8229 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8231 fputs (ASM_LONG
, file
);
8232 output_addr_const (file
, x
);
8233 fputs ("@DTPOFF", file
);
8239 fputs (", 0", file
);
8246 /* In the name of slightly smaller debug output, and to cater to
8247 general assembler lossage, recognize PIC+GOTOFF and turn it back
8248 into a direct symbol reference.
8250 On Darwin, this is necessary to avoid a crash, because Darwin
8251 has a different PIC label for each routine but the DWARF debugging
8252 information is not associated with any particular routine, so it's
8253 necessary to remove references to the PIC label from RTL stored by
8254 the DWARF output code. */
8257 ix86_delegitimize_address (rtx orig_x
)
8260 /* reg_addend is NULL or a multiple of some register. */
8261 rtx reg_addend
= NULL_RTX
;
8262 /* const_addend is NULL or a const_int. */
8263 rtx const_addend
= NULL_RTX
;
8264 /* This is the result, or NULL. */
8265 rtx result
= NULL_RTX
;
8272 if (GET_CODE (x
) != CONST
8273 || GET_CODE (XEXP (x
, 0)) != UNSPEC
8274 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
8277 return XVECEXP (XEXP (x
, 0), 0, 0);
8280 if (GET_CODE (x
) != PLUS
8281 || GET_CODE (XEXP (x
, 1)) != CONST
)
8284 if (REG_P (XEXP (x
, 0))
8285 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8286 /* %ebx + GOT/GOTOFF */
8288 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
8290 /* %ebx + %reg * scale + GOT/GOTOFF */
8291 reg_addend
= XEXP (x
, 0);
8292 if (REG_P (XEXP (reg_addend
, 0))
8293 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8294 reg_addend
= XEXP (reg_addend
, 1);
8295 else if (REG_P (XEXP (reg_addend
, 1))
8296 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
8297 reg_addend
= XEXP (reg_addend
, 0);
8300 if (!REG_P (reg_addend
)
8301 && GET_CODE (reg_addend
) != MULT
8302 && GET_CODE (reg_addend
) != ASHIFT
)
8308 x
= XEXP (XEXP (x
, 1), 0);
8309 if (GET_CODE (x
) == PLUS
8310 && CONST_INT_P (XEXP (x
, 1)))
8312 const_addend
= XEXP (x
, 1);
8316 if (GET_CODE (x
) == UNSPEC
8317 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
8318 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
8319 result
= XVECEXP (x
, 0, 0);
8321 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8323 result
= XEXP (x
, 0);
8329 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8331 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8335 /* If X is a machine specific address (i.e. a symbol or label being
8336 referenced as a displacement from the GOT implemented using an
8337 UNSPEC), then return the base term. Otherwise return X. */
8340 ix86_find_base_term (rtx x
)
8346 if (GET_CODE (x
) != CONST
)
8349 if (GET_CODE (term
) == PLUS
8350 && (CONST_INT_P (XEXP (term
, 1))
8351 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8352 term
= XEXP (term
, 0);
8353 if (GET_CODE (term
) != UNSPEC
8354 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8357 term
= XVECEXP (term
, 0, 0);
8359 if (GET_CODE (term
) != SYMBOL_REF
8360 && GET_CODE (term
) != LABEL_REF
)
8366 term
= ix86_delegitimize_address (x
);
8368 if (GET_CODE (term
) != SYMBOL_REF
8369 && GET_CODE (term
) != LABEL_REF
)
8376 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8381 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8383 enum rtx_code second_code
, bypass_code
;
8384 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8385 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8386 code
= ix86_fp_compare_code_to_integer (code
);
8390 code
= reverse_condition (code
);
8441 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8445 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8446 Those same assemblers have the same but opposite lossage on cmov. */
8448 suffix
= fp
? "nbe" : "a";
8449 else if (mode
== CCCmode
)
8472 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8494 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8495 suffix
= fp
? "nb" : "ae";
8498 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8505 else if (mode
== CCCmode
)
8506 suffix
= fp
? "nb" : "ae";
8511 suffix
= fp
? "u" : "p";
8514 suffix
= fp
? "nu" : "np";
8519 fputs (suffix
, file
);
8522 /* Print the name of register X to FILE based on its machine mode and number.
8523 If CODE is 'w', pretend the mode is HImode.
8524 If CODE is 'b', pretend the mode is QImode.
8525 If CODE is 'k', pretend the mode is SImode.
8526 If CODE is 'q', pretend the mode is DImode.
8527 If CODE is 'h', pretend the reg is the 'high' byte register.
8528 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8531 print_reg (rtx x
, int code
, FILE *file
)
8533 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8534 && REGNO (x
) != FRAME_POINTER_REGNUM
8535 && REGNO (x
) != FLAGS_REG
8536 && REGNO (x
) != FPSR_REG
8537 && REGNO (x
) != FPCR_REG
);
8539 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8542 if (code
== 'w' || MMX_REG_P (x
))
8544 else if (code
== 'b')
8546 else if (code
== 'k')
8548 else if (code
== 'q')
8550 else if (code
== 'y')
8552 else if (code
== 'h')
8555 code
= GET_MODE_SIZE (GET_MODE (x
));
8557 /* Irritatingly, AMD extended registers use different naming convention
8558 from the normal registers. */
8559 if (REX_INT_REG_P (x
))
8561 gcc_assert (TARGET_64BIT
);
8565 error ("extended registers have no high halves");
8568 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8571 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8574 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8577 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8580 error ("unsupported operand size for extended register");
8588 if (STACK_TOP_P (x
))
8590 fputs ("st(0)", file
);
8597 if (! ANY_FP_REG_P (x
))
8598 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8603 fputs (hi_reg_name
[REGNO (x
)], file
);
8606 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8608 fputs (qi_reg_name
[REGNO (x
)], file
);
8611 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8613 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8620 /* Locate some local-dynamic symbol still in use by this function
8621 so that we can print its name in some tls_local_dynamic_base
8625 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8629 if (GET_CODE (x
) == SYMBOL_REF
8630 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8632 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8640 get_some_local_dynamic_name (void)
8644 if (cfun
->machine
->some_ld_name
)
8645 return cfun
->machine
->some_ld_name
;
8647 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8649 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8650 return cfun
->machine
->some_ld_name
;
8656 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8657 C -- print opcode suffix for set/cmov insn.
8658 c -- like C, but print reversed condition
8659 F,f -- likewise, but for floating-point.
8660 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8662 R -- print the prefix for register names.
8663 z -- print the opcode suffix for the size of the current operand.
8664 * -- print a star (in certain assembler syntax)
8665 A -- print an absolute memory reference.
8666 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8667 s -- print a shift double count, followed by the assemblers argument
8669 b -- print the QImode name of the register for the indicated operand.
8670 %b0 would print %al if operands[0] is reg 0.
8671 w -- likewise, print the HImode name of the register.
8672 k -- likewise, print the SImode name of the register.
8673 q -- likewise, print the DImode name of the register.
8674 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8675 y -- print "st(0)" instead of "st" as a register.
8676 D -- print condition for SSE cmp instruction.
8677 P -- if PIC, print an @PLT suffix.
8678 X -- don't print any sort of PIC '@' suffix for a symbol.
8679 & -- print some in-use local-dynamic symbol name.
8680 H -- print a memory address offset by 8; used for sse high-parts
8681 + -- print a branch hint as 'cs' or 'ds' prefix
8682 ; -- print a semicolon (after prefixes due to bug in older gas).
8686 print_operand (FILE *file
, rtx x
, int code
)
8693 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8698 assemble_name (file
, get_some_local_dynamic_name ());
8702 switch (ASSEMBLER_DIALECT
)
8709 /* Intel syntax. For absolute addresses, registers should not
8710 be surrounded by braces. */
8714 PRINT_OPERAND (file
, x
, 0);
8724 PRINT_OPERAND (file
, x
, 0);
8729 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8734 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8739 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8744 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8749 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8754 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8759 /* 387 opcodes don't get size suffixes if the operands are
8761 if (STACK_REG_P (x
))
8764 /* Likewise if using Intel opcodes. */
8765 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8768 /* This is the size of op from size of operand. */
8769 switch (GET_MODE_SIZE (GET_MODE (x
)))
8778 #ifdef HAVE_GAS_FILDS_FISTS
8788 if (GET_MODE (x
) == SFmode
)
8803 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8805 #ifdef GAS_MNEMONICS
8831 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8833 PRINT_OPERAND (file
, x
, 0);
8839 /* Little bit of braindamage here. The SSE compare instructions
8840 does use completely different names for the comparisons that the
8841 fp conditional moves. */
8842 switch (GET_CODE (x
))
8857 fputs ("unord", file
);
8861 fputs ("neq", file
);
8865 fputs ("nlt", file
);
8869 fputs ("nle", file
);
8872 fputs ("ord", file
);
8879 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8880 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8882 switch (GET_MODE (x
))
8884 case HImode
: putc ('w', file
); break;
8886 case SFmode
: putc ('l', file
); break;
8888 case DFmode
: putc ('q', file
); break;
8889 default: gcc_unreachable ();
8896 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8899 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8900 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8903 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8906 /* Like above, but reverse condition */
8908 /* Check to see if argument to %c is really a constant
8909 and not a condition code which needs to be reversed. */
8910 if (!COMPARISON_P (x
))
8912 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8915 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8918 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8919 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8922 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8926 /* It doesn't actually matter what mode we use here, as we're
8927 only going to use this for printing. */
8928 x
= adjust_address_nv (x
, DImode
, 8);
8935 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8938 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8941 int pred_val
= INTVAL (XEXP (x
, 0));
8943 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8944 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8946 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8947 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8949 /* Emit hints only in the case default branch prediction
8950 heuristics would fail. */
8951 if (taken
!= cputaken
)
8953 /* We use 3e (DS) prefix for taken branches and
8954 2e (CS) prefix for not taken branches. */
8956 fputs ("ds ; ", file
);
8958 fputs ("cs ; ", file
);
8967 fputs (" ; ", file
);
8974 output_operand_lossage ("invalid operand code '%c'", code
);
8979 print_reg (x
, code
, file
);
8983 /* No `byte ptr' prefix for call instructions. */
8984 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8987 switch (GET_MODE_SIZE (GET_MODE (x
)))
8989 case 1: size
= "BYTE"; break;
8990 case 2: size
= "WORD"; break;
8991 case 4: size
= "DWORD"; break;
8992 case 8: size
= "QWORD"; break;
8993 case 12: size
= "XWORD"; break;
8994 case 16: size
= "XMMWORD"; break;
8999 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9002 else if (code
== 'w')
9004 else if (code
== 'k')
9008 fputs (" PTR ", file
);
9012 /* Avoid (%rip) for call operands. */
9013 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
9014 && !CONST_INT_P (x
))
9015 output_addr_const (file
, x
);
9016 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
9017 output_operand_lossage ("invalid constraints for operand");
9022 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
9027 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9028 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
9030 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9032 fprintf (file
, "0x%08lx", l
);
9035 /* These float cases don't actually occur as immediate operands. */
9036 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
9040 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9041 fprintf (file
, "%s", dstr
);
9044 else if (GET_CODE (x
) == CONST_DOUBLE
9045 && GET_MODE (x
) == XFmode
)
9049 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
9050 fprintf (file
, "%s", dstr
);
9055 /* We have patterns that allow zero sets of memory, for instance.
9056 In 64-bit mode, we should probably support all 8-byte vectors,
9057 since we can in fact encode that into an immediate. */
9058 if (GET_CODE (x
) == CONST_VECTOR
)
9060 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
9066 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
9068 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9071 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
9072 || GET_CODE (x
) == LABEL_REF
)
9074 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9077 fputs ("OFFSET FLAT:", file
);
9080 if (CONST_INT_P (x
))
9081 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
9083 output_pic_addr_const (file
, x
, code
);
9085 output_addr_const (file
, x
);
9089 /* Print a memory operand whose address is ADDR. */
9092 print_operand_address (FILE *file
, rtx addr
)
9094 struct ix86_address parts
;
9095 rtx base
, index
, disp
;
9097 int ok
= ix86_decompose_address (addr
, &parts
);
9102 index
= parts
.index
;
9104 scale
= parts
.scale
;
9112 if (USER_LABEL_PREFIX
[0] == 0)
9114 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
9120 if (!base
&& !index
)
9122 /* Displacement only requires special attention. */
9124 if (CONST_INT_P (disp
))
9126 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
9128 if (USER_LABEL_PREFIX
[0] == 0)
9130 fputs ("ds:", file
);
9132 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
9135 output_pic_addr_const (file
, disp
, 0);
9137 output_addr_const (file
, disp
);
9139 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9142 if (GET_CODE (disp
) == CONST
9143 && GET_CODE (XEXP (disp
, 0)) == PLUS
9144 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9145 disp
= XEXP (XEXP (disp
, 0), 0);
9146 if (GET_CODE (disp
) == LABEL_REF
9147 || (GET_CODE (disp
) == SYMBOL_REF
9148 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
9149 fputs ("(%rip)", file
);
9154 if (ASSEMBLER_DIALECT
== ASM_ATT
)
9159 output_pic_addr_const (file
, disp
, 0);
9160 else if (GET_CODE (disp
) == LABEL_REF
)
9161 output_asm_label (disp
);
9163 output_addr_const (file
, disp
);
9168 print_reg (base
, 0, file
);
9172 print_reg (index
, 0, file
);
9174 fprintf (file
, ",%d", scale
);
9180 rtx offset
= NULL_RTX
;
9184 /* Pull out the offset of a symbol; print any symbol itself. */
9185 if (GET_CODE (disp
) == CONST
9186 && GET_CODE (XEXP (disp
, 0)) == PLUS
9187 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9189 offset
= XEXP (XEXP (disp
, 0), 1);
9190 disp
= gen_rtx_CONST (VOIDmode
,
9191 XEXP (XEXP (disp
, 0), 0));
9195 output_pic_addr_const (file
, disp
, 0);
9196 else if (GET_CODE (disp
) == LABEL_REF
)
9197 output_asm_label (disp
);
9198 else if (CONST_INT_P (disp
))
9201 output_addr_const (file
, disp
);
9207 print_reg (base
, 0, file
);
9210 if (INTVAL (offset
) >= 0)
9212 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9216 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9223 print_reg (index
, 0, file
);
9225 fprintf (file
, "*%d", scale
);
9233 output_addr_const_extra (FILE *file
, rtx x
)
9237 if (GET_CODE (x
) != UNSPEC
)
9240 op
= XVECEXP (x
, 0, 0);
9241 switch (XINT (x
, 1))
9243 case UNSPEC_GOTTPOFF
:
9244 output_addr_const (file
, op
);
9245 /* FIXME: This might be @TPOFF in Sun ld. */
9246 fputs ("@GOTTPOFF", file
);
9249 output_addr_const (file
, op
);
9250 fputs ("@TPOFF", file
);
9253 output_addr_const (file
, op
);
9255 fputs ("@TPOFF", file
);
9257 fputs ("@NTPOFF", file
);
9260 output_addr_const (file
, op
);
9261 fputs ("@DTPOFF", file
);
9263 case UNSPEC_GOTNTPOFF
:
9264 output_addr_const (file
, op
);
9266 fputs ("@GOTTPOFF(%rip)", file
);
9268 fputs ("@GOTNTPOFF", file
);
9270 case UNSPEC_INDNTPOFF
:
9271 output_addr_const (file
, op
);
9272 fputs ("@INDNTPOFF", file
);
9282 /* Split one or more DImode RTL references into pairs of SImode
9283 references. The RTL can be REG, offsettable MEM, integer constant, or
9284 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9285 split and "num" is its length. lo_half and hi_half are output arrays
9286 that parallel "operands". */
9289 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9293 rtx op
= operands
[num
];
9295 /* simplify_subreg refuse to split volatile memory addresses,
9296 but we still have to handle it. */
9299 lo_half
[num
] = adjust_address (op
, SImode
, 0);
9300 hi_half
[num
] = adjust_address (op
, SImode
, 4);
9304 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
9305 GET_MODE (op
) == VOIDmode
9306 ? DImode
: GET_MODE (op
), 0);
9307 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
9308 GET_MODE (op
) == VOIDmode
9309 ? DImode
: GET_MODE (op
), 4);
9313 /* Split one or more TImode RTL references into pairs of DImode
9314 references. The RTL can be REG, offsettable MEM, integer constant, or
9315 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9316 split and "num" is its length. lo_half and hi_half are output arrays
9317 that parallel "operands". */
9320 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9324 rtx op
= operands
[num
];
9326 /* simplify_subreg refuse to split volatile memory addresses, but we
9327 still have to handle it. */
9330 lo_half
[num
] = adjust_address (op
, DImode
, 0);
9331 hi_half
[num
] = adjust_address (op
, DImode
, 8);
9335 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
9336 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
9341 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9342 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9343 is the expression of the binary operation. The output may either be
9344 emitted here, or returned to the caller, like all output_* functions.
9346 There is no guarantee that the operands are the same mode, as they
9347 might be within FLOAT or FLOAT_EXTEND expressions. */
9349 #ifndef SYSV386_COMPAT
9350 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9351 wants to fix the assemblers because that causes incompatibility
9352 with gcc. No-one wants to fix gcc because that causes
9353 incompatibility with assemblers... You can use the option of
9354 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9355 #define SYSV386_COMPAT 1
9359 output_387_binary_op (rtx insn
, rtx
*operands
)
9361 static char buf
[30];
9364 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
9366 #ifdef ENABLE_CHECKING
9367 /* Even if we do not want to check the inputs, this documents input
9368 constraints. Which helps in understanding the following code. */
9369 if (STACK_REG_P (operands
[0])
9370 && ((REG_P (operands
[1])
9371 && REGNO (operands
[0]) == REGNO (operands
[1])
9372 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
9373 || (REG_P (operands
[2])
9374 && REGNO (operands
[0]) == REGNO (operands
[2])
9375 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
9376 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
9379 gcc_assert (is_sse
);
9382 switch (GET_CODE (operands
[3]))
9385 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9386 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9394 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9395 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9403 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9404 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9412 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9413 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9427 if (GET_MODE (operands
[0]) == SFmode
)
9428 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9430 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9435 switch (GET_CODE (operands
[3]))
9439 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9441 rtx temp
= operands
[2];
9442 operands
[2] = operands
[1];
9446 /* know operands[0] == operands[1]. */
9448 if (MEM_P (operands
[2]))
9454 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9456 if (STACK_TOP_P (operands
[0]))
9457 /* How is it that we are storing to a dead operand[2]?
9458 Well, presumably operands[1] is dead too. We can't
9459 store the result to st(0) as st(0) gets popped on this
9460 instruction. Instead store to operands[2] (which I
9461 think has to be st(1)). st(1) will be popped later.
9462 gcc <= 2.8.1 didn't have this check and generated
9463 assembly code that the Unixware assembler rejected. */
9464 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9466 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9470 if (STACK_TOP_P (operands
[0]))
9471 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9473 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9478 if (MEM_P (operands
[1]))
9484 if (MEM_P (operands
[2]))
9490 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9493 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9494 derived assemblers, confusingly reverse the direction of
9495 the operation for fsub{r} and fdiv{r} when the
9496 destination register is not st(0). The Intel assembler
9497 doesn't have this brain damage. Read !SYSV386_COMPAT to
9498 figure out what the hardware really does. */
9499 if (STACK_TOP_P (operands
[0]))
9500 p
= "{p\t%0, %2|rp\t%2, %0}";
9502 p
= "{rp\t%2, %0|p\t%0, %2}";
9504 if (STACK_TOP_P (operands
[0]))
9505 /* As above for fmul/fadd, we can't store to st(0). */
9506 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9508 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9513 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9516 if (STACK_TOP_P (operands
[0]))
9517 p
= "{rp\t%0, %1|p\t%1, %0}";
9519 p
= "{p\t%1, %0|rp\t%0, %1}";
9521 if (STACK_TOP_P (operands
[0]))
9522 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9524 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9529 if (STACK_TOP_P (operands
[0]))
9531 if (STACK_TOP_P (operands
[1]))
9532 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9534 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9537 else if (STACK_TOP_P (operands
[1]))
9540 p
= "{\t%1, %0|r\t%0, %1}";
9542 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9548 p
= "{r\t%2, %0|\t%0, %2}";
9550 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9563 /* Return needed mode for entity in optimize_mode_switching pass. */
9566 ix86_mode_needed (int entity
, rtx insn
)
9568 enum attr_i387_cw mode
;
9570 /* The mode UNINITIALIZED is used to store control word after a
9571 function call or ASM pattern. The mode ANY specify that function
9572 has no requirements on the control word and make no changes in the
9573 bits we are interested in. */
9576 || (NONJUMP_INSN_P (insn
)
9577 && (asm_noperands (PATTERN (insn
)) >= 0
9578 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9579 return I387_CW_UNINITIALIZED
;
9581 if (recog_memoized (insn
) < 0)
9584 mode
= get_attr_i387_cw (insn
);
9589 if (mode
== I387_CW_TRUNC
)
9594 if (mode
== I387_CW_FLOOR
)
9599 if (mode
== I387_CW_CEIL
)
9604 if (mode
== I387_CW_MASK_PM
)
9615 /* Output code to initialize control word copies used by trunc?f?i and
9616 rounding patterns. CURRENT_MODE is set to current control word,
9617 while NEW_MODE is set to new control word. */
9620 emit_i387_cw_initialization (int mode
)
9622 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9625 enum ix86_stack_slot slot
;
9627 rtx reg
= gen_reg_rtx (HImode
);
9629 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9630 emit_move_insn (reg
, copy_rtx (stored_mode
));
9632 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9637 /* round toward zero (truncate) */
9638 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9639 slot
= SLOT_CW_TRUNC
;
9643 /* round down toward -oo */
9644 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9645 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9646 slot
= SLOT_CW_FLOOR
;
9650 /* round up toward +oo */
9651 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9652 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9653 slot
= SLOT_CW_CEIL
;
9656 case I387_CW_MASK_PM
:
9657 /* mask precision exception for nearbyint() */
9658 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9659 slot
= SLOT_CW_MASK_PM
;
9671 /* round toward zero (truncate) */
9672 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9673 slot
= SLOT_CW_TRUNC
;
9677 /* round down toward -oo */
9678 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9679 slot
= SLOT_CW_FLOOR
;
9683 /* round up toward +oo */
9684 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9685 slot
= SLOT_CW_CEIL
;
9688 case I387_CW_MASK_PM
:
9689 /* mask precision exception for nearbyint() */
9690 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9691 slot
= SLOT_CW_MASK_PM
;
9699 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9701 new_mode
= assign_386_stack_local (HImode
, slot
);
9702 emit_move_insn (new_mode
, reg
);
9705 /* Output code for INSN to convert a float to a signed int. OPERANDS
9706 are the insn operands. The output may be [HSD]Imode and the input
9707 operand may be [SDX]Fmode. */
9710 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9712 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9713 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9714 int round_mode
= get_attr_i387_cw (insn
);
9716 /* Jump through a hoop or two for DImode, since the hardware has no
9717 non-popping instruction. We used to do this a different way, but
9718 that was somewhat fragile and broke with post-reload splitters. */
9719 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9720 output_asm_insn ("fld\t%y1", operands
);
9722 gcc_assert (STACK_TOP_P (operands
[1]));
9723 gcc_assert (MEM_P (operands
[0]));
9724 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9727 output_asm_insn ("fisttp%z0\t%0", operands
);
9730 if (round_mode
!= I387_CW_ANY
)
9731 output_asm_insn ("fldcw\t%3", operands
);
9732 if (stack_top_dies
|| dimode_p
)
9733 output_asm_insn ("fistp%z0\t%0", operands
);
9735 output_asm_insn ("fist%z0\t%0", operands
);
9736 if (round_mode
!= I387_CW_ANY
)
9737 output_asm_insn ("fldcw\t%2", operands
);
9743 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9744 have the values zero or one, indicates the ffreep insn's operand
9745 from the OPERANDS array. */
9748 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9750 if (TARGET_USE_FFREEP
)
9751 #if HAVE_AS_IX86_FFREEP
9752 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9755 static char retval
[] = ".word\t0xc_df";
9756 int regno
= REGNO (operands
[opno
]);
9758 gcc_assert (FP_REGNO_P (regno
));
9760 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9765 return opno
? "fstp\t%y1" : "fstp\t%y0";
9769 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9770 should be used. UNORDERED_P is true when fucom should be used. */
9773 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9776 rtx cmp_op0
, cmp_op1
;
9777 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9781 cmp_op0
= operands
[0];
9782 cmp_op1
= operands
[1];
9786 cmp_op0
= operands
[1];
9787 cmp_op1
= operands
[2];
9792 if (GET_MODE (operands
[0]) == SFmode
)
9794 return "ucomiss\t{%1, %0|%0, %1}";
9796 return "comiss\t{%1, %0|%0, %1}";
9799 return "ucomisd\t{%1, %0|%0, %1}";
9801 return "comisd\t{%1, %0|%0, %1}";
9804 gcc_assert (STACK_TOP_P (cmp_op0
));
9806 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9808 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9812 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9813 return output_387_ffreep (operands
, 1);
9816 return "ftst\n\tfnstsw\t%0";
9819 if (STACK_REG_P (cmp_op1
)
9821 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9822 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9824 /* If both the top of the 387 stack dies, and the other operand
9825 is also a stack register that dies, then this must be a
9826 `fcompp' float compare */
9830 /* There is no double popping fcomi variant. Fortunately,
9831 eflags is immune from the fstp's cc clobbering. */
9833 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9835 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9836 return output_387_ffreep (operands
, 0);
9841 return "fucompp\n\tfnstsw\t%0";
9843 return "fcompp\n\tfnstsw\t%0";
9848 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9850 static const char * const alt
[16] =
9852 "fcom%z2\t%y2\n\tfnstsw\t%0",
9853 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9854 "fucom%z2\t%y2\n\tfnstsw\t%0",
9855 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9857 "ficom%z2\t%y2\n\tfnstsw\t%0",
9858 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9862 "fcomi\t{%y1, %0|%0, %y1}",
9863 "fcomip\t{%y1, %0|%0, %y1}",
9864 "fucomi\t{%y1, %0|%0, %y1}",
9865 "fucomip\t{%y1, %0|%0, %y1}",
9876 mask
= eflags_p
<< 3;
9877 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9878 mask
|= unordered_p
<< 1;
9879 mask
|= stack_top_dies
;
9881 gcc_assert (mask
< 16);
9890 ix86_output_addr_vec_elt (FILE *file
, int value
)
9892 const char *directive
= ASM_LONG
;
9896 directive
= ASM_QUAD
;
9898 gcc_assert (!TARGET_64BIT
);
9901 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9905 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9907 const char *directive
= ASM_LONG
;
9910 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9911 directive
= ASM_QUAD
;
9913 gcc_assert (!TARGET_64BIT
);
9915 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9916 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9917 fprintf (file
, "%s%s%d-%s%d\n",
9918 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9919 else if (HAVE_AS_GOTOFF_IN_DATA
)
9920 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9922 else if (TARGET_MACHO
)
9924 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9925 machopic_output_function_base_name (file
);
9926 fprintf(file
, "\n");
9930 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9931 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9934 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9938 ix86_expand_clear (rtx dest
)
9942 /* We play register width games, which are only valid after reload. */
9943 gcc_assert (reload_completed
);
9945 /* Avoid HImode and its attendant prefix byte. */
9946 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9947 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9948 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9950 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9951 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9953 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9954 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9960 /* X is an unchanging MEM. If it is a constant pool reference, return
9961 the constant pool rtx, else NULL. */
9964 maybe_get_pool_constant (rtx x
)
9966 x
= ix86_delegitimize_address (XEXP (x
, 0));
9968 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9969 return get_pool_constant (x
);
9975 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9977 int strict
= (reload_in_progress
|| reload_completed
);
9979 enum tls_model model
;
9984 if (GET_CODE (op1
) == SYMBOL_REF
)
9986 model
= SYMBOL_REF_TLS_MODEL (op1
);
9989 op1
= legitimize_tls_address (op1
, model
, true);
9990 op1
= force_operand (op1
, op0
);
9994 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9995 && SYMBOL_REF_DLLIMPORT_P (op1
))
9996 op1
= legitimize_dllimport_symbol (op1
, false);
9998 else if (GET_CODE (op1
) == CONST
9999 && GET_CODE (XEXP (op1
, 0)) == PLUS
10000 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
10002 rtx addend
= XEXP (XEXP (op1
, 0), 1);
10003 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
10006 model
= SYMBOL_REF_TLS_MODEL (symbol
);
10008 tmp
= legitimize_tls_address (symbol
, model
, true);
10009 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10010 && SYMBOL_REF_DLLIMPORT_P (symbol
))
10011 tmp
= legitimize_dllimport_symbol (symbol
, true);
10015 tmp
= force_operand (tmp
, NULL
);
10016 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
10017 op0
, 1, OPTAB_DIRECT
);
10023 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
10025 if (TARGET_MACHO
&& !TARGET_64BIT
)
10030 rtx temp
= ((reload_in_progress
10031 || ((op0
&& REG_P (op0
))
10033 ? op0
: gen_reg_rtx (Pmode
));
10034 op1
= machopic_indirect_data_reference (op1
, temp
);
10035 op1
= machopic_legitimize_pic_address (op1
, mode
,
10036 temp
== op1
? 0 : temp
);
10038 else if (MACHOPIC_INDIRECT
)
10039 op1
= machopic_indirect_data_reference (op1
, 0);
10047 op1
= force_reg (Pmode
, op1
);
10048 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
10050 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
10051 op1
= legitimize_pic_address (op1
, reg
);
10060 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
10061 || !push_operand (op0
, mode
))
10063 op1
= force_reg (mode
, op1
);
10065 if (push_operand (op0
, mode
)
10066 && ! general_no_elim_operand (op1
, mode
))
10067 op1
= copy_to_mode_reg (mode
, op1
);
10069 /* Force large constants in 64bit compilation into register
10070 to get them CSEed. */
10071 if (TARGET_64BIT
&& mode
== DImode
10072 && immediate_operand (op1
, mode
)
10073 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
10074 && !register_operand (op0
, mode
)
10075 && optimize
&& !reload_completed
&& !reload_in_progress
)
10076 op1
= copy_to_mode_reg (mode
, op1
);
10078 if (FLOAT_MODE_P (mode
))
10080 /* If we are loading a floating point constant to a register,
10081 force the value to memory now, since we'll get better code
10082 out the back end. */
10086 else if (GET_CODE (op1
) == CONST_DOUBLE
)
10088 op1
= validize_mem (force_const_mem (mode
, op1
));
10089 if (!register_operand (op0
, mode
))
10091 rtx temp
= gen_reg_rtx (mode
);
10092 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
10093 emit_move_insn (op0
, temp
);
10100 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10104 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
10106 rtx op0
= operands
[0], op1
= operands
[1];
10107 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
10109 /* Force constants other than zero into memory. We do not know how
10110 the instructions used to build constants modify the upper 64 bits
10111 of the register, once we have that information we may be able
10112 to handle some of them more efficiently. */
10113 if ((reload_in_progress
| reload_completed
) == 0
10114 && register_operand (op0
, mode
)
10115 && (CONSTANT_P (op1
)
10116 || (GET_CODE (op1
) == SUBREG
10117 && CONSTANT_P (SUBREG_REG (op1
))))
10118 && standard_sse_constant_p (op1
) <= 0)
10119 op1
= validize_mem (force_const_mem (mode
, op1
));
10121 /* TDmode values are passed as TImode on the stack. Timode values
10122 are moved via xmm registers, and moving them to stack can result in
10123 unaligned memory access. Use ix86_expand_vector_move_misalign()
10124 if memory operand is not aligned correctly. */
10125 if (can_create_pseudo_p ()
10126 && (mode
== TImode
) && !TARGET_64BIT
10127 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
10128 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
10132 /* ix86_expand_vector_move_misalign() does not like constants ... */
10133 if (CONSTANT_P (op1
)
10134 || (GET_CODE (op1
) == SUBREG
10135 && CONSTANT_P (SUBREG_REG (op1
))))
10136 op1
= validize_mem (force_const_mem (mode
, op1
));
10138 /* ... nor both arguments in memory. */
10139 if (!register_operand (op0
, mode
)
10140 && !register_operand (op1
, mode
))
10141 op1
= force_reg (mode
, op1
);
10143 tmp
[0] = op0
; tmp
[1] = op1
;
10144 ix86_expand_vector_move_misalign (mode
, tmp
);
10148 /* Make operand1 a register if it isn't already. */
10149 if (can_create_pseudo_p ()
10150 && !register_operand (op0
, mode
)
10151 && !register_operand (op1
, mode
))
10153 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
10157 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
10160 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10161 straight to ix86_expand_vector_move. */
10162 /* Code generation for scalar reg-reg moves of single and double precision data:
10163 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10167 if (x86_sse_partial_reg_dependency == true)
10172 Code generation for scalar loads of double precision data:
10173 if (x86_sse_split_regs == true)
10174 movlpd mem, reg (gas syntax)
10178 Code generation for unaligned packed loads of single precision data
10179 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10180 if (x86_sse_unaligned_move_optimal)
10183 if (x86_sse_partial_reg_dependency == true)
10195 Code generation for unaligned packed loads of double precision data
10196 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10197 if (x86_sse_unaligned_move_optimal)
10200 if (x86_sse_split_regs == true)
10213 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
10222 /* If we're optimizing for size, movups is the smallest. */
10225 op0
= gen_lowpart (V4SFmode
, op0
);
10226 op1
= gen_lowpart (V4SFmode
, op1
);
10227 emit_insn (gen_sse_movups (op0
, op1
));
10231 /* ??? If we have typed data, then it would appear that using
10232 movdqu is the only way to get unaligned data loaded with
10234 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10236 op0
= gen_lowpart (V16QImode
, op0
);
10237 op1
= gen_lowpart (V16QImode
, op1
);
10238 emit_insn (gen_sse2_movdqu (op0
, op1
));
10242 if (TARGET_SSE2
&& mode
== V2DFmode
)
10246 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10248 op0
= gen_lowpart (V2DFmode
, op0
);
10249 op1
= gen_lowpart (V2DFmode
, op1
);
10250 emit_insn (gen_sse2_movupd (op0
, op1
));
10254 /* When SSE registers are split into halves, we can avoid
10255 writing to the top half twice. */
10256 if (TARGET_SSE_SPLIT_REGS
)
10258 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10263 /* ??? Not sure about the best option for the Intel chips.
10264 The following would seem to satisfy; the register is
10265 entirely cleared, breaking the dependency chain. We
10266 then store to the upper half, with a dependency depth
10267 of one. A rumor has it that Intel recommends two movsd
10268 followed by an unpacklpd, but this is unconfirmed. And
10269 given that the dependency depth of the unpacklpd would
10270 still be one, I'm not sure why this would be better. */
10271 zero
= CONST0_RTX (V2DFmode
);
10274 m
= adjust_address (op1
, DFmode
, 0);
10275 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
10276 m
= adjust_address (op1
, DFmode
, 8);
10277 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
10281 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10283 op0
= gen_lowpart (V4SFmode
, op0
);
10284 op1
= gen_lowpart (V4SFmode
, op1
);
10285 emit_insn (gen_sse_movups (op0
, op1
));
10289 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
10290 emit_move_insn (op0
, CONST0_RTX (mode
));
10292 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10294 if (mode
!= V4SFmode
)
10295 op0
= gen_lowpart (V4SFmode
, op0
);
10296 m
= adjust_address (op1
, V2SFmode
, 0);
10297 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
10298 m
= adjust_address (op1
, V2SFmode
, 8);
10299 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
10302 else if (MEM_P (op0
))
10304 /* If we're optimizing for size, movups is the smallest. */
10307 op0
= gen_lowpart (V4SFmode
, op0
);
10308 op1
= gen_lowpart (V4SFmode
, op1
);
10309 emit_insn (gen_sse_movups (op0
, op1
));
10313 /* ??? Similar to above, only less clear because of quote
10314 typeless stores unquote. */
10315 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
10316 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10318 op0
= gen_lowpart (V16QImode
, op0
);
10319 op1
= gen_lowpart (V16QImode
, op1
);
10320 emit_insn (gen_sse2_movdqu (op0
, op1
));
10324 if (TARGET_SSE2
&& mode
== V2DFmode
)
10326 m
= adjust_address (op0
, DFmode
, 0);
10327 emit_insn (gen_sse2_storelpd (m
, op1
));
10328 m
= adjust_address (op0
, DFmode
, 8);
10329 emit_insn (gen_sse2_storehpd (m
, op1
));
10333 if (mode
!= V4SFmode
)
10334 op1
= gen_lowpart (V4SFmode
, op1
);
10335 m
= adjust_address (op0
, V2SFmode
, 0);
10336 emit_insn (gen_sse_storelps (m
, op1
));
10337 m
= adjust_address (op0
, V2SFmode
, 8);
10338 emit_insn (gen_sse_storehps (m
, op1
));
10342 gcc_unreachable ();
10345 /* Expand a push in MODE. This is some mode for which we do not support
10346 proper push instructions, at least from the registers that we expect
10347 the value to live in. */
10350 ix86_expand_push (enum machine_mode mode
, rtx x
)
10354 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
10355 GEN_INT (-GET_MODE_SIZE (mode
)),
10356 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
10357 if (tmp
!= stack_pointer_rtx
)
10358 emit_move_insn (stack_pointer_rtx
, tmp
);
10360 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
10361 emit_move_insn (tmp
, x
);
10364 /* Helper function of ix86_fixup_binary_operands to canonicalize
10365 operand order. Returns true if the operands should be swapped. */
10368 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
10371 rtx dst
= operands
[0];
10372 rtx src1
= operands
[1];
10373 rtx src2
= operands
[2];
10375 /* If the operation is not commutative, we can't do anything. */
10376 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
10379 /* Highest priority is that src1 should match dst. */
10380 if (rtx_equal_p (dst
, src1
))
10382 if (rtx_equal_p (dst
, src2
))
10385 /* Next highest priority is that immediate constants come second. */
10386 if (immediate_operand (src2
, mode
))
10388 if (immediate_operand (src1
, mode
))
10391 /* Lowest priority is that memory references should come second. */
10401 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10402 destination to use for the operation. If different from the true
10403 destination in operands[0], a copy operation will be required. */
10406 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
10409 rtx dst
= operands
[0];
10410 rtx src1
= operands
[1];
10411 rtx src2
= operands
[2];
10413 /* Canonicalize operand order. */
10414 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10421 /* Both source operands cannot be in memory. */
10422 if (MEM_P (src1
) && MEM_P (src2
))
10424 /* Optimization: Only read from memory once. */
10425 if (rtx_equal_p (src1
, src2
))
10427 src2
= force_reg (mode
, src2
);
10431 src2
= force_reg (mode
, src2
);
10434 /* If the destination is memory, and we do not have matching source
10435 operands, do things in registers. */
10436 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10437 dst
= gen_reg_rtx (mode
);
10439 /* Source 1 cannot be a constant. */
10440 if (CONSTANT_P (src1
))
10441 src1
= force_reg (mode
, src1
);
10443 /* Source 1 cannot be a non-matching memory. */
10444 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10445 src1
= force_reg (mode
, src1
);
10447 operands
[1] = src1
;
10448 operands
[2] = src2
;
10452 /* Similarly, but assume that the destination has already been
10453 set up properly. */
10456 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10457 enum machine_mode mode
, rtx operands
[])
10459 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10460 gcc_assert (dst
== operands
[0]);
10463 /* Attempt to expand a binary operator. Make the expansion closer to the
10464 actual machine, then just general_operand, which will allow 3 separate
10465 memory references (one output, two input) in a single insn. */
10468 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10471 rtx src1
, src2
, dst
, op
, clob
;
10473 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10474 src1
= operands
[1];
10475 src2
= operands
[2];
10477 /* Emit the instruction. */
10479 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10480 if (reload_in_progress
)
10482 /* Reload doesn't know about the flags register, and doesn't know that
10483 it doesn't want to clobber it. We can only do this with PLUS. */
10484 gcc_assert (code
== PLUS
);
10489 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10490 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10493 /* Fix up the destination if needed. */
10494 if (dst
!= operands
[0])
10495 emit_move_insn (operands
[0], dst
);
10498 /* Return TRUE or FALSE depending on whether the binary operator meets the
10499 appropriate constraints. */
10502 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10505 rtx dst
= operands
[0];
10506 rtx src1
= operands
[1];
10507 rtx src2
= operands
[2];
10509 /* Both source operands cannot be in memory. */
10510 if (MEM_P (src1
) && MEM_P (src2
))
10513 /* Canonicalize operand order for commutative operators. */
10514 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10521 /* If the destination is memory, we must have a matching source operand. */
10522 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10525 /* Source 1 cannot be a constant. */
10526 if (CONSTANT_P (src1
))
10529 /* Source 1 cannot be a non-matching memory. */
10530 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10536 /* Attempt to expand a unary operator. Make the expansion closer to the
10537 actual machine, then just general_operand, which will allow 2 separate
10538 memory references (one output, one input) in a single insn. */
10541 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10544 int matching_memory
;
10545 rtx src
, dst
, op
, clob
;
10550 /* If the destination is memory, and we do not have matching source
10551 operands, do things in registers. */
10552 matching_memory
= 0;
10555 if (rtx_equal_p (dst
, src
))
10556 matching_memory
= 1;
10558 dst
= gen_reg_rtx (mode
);
10561 /* When source operand is memory, destination must match. */
10562 if (MEM_P (src
) && !matching_memory
)
10563 src
= force_reg (mode
, src
);
10565 /* Emit the instruction. */
10567 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10568 if (reload_in_progress
|| code
== NOT
)
10570 /* Reload doesn't know about the flags register, and doesn't know that
10571 it doesn't want to clobber it. */
10572 gcc_assert (code
== NOT
);
10577 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10578 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10581 /* Fix up the destination if needed. */
10582 if (dst
!= operands
[0])
10583 emit_move_insn (operands
[0], dst
);
10586 /* Return TRUE or FALSE depending on whether the unary operator meets the
10587 appropriate constraints. */
10590 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10591 enum machine_mode mode ATTRIBUTE_UNUSED
,
10592 rtx operands
[2] ATTRIBUTE_UNUSED
)
10594 /* If one of operands is memory, source and destination must match. */
10595 if ((MEM_P (operands
[0])
10596 || MEM_P (operands
[1]))
10597 && ! rtx_equal_p (operands
[0], operands
[1]))
10602 /* Post-reload splitter for converting an SF or DFmode value in an
10603 SSE register into an unsigned SImode. */
10606 ix86_split_convert_uns_si_sse (rtx operands
[])
10608 enum machine_mode vecmode
;
10609 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10611 large
= operands
[1];
10612 zero_or_two31
= operands
[2];
10613 input
= operands
[3];
10614 two31
= operands
[4];
10615 vecmode
= GET_MODE (large
);
10616 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10618 /* Load up the value into the low element. We must ensure that the other
10619 elements are valid floats -- zero is the easiest such value. */
10622 if (vecmode
== V4SFmode
)
10623 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10625 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10629 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10630 emit_move_insn (value
, CONST0_RTX (vecmode
));
10631 if (vecmode
== V4SFmode
)
10632 emit_insn (gen_sse_movss (value
, value
, input
));
10634 emit_insn (gen_sse2_movsd (value
, value
, input
));
10637 emit_move_insn (large
, two31
);
10638 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10640 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10641 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10643 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10644 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10646 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10647 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10649 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10650 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10652 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10653 if (vecmode
== V4SFmode
)
10654 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10656 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10659 emit_insn (gen_xorv4si3 (value
, value
, large
));
10662 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10663 Expects the 64-bit DImode to be supplied in a pair of integral
10664 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10665 -mfpmath=sse, !optimize_size only. */
10668 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10670 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10671 rtx int_xmm
, fp_xmm
;
10672 rtx biases
, exponents
;
10675 int_xmm
= gen_reg_rtx (V4SImode
);
10676 if (TARGET_INTER_UNIT_MOVES
)
10677 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10678 else if (TARGET_SSE_SPLIT_REGS
)
10680 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10681 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10685 x
= gen_reg_rtx (V2DImode
);
10686 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10687 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10690 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10691 gen_rtvec (4, GEN_INT (0x43300000UL
),
10692 GEN_INT (0x45300000UL
),
10693 const0_rtx
, const0_rtx
));
10694 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10696 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10697 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10699 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10700 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10701 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10702 (0x1.0p84 + double(fp_value_hi_xmm)).
10703 Note these exponents differ by 32. */
10705 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10707 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10708 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10709 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10710 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10711 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10712 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10713 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10714 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10715 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10717 /* Add the upper and lower DFmode values together. */
10719 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10722 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10723 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10724 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10727 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10730 /* Convert an unsigned SImode value into a DFmode. Only currently used
10731 for SSE, but applicable anywhere. */
10734 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10736 REAL_VALUE_TYPE TWO31r
;
10739 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10740 NULL
, 1, OPTAB_DIRECT
);
10742 fp
= gen_reg_rtx (DFmode
);
10743 emit_insn (gen_floatsidf2 (fp
, x
));
10745 real_ldexp (&TWO31r
, &dconst1
, 31);
10746 x
= const_double_from_real_value (TWO31r
, DFmode
);
10748 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10750 emit_move_insn (target
, x
);
10753 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10754 32-bit mode; otherwise we have a direct convert instruction. */
10757 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10759 REAL_VALUE_TYPE TWO32r
;
10760 rtx fp_lo
, fp_hi
, x
;
10762 fp_lo
= gen_reg_rtx (DFmode
);
10763 fp_hi
= gen_reg_rtx (DFmode
);
10765 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10767 real_ldexp (&TWO32r
, &dconst1
, 32);
10768 x
= const_double_from_real_value (TWO32r
, DFmode
);
10769 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10771 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10773 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10776 emit_move_insn (target
, x
);
10779 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10780 For x86_32, -mfpmath=sse, !optimize_size only. */
10782 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10784 REAL_VALUE_TYPE ONE16r
;
10785 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10787 real_ldexp (&ONE16r
, &dconst1
, 16);
10788 x
= const_double_from_real_value (ONE16r
, SFmode
);
10789 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10790 NULL
, 0, OPTAB_DIRECT
);
10791 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10792 NULL
, 0, OPTAB_DIRECT
);
10793 fp_hi
= gen_reg_rtx (SFmode
);
10794 fp_lo
= gen_reg_rtx (SFmode
);
10795 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10796 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10797 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10799 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10801 if (!rtx_equal_p (target
, fp_hi
))
10802 emit_move_insn (target
, fp_hi
);
10805 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10806 then replicate the value for all elements of the vector
10810 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10817 v
= gen_rtvec (4, value
, value
, value
, value
);
10818 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
10822 v
= gen_rtvec (2, value
, value
);
10823 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
10827 v
= gen_rtvec (4, value
, value
, value
, value
);
10829 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10830 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10831 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10835 v
= gen_rtvec (2, value
, value
);
10837 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10838 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10841 gcc_unreachable ();
10845 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10846 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10847 for an SSE register. If VECT is true, then replicate the mask for
10848 all elements of the vector register. If INVERT is true, then create
10849 a mask excluding the sign bit. */
10852 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10854 enum machine_mode vec_mode
, imode
;
10855 HOST_WIDE_INT hi
, lo
;
10860 /* Find the sign bit, sign extended to 2*HWI. */
10866 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
10867 lo
= 0x80000000, hi
= lo
< 0;
10873 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
10874 if (HOST_BITS_PER_WIDE_INT
>= 64)
10875 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10877 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10883 vec_mode
= VOIDmode
;
10884 gcc_assert (HOST_BITS_PER_WIDE_INT
>= 64);
10885 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
10889 gcc_unreachable ();
10893 lo
= ~lo
, hi
= ~hi
;
10895 /* Force this value into the low part of a fp vector constant. */
10896 mask
= immed_double_const (lo
, hi
, imode
);
10897 mask
= gen_lowpart (mode
, mask
);
10899 if (vec_mode
== VOIDmode
)
10900 return force_reg (mode
, mask
);
10902 v
= ix86_build_const_vector (mode
, vect
, mask
);
10903 return force_reg (vec_mode
, v
);
10906 /* Generate code for floating point ABS or NEG. */
10909 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10912 rtx mask
, set
, use
, clob
, dst
, src
;
10913 bool matching_memory
;
10914 bool use_sse
= false;
10915 bool vector_mode
= VECTOR_MODE_P (mode
);
10916 enum machine_mode elt_mode
= mode
;
10920 elt_mode
= GET_MODE_INNER (mode
);
10923 else if (mode
== TFmode
)
10925 else if (TARGET_SSE_MATH
)
10926 use_sse
= SSE_FLOAT_MODE_P (mode
);
10928 /* NEG and ABS performed with SSE use bitwise mask operations.
10929 Create the appropriate mask now. */
10931 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10938 /* If the destination is memory, and we don't have matching source
10939 operands or we're using the x87, do things in registers. */
10940 matching_memory
= false;
10943 if (use_sse
&& rtx_equal_p (dst
, src
))
10944 matching_memory
= true;
10946 dst
= gen_reg_rtx (mode
);
10948 if (MEM_P (src
) && !matching_memory
)
10949 src
= force_reg (mode
, src
);
10953 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10954 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10959 set
= gen_rtx_fmt_e (code
, mode
, src
);
10960 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10963 use
= gen_rtx_USE (VOIDmode
, mask
);
10964 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10965 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10966 gen_rtvec (3, set
, use
, clob
)));
10972 if (dst
!= operands
[0])
10973 emit_move_insn (operands
[0], dst
);
10976 /* Expand a copysign operation. Special case operand 0 being a constant. */
10979 ix86_expand_copysign (rtx operands
[])
10981 enum machine_mode mode
, vmode
;
10982 rtx dest
, op0
, op1
, mask
, nmask
;
10984 dest
= operands
[0];
10988 mode
= GET_MODE (dest
);
10989 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10991 if (GET_CODE (op0
) == CONST_DOUBLE
)
10993 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
10995 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10996 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10998 if (mode
== SFmode
|| mode
== DFmode
)
11000 if (op0
== CONST0_RTX (mode
))
11001 op0
= CONST0_RTX (vmode
);
11006 if (mode
== SFmode
)
11007 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
11008 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
11010 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
11011 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
11015 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11017 if (mode
== SFmode
)
11018 copysign_insn
= gen_copysignsf3_const
;
11019 else if (mode
== DFmode
)
11020 copysign_insn
= gen_copysigndf3_const
;
11022 copysign_insn
= gen_copysigntf3_const
;
11024 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
11028 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
11030 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
11031 mask
= ix86_build_signbit_mask (mode
, 0, 0);
11033 if (mode
== SFmode
)
11034 copysign_insn
= gen_copysignsf3_var
;
11035 else if (mode
== DFmode
)
11036 copysign_insn
= gen_copysigndf3_var
;
11038 copysign_insn
= gen_copysigntf3_var
;
11040 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
11044 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11045 be a constant, and so has already been expanded into a vector constant. */
11048 ix86_split_copysign_const (rtx operands
[])
11050 enum machine_mode mode
, vmode
;
11051 rtx dest
, op0
, op1
, mask
, x
;
11053 dest
= operands
[0];
11056 mask
= operands
[3];
11058 mode
= GET_MODE (dest
);
11059 vmode
= GET_MODE (mask
);
11061 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
11062 x
= gen_rtx_AND (vmode
, dest
, mask
);
11063 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11065 if (op0
!= CONST0_RTX (vmode
))
11067 x
= gen_rtx_IOR (vmode
, dest
, op0
);
11068 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11072 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11073 so we have to do two masks. */
11076 ix86_split_copysign_var (rtx operands
[])
11078 enum machine_mode mode
, vmode
;
11079 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
11081 dest
= operands
[0];
11082 scratch
= operands
[1];
11085 nmask
= operands
[4];
11086 mask
= operands
[5];
11088 mode
= GET_MODE (dest
);
11089 vmode
= GET_MODE (mask
);
11091 if (rtx_equal_p (op0
, op1
))
11093 /* Shouldn't happen often (it's useless, obviously), but when it does
11094 we'd generate incorrect code if we continue below. */
11095 emit_move_insn (dest
, op0
);
11099 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
11101 gcc_assert (REGNO (op1
) == REGNO (scratch
));
11103 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11104 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11107 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11108 x
= gen_rtx_NOT (vmode
, dest
);
11109 x
= gen_rtx_AND (vmode
, x
, op0
);
11110 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11114 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
11116 x
= gen_rtx_AND (vmode
, scratch
, mask
);
11118 else /* alternative 2,4 */
11120 gcc_assert (REGNO (mask
) == REGNO (scratch
));
11121 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
11122 x
= gen_rtx_AND (vmode
, scratch
, op1
);
11124 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
11126 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
11128 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11129 x
= gen_rtx_AND (vmode
, dest
, nmask
);
11131 else /* alternative 3,4 */
11133 gcc_assert (REGNO (nmask
) == REGNO (dest
));
11135 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
11136 x
= gen_rtx_AND (vmode
, dest
, op0
);
11138 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11141 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
11142 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11145 /* Return TRUE or FALSE depending on whether the first SET in INSN
11146 has source and destination with matching CC modes, and that the
11147 CC mode is at least as constrained as REQ_MODE. */
11150 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
11153 enum machine_mode set_mode
;
11155 set
= PATTERN (insn
);
11156 if (GET_CODE (set
) == PARALLEL
)
11157 set
= XVECEXP (set
, 0, 0);
11158 gcc_assert (GET_CODE (set
) == SET
);
11159 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
11161 set_mode
= GET_MODE (SET_DEST (set
));
11165 if (req_mode
!= CCNOmode
11166 && (req_mode
!= CCmode
11167 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
11171 if (req_mode
== CCGCmode
)
11175 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
11179 if (req_mode
== CCZmode
)
11186 gcc_unreachable ();
11189 return (GET_MODE (SET_SRC (set
)) == set_mode
);
11192 /* Generate insn patterns to do an integer compare of OPERANDS. */
11195 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
11197 enum machine_mode cmpmode
;
11200 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
11201 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
11203 /* This is very simple, but making the interface the same as in the
11204 FP case makes the rest of the code easier. */
11205 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
11206 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
11208 /* Return the test that should be put into the flags user, i.e.
11209 the bcc, scc, or cmov instruction. */
11210 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
11213 /* Figure out whether to use ordered or unordered fp comparisons.
11214 Return the appropriate mode to use. */
11217 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
11219 /* ??? In order to make all comparisons reversible, we do all comparisons
11220 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11221 all forms trapping and nontrapping comparisons, we can make inequality
11222 comparisons trapping again, since it results in better code when using
11223 FCOM based compares. */
11224 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
11228 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
11230 enum machine_mode mode
= GET_MODE (op0
);
11232 if (SCALAR_FLOAT_MODE_P (mode
))
11234 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11235 return ix86_fp_compare_mode (code
);
11240 /* Only zero flag is needed. */
11241 case EQ
: /* ZF=0 */
11242 case NE
: /* ZF!=0 */
11244 /* Codes needing carry flag. */
11245 case GEU
: /* CF=0 */
11246 case LTU
: /* CF=1 */
11247 /* Detect overflow checks. They need just the carry flag. */
11248 if (GET_CODE (op0
) == PLUS
11249 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11253 case GTU
: /* CF=0 & ZF=0 */
11254 case LEU
: /* CF=1 | ZF=1 */
11255 /* Detect overflow checks. They need just the carry flag. */
11256 if (GET_CODE (op0
) == MINUS
11257 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11261 /* Codes possibly doable only with sign flag when
11262 comparing against zero. */
11263 case GE
: /* SF=OF or SF=0 */
11264 case LT
: /* SF<>OF or SF=1 */
11265 if (op1
== const0_rtx
)
11268 /* For other cases Carry flag is not required. */
11270 /* Codes doable only with sign flag when comparing
11271 against zero, but we miss jump instruction for it
11272 so we need to use relational tests against overflow
11273 that thus needs to be zero. */
11274 case GT
: /* ZF=0 & SF=OF */
11275 case LE
: /* ZF=1 | SF<>OF */
11276 if (op1
== const0_rtx
)
11280 /* strcmp pattern do (use flags) and combine may ask us for proper
11285 gcc_unreachable ();
11289 /* Return the fixed registers used for condition codes. */
11292 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
11299 /* If two condition code modes are compatible, return a condition code
11300 mode which is compatible with both. Otherwise, return
11303 static enum machine_mode
11304 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
11309 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
11312 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
11313 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
11319 gcc_unreachable ();
11349 /* These are only compatible with themselves, which we already
11355 /* Split comparison code CODE into comparisons we can do using branch
11356 instructions. BYPASS_CODE is comparison code for branch that will
11357 branch around FIRST_CODE and SECOND_CODE. If some of branches
11358 is not required, set value to UNKNOWN.
11359 We never require more than two branches. */
11362 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
11363 enum rtx_code
*first_code
,
11364 enum rtx_code
*second_code
)
11366 *first_code
= code
;
11367 *bypass_code
= UNKNOWN
;
11368 *second_code
= UNKNOWN
;
11370 /* The fcomi comparison sets flags as follows:
11380 case GT
: /* GTU - CF=0 & ZF=0 */
11381 case GE
: /* GEU - CF=0 */
11382 case ORDERED
: /* PF=0 */
11383 case UNORDERED
: /* PF=1 */
11384 case UNEQ
: /* EQ - ZF=1 */
11385 case UNLT
: /* LTU - CF=1 */
11386 case UNLE
: /* LEU - CF=1 | ZF=1 */
11387 case LTGT
: /* EQ - ZF=0 */
11389 case LT
: /* LTU - CF=1 - fails on unordered */
11390 *first_code
= UNLT
;
11391 *bypass_code
= UNORDERED
;
11393 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
11394 *first_code
= UNLE
;
11395 *bypass_code
= UNORDERED
;
11397 case EQ
: /* EQ - ZF=1 - fails on unordered */
11398 *first_code
= UNEQ
;
11399 *bypass_code
= UNORDERED
;
11401 case NE
: /* NE - ZF=0 - fails on unordered */
11402 *first_code
= LTGT
;
11403 *second_code
= UNORDERED
;
11405 case UNGE
: /* GEU - CF=0 - fails on unordered */
11407 *second_code
= UNORDERED
;
11409 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
11411 *second_code
= UNORDERED
;
11414 gcc_unreachable ();
11416 if (!TARGET_IEEE_FP
)
11418 *second_code
= UNKNOWN
;
11419 *bypass_code
= UNKNOWN
;
11423 /* Return cost of comparison done fcom + arithmetics operations on AX.
11424 All following functions do use number of instructions as a cost metrics.
11425 In future this should be tweaked to compute bytes for optimize_size and
11426 take into account performance of various instructions on various CPUs. */
11428 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
11430 if (!TARGET_IEEE_FP
)
11432 /* The cost of code output by ix86_expand_fp_compare. */
11456 gcc_unreachable ();
11460 /* Return cost of comparison done using fcomi operation.
11461 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11463 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
11465 enum rtx_code bypass_code
, first_code
, second_code
;
11466 /* Return arbitrarily high cost when instruction is not supported - this
11467 prevents gcc from using it. */
11470 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11471 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
11474 /* Return cost of comparison done using sahf operation.
11475 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11477 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11479 enum rtx_code bypass_code
, first_code
, second_code
;
11480 /* Return arbitrarily high cost when instruction is not preferred - this
11481 avoids gcc from using it. */
11482 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11484 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11485 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11488 /* Compute cost of the comparison done using any method.
11489 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11491 ix86_fp_comparison_cost (enum rtx_code code
)
11493 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11496 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11497 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11499 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11500 if (min
> sahf_cost
)
11502 if (min
> fcomi_cost
)
11507 /* Return true if we should use an FCOMI instruction for this
11511 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11513 enum rtx_code swapped_code
= swap_condition (code
);
11515 return ((ix86_fp_comparison_cost (code
)
11516 == ix86_fp_comparison_fcomi_cost (code
))
11517 || (ix86_fp_comparison_cost (swapped_code
)
11518 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11521 /* Swap, force into registers, or otherwise massage the two operands
11522 to a fp comparison. The operands are updated in place; the new
11523 comparison code is returned. */
11525 static enum rtx_code
11526 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11528 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11529 rtx op0
= *pop0
, op1
= *pop1
;
11530 enum machine_mode op_mode
= GET_MODE (op0
);
11531 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11533 /* All of the unordered compare instructions only work on registers.
11534 The same is true of the fcomi compare instructions. The XFmode
11535 compare instructions require registers except when comparing
11536 against zero or when converting operand 1 from fixed point to
11540 && (fpcmp_mode
== CCFPUmode
11541 || (op_mode
== XFmode
11542 && ! (standard_80387_constant_p (op0
) == 1
11543 || standard_80387_constant_p (op1
) == 1)
11544 && GET_CODE (op1
) != FLOAT
)
11545 || ix86_use_fcomi_compare (code
)))
11547 op0
= force_reg (op_mode
, op0
);
11548 op1
= force_reg (op_mode
, op1
);
11552 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11553 things around if they appear profitable, otherwise force op0
11554 into a register. */
11556 if (standard_80387_constant_p (op0
) == 0
11558 && ! (standard_80387_constant_p (op1
) == 0
11562 tmp
= op0
, op0
= op1
, op1
= tmp
;
11563 code
= swap_condition (code
);
11567 op0
= force_reg (op_mode
, op0
);
11569 if (CONSTANT_P (op1
))
11571 int tmp
= standard_80387_constant_p (op1
);
11573 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11577 op1
= force_reg (op_mode
, op1
);
11580 op1
= force_reg (op_mode
, op1
);
11584 /* Try to rearrange the comparison to make it cheaper. */
11585 if (ix86_fp_comparison_cost (code
)
11586 > ix86_fp_comparison_cost (swap_condition (code
))
11587 && (REG_P (op1
) || can_create_pseudo_p ()))
11590 tmp
= op0
, op0
= op1
, op1
= tmp
;
11591 code
= swap_condition (code
);
11593 op0
= force_reg (op_mode
, op0
);
11601 /* Convert comparison codes we use to represent FP comparison to integer
11602 code that will result in proper branch. Return UNKNOWN if no such code
11606 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11635 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11638 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11639 rtx
*second_test
, rtx
*bypass_test
)
11641 enum machine_mode fpcmp_mode
, intcmp_mode
;
11643 int cost
= ix86_fp_comparison_cost (code
);
11644 enum rtx_code bypass_code
, first_code
, second_code
;
11646 fpcmp_mode
= ix86_fp_compare_mode (code
);
11647 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11650 *second_test
= NULL_RTX
;
11652 *bypass_test
= NULL_RTX
;
11654 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11656 /* Do fcomi/sahf based test when profitable. */
11657 if (ix86_fp_comparison_arithmetics_cost (code
) > cost
11658 && (bypass_code
== UNKNOWN
|| bypass_test
)
11659 && (second_code
== UNKNOWN
|| second_test
))
11661 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11662 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11668 gcc_assert (TARGET_SAHF
);
11671 scratch
= gen_reg_rtx (HImode
);
11672 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11674 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
11677 /* The FP codes work out to act like unsigned. */
11678 intcmp_mode
= fpcmp_mode
;
11680 if (bypass_code
!= UNKNOWN
)
11681 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11682 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11684 if (second_code
!= UNKNOWN
)
11685 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11686 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11691 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11692 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11693 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11695 scratch
= gen_reg_rtx (HImode
);
11696 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11698 /* In the unordered case, we have to check C2 for NaN's, which
11699 doesn't happen to work out to anything nice combination-wise.
11700 So do some bit twiddling on the value we've got in AH to come
11701 up with an appropriate set of condition codes. */
11703 intcmp_mode
= CCNOmode
;
11708 if (code
== GT
|| !TARGET_IEEE_FP
)
11710 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11715 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11716 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11717 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11718 intcmp_mode
= CCmode
;
11724 if (code
== LT
&& TARGET_IEEE_FP
)
11726 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11727 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11728 intcmp_mode
= CCmode
;
11733 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11739 if (code
== GE
|| !TARGET_IEEE_FP
)
11741 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11746 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11747 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11754 if (code
== LE
&& TARGET_IEEE_FP
)
11756 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11757 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11758 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11759 intcmp_mode
= CCmode
;
11764 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11770 if (code
== EQ
&& TARGET_IEEE_FP
)
11772 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11773 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11774 intcmp_mode
= CCmode
;
11779 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11786 if (code
== NE
&& TARGET_IEEE_FP
)
11788 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11789 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11795 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11801 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11805 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11810 gcc_unreachable ();
11814 /* Return the test that should be put into the flags user, i.e.
11815 the bcc, scc, or cmov instruction. */
11816 return gen_rtx_fmt_ee (code
, VOIDmode
,
11817 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11822 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11825 op0
= ix86_compare_op0
;
11826 op1
= ix86_compare_op1
;
11829 *second_test
= NULL_RTX
;
11831 *bypass_test
= NULL_RTX
;
11833 if (ix86_compare_emitted
)
11835 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11836 ix86_compare_emitted
= NULL_RTX
;
11838 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11840 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11841 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11842 second_test
, bypass_test
);
11845 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11850 /* Return true if the CODE will result in nontrivial jump sequence. */
11852 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11854 enum rtx_code bypass_code
, first_code
, second_code
;
11857 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11858 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11862 ix86_expand_branch (enum rtx_code code
, rtx label
)
11866 /* If we have emitted a compare insn, go straight to simple.
11867 ix86_expand_compare won't emit anything if ix86_compare_emitted
11869 if (ix86_compare_emitted
)
11872 switch (GET_MODE (ix86_compare_op0
))
11878 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11879 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11880 gen_rtx_LABEL_REF (VOIDmode
, label
),
11882 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11891 enum rtx_code bypass_code
, first_code
, second_code
;
11893 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11894 &ix86_compare_op1
);
11896 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11898 /* Check whether we will use the natural sequence with one jump. If
11899 so, we can expand jump early. Otherwise delay expansion by
11900 creating compound insn to not confuse optimizers. */
11901 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
)
11903 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11904 gen_rtx_LABEL_REF (VOIDmode
, label
),
11905 pc_rtx
, NULL_RTX
, NULL_RTX
);
11909 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11910 ix86_compare_op0
, ix86_compare_op1
);
11911 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11912 gen_rtx_LABEL_REF (VOIDmode
, label
),
11914 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11916 use_fcomi
= ix86_use_fcomi_compare (code
);
11917 vec
= rtvec_alloc (3 + !use_fcomi
);
11918 RTVEC_ELT (vec
, 0) = tmp
;
11920 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FPSR_REG
));
11922 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FLAGS_REG
));
11925 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11927 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11936 /* Expand DImode branch into multiple compare+branch. */
11938 rtx lo
[2], hi
[2], label2
;
11939 enum rtx_code code1
, code2
, code3
;
11940 enum machine_mode submode
;
11942 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11944 tmp
= ix86_compare_op0
;
11945 ix86_compare_op0
= ix86_compare_op1
;
11946 ix86_compare_op1
= tmp
;
11947 code
= swap_condition (code
);
11949 if (GET_MODE (ix86_compare_op0
) == DImode
)
11951 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11952 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11957 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11958 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11962 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11963 avoid two branches. This costs one extra insn, so disable when
11964 optimizing for size. */
11966 if ((code
== EQ
|| code
== NE
)
11968 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11973 if (hi
[1] != const0_rtx
)
11974 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11975 NULL_RTX
, 0, OPTAB_WIDEN
);
11978 if (lo
[1] != const0_rtx
)
11979 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11980 NULL_RTX
, 0, OPTAB_WIDEN
);
11982 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11983 NULL_RTX
, 0, OPTAB_WIDEN
);
11985 ix86_compare_op0
= tmp
;
11986 ix86_compare_op1
= const0_rtx
;
11987 ix86_expand_branch (code
, label
);
11991 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11992 op1 is a constant and the low word is zero, then we can just
11993 examine the high word. */
11995 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11998 case LT
: case LTU
: case GE
: case GEU
:
11999 ix86_compare_op0
= hi
[0];
12000 ix86_compare_op1
= hi
[1];
12001 ix86_expand_branch (code
, label
);
12007 /* Otherwise, we need two or three jumps. */
12009 label2
= gen_label_rtx ();
12012 code2
= swap_condition (code
);
12013 code3
= unsigned_condition (code
);
12017 case LT
: case GT
: case LTU
: case GTU
:
12020 case LE
: code1
= LT
; code2
= GT
; break;
12021 case GE
: code1
= GT
; code2
= LT
; break;
12022 case LEU
: code1
= LTU
; code2
= GTU
; break;
12023 case GEU
: code1
= GTU
; code2
= LTU
; break;
12025 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
12026 case NE
: code2
= UNKNOWN
; break;
12029 gcc_unreachable ();
12034 * if (hi(a) < hi(b)) goto true;
12035 * if (hi(a) > hi(b)) goto false;
12036 * if (lo(a) < lo(b)) goto true;
12040 ix86_compare_op0
= hi
[0];
12041 ix86_compare_op1
= hi
[1];
12043 if (code1
!= UNKNOWN
)
12044 ix86_expand_branch (code1
, label
);
12045 if (code2
!= UNKNOWN
)
12046 ix86_expand_branch (code2
, label2
);
12048 ix86_compare_op0
= lo
[0];
12049 ix86_compare_op1
= lo
[1];
12050 ix86_expand_branch (code3
, label
);
12052 if (code2
!= UNKNOWN
)
12053 emit_label (label2
);
12058 gcc_unreachable ();
12062 /* Split branch based on floating point condition. */
12064 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
12065 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
12067 rtx second
, bypass
;
12068 rtx label
= NULL_RTX
;
12070 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
12073 if (target2
!= pc_rtx
)
12076 code
= reverse_condition_maybe_unordered (code
);
12081 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
12082 tmp
, &second
, &bypass
);
12084 /* Remove pushed operand from stack. */
12086 ix86_free_from_memory (GET_MODE (pushed
));
12088 if (split_branch_probability
>= 0)
12090 /* Distribute the probabilities across the jumps.
12091 Assume the BYPASS and SECOND to be always test
12093 probability
= split_branch_probability
;
12095 /* Value of 1 is low enough to make no need for probability
12096 to be updated. Later we may run some experiments and see
12097 if unordered values are more frequent in practice. */
12099 bypass_probability
= 1;
12101 second_probability
= 1;
12103 if (bypass
!= NULL_RTX
)
12105 label
= gen_label_rtx ();
12106 i
= emit_jump_insn (gen_rtx_SET
12108 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12110 gen_rtx_LABEL_REF (VOIDmode
,
12113 if (bypass_probability
>= 0)
12115 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12116 GEN_INT (bypass_probability
),
12119 i
= emit_jump_insn (gen_rtx_SET
12121 gen_rtx_IF_THEN_ELSE (VOIDmode
,
12122 condition
, target1
, target2
)));
12123 if (probability
>= 0)
12125 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12126 GEN_INT (probability
),
12128 if (second
!= NULL_RTX
)
12130 i
= emit_jump_insn (gen_rtx_SET
12132 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
12134 if (second_probability
>= 0)
12136 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
12137 GEN_INT (second_probability
),
12140 if (label
!= NULL_RTX
)
12141 emit_label (label
);
12145 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
12147 rtx ret
, tmp
, tmpreg
, equiv
;
12148 rtx second_test
, bypass_test
;
12150 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
12151 return 0; /* FAIL */
12153 gcc_assert (GET_MODE (dest
) == QImode
);
12155 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12156 PUT_MODE (ret
, QImode
);
12161 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
12162 if (bypass_test
|| second_test
)
12164 rtx test
= second_test
;
12166 rtx tmp2
= gen_reg_rtx (QImode
);
12169 gcc_assert (!second_test
);
12170 test
= bypass_test
;
12172 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
12174 PUT_MODE (test
, QImode
);
12175 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
12178 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
12180 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
12183 /* Attach a REG_EQUAL note describing the comparison result. */
12184 if (ix86_compare_op0
&& ix86_compare_op1
)
12186 equiv
= simplify_gen_relational (code
, QImode
,
12187 GET_MODE (ix86_compare_op0
),
12188 ix86_compare_op0
, ix86_compare_op1
);
12189 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
12192 return 1; /* DONE */
12195 /* Expand comparison setting or clearing carry flag. Return true when
12196 successful and set pop for the operation. */
12198 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
12200 enum machine_mode mode
=
12201 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
12203 /* Do not handle DImode compares that go through special path. */
12204 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
12207 if (SCALAR_FLOAT_MODE_P (mode
))
12209 rtx second_test
= NULL
, bypass_test
= NULL
;
12210 rtx compare_op
, compare_seq
;
12212 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
12214 /* Shortcut: following common codes never translate
12215 into carry flag compares. */
12216 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
12217 || code
== ORDERED
|| code
== UNORDERED
)
12220 /* These comparisons require zero flag; swap operands so they won't. */
12221 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
12222 && !TARGET_IEEE_FP
)
12227 code
= swap_condition (code
);
12230 /* Try to expand the comparison and verify that we end up with
12231 carry flag based comparison. This fails to be true only when
12232 we decide to expand comparison using arithmetic that is not
12233 too common scenario. */
12235 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
12236 &second_test
, &bypass_test
);
12237 compare_seq
= get_insns ();
12240 if (second_test
|| bypass_test
)
12243 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12244 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12245 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
12247 code
= GET_CODE (compare_op
);
12249 if (code
!= LTU
&& code
!= GEU
)
12252 emit_insn (compare_seq
);
12257 if (!INTEGRAL_MODE_P (mode
))
12266 /* Convert a==0 into (unsigned)a<1. */
12269 if (op1
!= const0_rtx
)
12272 code
= (code
== EQ
? LTU
: GEU
);
12275 /* Convert a>b into b<a or a>=b-1. */
12278 if (CONST_INT_P (op1
))
12280 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
12281 /* Bail out on overflow. We still can swap operands but that
12282 would force loading of the constant into register. */
12283 if (op1
== const0_rtx
12284 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
12286 code
= (code
== GTU
? GEU
: LTU
);
12293 code
= (code
== GTU
? LTU
: GEU
);
12297 /* Convert a>=0 into (unsigned)a<0x80000000. */
12300 if (mode
== DImode
|| op1
!= const0_rtx
)
12302 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12303 code
= (code
== LT
? GEU
: LTU
);
12307 if (mode
== DImode
|| op1
!= constm1_rtx
)
12309 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12310 code
= (code
== LE
? GEU
: LTU
);
12316 /* Swapping operands may cause constant to appear as first operand. */
12317 if (!nonimmediate_operand (op0
, VOIDmode
))
12319 if (!can_create_pseudo_p ())
12321 op0
= force_reg (mode
, op0
);
12323 ix86_compare_op0
= op0
;
12324 ix86_compare_op1
= op1
;
12325 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
12326 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
12331 ix86_expand_int_movcc (rtx operands
[])
12333 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
12334 rtx compare_seq
, compare_op
;
12335 rtx second_test
, bypass_test
;
12336 enum machine_mode mode
= GET_MODE (operands
[0]);
12337 bool sign_bit_compare_p
= false;;
12340 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12341 compare_seq
= get_insns ();
12344 compare_code
= GET_CODE (compare_op
);
12346 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
12347 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
12348 sign_bit_compare_p
= true;
12350 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12351 HImode insns, we'd be swallowed in word prefix ops. */
12353 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
12354 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
12355 && CONST_INT_P (operands
[2])
12356 && CONST_INT_P (operands
[3]))
12358 rtx out
= operands
[0];
12359 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
12360 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
12361 HOST_WIDE_INT diff
;
12364 /* Sign bit compares are better done using shifts than we do by using
12366 if (sign_bit_compare_p
12367 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12368 ix86_compare_op1
, &compare_op
))
12370 /* Detect overlap between destination and compare sources. */
12373 if (!sign_bit_compare_p
)
12375 bool fpcmp
= false;
12377 compare_code
= GET_CODE (compare_op
);
12379 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12380 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12383 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
12386 /* To simplify rest of code, restrict to the GEU case. */
12387 if (compare_code
== LTU
)
12389 HOST_WIDE_INT tmp
= ct
;
12392 compare_code
= reverse_condition (compare_code
);
12393 code
= reverse_condition (code
);
12398 PUT_CODE (compare_op
,
12399 reverse_condition_maybe_unordered
12400 (GET_CODE (compare_op
)));
12402 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12406 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
12407 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
12408 tmp
= gen_reg_rtx (mode
);
12410 if (mode
== DImode
)
12411 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
12413 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
12417 if (code
== GT
|| code
== GE
)
12418 code
= reverse_condition (code
);
12421 HOST_WIDE_INT tmp
= ct
;
12426 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
12427 ix86_compare_op1
, VOIDmode
, 0, -1);
12440 tmp
= expand_simple_binop (mode
, PLUS
,
12442 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12453 tmp
= expand_simple_binop (mode
, IOR
,
12455 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12457 else if (diff
== -1 && ct
)
12467 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12469 tmp
= expand_simple_binop (mode
, PLUS
,
12470 copy_rtx (tmp
), GEN_INT (cf
),
12471 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12479 * andl cf - ct, dest
12489 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12492 tmp
= expand_simple_binop (mode
, AND
,
12494 gen_int_mode (cf
- ct
, mode
),
12495 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12497 tmp
= expand_simple_binop (mode
, PLUS
,
12498 copy_rtx (tmp
), GEN_INT (ct
),
12499 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12502 if (!rtx_equal_p (tmp
, out
))
12503 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12505 return 1; /* DONE */
12510 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12513 tmp
= ct
, ct
= cf
, cf
= tmp
;
12516 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12518 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12520 /* We may be reversing unordered compare to normal compare, that
12521 is not valid in general (we may convert non-trapping condition
12522 to trapping one), however on i386 we currently emit all
12523 comparisons unordered. */
12524 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12525 code
= reverse_condition_maybe_unordered (code
);
12529 compare_code
= reverse_condition (compare_code
);
12530 code
= reverse_condition (code
);
12534 compare_code
= UNKNOWN
;
12535 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12536 && CONST_INT_P (ix86_compare_op1
))
12538 if (ix86_compare_op1
== const0_rtx
12539 && (code
== LT
|| code
== GE
))
12540 compare_code
= code
;
12541 else if (ix86_compare_op1
== constm1_rtx
)
12545 else if (code
== GT
)
12550 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12551 if (compare_code
!= UNKNOWN
12552 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12553 && (cf
== -1 || ct
== -1))
12555 /* If lea code below could be used, only optimize
12556 if it results in a 2 insn sequence. */
12558 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12559 || diff
== 3 || diff
== 5 || diff
== 9)
12560 || (compare_code
== LT
&& ct
== -1)
12561 || (compare_code
== GE
&& cf
== -1))
12564 * notl op1 (if necessary)
12572 code
= reverse_condition (code
);
12575 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12576 ix86_compare_op1
, VOIDmode
, 0, -1);
12578 out
= expand_simple_binop (mode
, IOR
,
12580 out
, 1, OPTAB_DIRECT
);
12581 if (out
!= operands
[0])
12582 emit_move_insn (operands
[0], out
);
12584 return 1; /* DONE */
12589 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12590 || diff
== 3 || diff
== 5 || diff
== 9)
12591 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12593 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12599 * lea cf(dest*(ct-cf)),dest
12603 * This also catches the degenerate setcc-only case.
12609 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12610 ix86_compare_op1
, VOIDmode
, 0, 1);
12613 /* On x86_64 the lea instruction operates on Pmode, so we need
12614 to get arithmetics done in proper mode to match. */
12616 tmp
= copy_rtx (out
);
12620 out1
= copy_rtx (out
);
12621 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12625 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12631 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12634 if (!rtx_equal_p (tmp
, out
))
12637 out
= force_operand (tmp
, copy_rtx (out
));
12639 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12641 if (!rtx_equal_p (out
, operands
[0]))
12642 emit_move_insn (operands
[0], copy_rtx (out
));
12644 return 1; /* DONE */
12648 * General case: Jumpful:
12649 * xorl dest,dest cmpl op1, op2
12650 * cmpl op1, op2 movl ct, dest
12651 * setcc dest jcc 1f
12652 * decl dest movl cf, dest
12653 * andl (cf-ct),dest 1:
12656 * Size 20. Size 14.
12658 * This is reasonably steep, but branch mispredict costs are
12659 * high on modern cpus, so consider failing only if optimizing
12663 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12664 && BRANCH_COST
>= 2)
12668 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12673 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12675 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12677 /* We may be reversing unordered compare to normal compare,
12678 that is not valid in general (we may convert non-trapping
12679 condition to trapping one), however on i386 we currently
12680 emit all comparisons unordered. */
12681 code
= reverse_condition_maybe_unordered (code
);
12685 code
= reverse_condition (code
);
12686 if (compare_code
!= UNKNOWN
)
12687 compare_code
= reverse_condition (compare_code
);
12691 if (compare_code
!= UNKNOWN
)
12693 /* notl op1 (if needed)
12698 For x < 0 (resp. x <= -1) there will be no notl,
12699 so if possible swap the constants to get rid of the
12701 True/false will be -1/0 while code below (store flag
12702 followed by decrement) is 0/-1, so the constants need
12703 to be exchanged once more. */
12705 if (compare_code
== GE
|| !cf
)
12707 code
= reverse_condition (code
);
12712 HOST_WIDE_INT tmp
= cf
;
12717 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12718 ix86_compare_op1
, VOIDmode
, 0, -1);
12722 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12723 ix86_compare_op1
, VOIDmode
, 0, 1);
12725 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12726 copy_rtx (out
), 1, OPTAB_DIRECT
);
12729 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12730 gen_int_mode (cf
- ct
, mode
),
12731 copy_rtx (out
), 1, OPTAB_DIRECT
);
12733 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12734 copy_rtx (out
), 1, OPTAB_DIRECT
);
12735 if (!rtx_equal_p (out
, operands
[0]))
12736 emit_move_insn (operands
[0], copy_rtx (out
));
12738 return 1; /* DONE */
12742 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12744 /* Try a few things more with specific constants and a variable. */
12747 rtx var
, orig_out
, out
, tmp
;
12749 if (BRANCH_COST
<= 2)
12750 return 0; /* FAIL */
12752 /* If one of the two operands is an interesting constant, load a
12753 constant with the above and mask it in with a logical operation. */
12755 if (CONST_INT_P (operands
[2]))
12758 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12759 operands
[3] = constm1_rtx
, op
= and_optab
;
12760 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12761 operands
[3] = const0_rtx
, op
= ior_optab
;
12763 return 0; /* FAIL */
12765 else if (CONST_INT_P (operands
[3]))
12768 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12769 operands
[2] = constm1_rtx
, op
= and_optab
;
12770 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12771 operands
[2] = const0_rtx
, op
= ior_optab
;
12773 return 0; /* FAIL */
12776 return 0; /* FAIL */
12778 orig_out
= operands
[0];
12779 tmp
= gen_reg_rtx (mode
);
12782 /* Recurse to get the constant loaded. */
12783 if (ix86_expand_int_movcc (operands
) == 0)
12784 return 0; /* FAIL */
12786 /* Mask in the interesting variable. */
12787 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12789 if (!rtx_equal_p (out
, orig_out
))
12790 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12792 return 1; /* DONE */
12796 * For comparison with above,
12806 if (! nonimmediate_operand (operands
[2], mode
))
12807 operands
[2] = force_reg (mode
, operands
[2]);
12808 if (! nonimmediate_operand (operands
[3], mode
))
12809 operands
[3] = force_reg (mode
, operands
[3]);
12811 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12813 rtx tmp
= gen_reg_rtx (mode
);
12814 emit_move_insn (tmp
, operands
[3]);
12817 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12819 rtx tmp
= gen_reg_rtx (mode
);
12820 emit_move_insn (tmp
, operands
[2]);
12824 if (! register_operand (operands
[2], VOIDmode
)
12826 || ! register_operand (operands
[3], VOIDmode
)))
12827 operands
[2] = force_reg (mode
, operands
[2]);
12830 && ! register_operand (operands
[3], VOIDmode
))
12831 operands
[3] = force_reg (mode
, operands
[3]);
12833 emit_insn (compare_seq
);
12834 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12835 gen_rtx_IF_THEN_ELSE (mode
,
12836 compare_op
, operands
[2],
12839 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12840 gen_rtx_IF_THEN_ELSE (mode
,
12842 copy_rtx (operands
[3]),
12843 copy_rtx (operands
[0]))));
12845 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12846 gen_rtx_IF_THEN_ELSE (mode
,
12848 copy_rtx (operands
[2]),
12849 copy_rtx (operands
[0]))));
12851 return 1; /* DONE */
12854 /* Swap, force into registers, or otherwise massage the two operands
12855 to an sse comparison with a mask result. Thus we differ a bit from
12856 ix86_prepare_fp_compare_args which expects to produce a flags result.
12858 The DEST operand exists to help determine whether to commute commutative
12859 operators. The POP0/POP1 operands are updated in place. The new
12860 comparison code is returned, or UNKNOWN if not implementable. */
12862 static enum rtx_code
12863 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12864 rtx
*pop0
, rtx
*pop1
)
12872 /* We have no LTGT as an operator. We could implement it with
12873 NE & ORDERED, but this requires an extra temporary. It's
12874 not clear that it's worth it. */
12881 /* These are supported directly. */
12888 /* For commutative operators, try to canonicalize the destination
12889 operand to be first in the comparison - this helps reload to
12890 avoid extra moves. */
12891 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12899 /* These are not supported directly. Swap the comparison operands
12900 to transform into something that is supported. */
12904 code
= swap_condition (code
);
12908 gcc_unreachable ();
12914 /* Detect conditional moves that exactly match min/max operational
12915 semantics. Note that this is IEEE safe, as long as we don't
12916 interchange the operands.
12918 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12919 and TRUE if the operation is successful and instructions are emitted. */
12922 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12923 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12925 enum machine_mode mode
;
12931 else if (code
== UNGE
)
12934 if_true
= if_false
;
12940 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12942 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12947 mode
= GET_MODE (dest
);
12949 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12950 but MODE may be a vector mode and thus not appropriate. */
12951 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12953 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12956 if_true
= force_reg (mode
, if_true
);
12957 v
= gen_rtvec (2, if_true
, if_false
);
12958 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12962 code
= is_min
? SMIN
: SMAX
;
12963 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12966 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12970 /* Expand an sse vector comparison. Return the register with the result. */
12973 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12974 rtx op_true
, rtx op_false
)
12976 enum machine_mode mode
= GET_MODE (dest
);
12979 cmp_op0
= force_reg (mode
, cmp_op0
);
12980 if (!nonimmediate_operand (cmp_op1
, mode
))
12981 cmp_op1
= force_reg (mode
, cmp_op1
);
12984 || reg_overlap_mentioned_p (dest
, op_true
)
12985 || reg_overlap_mentioned_p (dest
, op_false
))
12986 dest
= gen_reg_rtx (mode
);
12988 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12989 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12994 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12995 operations. This is used for both scalar and vector conditional moves. */
12998 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
13000 enum machine_mode mode
= GET_MODE (dest
);
13003 if (op_false
== CONST0_RTX (mode
))
13005 op_true
= force_reg (mode
, op_true
);
13006 x
= gen_rtx_AND (mode
, cmp
, op_true
);
13007 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13009 else if (op_true
== CONST0_RTX (mode
))
13011 op_false
= force_reg (mode
, op_false
);
13012 x
= gen_rtx_NOT (mode
, cmp
);
13013 x
= gen_rtx_AND (mode
, x
, op_false
);
13014 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13018 op_true
= force_reg (mode
, op_true
);
13019 op_false
= force_reg (mode
, op_false
);
13021 t2
= gen_reg_rtx (mode
);
13023 t3
= gen_reg_rtx (mode
);
13027 x
= gen_rtx_AND (mode
, op_true
, cmp
);
13028 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
13030 x
= gen_rtx_NOT (mode
, cmp
);
13031 x
= gen_rtx_AND (mode
, x
, op_false
);
13032 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
13034 x
= gen_rtx_IOR (mode
, t3
, t2
);
13035 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
13039 /* Expand a floating-point conditional move. Return true if successful. */
13042 ix86_expand_fp_movcc (rtx operands
[])
13044 enum machine_mode mode
= GET_MODE (operands
[0]);
13045 enum rtx_code code
= GET_CODE (operands
[1]);
13046 rtx tmp
, compare_op
, second_test
, bypass_test
;
13048 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
13050 enum machine_mode cmode
;
13052 /* Since we've no cmove for sse registers, don't force bad register
13053 allocation just to gain access to it. Deny movcc when the
13054 comparison mode doesn't match the move mode. */
13055 cmode
= GET_MODE (ix86_compare_op0
);
13056 if (cmode
== VOIDmode
)
13057 cmode
= GET_MODE (ix86_compare_op1
);
13061 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13063 &ix86_compare_op1
);
13064 if (code
== UNKNOWN
)
13067 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
13068 ix86_compare_op1
, operands
[2],
13072 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
13073 ix86_compare_op1
, operands
[2], operands
[3]);
13074 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
13078 /* The floating point conditional move instructions don't directly
13079 support conditions resulting from a signed integer comparison. */
13081 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13083 /* The floating point conditional move instructions don't directly
13084 support signed integer comparisons. */
13086 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
13088 gcc_assert (!second_test
&& !bypass_test
);
13089 tmp
= gen_reg_rtx (QImode
);
13090 ix86_expand_setcc (code
, tmp
);
13092 ix86_compare_op0
= tmp
;
13093 ix86_compare_op1
= const0_rtx
;
13094 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
13096 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
13098 tmp
= gen_reg_rtx (mode
);
13099 emit_move_insn (tmp
, operands
[3]);
13102 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
13104 tmp
= gen_reg_rtx (mode
);
13105 emit_move_insn (tmp
, operands
[2]);
13109 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13110 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
13111 operands
[2], operands
[3])));
13113 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13114 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
13115 operands
[3], operands
[0])));
13117 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
13118 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
13119 operands
[2], operands
[0])));
13124 /* Expand a floating-point vector conditional move; a vcond operation
13125 rather than a movcc operation. */
13128 ix86_expand_fp_vcond (rtx operands
[])
13130 enum rtx_code code
= GET_CODE (operands
[3]);
13133 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
13134 &operands
[4], &operands
[5]);
13135 if (code
== UNKNOWN
)
13138 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
13139 operands
[5], operands
[1], operands
[2]))
13142 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
13143 operands
[1], operands
[2]);
13144 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
13148 /* Expand a signed/unsigned integral vector conditional move. */
13151 ix86_expand_int_vcond (rtx operands
[])
13153 enum machine_mode mode
= GET_MODE (operands
[0]);
13154 enum rtx_code code
= GET_CODE (operands
[3]);
13155 bool negate
= false;
13158 cop0
= operands
[4];
13159 cop1
= operands
[5];
13161 /* Canonicalize the comparison to EQ, GT, GTU. */
13172 code
= reverse_condition (code
);
13178 code
= reverse_condition (code
);
13184 code
= swap_condition (code
);
13185 x
= cop0
, cop0
= cop1
, cop1
= x
;
13189 gcc_unreachable ();
13192 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13193 if (mode
== V2DImode
)
13198 /* SSE4.1 supports EQ. */
13199 if (!TARGET_SSE4_1
)
13205 /* SSE4.2 supports GT/GTU. */
13206 if (!TARGET_SSE4_2
)
13211 gcc_unreachable ();
13215 /* Unsigned parallel compare is not supported by the hardware. Play some
13216 tricks to turn this into a signed comparison against 0. */
13219 cop0
= force_reg (mode
, cop0
);
13228 /* Perform a parallel modulo subtraction. */
13229 t1
= gen_reg_rtx (mode
);
13230 emit_insn ((mode
== V4SImode
13232 : gen_subv2di3
) (t1
, cop0
, cop1
));
13234 /* Extract the original sign bit of op0. */
13235 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
13237 t2
= gen_reg_rtx (mode
);
13238 emit_insn ((mode
== V4SImode
13240 : gen_andv2di3
) (t2
, cop0
, mask
));
13242 /* XOR it back into the result of the subtraction. This results
13243 in the sign bit set iff we saw unsigned underflow. */
13244 x
= gen_reg_rtx (mode
);
13245 emit_insn ((mode
== V4SImode
13247 : gen_xorv2di3
) (x
, t1
, t2
));
13255 /* Perform a parallel unsigned saturating subtraction. */
13256 x
= gen_reg_rtx (mode
);
13257 emit_insn (gen_rtx_SET (VOIDmode
, x
,
13258 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
13265 gcc_unreachable ();
13269 cop1
= CONST0_RTX (mode
);
13272 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
13273 operands
[1+negate
], operands
[2-negate
]);
13275 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
13276 operands
[2-negate
]);
13280 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13281 true if we should do zero extension, else sign extension. HIGH_P is
13282 true if we want the N/2 high elements, else the low elements. */
13285 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13287 enum machine_mode imode
= GET_MODE (operands
[1]);
13288 rtx (*unpack
)(rtx
, rtx
, rtx
);
13295 unpack
= gen_vec_interleave_highv16qi
;
13297 unpack
= gen_vec_interleave_lowv16qi
;
13301 unpack
= gen_vec_interleave_highv8hi
;
13303 unpack
= gen_vec_interleave_lowv8hi
;
13307 unpack
= gen_vec_interleave_highv4si
;
13309 unpack
= gen_vec_interleave_lowv4si
;
13312 gcc_unreachable ();
13315 dest
= gen_lowpart (imode
, operands
[0]);
13318 se
= force_reg (imode
, CONST0_RTX (imode
));
13320 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
13321 operands
[1], pc_rtx
, pc_rtx
);
13323 emit_insn (unpack (dest
, operands
[1], se
));
13326 /* This function performs the same task as ix86_expand_sse_unpack,
13327 but with SSE4.1 instructions. */
13330 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13332 enum machine_mode imode
= GET_MODE (operands
[1]);
13333 rtx (*unpack
)(rtx
, rtx
);
13340 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
13342 unpack
= gen_sse4_1_extendv8qiv8hi2
;
13346 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
13348 unpack
= gen_sse4_1_extendv4hiv4si2
;
13352 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
13354 unpack
= gen_sse4_1_extendv2siv2di2
;
13357 gcc_unreachable ();
13360 dest
= operands
[0];
13363 /* Shift higher 8 bytes to lower 8 bytes. */
13364 src
= gen_reg_rtx (imode
);
13365 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
13366 gen_lowpart (TImode
, operands
[1]),
13372 emit_insn (unpack (dest
, src
));
13375 /* Expand conditional increment or decrement using adb/sbb instructions.
13376 The default case using setcc followed by the conditional move can be
13377 done by generic code. */
13379 ix86_expand_int_addcc (rtx operands
[])
13381 enum rtx_code code
= GET_CODE (operands
[1]);
13383 rtx val
= const0_rtx
;
13384 bool fpcmp
= false;
13385 enum machine_mode mode
= GET_MODE (operands
[0]);
13387 if (operands
[3] != const1_rtx
13388 && operands
[3] != constm1_rtx
)
13390 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
13391 ix86_compare_op1
, &compare_op
))
13393 code
= GET_CODE (compare_op
);
13395 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
13396 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
13399 code
= ix86_fp_compare_code_to_integer (code
);
13406 PUT_CODE (compare_op
,
13407 reverse_condition_maybe_unordered
13408 (GET_CODE (compare_op
)));
13410 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
13412 PUT_MODE (compare_op
, mode
);
13414 /* Construct either adc or sbb insn. */
13415 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
13417 switch (GET_MODE (operands
[0]))
13420 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13423 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13426 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13429 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13432 gcc_unreachable ();
13437 switch (GET_MODE (operands
[0]))
13440 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13443 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13446 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13449 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13452 gcc_unreachable ();
13455 return 1; /* DONE */
13459 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13460 works for floating pointer parameters and nonoffsetable memories.
13461 For pushes, it returns just stack offsets; the values will be saved
13462 in the right order. Maximally three parts are generated. */
13465 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
13470 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
13472 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
13474 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
13475 gcc_assert (size
>= 2 && size
<= 3);
13477 /* Optimize constant pool reference to immediates. This is used by fp
13478 moves, that force all constants to memory to allow combining. */
13479 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
13481 rtx tmp
= maybe_get_pool_constant (operand
);
13486 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
13488 /* The only non-offsetable memories we handle are pushes. */
13489 int ok
= push_operand (operand
, VOIDmode
);
13493 operand
= copy_rtx (operand
);
13494 PUT_MODE (operand
, Pmode
);
13495 parts
[0] = parts
[1] = parts
[2] = operand
;
13499 if (GET_CODE (operand
) == CONST_VECTOR
)
13501 enum machine_mode imode
= int_mode_for_mode (mode
);
13502 /* Caution: if we looked through a constant pool memory above,
13503 the operand may actually have a different mode now. That's
13504 ok, since we want to pun this all the way back to an integer. */
13505 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
13506 gcc_assert (operand
!= NULL
);
13512 if (mode
== DImode
)
13513 split_di (&operand
, 1, &parts
[0], &parts
[1]);
13516 if (REG_P (operand
))
13518 gcc_assert (reload_completed
);
13519 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
13520 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
13522 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
13524 else if (offsettable_memref_p (operand
))
13526 operand
= adjust_address (operand
, SImode
, 0);
13527 parts
[0] = operand
;
13528 parts
[1] = adjust_address (operand
, SImode
, 4);
13530 parts
[2] = adjust_address (operand
, SImode
, 8);
13532 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13537 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13541 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
13542 parts
[2] = gen_int_mode (l
[2], SImode
);
13545 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
13548 gcc_unreachable ();
13550 parts
[1] = gen_int_mode (l
[1], SImode
);
13551 parts
[0] = gen_int_mode (l
[0], SImode
);
13554 gcc_unreachable ();
13559 if (mode
== TImode
)
13560 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
13561 if (mode
== XFmode
|| mode
== TFmode
)
13563 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13564 if (REG_P (operand
))
13566 gcc_assert (reload_completed
);
13567 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13568 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13570 else if (offsettable_memref_p (operand
))
13572 operand
= adjust_address (operand
, DImode
, 0);
13573 parts
[0] = operand
;
13574 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13576 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13581 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13582 real_to_target (l
, &r
, mode
);
13584 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13585 if (HOST_BITS_PER_WIDE_INT
>= 64)
13588 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13589 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13592 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13594 if (upper_mode
== SImode
)
13595 parts
[1] = gen_int_mode (l
[2], SImode
);
13596 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13599 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13600 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13603 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13606 gcc_unreachable ();
13613 /* Emit insns to perform a move or push of DI, DF, and XF values.
13614 Return false when normal moves are needed; true when all required
13615 insns have been emitted. Operands 2-4 contain the input values
13616 int the correct order; operands 5-7 contain the output values. */
13619 ix86_split_long_move (rtx operands
[])
13624 int collisions
= 0;
13625 enum machine_mode mode
= GET_MODE (operands
[0]);
13627 /* The DFmode expanders may ask us to move double.
13628 For 64bit target this is single move. By hiding the fact
13629 here we simplify i386.md splitters. */
13630 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13632 /* Optimize constant pool reference to immediates. This is used by
13633 fp moves, that force all constants to memory to allow combining. */
13635 if (MEM_P (operands
[1])
13636 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13637 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13638 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13639 if (push_operand (operands
[0], VOIDmode
))
13641 operands
[0] = copy_rtx (operands
[0]);
13642 PUT_MODE (operands
[0], Pmode
);
13645 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13646 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13647 emit_move_insn (operands
[0], operands
[1]);
13651 /* The only non-offsettable memory we handle is push. */
13652 if (push_operand (operands
[0], VOIDmode
))
13655 gcc_assert (!MEM_P (operands
[0])
13656 || offsettable_memref_p (operands
[0]));
13658 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13659 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13661 /* When emitting push, take care for source operands on the stack. */
13662 if (push
&& MEM_P (operands
[1])
13663 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13666 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13667 XEXP (part
[1][2], 0));
13668 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13669 XEXP (part
[1][1], 0));
13672 /* We need to do copy in the right order in case an address register
13673 of the source overlaps the destination. */
13674 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13676 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13678 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13681 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13684 /* Collision in the middle part can be handled by reordering. */
13685 if (collisions
== 1 && nparts
== 3
13686 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13689 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13690 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13693 /* If there are more collisions, we can't handle it by reordering.
13694 Do an lea to the last part and use only one colliding move. */
13695 else if (collisions
> 1)
13701 base
= part
[0][nparts
- 1];
13703 /* Handle the case when the last part isn't valid for lea.
13704 Happens in 64-bit mode storing the 12-byte XFmode. */
13705 if (GET_MODE (base
) != Pmode
)
13706 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13708 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13709 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13710 part
[1][1] = replace_equiv_address (part
[1][1],
13711 plus_constant (base
, UNITS_PER_WORD
));
13713 part
[1][2] = replace_equiv_address (part
[1][2],
13714 plus_constant (base
, 8));
13724 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13725 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13726 emit_move_insn (part
[0][2], part
[1][2]);
13731 /* In 64bit mode we don't have 32bit push available. In case this is
13732 register, it is OK - we will just use larger counterpart. We also
13733 retype memory - these comes from attempt to avoid REX prefix on
13734 moving of second half of TFmode value. */
13735 if (GET_MODE (part
[1][1]) == SImode
)
13737 switch (GET_CODE (part
[1][1]))
13740 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13744 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13748 gcc_unreachable ();
13751 if (GET_MODE (part
[1][0]) == SImode
)
13752 part
[1][0] = part
[1][1];
13755 emit_move_insn (part
[0][1], part
[1][1]);
13756 emit_move_insn (part
[0][0], part
[1][0]);
13760 /* Choose correct order to not overwrite the source before it is copied. */
13761 if ((REG_P (part
[0][0])
13762 && REG_P (part
[1][1])
13763 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13765 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13767 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13771 operands
[2] = part
[0][2];
13772 operands
[3] = part
[0][1];
13773 operands
[4] = part
[0][0];
13774 operands
[5] = part
[1][2];
13775 operands
[6] = part
[1][1];
13776 operands
[7] = part
[1][0];
13780 operands
[2] = part
[0][1];
13781 operands
[3] = part
[0][0];
13782 operands
[5] = part
[1][1];
13783 operands
[6] = part
[1][0];
13790 operands
[2] = part
[0][0];
13791 operands
[3] = part
[0][1];
13792 operands
[4] = part
[0][2];
13793 operands
[5] = part
[1][0];
13794 operands
[6] = part
[1][1];
13795 operands
[7] = part
[1][2];
13799 operands
[2] = part
[0][0];
13800 operands
[3] = part
[0][1];
13801 operands
[5] = part
[1][0];
13802 operands
[6] = part
[1][1];
13806 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13809 if (CONST_INT_P (operands
[5])
13810 && operands
[5] != const0_rtx
13811 && REG_P (operands
[2]))
13813 if (CONST_INT_P (operands
[6])
13814 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13815 operands
[6] = operands
[2];
13818 && CONST_INT_P (operands
[7])
13819 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13820 operands
[7] = operands
[2];
13824 && CONST_INT_P (operands
[6])
13825 && operands
[6] != const0_rtx
13826 && REG_P (operands
[3])
13827 && CONST_INT_P (operands
[7])
13828 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13829 operands
[7] = operands
[3];
13832 emit_move_insn (operands
[2], operands
[5]);
13833 emit_move_insn (operands
[3], operands
[6]);
13835 emit_move_insn (operands
[4], operands
[7]);
13840 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13841 left shift by a constant, either using a single shift or
13842 a sequence of add instructions. */
13845 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13849 emit_insn ((mode
== DImode
13851 : gen_adddi3
) (operand
, operand
, operand
));
13853 else if (!optimize_size
13854 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13857 for (i
=0; i
<count
; i
++)
13859 emit_insn ((mode
== DImode
13861 : gen_adddi3
) (operand
, operand
, operand
));
13865 emit_insn ((mode
== DImode
13867 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13871 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13873 rtx low
[2], high
[2];
13875 const int single_width
= mode
== DImode
? 32 : 64;
13877 if (CONST_INT_P (operands
[2]))
13879 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13880 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13882 if (count
>= single_width
)
13884 emit_move_insn (high
[0], low
[1]);
13885 emit_move_insn (low
[0], const0_rtx
);
13887 if (count
> single_width
)
13888 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13892 if (!rtx_equal_p (operands
[0], operands
[1]))
13893 emit_move_insn (operands
[0], operands
[1]);
13894 emit_insn ((mode
== DImode
13896 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13897 ix86_expand_ashl_const (low
[0], count
, mode
);
13902 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13904 if (operands
[1] == const1_rtx
)
13906 /* Assuming we've chosen a QImode capable registers, then 1 << N
13907 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13908 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13910 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13912 ix86_expand_clear (low
[0]);
13913 ix86_expand_clear (high
[0]);
13914 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13916 d
= gen_lowpart (QImode
, low
[0]);
13917 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13918 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13919 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13921 d
= gen_lowpart (QImode
, high
[0]);
13922 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13923 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13924 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13927 /* Otherwise, we can get the same results by manually performing
13928 a bit extract operation on bit 5/6, and then performing the two
13929 shifts. The two methods of getting 0/1 into low/high are exactly
13930 the same size. Avoiding the shift in the bit extract case helps
13931 pentium4 a bit; no one else seems to care much either way. */
13936 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13937 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13939 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13940 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13942 emit_insn ((mode
== DImode
13944 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13945 emit_insn ((mode
== DImode
13947 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13948 emit_move_insn (low
[0], high
[0]);
13949 emit_insn ((mode
== DImode
13951 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13954 emit_insn ((mode
== DImode
13956 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13957 emit_insn ((mode
== DImode
13959 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13963 if (operands
[1] == constm1_rtx
)
13965 /* For -1 << N, we can avoid the shld instruction, because we
13966 know that we're shifting 0...31/63 ones into a -1. */
13967 emit_move_insn (low
[0], constm1_rtx
);
13969 emit_move_insn (high
[0], low
[0]);
13971 emit_move_insn (high
[0], constm1_rtx
);
13975 if (!rtx_equal_p (operands
[0], operands
[1]))
13976 emit_move_insn (operands
[0], operands
[1]);
13978 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13979 emit_insn ((mode
== DImode
13981 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13984 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13986 if (TARGET_CMOVE
&& scratch
)
13988 ix86_expand_clear (scratch
);
13989 emit_insn ((mode
== DImode
13990 ? gen_x86_shift_adj_1
13991 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13994 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13998 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14000 rtx low
[2], high
[2];
14002 const int single_width
= mode
== DImode
? 32 : 64;
14004 if (CONST_INT_P (operands
[2]))
14006 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14007 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14009 if (count
== single_width
* 2 - 1)
14011 emit_move_insn (high
[0], high
[1]);
14012 emit_insn ((mode
== DImode
14014 : gen_ashrdi3
) (high
[0], high
[0],
14015 GEN_INT (single_width
- 1)));
14016 emit_move_insn (low
[0], high
[0]);
14019 else if (count
>= single_width
)
14021 emit_move_insn (low
[0], high
[1]);
14022 emit_move_insn (high
[0], low
[0]);
14023 emit_insn ((mode
== DImode
14025 : gen_ashrdi3
) (high
[0], high
[0],
14026 GEN_INT (single_width
- 1)));
14027 if (count
> single_width
)
14028 emit_insn ((mode
== DImode
14030 : gen_ashrdi3
) (low
[0], low
[0],
14031 GEN_INT (count
- single_width
)));
14035 if (!rtx_equal_p (operands
[0], operands
[1]))
14036 emit_move_insn (operands
[0], operands
[1]);
14037 emit_insn ((mode
== DImode
14039 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14040 emit_insn ((mode
== DImode
14042 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14047 if (!rtx_equal_p (operands
[0], operands
[1]))
14048 emit_move_insn (operands
[0], operands
[1]);
14050 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14052 emit_insn ((mode
== DImode
14054 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14055 emit_insn ((mode
== DImode
14057 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
14059 if (TARGET_CMOVE
&& scratch
)
14061 emit_move_insn (scratch
, high
[0]);
14062 emit_insn ((mode
== DImode
14064 : gen_ashrdi3
) (scratch
, scratch
,
14065 GEN_INT (single_width
- 1)));
14066 emit_insn ((mode
== DImode
14067 ? gen_x86_shift_adj_1
14068 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14072 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
14077 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
14079 rtx low
[2], high
[2];
14081 const int single_width
= mode
== DImode
? 32 : 64;
14083 if (CONST_INT_P (operands
[2]))
14085 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
14086 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
14088 if (count
>= single_width
)
14090 emit_move_insn (low
[0], high
[1]);
14091 ix86_expand_clear (high
[0]);
14093 if (count
> single_width
)
14094 emit_insn ((mode
== DImode
14096 : gen_lshrdi3
) (low
[0], low
[0],
14097 GEN_INT (count
- single_width
)));
14101 if (!rtx_equal_p (operands
[0], operands
[1]))
14102 emit_move_insn (operands
[0], operands
[1]);
14103 emit_insn ((mode
== DImode
14105 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
14106 emit_insn ((mode
== DImode
14108 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
14113 if (!rtx_equal_p (operands
[0], operands
[1]))
14114 emit_move_insn (operands
[0], operands
[1]);
14116 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
14118 emit_insn ((mode
== DImode
14120 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
14121 emit_insn ((mode
== DImode
14123 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
14125 /* Heh. By reversing the arguments, we can reuse this pattern. */
14126 if (TARGET_CMOVE
&& scratch
)
14128 ix86_expand_clear (scratch
);
14129 emit_insn ((mode
== DImode
14130 ? gen_x86_shift_adj_1
14131 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
14135 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
14139 /* Predict just emitted jump instruction to be taken with probability PROB. */
14141 predict_jump (int prob
)
14143 rtx insn
= get_last_insn ();
14144 gcc_assert (JUMP_P (insn
));
14146 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
14151 /* Helper function for the string operations below. Dest VARIABLE whether
14152 it is aligned to VALUE bytes. If true, jump to the label. */
14154 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
14156 rtx label
= gen_label_rtx ();
14157 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
14158 if (GET_MODE (variable
) == DImode
)
14159 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
14161 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
14162 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
14165 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
14167 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14171 /* Adjust COUNTER by the VALUE. */
14173 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
14175 if (GET_MODE (countreg
) == DImode
)
14176 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
14178 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
14181 /* Zero extend possibly SImode EXP to Pmode register. */
14183 ix86_zero_extend_to_Pmode (rtx exp
)
14186 if (GET_MODE (exp
) == VOIDmode
)
14187 return force_reg (Pmode
, exp
);
14188 if (GET_MODE (exp
) == Pmode
)
14189 return copy_to_mode_reg (Pmode
, exp
);
14190 r
= gen_reg_rtx (Pmode
);
14191 emit_insn (gen_zero_extendsidi2 (r
, exp
));
14195 /* Divide COUNTREG by SCALE. */
14197 scale_counter (rtx countreg
, int scale
)
14200 rtx piece_size_mask
;
14204 if (CONST_INT_P (countreg
))
14205 return GEN_INT (INTVAL (countreg
) / scale
);
14206 gcc_assert (REG_P (countreg
));
14208 piece_size_mask
= GEN_INT (scale
- 1);
14209 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
14210 GEN_INT (exact_log2 (scale
)),
14211 NULL
, 1, OPTAB_DIRECT
);
14215 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14216 DImode for constant loop counts. */
14218 static enum machine_mode
14219 counter_mode (rtx count_exp
)
14221 if (GET_MODE (count_exp
) != VOIDmode
)
14222 return GET_MODE (count_exp
);
14223 if (GET_CODE (count_exp
) != CONST_INT
)
14225 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
14230 /* When SRCPTR is non-NULL, output simple loop to move memory
14231 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14232 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14233 equivalent loop to set memory by VALUE (supposed to be in MODE).
14235 The size is rounded down to whole number of chunk size moved at once.
14236 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14240 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
14241 rtx destptr
, rtx srcptr
, rtx value
,
14242 rtx count
, enum machine_mode mode
, int unroll
,
14245 rtx out_label
, top_label
, iter
, tmp
;
14246 enum machine_mode iter_mode
= counter_mode (count
);
14247 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
14248 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
14254 top_label
= gen_label_rtx ();
14255 out_label
= gen_label_rtx ();
14256 iter
= gen_reg_rtx (iter_mode
);
14258 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
14259 NULL
, 1, OPTAB_DIRECT
);
14260 /* Those two should combine. */
14261 if (piece_size
== const1_rtx
)
14263 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
14265 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
14267 emit_move_insn (iter
, const0_rtx
);
14269 emit_label (top_label
);
14271 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
14272 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
14273 destmem
= change_address (destmem
, mode
, x_addr
);
14277 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
14278 srcmem
= change_address (srcmem
, mode
, y_addr
);
14280 /* When unrolling for chips that reorder memory reads and writes,
14281 we can save registers by using single temporary.
14282 Also using 4 temporaries is overkill in 32bit mode. */
14283 if (!TARGET_64BIT
&& 0)
14285 for (i
= 0; i
< unroll
; i
++)
14290 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14292 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14294 emit_move_insn (destmem
, srcmem
);
14300 gcc_assert (unroll
<= 4);
14301 for (i
= 0; i
< unroll
; i
++)
14303 tmpreg
[i
] = gen_reg_rtx (mode
);
14307 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14309 emit_move_insn (tmpreg
[i
], srcmem
);
14311 for (i
= 0; i
< unroll
; i
++)
14316 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14318 emit_move_insn (destmem
, tmpreg
[i
]);
14323 for (i
= 0; i
< unroll
; i
++)
14327 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14328 emit_move_insn (destmem
, value
);
14331 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
14332 true, OPTAB_LIB_WIDEN
);
14334 emit_move_insn (iter
, tmp
);
14336 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
14338 if (expected_size
!= -1)
14340 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
14341 if (expected_size
== 0)
14343 else if (expected_size
> REG_BR_PROB_BASE
)
14344 predict_jump (REG_BR_PROB_BASE
- 1);
14346 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
14349 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
14350 iter
= ix86_zero_extend_to_Pmode (iter
);
14351 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
14352 true, OPTAB_LIB_WIDEN
);
14353 if (tmp
!= destptr
)
14354 emit_move_insn (destptr
, tmp
);
14357 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
14358 true, OPTAB_LIB_WIDEN
);
14360 emit_move_insn (srcptr
, tmp
);
14362 emit_label (out_label
);
14365 /* Output "rep; mov" instruction.
14366 Arguments have same meaning as for previous function */
14368 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
14369 rtx destptr
, rtx srcptr
,
14371 enum machine_mode mode
)
14377 /* If the size is known, it is shorter to use rep movs. */
14378 if (mode
== QImode
&& CONST_INT_P (count
)
14379 && !(INTVAL (count
) & 3))
14382 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14383 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14384 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
14385 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
14386 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14387 if (mode
!= QImode
)
14389 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14390 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14391 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14392 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14393 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14394 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
14398 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14399 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
14401 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
14405 /* Output "rep; stos" instruction.
14406 Arguments have same meaning as for previous function */
14408 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
14410 enum machine_mode mode
)
14415 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14416 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14417 value
= force_reg (mode
, gen_lowpart (mode
, value
));
14418 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14419 if (mode
!= QImode
)
14421 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14422 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14423 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14426 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14427 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
14431 emit_strmov (rtx destmem
, rtx srcmem
,
14432 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
14434 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
14435 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
14436 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14439 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14441 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
14442 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
14445 if (CONST_INT_P (count
))
14447 HOST_WIDE_INT countval
= INTVAL (count
);
14450 if ((countval
& 0x10) && max_size
> 16)
14454 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14455 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
14458 gcc_unreachable ();
14461 if ((countval
& 0x08) && max_size
> 8)
14464 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14467 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14468 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
14472 if ((countval
& 0x04) && max_size
> 4)
14474 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14477 if ((countval
& 0x02) && max_size
> 2)
14479 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
14482 if ((countval
& 0x01) && max_size
> 1)
14484 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
14491 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
14492 count
, 1, OPTAB_DIRECT
);
14493 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
14494 count
, QImode
, 1, 4);
14498 /* When there are stringops, we can cheaply increase dest and src pointers.
14499 Otherwise we save code size by maintaining offset (zero is readily
14500 available from preceding rep operation) and using x86 addressing modes.
14502 if (TARGET_SINGLE_STRINGOP
)
14506 rtx label
= ix86_expand_aligntest (count
, 4, true);
14507 src
= change_address (srcmem
, SImode
, srcptr
);
14508 dest
= change_address (destmem
, SImode
, destptr
);
14509 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14510 emit_label (label
);
14511 LABEL_NUSES (label
) = 1;
14515 rtx label
= ix86_expand_aligntest (count
, 2, true);
14516 src
= change_address (srcmem
, HImode
, srcptr
);
14517 dest
= change_address (destmem
, HImode
, destptr
);
14518 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14519 emit_label (label
);
14520 LABEL_NUSES (label
) = 1;
14524 rtx label
= ix86_expand_aligntest (count
, 1, true);
14525 src
= change_address (srcmem
, QImode
, srcptr
);
14526 dest
= change_address (destmem
, QImode
, destptr
);
14527 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14528 emit_label (label
);
14529 LABEL_NUSES (label
) = 1;
14534 rtx offset
= force_reg (Pmode
, const0_rtx
);
14539 rtx label
= ix86_expand_aligntest (count
, 4, true);
14540 src
= change_address (srcmem
, SImode
, srcptr
);
14541 dest
= change_address (destmem
, SImode
, destptr
);
14542 emit_move_insn (dest
, src
);
14543 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
14544 true, OPTAB_LIB_WIDEN
);
14546 emit_move_insn (offset
, tmp
);
14547 emit_label (label
);
14548 LABEL_NUSES (label
) = 1;
14552 rtx label
= ix86_expand_aligntest (count
, 2, true);
14553 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14554 src
= change_address (srcmem
, HImode
, tmp
);
14555 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14556 dest
= change_address (destmem
, HImode
, tmp
);
14557 emit_move_insn (dest
, src
);
14558 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
14559 true, OPTAB_LIB_WIDEN
);
14561 emit_move_insn (offset
, tmp
);
14562 emit_label (label
);
14563 LABEL_NUSES (label
) = 1;
14567 rtx label
= ix86_expand_aligntest (count
, 1, true);
14568 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14569 src
= change_address (srcmem
, QImode
, tmp
);
14570 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14571 dest
= change_address (destmem
, QImode
, tmp
);
14572 emit_move_insn (dest
, src
);
14573 emit_label (label
);
14574 LABEL_NUSES (label
) = 1;
14579 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14581 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14582 rtx count
, int max_size
)
14585 expand_simple_binop (counter_mode (count
), AND
, count
,
14586 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14587 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14588 gen_lowpart (QImode
, value
), count
, QImode
,
14592 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14594 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14598 if (CONST_INT_P (count
))
14600 HOST_WIDE_INT countval
= INTVAL (count
);
14603 if ((countval
& 0x10) && max_size
> 16)
14607 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14608 emit_insn (gen_strset (destptr
, dest
, value
));
14609 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14610 emit_insn (gen_strset (destptr
, dest
, value
));
14613 gcc_unreachable ();
14616 if ((countval
& 0x08) && max_size
> 8)
14620 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14621 emit_insn (gen_strset (destptr
, dest
, value
));
14625 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14626 emit_insn (gen_strset (destptr
, dest
, value
));
14627 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14628 emit_insn (gen_strset (destptr
, dest
, value
));
14632 if ((countval
& 0x04) && max_size
> 4)
14634 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14635 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14638 if ((countval
& 0x02) && max_size
> 2)
14640 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14641 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14644 if ((countval
& 0x01) && max_size
> 1)
14646 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14647 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14654 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14659 rtx label
= ix86_expand_aligntest (count
, 16, true);
14662 dest
= change_address (destmem
, DImode
, destptr
);
14663 emit_insn (gen_strset (destptr
, dest
, value
));
14664 emit_insn (gen_strset (destptr
, dest
, value
));
14668 dest
= change_address (destmem
, SImode
, destptr
);
14669 emit_insn (gen_strset (destptr
, dest
, value
));
14670 emit_insn (gen_strset (destptr
, dest
, value
));
14671 emit_insn (gen_strset (destptr
, dest
, value
));
14672 emit_insn (gen_strset (destptr
, dest
, value
));
14674 emit_label (label
);
14675 LABEL_NUSES (label
) = 1;
14679 rtx label
= ix86_expand_aligntest (count
, 8, true);
14682 dest
= change_address (destmem
, DImode
, destptr
);
14683 emit_insn (gen_strset (destptr
, dest
, value
));
14687 dest
= change_address (destmem
, SImode
, destptr
);
14688 emit_insn (gen_strset (destptr
, dest
, value
));
14689 emit_insn (gen_strset (destptr
, dest
, value
));
14691 emit_label (label
);
14692 LABEL_NUSES (label
) = 1;
14696 rtx label
= ix86_expand_aligntest (count
, 4, true);
14697 dest
= change_address (destmem
, SImode
, destptr
);
14698 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14699 emit_label (label
);
14700 LABEL_NUSES (label
) = 1;
14704 rtx label
= ix86_expand_aligntest (count
, 2, true);
14705 dest
= change_address (destmem
, HImode
, destptr
);
14706 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14707 emit_label (label
);
14708 LABEL_NUSES (label
) = 1;
14712 rtx label
= ix86_expand_aligntest (count
, 1, true);
14713 dest
= change_address (destmem
, QImode
, destptr
);
14714 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14715 emit_label (label
);
14716 LABEL_NUSES (label
) = 1;
14720 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14721 DESIRED_ALIGNMENT. */
14723 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14724 rtx destptr
, rtx srcptr
, rtx count
,
14725 int align
, int desired_alignment
)
14727 if (align
<= 1 && desired_alignment
> 1)
14729 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14730 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14731 destmem
= change_address (destmem
, QImode
, destptr
);
14732 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14733 ix86_adjust_counter (count
, 1);
14734 emit_label (label
);
14735 LABEL_NUSES (label
) = 1;
14737 if (align
<= 2 && desired_alignment
> 2)
14739 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14740 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14741 destmem
= change_address (destmem
, HImode
, destptr
);
14742 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14743 ix86_adjust_counter (count
, 2);
14744 emit_label (label
);
14745 LABEL_NUSES (label
) = 1;
14747 if (align
<= 4 && desired_alignment
> 4)
14749 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14750 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14751 destmem
= change_address (destmem
, SImode
, destptr
);
14752 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14753 ix86_adjust_counter (count
, 4);
14754 emit_label (label
);
14755 LABEL_NUSES (label
) = 1;
14757 gcc_assert (desired_alignment
<= 8);
14760 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14761 DESIRED_ALIGNMENT. */
14763 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14764 int align
, int desired_alignment
)
14766 if (align
<= 1 && desired_alignment
> 1)
14768 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14769 destmem
= change_address (destmem
, QImode
, destptr
);
14770 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14771 ix86_adjust_counter (count
, 1);
14772 emit_label (label
);
14773 LABEL_NUSES (label
) = 1;
14775 if (align
<= 2 && desired_alignment
> 2)
14777 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14778 destmem
= change_address (destmem
, HImode
, destptr
);
14779 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14780 ix86_adjust_counter (count
, 2);
14781 emit_label (label
);
14782 LABEL_NUSES (label
) = 1;
14784 if (align
<= 4 && desired_alignment
> 4)
14786 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14787 destmem
= change_address (destmem
, SImode
, destptr
);
14788 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14789 ix86_adjust_counter (count
, 4);
14790 emit_label (label
);
14791 LABEL_NUSES (label
) = 1;
14793 gcc_assert (desired_alignment
<= 8);
14796 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14797 static enum stringop_alg
14798 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14799 int *dynamic_check
)
14801 const struct stringop_algs
* algs
;
14803 *dynamic_check
= -1;
14805 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14807 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14808 if (stringop_alg
!= no_stringop
)
14809 return stringop_alg
;
14810 /* rep; movq or rep; movl is the smallest variant. */
14811 else if (optimize_size
)
14813 if (!count
|| (count
& 3))
14814 return rep_prefix_1_byte
;
14816 return rep_prefix_4_byte
;
14818 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14820 else if (expected_size
!= -1 && expected_size
< 4)
14821 return loop_1_byte
;
14822 else if (expected_size
!= -1)
14825 enum stringop_alg alg
= libcall
;
14826 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14828 gcc_assert (algs
->size
[i
].max
);
14829 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14831 if (algs
->size
[i
].alg
!= libcall
)
14832 alg
= algs
->size
[i
].alg
;
14833 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14834 last non-libcall inline algorithm. */
14835 if (TARGET_INLINE_ALL_STRINGOPS
)
14837 /* When the current size is best to be copied by a libcall,
14838 but we are still forced to inline, run the heuristic bellow
14839 that will pick code for medium sized blocks. */
14840 if (alg
!= libcall
)
14845 return algs
->size
[i
].alg
;
14848 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14850 /* When asked to inline the call anyway, try to pick meaningful choice.
14851 We look for maximal size of block that is faster to copy by hand and
14852 take blocks of at most of that size guessing that average size will
14853 be roughly half of the block.
14855 If this turns out to be bad, we might simply specify the preferred
14856 choice in ix86_costs. */
14857 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14858 && algs
->unknown_size
== libcall
)
14861 enum stringop_alg alg
;
14864 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14865 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14866 max
= algs
->size
[i
].max
;
14869 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14870 gcc_assert (*dynamic_check
== -1);
14871 gcc_assert (alg
!= libcall
);
14872 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14873 *dynamic_check
= max
;
14876 return algs
->unknown_size
;
14879 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14880 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14882 decide_alignment (int align
,
14883 enum stringop_alg alg
,
14886 int desired_align
= 0;
14890 gcc_unreachable ();
14892 case unrolled_loop
:
14893 desired_align
= GET_MODE_SIZE (Pmode
);
14895 case rep_prefix_8_byte
:
14898 case rep_prefix_4_byte
:
14899 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14900 copying whole cacheline at once. */
14901 if (TARGET_PENTIUMPRO
)
14906 case rep_prefix_1_byte
:
14907 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14908 copying whole cacheline at once. */
14909 if (TARGET_PENTIUMPRO
)
14923 if (desired_align
< align
)
14924 desired_align
= align
;
14925 if (expected_size
!= -1 && expected_size
< 4)
14926 desired_align
= align
;
14927 return desired_align
;
14930 /* Return the smallest power of 2 greater than VAL. */
14932 smallest_pow2_greater_than (int val
)
14940 /* Expand string move (memcpy) operation. Use i386 string operations when
14941 profitable. expand_clrmem contains similar code. The code depends upon
14942 architecture, block size and alignment, but always has the same
14945 1) Prologue guard: Conditional that jumps up to epilogues for small
14946 blocks that can be handled by epilogue alone. This is faster but
14947 also needed for correctness, since prologue assume the block is larger
14948 than the desired alignment.
14950 Optional dynamic check for size and libcall for large
14951 blocks is emitted here too, with -minline-stringops-dynamically.
14953 2) Prologue: copy first few bytes in order to get destination aligned
14954 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14955 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14956 We emit either a jump tree on power of two sized blocks, or a byte loop.
14958 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14959 with specified algorithm.
14961 4) Epilogue: code copying tail of the block that is too small to be
14962 handled by main body (or up to size guarded by prologue guard). */
14965 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14966 rtx expected_align_exp
, rtx expected_size_exp
)
14972 rtx jump_around_label
= NULL
;
14973 HOST_WIDE_INT align
= 1;
14974 unsigned HOST_WIDE_INT count
= 0;
14975 HOST_WIDE_INT expected_size
= -1;
14976 int size_needed
= 0, epilogue_size_needed
;
14977 int desired_align
= 0;
14978 enum stringop_alg alg
;
14981 if (CONST_INT_P (align_exp
))
14982 align
= INTVAL (align_exp
);
14983 /* i386 can do misaligned access on reasonably increased cost. */
14984 if (CONST_INT_P (expected_align_exp
)
14985 && INTVAL (expected_align_exp
) > align
)
14986 align
= INTVAL (expected_align_exp
);
14987 if (CONST_INT_P (count_exp
))
14988 count
= expected_size
= INTVAL (count_exp
);
14989 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14990 expected_size
= INTVAL (expected_size_exp
);
14992 /* Step 0: Decide on preferred algorithm, desired alignment and
14993 size of chunks to be copied by main loop. */
14995 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14996 desired_align
= decide_alignment (align
, alg
, expected_size
);
14998 if (!TARGET_ALIGN_STRINGOPS
)
14999 align
= desired_align
;
15001 if (alg
== libcall
)
15003 gcc_assert (alg
!= no_stringop
);
15005 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
15006 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15007 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
15012 gcc_unreachable ();
15014 size_needed
= GET_MODE_SIZE (Pmode
);
15016 case unrolled_loop
:
15017 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
15019 case rep_prefix_8_byte
:
15022 case rep_prefix_4_byte
:
15025 case rep_prefix_1_byte
:
15031 epilogue_size_needed
= size_needed
;
15033 /* Step 1: Prologue guard. */
15035 /* Alignment code needs count to be in register. */
15036 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15038 enum machine_mode mode
= SImode
;
15039 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15041 count_exp
= force_reg (mode
, count_exp
);
15043 gcc_assert (desired_align
>= 1 && align
>= 1);
15045 /* Ensure that alignment prologue won't copy past end of block. */
15046 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15048 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15049 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15050 Make sure it is power of 2. */
15051 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15053 label
= gen_label_rtx ();
15054 emit_cmp_and_jump_insns (count_exp
,
15055 GEN_INT (epilogue_size_needed
),
15056 LTU
, 0, counter_mode (count_exp
), 1, label
);
15057 if (GET_CODE (count_exp
) == CONST_INT
)
15059 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
15060 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15062 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15064 /* Emit code to decide on runtime whether library call or inline should be
15066 if (dynamic_check
!= -1)
15068 rtx hot_label
= gen_label_rtx ();
15069 jump_around_label
= gen_label_rtx ();
15070 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15071 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
15072 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15073 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
15074 emit_jump (jump_around_label
);
15075 emit_label (hot_label
);
15078 /* Step 2: Alignment prologue. */
15080 if (desired_align
> align
)
15082 /* Except for the first move in epilogue, we no longer know
15083 constant offset in aliasing info. It don't seems to worth
15084 the pain to maintain it for the first move, so throw away
15086 src
= change_address (src
, BLKmode
, srcreg
);
15087 dst
= change_address (dst
, BLKmode
, destreg
);
15088 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
15091 if (label
&& size_needed
== 1)
15093 emit_label (label
);
15094 LABEL_NUSES (label
) = 1;
15098 /* Step 3: Main loop. */
15104 gcc_unreachable ();
15106 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15107 count_exp
, QImode
, 1, expected_size
);
15110 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15111 count_exp
, Pmode
, 1, expected_size
);
15113 case unrolled_loop
:
15114 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15115 registers for 4 temporaries anyway. */
15116 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
15117 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
15120 case rep_prefix_8_byte
:
15121 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15124 case rep_prefix_4_byte
:
15125 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15128 case rep_prefix_1_byte
:
15129 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
15133 /* Adjust properly the offset of src and dest memory for aliasing. */
15134 if (CONST_INT_P (count_exp
))
15136 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
15137 (count
/ size_needed
) * size_needed
);
15138 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15139 (count
/ size_needed
) * size_needed
);
15143 src
= change_address (src
, BLKmode
, srcreg
);
15144 dst
= change_address (dst
, BLKmode
, destreg
);
15147 /* Step 4: Epilogue to copy the remaining bytes. */
15151 /* When the main loop is done, COUNT_EXP might hold original count,
15152 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15153 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15154 bytes. Compensate if needed. */
15156 if (size_needed
< epilogue_size_needed
)
15159 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15160 GEN_INT (size_needed
- 1), count_exp
, 1,
15162 if (tmp
!= count_exp
)
15163 emit_move_insn (count_exp
, tmp
);
15165 emit_label (label
);
15166 LABEL_NUSES (label
) = 1;
15169 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15170 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
15171 epilogue_size_needed
);
15172 if (jump_around_label
)
15173 emit_label (jump_around_label
);
15177 /* Helper function for memcpy. For QImode value 0xXY produce
15178 0xXYXYXYXY of wide specified by MODE. This is essentially
15179 a * 0x10101010, but we can do slightly better than
15180 synth_mult by unwinding the sequence by hand on CPUs with
15183 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
15185 enum machine_mode valmode
= GET_MODE (val
);
15187 int nops
= mode
== DImode
? 3 : 2;
15189 gcc_assert (mode
== SImode
|| mode
== DImode
);
15190 if (val
== const0_rtx
)
15191 return copy_to_mode_reg (mode
, const0_rtx
);
15192 if (CONST_INT_P (val
))
15194 HOST_WIDE_INT v
= INTVAL (val
) & 255;
15198 if (mode
== DImode
)
15199 v
|= (v
<< 16) << 16;
15200 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
15203 if (valmode
== VOIDmode
)
15205 if (valmode
!= QImode
)
15206 val
= gen_lowpart (QImode
, val
);
15207 if (mode
== QImode
)
15209 if (!TARGET_PARTIAL_REG_STALL
)
15211 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
15212 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
15213 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
15214 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
15216 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15217 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
15218 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
15223 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15225 if (!TARGET_PARTIAL_REG_STALL
)
15226 if (mode
== SImode
)
15227 emit_insn (gen_movsi_insv_1 (reg
, reg
));
15229 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
15232 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
15233 NULL
, 1, OPTAB_DIRECT
);
15235 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15237 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
15238 NULL
, 1, OPTAB_DIRECT
);
15239 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15240 if (mode
== SImode
)
15242 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
15243 NULL
, 1, OPTAB_DIRECT
);
15244 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15249 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15250 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15251 alignment from ALIGN to DESIRED_ALIGN. */
15253 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
15258 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
15259 promoted_val
= promote_duplicated_reg (DImode
, val
);
15260 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
15261 promoted_val
= promote_duplicated_reg (SImode
, val
);
15262 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
15263 promoted_val
= promote_duplicated_reg (HImode
, val
);
15265 promoted_val
= val
;
15267 return promoted_val
;
15270 /* Expand string clear operation (bzero). Use i386 string operations when
15271 profitable. See expand_movmem comment for explanation of individual
15272 steps performed. */
15274 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
15275 rtx expected_align_exp
, rtx expected_size_exp
)
15280 rtx jump_around_label
= NULL
;
15281 HOST_WIDE_INT align
= 1;
15282 unsigned HOST_WIDE_INT count
= 0;
15283 HOST_WIDE_INT expected_size
= -1;
15284 int size_needed
= 0, epilogue_size_needed
;
15285 int desired_align
= 0;
15286 enum stringop_alg alg
;
15287 rtx promoted_val
= NULL
;
15288 bool force_loopy_epilogue
= false;
15291 if (CONST_INT_P (align_exp
))
15292 align
= INTVAL (align_exp
);
15293 /* i386 can do misaligned access on reasonably increased cost. */
15294 if (CONST_INT_P (expected_align_exp
)
15295 && INTVAL (expected_align_exp
) > align
)
15296 align
= INTVAL (expected_align_exp
);
15297 if (CONST_INT_P (count_exp
))
15298 count
= expected_size
= INTVAL (count_exp
);
15299 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15300 expected_size
= INTVAL (expected_size_exp
);
15302 /* Step 0: Decide on preferred algorithm, desired alignment and
15303 size of chunks to be copied by main loop. */
15305 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
15306 desired_align
= decide_alignment (align
, alg
, expected_size
);
15308 if (!TARGET_ALIGN_STRINGOPS
)
15309 align
= desired_align
;
15311 if (alg
== libcall
)
15313 gcc_assert (alg
!= no_stringop
);
15315 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
15316 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15321 gcc_unreachable ();
15323 size_needed
= GET_MODE_SIZE (Pmode
);
15325 case unrolled_loop
:
15326 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
15328 case rep_prefix_8_byte
:
15331 case rep_prefix_4_byte
:
15334 case rep_prefix_1_byte
:
15339 epilogue_size_needed
= size_needed
;
15341 /* Step 1: Prologue guard. */
15343 /* Alignment code needs count to be in register. */
15344 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15346 enum machine_mode mode
= SImode
;
15347 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15349 count_exp
= force_reg (mode
, count_exp
);
15351 /* Do the cheap promotion to allow better CSE across the
15352 main loop and epilogue (ie one load of the big constant in the
15353 front of all code. */
15354 if (CONST_INT_P (val_exp
))
15355 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15356 desired_align
, align
);
15357 /* Ensure that alignment prologue won't copy past end of block. */
15358 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15360 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15361 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15362 Make sure it is power of 2. */
15363 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15365 /* To improve performance of small blocks, we jump around the VAL
15366 promoting mode. This mean that if the promoted VAL is not constant,
15367 we might not use it in the epilogue and have to use byte
15369 if (epilogue_size_needed
> 2 && !promoted_val
)
15370 force_loopy_epilogue
= true;
15371 label
= gen_label_rtx ();
15372 emit_cmp_and_jump_insns (count_exp
,
15373 GEN_INT (epilogue_size_needed
),
15374 LTU
, 0, counter_mode (count_exp
), 1, label
);
15375 if (GET_CODE (count_exp
) == CONST_INT
)
15377 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
15378 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15380 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15382 if (dynamic_check
!= -1)
15384 rtx hot_label
= gen_label_rtx ();
15385 jump_around_label
= gen_label_rtx ();
15386 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15387 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
15388 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15389 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
15390 emit_jump (jump_around_label
);
15391 emit_label (hot_label
);
15394 /* Step 2: Alignment prologue. */
15396 /* Do the expensive promotion once we branched off the small blocks. */
15398 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15399 desired_align
, align
);
15400 gcc_assert (desired_align
>= 1 && align
>= 1);
15402 if (desired_align
> align
)
15404 /* Except for the first move in epilogue, we no longer know
15405 constant offset in aliasing info. It don't seems to worth
15406 the pain to maintain it for the first move, so throw away
15408 dst
= change_address (dst
, BLKmode
, destreg
);
15409 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
15412 if (label
&& size_needed
== 1)
15414 emit_label (label
);
15415 LABEL_NUSES (label
) = 1;
15419 /* Step 3: Main loop. */
15425 gcc_unreachable ();
15427 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15428 count_exp
, QImode
, 1, expected_size
);
15431 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15432 count_exp
, Pmode
, 1, expected_size
);
15434 case unrolled_loop
:
15435 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15436 count_exp
, Pmode
, 4, expected_size
);
15438 case rep_prefix_8_byte
:
15439 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15442 case rep_prefix_4_byte
:
15443 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15446 case rep_prefix_1_byte
:
15447 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15451 /* Adjust properly the offset of src and dest memory for aliasing. */
15452 if (CONST_INT_P (count_exp
))
15453 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15454 (count
/ size_needed
) * size_needed
);
15456 dst
= change_address (dst
, BLKmode
, destreg
);
15458 /* Step 4: Epilogue to copy the remaining bytes. */
15462 /* When the main loop is done, COUNT_EXP might hold original count,
15463 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15464 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15465 bytes. Compensate if needed. */
15467 if (size_needed
< desired_align
- align
)
15470 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15471 GEN_INT (size_needed
- 1), count_exp
, 1,
15473 size_needed
= desired_align
- align
+ 1;
15474 if (tmp
!= count_exp
)
15475 emit_move_insn (count_exp
, tmp
);
15477 emit_label (label
);
15478 LABEL_NUSES (label
) = 1;
15480 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15482 if (force_loopy_epilogue
)
15483 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
15486 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
15489 if (jump_around_label
)
15490 emit_label (jump_around_label
);
15494 /* Expand the appropriate insns for doing strlen if not just doing
15497 out = result, initialized with the start address
15498 align_rtx = alignment of the address.
15499 scratch = scratch register, initialized with the startaddress when
15500 not aligned, otherwise undefined
15502 This is just the body. It needs the initializations mentioned above and
15503 some address computing at the end. These things are done in i386.md. */
15506 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
15510 rtx align_2_label
= NULL_RTX
;
15511 rtx align_3_label
= NULL_RTX
;
15512 rtx align_4_label
= gen_label_rtx ();
15513 rtx end_0_label
= gen_label_rtx ();
15515 rtx tmpreg
= gen_reg_rtx (SImode
);
15516 rtx scratch
= gen_reg_rtx (SImode
);
15520 if (CONST_INT_P (align_rtx
))
15521 align
= INTVAL (align_rtx
);
15523 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15525 /* Is there a known alignment and is it less than 4? */
15528 rtx scratch1
= gen_reg_rtx (Pmode
);
15529 emit_move_insn (scratch1
, out
);
15530 /* Is there a known alignment and is it not 2? */
15533 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
15534 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
15536 /* Leave just the 3 lower bits. */
15537 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
15538 NULL_RTX
, 0, OPTAB_WIDEN
);
15540 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15541 Pmode
, 1, align_4_label
);
15542 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
15543 Pmode
, 1, align_2_label
);
15544 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
15545 Pmode
, 1, align_3_label
);
15549 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15550 check if is aligned to 4 - byte. */
15552 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
15553 NULL_RTX
, 0, OPTAB_WIDEN
);
15555 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15556 Pmode
, 1, align_4_label
);
15559 mem
= change_address (src
, QImode
, out
);
15561 /* Now compare the bytes. */
15563 /* Compare the first n unaligned byte on a byte per byte basis. */
15564 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15565 QImode
, 1, end_0_label
);
15567 /* Increment the address. */
15569 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15571 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15573 /* Not needed with an alignment of 2 */
15576 emit_label (align_2_label
);
15578 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15582 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15584 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15586 emit_label (align_3_label
);
15589 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15593 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15595 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15598 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15599 align this loop. It gives only huge programs, but does not help to
15601 emit_label (align_4_label
);
15603 mem
= change_address (src
, SImode
, out
);
15604 emit_move_insn (scratch
, mem
);
15606 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15608 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15610 /* This formula yields a nonzero result iff one of the bytes is zero.
15611 This saves three branches inside loop and many cycles. */
15613 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15614 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15615 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15616 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15617 gen_int_mode (0x80808080, SImode
)));
15618 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15623 rtx reg
= gen_reg_rtx (SImode
);
15624 rtx reg2
= gen_reg_rtx (Pmode
);
15625 emit_move_insn (reg
, tmpreg
);
15626 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15628 /* If zero is not in the first two bytes, move two bytes forward. */
15629 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15630 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15631 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15632 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15633 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15636 /* Emit lea manually to avoid clobbering of flags. */
15637 emit_insn (gen_rtx_SET (SImode
, reg2
,
15638 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15640 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15641 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15642 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15643 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15650 rtx end_2_label
= gen_label_rtx ();
15651 /* Is zero in the first two bytes? */
15653 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15654 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15655 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15656 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15657 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15659 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15660 JUMP_LABEL (tmp
) = end_2_label
;
15662 /* Not in the first two. Move two bytes forward. */
15663 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15665 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15667 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15669 emit_label (end_2_label
);
15673 /* Avoid branch in fixing the byte. */
15674 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15675 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15676 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
15678 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15680 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15682 emit_label (end_0_label
);
15685 /* Expand strlen. */
15688 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
15690 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
15692 /* The generic case of strlen expander is long. Avoid it's
15693 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15695 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15696 && !TARGET_INLINE_ALL_STRINGOPS
15698 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
15701 addr
= force_reg (Pmode
, XEXP (src
, 0));
15702 scratch1
= gen_reg_rtx (Pmode
);
15704 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15707 /* Well it seems that some optimizer does not combine a call like
15708 foo(strlen(bar), strlen(bar));
15709 when the move and the subtraction is done here. It does calculate
15710 the length just once when these instructions are done inside of
15711 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15712 often used and I use one fewer register for the lifetime of
15713 output_strlen_unroll() this is better. */
15715 emit_move_insn (out
, addr
);
15717 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
15719 /* strlensi_unroll_1 returns the address of the zero at the end of
15720 the string, like memchr(), so compute the length by subtracting
15721 the start address. */
15723 emit_insn (gen_subdi3 (out
, out
, addr
));
15725 emit_insn (gen_subsi3 (out
, out
, addr
));
15730 scratch2
= gen_reg_rtx (Pmode
);
15731 scratch3
= gen_reg_rtx (Pmode
);
15732 scratch4
= force_reg (Pmode
, constm1_rtx
);
15734 emit_move_insn (scratch3
, addr
);
15735 eoschar
= force_reg (QImode
, eoschar
);
15737 src
= replace_equiv_address_nv (src
, scratch3
);
15739 /* If .md starts supporting :P, this can be done in .md. */
15740 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
15741 scratch4
), UNSPEC_SCAS
);
15742 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
15745 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
15746 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
15750 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
15751 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
15757 /* For given symbol (function) construct code to compute address of it's PLT
15758 entry in large x86-64 PIC model. */
15760 construct_plt_address (rtx symbol
)
15762 rtx tmp
= gen_reg_rtx (Pmode
);
15763 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15765 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15766 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15768 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15769 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15774 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15775 rtx callarg2 ATTRIBUTE_UNUSED
,
15776 rtx pop
, int sibcall
)
15778 rtx use
= NULL
, call
;
15780 if (pop
== const0_rtx
)
15782 gcc_assert (!TARGET_64BIT
|| !pop
);
15784 if (TARGET_MACHO
&& !TARGET_64BIT
)
15787 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15788 fnaddr
= machopic_indirect_call_target (fnaddr
);
15793 /* Static functions and indirect calls don't need the pic register. */
15794 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15795 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15796 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15797 use_reg (&use
, pic_offset_table_rtx
);
15800 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15802 rtx al
= gen_rtx_REG (QImode
, 0);
15803 emit_move_insn (al
, callarg2
);
15804 use_reg (&use
, al
);
15807 if (ix86_cmodel
== CM_LARGE_PIC
15808 && GET_CODE (fnaddr
) == MEM
15809 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15810 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15811 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15812 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15814 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15815 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15817 if (sibcall
&& TARGET_64BIT
15818 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15821 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15822 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15823 emit_move_insn (fnaddr
, addr
);
15824 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15827 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15829 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15832 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15833 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15834 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15837 call
= emit_call_insn (call
);
15839 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15843 /* Clear stack slot assignments remembered from previous functions.
15844 This is called from INIT_EXPANDERS once before RTL is emitted for each
15847 static struct machine_function
*
15848 ix86_init_machine_status (void)
15850 struct machine_function
*f
;
15852 f
= GGC_CNEW (struct machine_function
);
15853 f
->use_fast_prologue_epilogue_nregs
= -1;
15854 f
->tls_descriptor_call_expanded_p
= 0;
15859 /* Return a MEM corresponding to a stack slot with mode MODE.
15860 Allocate a new slot if necessary.
15862 The RTL for a function can have several slots available: N is
15863 which slot to use. */
15866 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15868 struct stack_local_entry
*s
;
15870 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15872 /* Virtual slot is valid only before vregs are instantiated. */
15873 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
15875 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15876 if (s
->mode
== mode
&& s
->n
== n
)
15877 return copy_rtx (s
->rtl
);
15879 s
= (struct stack_local_entry
*)
15880 ggc_alloc (sizeof (struct stack_local_entry
));
15883 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15885 s
->next
= ix86_stack_locals
;
15886 ix86_stack_locals
= s
;
15890 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15892 static GTY(()) rtx ix86_tls_symbol
;
15894 ix86_tls_get_addr (void)
15897 if (!ix86_tls_symbol
)
15899 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15900 (TARGET_ANY_GNU_TLS
15902 ? "___tls_get_addr"
15903 : "__tls_get_addr");
15906 return ix86_tls_symbol
;
15909 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15911 static GTY(()) rtx ix86_tls_module_base_symbol
;
15913 ix86_tls_module_base (void)
15916 if (!ix86_tls_module_base_symbol
)
15918 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15919 "_TLS_MODULE_BASE_");
15920 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15921 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15924 return ix86_tls_module_base_symbol
;
15927 /* Calculate the length of the memory address in the instruction
15928 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15931 memory_address_length (rtx addr
)
15933 struct ix86_address parts
;
15934 rtx base
, index
, disp
;
15938 if (GET_CODE (addr
) == PRE_DEC
15939 || GET_CODE (addr
) == POST_INC
15940 || GET_CODE (addr
) == PRE_MODIFY
15941 || GET_CODE (addr
) == POST_MODIFY
)
15944 ok
= ix86_decompose_address (addr
, &parts
);
15947 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15948 parts
.base
= SUBREG_REG (parts
.base
);
15949 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15950 parts
.index
= SUBREG_REG (parts
.index
);
15953 index
= parts
.index
;
15958 - esp as the base always wants an index,
15959 - ebp as the base always wants a displacement. */
15961 /* Register Indirect. */
15962 if (base
&& !index
&& !disp
)
15964 /* esp (for its index) and ebp (for its displacement) need
15965 the two-byte modrm form. */
15966 if (addr
== stack_pointer_rtx
15967 || addr
== arg_pointer_rtx
15968 || addr
== frame_pointer_rtx
15969 || addr
== hard_frame_pointer_rtx
)
15973 /* Direct Addressing. */
15974 else if (disp
&& !base
&& !index
)
15979 /* Find the length of the displacement constant. */
15982 if (base
&& satisfies_constraint_K (disp
))
15987 /* ebp always wants a displacement. */
15988 else if (base
== hard_frame_pointer_rtx
)
15991 /* An index requires the two-byte modrm form.... */
15993 /* ...like esp, which always wants an index. */
15994 || base
== stack_pointer_rtx
15995 || base
== arg_pointer_rtx
15996 || base
== frame_pointer_rtx
)
16003 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16004 is set, expect that insn have 8bit immediate alternative. */
16006 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
16010 extract_insn_cached (insn
);
16011 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16012 if (CONSTANT_P (recog_data
.operand
[i
]))
16015 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
16019 switch (get_attr_mode (insn
))
16030 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16035 fatal_insn ("unknown insn mode", insn
);
16041 /* Compute default value for "length_address" attribute. */
16043 ix86_attr_length_address_default (rtx insn
)
16047 if (get_attr_type (insn
) == TYPE_LEA
)
16049 rtx set
= PATTERN (insn
);
16051 if (GET_CODE (set
) == PARALLEL
)
16052 set
= XVECEXP (set
, 0, 0);
16054 gcc_assert (GET_CODE (set
) == SET
);
16056 return memory_address_length (SET_SRC (set
));
16059 extract_insn_cached (insn
);
16060 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16061 if (MEM_P (recog_data
.operand
[i
]))
16063 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
16069 /* Return the maximum number of instructions a cpu can issue. */
16072 ix86_issue_rate (void)
16076 case PROCESSOR_PENTIUM
:
16080 case PROCESSOR_PENTIUMPRO
:
16081 case PROCESSOR_PENTIUM4
:
16082 case PROCESSOR_ATHLON
:
16084 case PROCESSOR_AMDFAM10
:
16085 case PROCESSOR_NOCONA
:
16086 case PROCESSOR_GENERIC32
:
16087 case PROCESSOR_GENERIC64
:
16090 case PROCESSOR_CORE2
:
16098 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16099 by DEP_INSN and nothing set by DEP_INSN. */
16102 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16106 /* Simplify the test for uninteresting insns. */
16107 if (insn_type
!= TYPE_SETCC
16108 && insn_type
!= TYPE_ICMOV
16109 && insn_type
!= TYPE_FCMOV
16110 && insn_type
!= TYPE_IBR
)
16113 if ((set
= single_set (dep_insn
)) != 0)
16115 set
= SET_DEST (set
);
16118 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
16119 && XVECLEN (PATTERN (dep_insn
), 0) == 2
16120 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
16121 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
16123 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16124 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
16129 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
16132 /* This test is true if the dependent insn reads the flags but
16133 not any other potentially set register. */
16134 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
16137 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
16143 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16144 address with operands set by DEP_INSN. */
16147 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
16151 if (insn_type
== TYPE_LEA
16154 addr
= PATTERN (insn
);
16156 if (GET_CODE (addr
) == PARALLEL
)
16157 addr
= XVECEXP (addr
, 0, 0);
16159 gcc_assert (GET_CODE (addr
) == SET
);
16161 addr
= SET_SRC (addr
);
16166 extract_insn_cached (insn
);
16167 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16168 if (MEM_P (recog_data
.operand
[i
]))
16170 addr
= XEXP (recog_data
.operand
[i
], 0);
16177 return modified_in_p (addr
, dep_insn
);
16181 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
16183 enum attr_type insn_type
, dep_insn_type
;
16184 enum attr_memory memory
;
16186 int dep_insn_code_number
;
16188 /* Anti and output dependencies have zero cost on all CPUs. */
16189 if (REG_NOTE_KIND (link
) != 0)
16192 dep_insn_code_number
= recog_memoized (dep_insn
);
16194 /* If we can't recognize the insns, we can't really do anything. */
16195 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
16198 insn_type
= get_attr_type (insn
);
16199 dep_insn_type
= get_attr_type (dep_insn
);
16203 case PROCESSOR_PENTIUM
:
16204 /* Address Generation Interlock adds a cycle of latency. */
16205 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16208 /* ??? Compares pair with jump/setcc. */
16209 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
16212 /* Floating point stores require value to be ready one cycle earlier. */
16213 if (insn_type
== TYPE_FMOV
16214 && get_attr_memory (insn
) == MEMORY_STORE
16215 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16219 case PROCESSOR_PENTIUMPRO
:
16220 memory
= get_attr_memory (insn
);
16222 /* INT->FP conversion is expensive. */
16223 if (get_attr_fp_int_src (dep_insn
))
16226 /* There is one cycle extra latency between an FP op and a store. */
16227 if (insn_type
== TYPE_FMOV
16228 && (set
= single_set (dep_insn
)) != NULL_RTX
16229 && (set2
= single_set (insn
)) != NULL_RTX
16230 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
16231 && MEM_P (SET_DEST (set2
)))
16234 /* Show ability of reorder buffer to hide latency of load by executing
16235 in parallel with previous instruction in case
16236 previous instruction is not needed to compute the address. */
16237 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16238 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16240 /* Claim moves to take one cycle, as core can issue one load
16241 at time and the next load can start cycle later. */
16242 if (dep_insn_type
== TYPE_IMOV
16243 || dep_insn_type
== TYPE_FMOV
)
16251 memory
= get_attr_memory (insn
);
16253 /* The esp dependency is resolved before the instruction is really
16255 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
16256 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
16259 /* INT->FP conversion is expensive. */
16260 if (get_attr_fp_int_src (dep_insn
))
16263 /* Show ability of reorder buffer to hide latency of load by executing
16264 in parallel with previous instruction in case
16265 previous instruction is not needed to compute the address. */
16266 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16267 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16269 /* Claim moves to take one cycle, as core can issue one load
16270 at time and the next load can start cycle later. */
16271 if (dep_insn_type
== TYPE_IMOV
16272 || dep_insn_type
== TYPE_FMOV
)
16281 case PROCESSOR_ATHLON
:
16283 case PROCESSOR_AMDFAM10
:
16284 case PROCESSOR_GENERIC32
:
16285 case PROCESSOR_GENERIC64
:
16286 memory
= get_attr_memory (insn
);
16288 /* Show ability of reorder buffer to hide latency of load by executing
16289 in parallel with previous instruction in case
16290 previous instruction is not needed to compute the address. */
16291 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16292 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16294 enum attr_unit unit
= get_attr_unit (insn
);
16297 /* Because of the difference between the length of integer and
16298 floating unit pipeline preparation stages, the memory operands
16299 for floating point are cheaper.
16301 ??? For Athlon it the difference is most probably 2. */
16302 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
16305 loadcost
= TARGET_ATHLON
? 2 : 0;
16307 if (cost
>= loadcost
)
16320 /* How many alternative schedules to try. This should be as wide as the
16321 scheduling freedom in the DFA, but no wider. Making this value too
16322 large results extra work for the scheduler. */
16325 ia32_multipass_dfa_lookahead (void)
16327 if (ix86_tune
== PROCESSOR_PENTIUM
)
16330 if (ix86_tune
== PROCESSOR_PENTIUMPRO
16331 || ix86_tune
== PROCESSOR_K6
)
16339 /* Compute the alignment given to a constant that is being placed in memory.
16340 EXP is the constant and ALIGN is the alignment that the object would
16342 The value of this function is used instead of that alignment to align
16346 ix86_constant_alignment (tree exp
, int align
)
16348 if (TREE_CODE (exp
) == REAL_CST
)
16350 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
16352 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
16355 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16356 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16357 return BITS_PER_WORD
;
16362 /* Compute the alignment for a static variable.
16363 TYPE is the data type, and ALIGN is the alignment that
16364 the object would ordinarily have. The value of this function is used
16365 instead of that alignment to align the object. */
16368 ix86_data_alignment (tree type
, int align
)
16370 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
16372 if (AGGREGATE_TYPE_P (type
)
16373 && TYPE_SIZE (type
)
16374 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16375 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
16376 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
16377 && align
< max_align
)
16380 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16381 to 16byte boundary. */
16384 if (AGGREGATE_TYPE_P (type
)
16385 && TYPE_SIZE (type
)
16386 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16387 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
16388 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16392 if (TREE_CODE (type
) == ARRAY_TYPE
)
16394 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16396 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16399 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16402 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16404 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16407 else if ((TREE_CODE (type
) == RECORD_TYPE
16408 || TREE_CODE (type
) == UNION_TYPE
16409 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16410 && TYPE_FIELDS (type
))
16412 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16414 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16417 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16418 || TREE_CODE (type
) == INTEGER_TYPE
)
16420 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16422 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16429 /* Compute the alignment for a local variable.
16430 TYPE is the data type, and ALIGN is the alignment that
16431 the object would ordinarily have. The value of this macro is used
16432 instead of that alignment to align the object. */
16435 ix86_local_alignment (tree type
, int align
)
16437 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16438 to 16byte boundary. */
16441 if (AGGREGATE_TYPE_P (type
)
16442 && TYPE_SIZE (type
)
16443 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16444 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
16445 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16448 if (TREE_CODE (type
) == ARRAY_TYPE
)
16450 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16452 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16455 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16457 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16459 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16462 else if ((TREE_CODE (type
) == RECORD_TYPE
16463 || TREE_CODE (type
) == UNION_TYPE
16464 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16465 && TYPE_FIELDS (type
))
16467 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16469 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16472 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16473 || TREE_CODE (type
) == INTEGER_TYPE
)
16476 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16478 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16484 /* Emit RTL insns to initialize the variable parts of a trampoline.
16485 FNADDR is an RTX for the address of the function's pure code.
16486 CXT is an RTX for the static chain value for the function. */
16488 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
16492 /* Compute offset from the end of the jmp to the target function. */
16493 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16494 plus_constant (tramp
, 10),
16495 NULL_RTX
, 1, OPTAB_DIRECT
);
16496 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
16497 gen_int_mode (0xb9, QImode
));
16498 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
16499 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
16500 gen_int_mode (0xe9, QImode
));
16501 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
16506 /* Try to load address using shorter movl instead of movabs.
16507 We may want to support movq for kernel mode, but kernel does not use
16508 trampolines at the moment. */
16509 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16511 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
16512 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16513 gen_int_mode (0xbb41, HImode
));
16514 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
16515 gen_lowpart (SImode
, fnaddr
));
16520 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16521 gen_int_mode (0xbb49, HImode
));
16522 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16526 /* Load static chain using movabs to r10. */
16527 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16528 gen_int_mode (0xba49, HImode
));
16529 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16532 /* Jump to the r11 */
16533 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16534 gen_int_mode (0xff49, HImode
));
16535 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
16536 gen_int_mode (0xe3, QImode
));
16538 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16541 #ifdef ENABLE_EXECUTE_STACK
16542 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16543 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
16547 /* Codes for all the SSE/MMX builtins. */
16550 IX86_BUILTIN_ADDPS
,
16551 IX86_BUILTIN_ADDSS
,
16552 IX86_BUILTIN_DIVPS
,
16553 IX86_BUILTIN_DIVSS
,
16554 IX86_BUILTIN_MULPS
,
16555 IX86_BUILTIN_MULSS
,
16556 IX86_BUILTIN_SUBPS
,
16557 IX86_BUILTIN_SUBSS
,
16559 IX86_BUILTIN_CMPEQPS
,
16560 IX86_BUILTIN_CMPLTPS
,
16561 IX86_BUILTIN_CMPLEPS
,
16562 IX86_BUILTIN_CMPGTPS
,
16563 IX86_BUILTIN_CMPGEPS
,
16564 IX86_BUILTIN_CMPNEQPS
,
16565 IX86_BUILTIN_CMPNLTPS
,
16566 IX86_BUILTIN_CMPNLEPS
,
16567 IX86_BUILTIN_CMPNGTPS
,
16568 IX86_BUILTIN_CMPNGEPS
,
16569 IX86_BUILTIN_CMPORDPS
,
16570 IX86_BUILTIN_CMPUNORDPS
,
16571 IX86_BUILTIN_CMPEQSS
,
16572 IX86_BUILTIN_CMPLTSS
,
16573 IX86_BUILTIN_CMPLESS
,
16574 IX86_BUILTIN_CMPNEQSS
,
16575 IX86_BUILTIN_CMPNLTSS
,
16576 IX86_BUILTIN_CMPNLESS
,
16577 IX86_BUILTIN_CMPNGTSS
,
16578 IX86_BUILTIN_CMPNGESS
,
16579 IX86_BUILTIN_CMPORDSS
,
16580 IX86_BUILTIN_CMPUNORDSS
,
16582 IX86_BUILTIN_COMIEQSS
,
16583 IX86_BUILTIN_COMILTSS
,
16584 IX86_BUILTIN_COMILESS
,
16585 IX86_BUILTIN_COMIGTSS
,
16586 IX86_BUILTIN_COMIGESS
,
16587 IX86_BUILTIN_COMINEQSS
,
16588 IX86_BUILTIN_UCOMIEQSS
,
16589 IX86_BUILTIN_UCOMILTSS
,
16590 IX86_BUILTIN_UCOMILESS
,
16591 IX86_BUILTIN_UCOMIGTSS
,
16592 IX86_BUILTIN_UCOMIGESS
,
16593 IX86_BUILTIN_UCOMINEQSS
,
16595 IX86_BUILTIN_CVTPI2PS
,
16596 IX86_BUILTIN_CVTPS2PI
,
16597 IX86_BUILTIN_CVTSI2SS
,
16598 IX86_BUILTIN_CVTSI642SS
,
16599 IX86_BUILTIN_CVTSS2SI
,
16600 IX86_BUILTIN_CVTSS2SI64
,
16601 IX86_BUILTIN_CVTTPS2PI
,
16602 IX86_BUILTIN_CVTTSS2SI
,
16603 IX86_BUILTIN_CVTTSS2SI64
,
16605 IX86_BUILTIN_MAXPS
,
16606 IX86_BUILTIN_MAXSS
,
16607 IX86_BUILTIN_MINPS
,
16608 IX86_BUILTIN_MINSS
,
16610 IX86_BUILTIN_LOADUPS
,
16611 IX86_BUILTIN_STOREUPS
,
16612 IX86_BUILTIN_MOVSS
,
16614 IX86_BUILTIN_MOVHLPS
,
16615 IX86_BUILTIN_MOVLHPS
,
16616 IX86_BUILTIN_LOADHPS
,
16617 IX86_BUILTIN_LOADLPS
,
16618 IX86_BUILTIN_STOREHPS
,
16619 IX86_BUILTIN_STORELPS
,
16621 IX86_BUILTIN_MASKMOVQ
,
16622 IX86_BUILTIN_MOVMSKPS
,
16623 IX86_BUILTIN_PMOVMSKB
,
16625 IX86_BUILTIN_MOVNTPS
,
16626 IX86_BUILTIN_MOVNTQ
,
16628 IX86_BUILTIN_LOADDQU
,
16629 IX86_BUILTIN_STOREDQU
,
16631 IX86_BUILTIN_PACKSSWB
,
16632 IX86_BUILTIN_PACKSSDW
,
16633 IX86_BUILTIN_PACKUSWB
,
16635 IX86_BUILTIN_PADDB
,
16636 IX86_BUILTIN_PADDW
,
16637 IX86_BUILTIN_PADDD
,
16638 IX86_BUILTIN_PADDQ
,
16639 IX86_BUILTIN_PADDSB
,
16640 IX86_BUILTIN_PADDSW
,
16641 IX86_BUILTIN_PADDUSB
,
16642 IX86_BUILTIN_PADDUSW
,
16643 IX86_BUILTIN_PSUBB
,
16644 IX86_BUILTIN_PSUBW
,
16645 IX86_BUILTIN_PSUBD
,
16646 IX86_BUILTIN_PSUBQ
,
16647 IX86_BUILTIN_PSUBSB
,
16648 IX86_BUILTIN_PSUBSW
,
16649 IX86_BUILTIN_PSUBUSB
,
16650 IX86_BUILTIN_PSUBUSW
,
16653 IX86_BUILTIN_PANDN
,
16657 IX86_BUILTIN_PAVGB
,
16658 IX86_BUILTIN_PAVGW
,
16660 IX86_BUILTIN_PCMPEQB
,
16661 IX86_BUILTIN_PCMPEQW
,
16662 IX86_BUILTIN_PCMPEQD
,
16663 IX86_BUILTIN_PCMPGTB
,
16664 IX86_BUILTIN_PCMPGTW
,
16665 IX86_BUILTIN_PCMPGTD
,
16667 IX86_BUILTIN_PMADDWD
,
16669 IX86_BUILTIN_PMAXSW
,
16670 IX86_BUILTIN_PMAXUB
,
16671 IX86_BUILTIN_PMINSW
,
16672 IX86_BUILTIN_PMINUB
,
16674 IX86_BUILTIN_PMULHUW
,
16675 IX86_BUILTIN_PMULHW
,
16676 IX86_BUILTIN_PMULLW
,
16678 IX86_BUILTIN_PSADBW
,
16679 IX86_BUILTIN_PSHUFW
,
16681 IX86_BUILTIN_PSLLW
,
16682 IX86_BUILTIN_PSLLD
,
16683 IX86_BUILTIN_PSLLQ
,
16684 IX86_BUILTIN_PSRAW
,
16685 IX86_BUILTIN_PSRAD
,
16686 IX86_BUILTIN_PSRLW
,
16687 IX86_BUILTIN_PSRLD
,
16688 IX86_BUILTIN_PSRLQ
,
16689 IX86_BUILTIN_PSLLWI
,
16690 IX86_BUILTIN_PSLLDI
,
16691 IX86_BUILTIN_PSLLQI
,
16692 IX86_BUILTIN_PSRAWI
,
16693 IX86_BUILTIN_PSRADI
,
16694 IX86_BUILTIN_PSRLWI
,
16695 IX86_BUILTIN_PSRLDI
,
16696 IX86_BUILTIN_PSRLQI
,
16698 IX86_BUILTIN_PUNPCKHBW
,
16699 IX86_BUILTIN_PUNPCKHWD
,
16700 IX86_BUILTIN_PUNPCKHDQ
,
16701 IX86_BUILTIN_PUNPCKLBW
,
16702 IX86_BUILTIN_PUNPCKLWD
,
16703 IX86_BUILTIN_PUNPCKLDQ
,
16705 IX86_BUILTIN_SHUFPS
,
16707 IX86_BUILTIN_RCPPS
,
16708 IX86_BUILTIN_RCPSS
,
16709 IX86_BUILTIN_RSQRTPS
,
16710 IX86_BUILTIN_RSQRTSS
,
16711 IX86_BUILTIN_RSQRTF
,
16712 IX86_BUILTIN_SQRTPS
,
16713 IX86_BUILTIN_SQRTSS
,
16715 IX86_BUILTIN_UNPCKHPS
,
16716 IX86_BUILTIN_UNPCKLPS
,
16718 IX86_BUILTIN_ANDPS
,
16719 IX86_BUILTIN_ANDNPS
,
16721 IX86_BUILTIN_XORPS
,
16724 IX86_BUILTIN_LDMXCSR
,
16725 IX86_BUILTIN_STMXCSR
,
16726 IX86_BUILTIN_SFENCE
,
16728 /* 3DNow! Original */
16729 IX86_BUILTIN_FEMMS
,
16730 IX86_BUILTIN_PAVGUSB
,
16731 IX86_BUILTIN_PF2ID
,
16732 IX86_BUILTIN_PFACC
,
16733 IX86_BUILTIN_PFADD
,
16734 IX86_BUILTIN_PFCMPEQ
,
16735 IX86_BUILTIN_PFCMPGE
,
16736 IX86_BUILTIN_PFCMPGT
,
16737 IX86_BUILTIN_PFMAX
,
16738 IX86_BUILTIN_PFMIN
,
16739 IX86_BUILTIN_PFMUL
,
16740 IX86_BUILTIN_PFRCP
,
16741 IX86_BUILTIN_PFRCPIT1
,
16742 IX86_BUILTIN_PFRCPIT2
,
16743 IX86_BUILTIN_PFRSQIT1
,
16744 IX86_BUILTIN_PFRSQRT
,
16745 IX86_BUILTIN_PFSUB
,
16746 IX86_BUILTIN_PFSUBR
,
16747 IX86_BUILTIN_PI2FD
,
16748 IX86_BUILTIN_PMULHRW
,
16750 /* 3DNow! Athlon Extensions */
16751 IX86_BUILTIN_PF2IW
,
16752 IX86_BUILTIN_PFNACC
,
16753 IX86_BUILTIN_PFPNACC
,
16754 IX86_BUILTIN_PI2FW
,
16755 IX86_BUILTIN_PSWAPDSI
,
16756 IX86_BUILTIN_PSWAPDSF
,
16759 IX86_BUILTIN_ADDPD
,
16760 IX86_BUILTIN_ADDSD
,
16761 IX86_BUILTIN_DIVPD
,
16762 IX86_BUILTIN_DIVSD
,
16763 IX86_BUILTIN_MULPD
,
16764 IX86_BUILTIN_MULSD
,
16765 IX86_BUILTIN_SUBPD
,
16766 IX86_BUILTIN_SUBSD
,
16768 IX86_BUILTIN_CMPEQPD
,
16769 IX86_BUILTIN_CMPLTPD
,
16770 IX86_BUILTIN_CMPLEPD
,
16771 IX86_BUILTIN_CMPGTPD
,
16772 IX86_BUILTIN_CMPGEPD
,
16773 IX86_BUILTIN_CMPNEQPD
,
16774 IX86_BUILTIN_CMPNLTPD
,
16775 IX86_BUILTIN_CMPNLEPD
,
16776 IX86_BUILTIN_CMPNGTPD
,
16777 IX86_BUILTIN_CMPNGEPD
,
16778 IX86_BUILTIN_CMPORDPD
,
16779 IX86_BUILTIN_CMPUNORDPD
,
16780 IX86_BUILTIN_CMPEQSD
,
16781 IX86_BUILTIN_CMPLTSD
,
16782 IX86_BUILTIN_CMPLESD
,
16783 IX86_BUILTIN_CMPNEQSD
,
16784 IX86_BUILTIN_CMPNLTSD
,
16785 IX86_BUILTIN_CMPNLESD
,
16786 IX86_BUILTIN_CMPORDSD
,
16787 IX86_BUILTIN_CMPUNORDSD
,
16789 IX86_BUILTIN_COMIEQSD
,
16790 IX86_BUILTIN_COMILTSD
,
16791 IX86_BUILTIN_COMILESD
,
16792 IX86_BUILTIN_COMIGTSD
,
16793 IX86_BUILTIN_COMIGESD
,
16794 IX86_BUILTIN_COMINEQSD
,
16795 IX86_BUILTIN_UCOMIEQSD
,
16796 IX86_BUILTIN_UCOMILTSD
,
16797 IX86_BUILTIN_UCOMILESD
,
16798 IX86_BUILTIN_UCOMIGTSD
,
16799 IX86_BUILTIN_UCOMIGESD
,
16800 IX86_BUILTIN_UCOMINEQSD
,
16802 IX86_BUILTIN_MAXPD
,
16803 IX86_BUILTIN_MAXSD
,
16804 IX86_BUILTIN_MINPD
,
16805 IX86_BUILTIN_MINSD
,
16807 IX86_BUILTIN_ANDPD
,
16808 IX86_BUILTIN_ANDNPD
,
16810 IX86_BUILTIN_XORPD
,
16812 IX86_BUILTIN_SQRTPD
,
16813 IX86_BUILTIN_SQRTSD
,
16815 IX86_BUILTIN_UNPCKHPD
,
16816 IX86_BUILTIN_UNPCKLPD
,
16818 IX86_BUILTIN_SHUFPD
,
16820 IX86_BUILTIN_LOADUPD
,
16821 IX86_BUILTIN_STOREUPD
,
16822 IX86_BUILTIN_MOVSD
,
16824 IX86_BUILTIN_LOADHPD
,
16825 IX86_BUILTIN_LOADLPD
,
16827 IX86_BUILTIN_CVTDQ2PD
,
16828 IX86_BUILTIN_CVTDQ2PS
,
16830 IX86_BUILTIN_CVTPD2DQ
,
16831 IX86_BUILTIN_CVTPD2PI
,
16832 IX86_BUILTIN_CVTPD2PS
,
16833 IX86_BUILTIN_CVTTPD2DQ
,
16834 IX86_BUILTIN_CVTTPD2PI
,
16836 IX86_BUILTIN_CVTPI2PD
,
16837 IX86_BUILTIN_CVTSI2SD
,
16838 IX86_BUILTIN_CVTSI642SD
,
16840 IX86_BUILTIN_CVTSD2SI
,
16841 IX86_BUILTIN_CVTSD2SI64
,
16842 IX86_BUILTIN_CVTSD2SS
,
16843 IX86_BUILTIN_CVTSS2SD
,
16844 IX86_BUILTIN_CVTTSD2SI
,
16845 IX86_BUILTIN_CVTTSD2SI64
,
16847 IX86_BUILTIN_CVTPS2DQ
,
16848 IX86_BUILTIN_CVTPS2PD
,
16849 IX86_BUILTIN_CVTTPS2DQ
,
16851 IX86_BUILTIN_MOVNTI
,
16852 IX86_BUILTIN_MOVNTPD
,
16853 IX86_BUILTIN_MOVNTDQ
,
16856 IX86_BUILTIN_MASKMOVDQU
,
16857 IX86_BUILTIN_MOVMSKPD
,
16858 IX86_BUILTIN_PMOVMSKB128
,
16860 IX86_BUILTIN_PACKSSWB128
,
16861 IX86_BUILTIN_PACKSSDW128
,
16862 IX86_BUILTIN_PACKUSWB128
,
16864 IX86_BUILTIN_PADDB128
,
16865 IX86_BUILTIN_PADDW128
,
16866 IX86_BUILTIN_PADDD128
,
16867 IX86_BUILTIN_PADDQ128
,
16868 IX86_BUILTIN_PADDSB128
,
16869 IX86_BUILTIN_PADDSW128
,
16870 IX86_BUILTIN_PADDUSB128
,
16871 IX86_BUILTIN_PADDUSW128
,
16872 IX86_BUILTIN_PSUBB128
,
16873 IX86_BUILTIN_PSUBW128
,
16874 IX86_BUILTIN_PSUBD128
,
16875 IX86_BUILTIN_PSUBQ128
,
16876 IX86_BUILTIN_PSUBSB128
,
16877 IX86_BUILTIN_PSUBSW128
,
16878 IX86_BUILTIN_PSUBUSB128
,
16879 IX86_BUILTIN_PSUBUSW128
,
16881 IX86_BUILTIN_PAND128
,
16882 IX86_BUILTIN_PANDN128
,
16883 IX86_BUILTIN_POR128
,
16884 IX86_BUILTIN_PXOR128
,
16886 IX86_BUILTIN_PAVGB128
,
16887 IX86_BUILTIN_PAVGW128
,
16889 IX86_BUILTIN_PCMPEQB128
,
16890 IX86_BUILTIN_PCMPEQW128
,
16891 IX86_BUILTIN_PCMPEQD128
,
16892 IX86_BUILTIN_PCMPGTB128
,
16893 IX86_BUILTIN_PCMPGTW128
,
16894 IX86_BUILTIN_PCMPGTD128
,
16896 IX86_BUILTIN_PMADDWD128
,
16898 IX86_BUILTIN_PMAXSW128
,
16899 IX86_BUILTIN_PMAXUB128
,
16900 IX86_BUILTIN_PMINSW128
,
16901 IX86_BUILTIN_PMINUB128
,
16903 IX86_BUILTIN_PMULUDQ
,
16904 IX86_BUILTIN_PMULUDQ128
,
16905 IX86_BUILTIN_PMULHUW128
,
16906 IX86_BUILTIN_PMULHW128
,
16907 IX86_BUILTIN_PMULLW128
,
16909 IX86_BUILTIN_PSADBW128
,
16910 IX86_BUILTIN_PSHUFHW
,
16911 IX86_BUILTIN_PSHUFLW
,
16912 IX86_BUILTIN_PSHUFD
,
16914 IX86_BUILTIN_PSLLDQI128
,
16915 IX86_BUILTIN_PSLLWI128
,
16916 IX86_BUILTIN_PSLLDI128
,
16917 IX86_BUILTIN_PSLLQI128
,
16918 IX86_BUILTIN_PSRAWI128
,
16919 IX86_BUILTIN_PSRADI128
,
16920 IX86_BUILTIN_PSRLDQI128
,
16921 IX86_BUILTIN_PSRLWI128
,
16922 IX86_BUILTIN_PSRLDI128
,
16923 IX86_BUILTIN_PSRLQI128
,
16925 IX86_BUILTIN_PSLLDQ128
,
16926 IX86_BUILTIN_PSLLW128
,
16927 IX86_BUILTIN_PSLLD128
,
16928 IX86_BUILTIN_PSLLQ128
,
16929 IX86_BUILTIN_PSRAW128
,
16930 IX86_BUILTIN_PSRAD128
,
16931 IX86_BUILTIN_PSRLW128
,
16932 IX86_BUILTIN_PSRLD128
,
16933 IX86_BUILTIN_PSRLQ128
,
16935 IX86_BUILTIN_PUNPCKHBW128
,
16936 IX86_BUILTIN_PUNPCKHWD128
,
16937 IX86_BUILTIN_PUNPCKHDQ128
,
16938 IX86_BUILTIN_PUNPCKHQDQ128
,
16939 IX86_BUILTIN_PUNPCKLBW128
,
16940 IX86_BUILTIN_PUNPCKLWD128
,
16941 IX86_BUILTIN_PUNPCKLDQ128
,
16942 IX86_BUILTIN_PUNPCKLQDQ128
,
16944 IX86_BUILTIN_CLFLUSH
,
16945 IX86_BUILTIN_MFENCE
,
16946 IX86_BUILTIN_LFENCE
,
16948 /* Prescott New Instructions. */
16949 IX86_BUILTIN_ADDSUBPS
,
16950 IX86_BUILTIN_HADDPS
,
16951 IX86_BUILTIN_HSUBPS
,
16952 IX86_BUILTIN_MOVSHDUP
,
16953 IX86_BUILTIN_MOVSLDUP
,
16954 IX86_BUILTIN_ADDSUBPD
,
16955 IX86_BUILTIN_HADDPD
,
16956 IX86_BUILTIN_HSUBPD
,
16957 IX86_BUILTIN_LDDQU
,
16959 IX86_BUILTIN_MONITOR
,
16960 IX86_BUILTIN_MWAIT
,
16963 IX86_BUILTIN_PHADDW
,
16964 IX86_BUILTIN_PHADDD
,
16965 IX86_BUILTIN_PHADDSW
,
16966 IX86_BUILTIN_PHSUBW
,
16967 IX86_BUILTIN_PHSUBD
,
16968 IX86_BUILTIN_PHSUBSW
,
16969 IX86_BUILTIN_PMADDUBSW
,
16970 IX86_BUILTIN_PMULHRSW
,
16971 IX86_BUILTIN_PSHUFB
,
16972 IX86_BUILTIN_PSIGNB
,
16973 IX86_BUILTIN_PSIGNW
,
16974 IX86_BUILTIN_PSIGND
,
16975 IX86_BUILTIN_PALIGNR
,
16976 IX86_BUILTIN_PABSB
,
16977 IX86_BUILTIN_PABSW
,
16978 IX86_BUILTIN_PABSD
,
16980 IX86_BUILTIN_PHADDW128
,
16981 IX86_BUILTIN_PHADDD128
,
16982 IX86_BUILTIN_PHADDSW128
,
16983 IX86_BUILTIN_PHSUBW128
,
16984 IX86_BUILTIN_PHSUBD128
,
16985 IX86_BUILTIN_PHSUBSW128
,
16986 IX86_BUILTIN_PMADDUBSW128
,
16987 IX86_BUILTIN_PMULHRSW128
,
16988 IX86_BUILTIN_PSHUFB128
,
16989 IX86_BUILTIN_PSIGNB128
,
16990 IX86_BUILTIN_PSIGNW128
,
16991 IX86_BUILTIN_PSIGND128
,
16992 IX86_BUILTIN_PALIGNR128
,
16993 IX86_BUILTIN_PABSB128
,
16994 IX86_BUILTIN_PABSW128
,
16995 IX86_BUILTIN_PABSD128
,
16997 /* AMDFAM10 - SSE4A New Instructions. */
16998 IX86_BUILTIN_MOVNTSD
,
16999 IX86_BUILTIN_MOVNTSS
,
17000 IX86_BUILTIN_EXTRQI
,
17001 IX86_BUILTIN_EXTRQ
,
17002 IX86_BUILTIN_INSERTQI
,
17003 IX86_BUILTIN_INSERTQ
,
17006 IX86_BUILTIN_BLENDPD
,
17007 IX86_BUILTIN_BLENDPS
,
17008 IX86_BUILTIN_BLENDVPD
,
17009 IX86_BUILTIN_BLENDVPS
,
17010 IX86_BUILTIN_PBLENDVB128
,
17011 IX86_BUILTIN_PBLENDW128
,
17016 IX86_BUILTIN_INSERTPS128
,
17018 IX86_BUILTIN_MOVNTDQA
,
17019 IX86_BUILTIN_MPSADBW128
,
17020 IX86_BUILTIN_PACKUSDW128
,
17021 IX86_BUILTIN_PCMPEQQ
,
17022 IX86_BUILTIN_PHMINPOSUW128
,
17024 IX86_BUILTIN_PMAXSB128
,
17025 IX86_BUILTIN_PMAXSD128
,
17026 IX86_BUILTIN_PMAXUD128
,
17027 IX86_BUILTIN_PMAXUW128
,
17029 IX86_BUILTIN_PMINSB128
,
17030 IX86_BUILTIN_PMINSD128
,
17031 IX86_BUILTIN_PMINUD128
,
17032 IX86_BUILTIN_PMINUW128
,
17034 IX86_BUILTIN_PMOVSXBW128
,
17035 IX86_BUILTIN_PMOVSXBD128
,
17036 IX86_BUILTIN_PMOVSXBQ128
,
17037 IX86_BUILTIN_PMOVSXWD128
,
17038 IX86_BUILTIN_PMOVSXWQ128
,
17039 IX86_BUILTIN_PMOVSXDQ128
,
17041 IX86_BUILTIN_PMOVZXBW128
,
17042 IX86_BUILTIN_PMOVZXBD128
,
17043 IX86_BUILTIN_PMOVZXBQ128
,
17044 IX86_BUILTIN_PMOVZXWD128
,
17045 IX86_BUILTIN_PMOVZXWQ128
,
17046 IX86_BUILTIN_PMOVZXDQ128
,
17048 IX86_BUILTIN_PMULDQ128
,
17049 IX86_BUILTIN_PMULLD128
,
17051 IX86_BUILTIN_ROUNDPD
,
17052 IX86_BUILTIN_ROUNDPS
,
17053 IX86_BUILTIN_ROUNDSD
,
17054 IX86_BUILTIN_ROUNDSS
,
17056 IX86_BUILTIN_PTESTZ
,
17057 IX86_BUILTIN_PTESTC
,
17058 IX86_BUILTIN_PTESTNZC
,
17060 IX86_BUILTIN_VEC_INIT_V2SI
,
17061 IX86_BUILTIN_VEC_INIT_V4HI
,
17062 IX86_BUILTIN_VEC_INIT_V8QI
,
17063 IX86_BUILTIN_VEC_EXT_V2DF
,
17064 IX86_BUILTIN_VEC_EXT_V2DI
,
17065 IX86_BUILTIN_VEC_EXT_V4SF
,
17066 IX86_BUILTIN_VEC_EXT_V4SI
,
17067 IX86_BUILTIN_VEC_EXT_V8HI
,
17068 IX86_BUILTIN_VEC_EXT_V2SI
,
17069 IX86_BUILTIN_VEC_EXT_V4HI
,
17070 IX86_BUILTIN_VEC_EXT_V16QI
,
17071 IX86_BUILTIN_VEC_SET_V2DI
,
17072 IX86_BUILTIN_VEC_SET_V4SF
,
17073 IX86_BUILTIN_VEC_SET_V4SI
,
17074 IX86_BUILTIN_VEC_SET_V8HI
,
17075 IX86_BUILTIN_VEC_SET_V4HI
,
17076 IX86_BUILTIN_VEC_SET_V16QI
,
17078 IX86_BUILTIN_VEC_PACK_SFIX
,
17081 IX86_BUILTIN_CRC32QI
,
17082 IX86_BUILTIN_CRC32HI
,
17083 IX86_BUILTIN_CRC32SI
,
17084 IX86_BUILTIN_CRC32DI
,
17086 IX86_BUILTIN_PCMPESTRI128
,
17087 IX86_BUILTIN_PCMPESTRM128
,
17088 IX86_BUILTIN_PCMPESTRA128
,
17089 IX86_BUILTIN_PCMPESTRC128
,
17090 IX86_BUILTIN_PCMPESTRO128
,
17091 IX86_BUILTIN_PCMPESTRS128
,
17092 IX86_BUILTIN_PCMPESTRZ128
,
17093 IX86_BUILTIN_PCMPISTRI128
,
17094 IX86_BUILTIN_PCMPISTRM128
,
17095 IX86_BUILTIN_PCMPISTRA128
,
17096 IX86_BUILTIN_PCMPISTRC128
,
17097 IX86_BUILTIN_PCMPISTRO128
,
17098 IX86_BUILTIN_PCMPISTRS128
,
17099 IX86_BUILTIN_PCMPISTRZ128
,
17101 IX86_BUILTIN_PCMPGTQ
,
17103 /* TFmode support builtins. */
17105 IX86_BUILTIN_FABSQ
,
17106 IX86_BUILTIN_COPYSIGNQ
,
17111 /* Table for the ix86 builtin decls. */
17112 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
17114 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17115 * if the target_flags include one of MASK. Stores the function decl
17116 * in the ix86_builtins array.
17117 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17120 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
17122 tree decl
= NULL_TREE
;
17124 if (mask
& ix86_isa_flags
17125 && (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
))
17127 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
17129 ix86_builtins
[(int) code
] = decl
;
17135 /* Like def_builtin, but also marks the function decl "const". */
17138 def_builtin_const (int mask
, const char *name
, tree type
,
17139 enum ix86_builtins code
)
17141 tree decl
= def_builtin (mask
, name
, type
, code
);
17143 TREE_READONLY (decl
) = 1;
17147 /* Bits for builtin_description.flag. */
17149 /* Set when we don't support the comparison natively, and should
17150 swap_comparison in order to support it. */
17151 #define BUILTIN_DESC_SWAP_OPERANDS 1
17153 struct builtin_description
17155 const unsigned int mask
;
17156 const enum insn_code icode
;
17157 const char *const name
;
17158 const enum ix86_builtins code
;
17159 const enum rtx_code comparison
;
17163 static const struct builtin_description bdesc_comi
[] =
17165 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
17166 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
17167 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
17168 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
17169 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
17170 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
17171 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
17172 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
17173 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
17174 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
17175 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
17176 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
17177 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
17178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
17179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
17180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
17181 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
17182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
17183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
17184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
17185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
17186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
17187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
17188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
17191 static const struct builtin_description bdesc_ptest
[] =
17194 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
17195 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
17196 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
17199 static const struct builtin_description bdesc_pcmpestr
[] =
17202 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
17203 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
17204 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
17205 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
17206 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
17207 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
17208 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
17211 static const struct builtin_description bdesc_pcmpistr
[] =
17214 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
17215 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
17216 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
17217 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
17218 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
17219 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
17220 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
17223 static const struct builtin_description bdesc_crc32
[] =
17226 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32qi
, 0, IX86_BUILTIN_CRC32QI
, UNKNOWN
, 0 },
17227 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32hi
, 0, IX86_BUILTIN_CRC32HI
, UNKNOWN
, 0 },
17228 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32si
, 0, IX86_BUILTIN_CRC32SI
, UNKNOWN
, 0 },
17229 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32di
, 0, IX86_BUILTIN_CRC32DI
, UNKNOWN
, 0 },
17232 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17233 static const struct builtin_description bdesc_sse_3arg
[] =
17236 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, 0 },
17237 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, 0 },
17238 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, 0 },
17239 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, 0 },
17240 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, 0 },
17241 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, 0 },
17242 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, 0 },
17243 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, 0 },
17244 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, 0 },
17245 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, 0 },
17246 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, UNKNOWN
, 0 },
17247 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, UNKNOWN
, 0 },
17250 static const struct builtin_description bdesc_2arg
[] =
17253 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, 0 },
17254 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, 0 },
17255 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, 0 },
17256 { OPTION_MASK_ISA_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, 0 },
17257 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, 0 },
17258 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, 0 },
17259 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, 0 },
17260 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, 0 },
17262 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
17263 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
17264 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
17265 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17266 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17267 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
17268 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
17269 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
17270 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
17271 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17272 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17273 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
17274 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
17275 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
17276 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
17277 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
17278 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
17279 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
17280 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
17281 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17282 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17283 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
17285 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, 0 },
17286 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, 0 },
17287 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, 0 },
17288 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, 0 },
17290 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, 0 },
17291 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, 0 },
17292 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, 0 },
17293 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, 0 },
17295 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, 0 },
17296 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, 0 },
17297 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, 0 },
17298 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, 0 },
17299 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, 0 },
17302 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, 0 },
17303 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, 0 },
17304 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, 0 },
17305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, 0 },
17306 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, 0 },
17307 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, 0 },
17308 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, 0 },
17309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, 0 },
17311 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, 0 },
17312 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, 0 },
17313 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, 0 },
17314 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, 0 },
17315 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, 0 },
17316 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, 0 },
17317 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, 0 },
17318 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, 0 },
17320 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, 0 },
17321 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, 0 },
17322 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, 0 },
17324 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, 0 },
17325 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, 0 },
17326 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, 0 },
17327 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, 0 },
17329 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, 0 },
17330 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, 0 },
17332 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, 0 },
17333 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, 0 },
17334 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, 0 },
17335 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, 0 },
17336 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, 0 },
17337 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, 0 },
17339 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, 0 },
17340 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, 0 },
17341 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, 0 },
17342 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, 0 },
17344 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, 0 },
17345 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, 0 },
17346 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, 0 },
17347 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, 0 },
17348 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, 0 },
17349 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, 0 },
17352 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, UNKNOWN
, 0 },
17353 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, UNKNOWN
, 0 },
17354 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, UNKNOWN
, 0 },
17356 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, 0 },
17357 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, 0 },
17358 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, 0 },
17360 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, UNKNOWN
, 0 },
17361 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, UNKNOWN
, 0 },
17362 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, UNKNOWN
, 0 },
17363 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, UNKNOWN
, 0 },
17364 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, UNKNOWN
, 0 },
17365 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, UNKNOWN
, 0 },
17367 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, UNKNOWN
, 0 },
17368 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, UNKNOWN
, 0 },
17369 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, UNKNOWN
, 0 },
17370 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, UNKNOWN
, 0 },
17371 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, UNKNOWN
, 0 },
17372 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, UNKNOWN
, 0 },
17374 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, UNKNOWN
, 0 },
17375 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, UNKNOWN
, 0 },
17376 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, UNKNOWN
, 0 },
17377 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, UNKNOWN
, 0 },
17379 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, UNKNOWN
, 0 },
17380 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, UNKNOWN
, 0 },
17383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, 0 },
17384 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, 0 },
17385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, 0 },
17386 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, 0 },
17387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, 0 },
17388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, 0 },
17389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, 0 },
17390 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, 0 },
17392 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
17393 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
17394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
17395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
17398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
17399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
17400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
17401 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
17404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
17405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
17406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
17407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
17408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
17409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
17410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
17411 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
17413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, 0 },
17414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, 0 },
17415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, 0 },
17416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, 0 },
17418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, 0 },
17419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, 0 },
17420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, 0 },
17421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, 0 },
17423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, 0 },
17424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, 0 },
17425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, 0 },
17427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, 0 },
17430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, 0 },
17431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, 0 },
17432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, 0 },
17433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, 0 },
17434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, 0 },
17435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, 0 },
17436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, 0 },
17437 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, 0 },
17439 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, 0 },
17440 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, 0 },
17441 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, 0 },
17442 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, 0 },
17443 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, 0 },
17444 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, 0 },
17445 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, 0 },
17446 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, 0 },
17448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, 0 },
17449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
, 0 },
17451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, 0 },
17452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, 0 },
17453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, 0 },
17454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, 0 },
17456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, 0 },
17457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, 0 },
17459 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, 0 },
17460 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, 0 },
17461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, 0 },
17462 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, 0 },
17463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, 0 },
17464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, 0 },
17466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, 0 },
17467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, 0 },
17468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, 0 },
17469 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, 0 },
17471 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, 0 },
17472 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, 0 },
17473 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, 0 },
17474 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, 0 },
17475 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, 0 },
17476 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, 0 },
17477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, 0 },
17478 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, 0 },
17480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, 0 },
17481 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, 0 },
17482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, 0 },
17484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, 0 },
17485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, UNKNOWN
, 0 },
17487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, UNKNOWN
, 0 },
17488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, 0 },
17490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, UNKNOWN
, 0 },
17491 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, UNKNOWN
, 0 },
17492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, UNKNOWN
, 0 },
17494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, UNKNOWN
, 0 },
17495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, UNKNOWN
, 0 },
17496 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, UNKNOWN
, 0 },
17498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, UNKNOWN
, 0 },
17499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, UNKNOWN
, 0 },
17501 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, UNKNOWN
, 0 },
17503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, 0 },
17504 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, 0 },
17505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, 0 },
17506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, 0 },
17509 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, 0 },
17510 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, 0 },
17511 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, 0 },
17512 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, 0 },
17513 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, 0 },
17514 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, 0 },
17517 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, 0 },
17518 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, 0 },
17519 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, 0 },
17520 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, 0 },
17521 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, 0 },
17522 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, 0 },
17523 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, 0 },
17524 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, 0 },
17525 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, 0 },
17526 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, 0 },
17527 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, 0 },
17528 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, 0 },
17529 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, 0 },
17530 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, 0 },
17531 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, 0 },
17532 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, 0 },
17533 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, 0 },
17534 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, 0 },
17535 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, 0 },
17536 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, 0 },
17537 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, 0 },
17538 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, 0 },
17539 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, 0 },
17540 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, 0 },
17543 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, 0 },
17544 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, 0 },
17545 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, 0 },
17546 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, 0 },
17547 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, 0 },
17548 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, 0 },
17549 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, 0 },
17550 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, 0 },
17551 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, 0 },
17552 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, 0 },
17553 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, UNKNOWN
, 0 },
17554 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, 0 },
17557 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, 0 },
17560 static const struct builtin_description bdesc_1arg
[] =
17562 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, 0 },
17563 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, 0 },
17565 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, UNKNOWN
, 0 },
17566 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, UNKNOWN
, 0 },
17567 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, UNKNOWN
, 0 },
17569 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, 0 },
17570 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, 0 },
17571 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, 0 },
17572 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, 0 },
17573 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, 0 },
17574 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, 0 },
17576 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, 0 },
17577 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, 0 },
17579 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, UNKNOWN
, 0 },
17581 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, 0 },
17582 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, 0 },
17584 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, 0 },
17585 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, 0 },
17586 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, 0 },
17587 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, 0 },
17588 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, 0 },
17590 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, 0 },
17592 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, 0 },
17593 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, 0 },
17594 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, 0 },
17595 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, 0 },
17597 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, 0 },
17598 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, 0 },
17599 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, 0 },
17602 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, 0 },
17603 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, 0 },
17606 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, 0 },
17607 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, 0 },
17608 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, 0 },
17609 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, 0 },
17610 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, 0 },
17611 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, 0 },
17614 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, 0 },
17615 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, 0 },
17616 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, 0 },
17617 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, 0 },
17618 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, 0 },
17619 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, 0 },
17620 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, 0 },
17621 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, 0 },
17622 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, 0 },
17623 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, 0 },
17624 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, 0 },
17625 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, 0 },
17626 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, 0 },
17628 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17629 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, UNKNOWN
, 0 },
17630 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, UNKNOWN
, 0 },
17633 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17634 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17637 ix86_init_mmx_sse_builtins (void)
17639 const struct builtin_description
* d
;
17642 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
17643 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
17644 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
17645 tree V2DI_type_node
17646 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
17647 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
17648 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
17649 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
17650 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
17651 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
17652 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
17654 tree pchar_type_node
= build_pointer_type (char_type_node
);
17655 tree pcchar_type_node
= build_pointer_type (
17656 build_type_variant (char_type_node
, 1, 0));
17657 tree pfloat_type_node
= build_pointer_type (float_type_node
);
17658 tree pcfloat_type_node
= build_pointer_type (
17659 build_type_variant (float_type_node
, 1, 0));
17660 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
17661 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
17662 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
17665 tree int_ftype_v4sf_v4sf
17666 = build_function_type_list (integer_type_node
,
17667 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17668 tree v4si_ftype_v4sf_v4sf
17669 = build_function_type_list (V4SI_type_node
,
17670 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17671 /* MMX/SSE/integer conversions. */
17672 tree int_ftype_v4sf
17673 = build_function_type_list (integer_type_node
,
17674 V4SF_type_node
, NULL_TREE
);
17675 tree int64_ftype_v4sf
17676 = build_function_type_list (long_long_integer_type_node
,
17677 V4SF_type_node
, NULL_TREE
);
17678 tree int_ftype_v8qi
17679 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
17680 tree v4sf_ftype_v4sf_int
17681 = build_function_type_list (V4SF_type_node
,
17682 V4SF_type_node
, integer_type_node
, NULL_TREE
);
17683 tree v4sf_ftype_v4sf_int64
17684 = build_function_type_list (V4SF_type_node
,
17685 V4SF_type_node
, long_long_integer_type_node
,
17687 tree v4sf_ftype_v4sf_v2si
17688 = build_function_type_list (V4SF_type_node
,
17689 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
17691 /* Miscellaneous. */
17692 tree v8qi_ftype_v4hi_v4hi
17693 = build_function_type_list (V8QI_type_node
,
17694 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17695 tree v4hi_ftype_v2si_v2si
17696 = build_function_type_list (V4HI_type_node
,
17697 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17698 tree v4sf_ftype_v4sf_v4sf_int
17699 = build_function_type_list (V4SF_type_node
,
17700 V4SF_type_node
, V4SF_type_node
,
17701 integer_type_node
, NULL_TREE
);
17702 tree v2si_ftype_v4hi_v4hi
17703 = build_function_type_list (V2SI_type_node
,
17704 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17705 tree v4hi_ftype_v4hi_int
17706 = build_function_type_list (V4HI_type_node
,
17707 V4HI_type_node
, integer_type_node
, NULL_TREE
);
17708 tree v4hi_ftype_v4hi_di
17709 = build_function_type_list (V4HI_type_node
,
17710 V4HI_type_node
, long_long_unsigned_type_node
,
17712 tree v2si_ftype_v2si_di
17713 = build_function_type_list (V2SI_type_node
,
17714 V2SI_type_node
, long_long_unsigned_type_node
,
17716 tree void_ftype_void
17717 = build_function_type (void_type_node
, void_list_node
);
17718 tree void_ftype_unsigned
17719 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
17720 tree void_ftype_unsigned_unsigned
17721 = build_function_type_list (void_type_node
, unsigned_type_node
,
17722 unsigned_type_node
, NULL_TREE
);
17723 tree void_ftype_pcvoid_unsigned_unsigned
17724 = build_function_type_list (void_type_node
, const_ptr_type_node
,
17725 unsigned_type_node
, unsigned_type_node
,
17727 tree unsigned_ftype_void
17728 = build_function_type (unsigned_type_node
, void_list_node
);
17729 tree v2si_ftype_v4sf
17730 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
17731 /* Loads/stores. */
17732 tree void_ftype_v8qi_v8qi_pchar
17733 = build_function_type_list (void_type_node
,
17734 V8QI_type_node
, V8QI_type_node
,
17735 pchar_type_node
, NULL_TREE
);
17736 tree v4sf_ftype_pcfloat
17737 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
17738 /* @@@ the type is bogus */
17739 tree v4sf_ftype_v4sf_pv2si
17740 = build_function_type_list (V4SF_type_node
,
17741 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
17742 tree void_ftype_pv2si_v4sf
17743 = build_function_type_list (void_type_node
,
17744 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
17745 tree void_ftype_pfloat_v4sf
17746 = build_function_type_list (void_type_node
,
17747 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
17748 tree void_ftype_pdi_di
17749 = build_function_type_list (void_type_node
,
17750 pdi_type_node
, long_long_unsigned_type_node
,
17752 tree void_ftype_pv2di_v2di
17753 = build_function_type_list (void_type_node
,
17754 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
17755 /* Normal vector unops. */
17756 tree v4sf_ftype_v4sf
17757 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17758 tree v16qi_ftype_v16qi
17759 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17760 tree v8hi_ftype_v8hi
17761 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17762 tree v4si_ftype_v4si
17763 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17764 tree v8qi_ftype_v8qi
17765 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17766 tree v4hi_ftype_v4hi
17767 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17769 /* Normal vector binops. */
17770 tree v4sf_ftype_v4sf_v4sf
17771 = build_function_type_list (V4SF_type_node
,
17772 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17773 tree v8qi_ftype_v8qi_v8qi
17774 = build_function_type_list (V8QI_type_node
,
17775 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17776 tree v4hi_ftype_v4hi_v4hi
17777 = build_function_type_list (V4HI_type_node
,
17778 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17779 tree v2si_ftype_v2si_v2si
17780 = build_function_type_list (V2SI_type_node
,
17781 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17782 tree di_ftype_di_di
17783 = build_function_type_list (long_long_unsigned_type_node
,
17784 long_long_unsigned_type_node
,
17785 long_long_unsigned_type_node
, NULL_TREE
);
17787 tree di_ftype_di_di_int
17788 = build_function_type_list (long_long_unsigned_type_node
,
17789 long_long_unsigned_type_node
,
17790 long_long_unsigned_type_node
,
17791 integer_type_node
, NULL_TREE
);
17793 tree v2si_ftype_v2sf
17794 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
17795 tree v2sf_ftype_v2si
17796 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
17797 tree v2si_ftype_v2si
17798 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17799 tree v2sf_ftype_v2sf
17800 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17801 tree v2sf_ftype_v2sf_v2sf
17802 = build_function_type_list (V2SF_type_node
,
17803 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17804 tree v2si_ftype_v2sf_v2sf
17805 = build_function_type_list (V2SI_type_node
,
17806 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17807 tree pint_type_node
= build_pointer_type (integer_type_node
);
17808 tree pdouble_type_node
= build_pointer_type (double_type_node
);
17809 tree pcdouble_type_node
= build_pointer_type (
17810 build_type_variant (double_type_node
, 1, 0));
17811 tree int_ftype_v2df_v2df
17812 = build_function_type_list (integer_type_node
,
17813 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17815 tree void_ftype_pcvoid
17816 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
17817 tree v4sf_ftype_v4si
17818 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
17819 tree v4si_ftype_v4sf
17820 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
17821 tree v2df_ftype_v4si
17822 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
17823 tree v4si_ftype_v2df
17824 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
17825 tree v4si_ftype_v2df_v2df
17826 = build_function_type_list (V4SI_type_node
,
17827 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17828 tree v2si_ftype_v2df
17829 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
17830 tree v4sf_ftype_v2df
17831 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17832 tree v2df_ftype_v2si
17833 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
17834 tree v2df_ftype_v4sf
17835 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17836 tree int_ftype_v2df
17837 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
17838 tree int64_ftype_v2df
17839 = build_function_type_list (long_long_integer_type_node
,
17840 V2DF_type_node
, NULL_TREE
);
17841 tree v2df_ftype_v2df_int
17842 = build_function_type_list (V2DF_type_node
,
17843 V2DF_type_node
, integer_type_node
, NULL_TREE
);
17844 tree v2df_ftype_v2df_int64
17845 = build_function_type_list (V2DF_type_node
,
17846 V2DF_type_node
, long_long_integer_type_node
,
17848 tree v4sf_ftype_v4sf_v2df
17849 = build_function_type_list (V4SF_type_node
,
17850 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17851 tree v2df_ftype_v2df_v4sf
17852 = build_function_type_list (V2DF_type_node
,
17853 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17854 tree v2df_ftype_v2df_v2df_int
17855 = build_function_type_list (V2DF_type_node
,
17856 V2DF_type_node
, V2DF_type_node
,
17859 tree v2df_ftype_v2df_pcdouble
17860 = build_function_type_list (V2DF_type_node
,
17861 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17862 tree void_ftype_pdouble_v2df
17863 = build_function_type_list (void_type_node
,
17864 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
17865 tree void_ftype_pint_int
17866 = build_function_type_list (void_type_node
,
17867 pint_type_node
, integer_type_node
, NULL_TREE
);
17868 tree void_ftype_v16qi_v16qi_pchar
17869 = build_function_type_list (void_type_node
,
17870 V16QI_type_node
, V16QI_type_node
,
17871 pchar_type_node
, NULL_TREE
);
17872 tree v2df_ftype_pcdouble
17873 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17874 tree v2df_ftype_v2df_v2df
17875 = build_function_type_list (V2DF_type_node
,
17876 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17877 tree v16qi_ftype_v16qi_v16qi
17878 = build_function_type_list (V16QI_type_node
,
17879 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17880 tree v8hi_ftype_v8hi_v8hi
17881 = build_function_type_list (V8HI_type_node
,
17882 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17883 tree v4si_ftype_v4si_v4si
17884 = build_function_type_list (V4SI_type_node
,
17885 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17886 tree v2di_ftype_v2di_v2di
17887 = build_function_type_list (V2DI_type_node
,
17888 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17889 tree v2di_ftype_v2df_v2df
17890 = build_function_type_list (V2DI_type_node
,
17891 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17892 tree v2df_ftype_v2df
17893 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17894 tree v2di_ftype_v2di_int
17895 = build_function_type_list (V2DI_type_node
,
17896 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17897 tree v2di_ftype_v2di_v2di_int
17898 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17899 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17900 tree v4si_ftype_v4si_int
17901 = build_function_type_list (V4SI_type_node
,
17902 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17903 tree v8hi_ftype_v8hi_int
17904 = build_function_type_list (V8HI_type_node
,
17905 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17906 tree v4si_ftype_v8hi_v8hi
17907 = build_function_type_list (V4SI_type_node
,
17908 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17909 tree di_ftype_v8qi_v8qi
17910 = build_function_type_list (long_long_unsigned_type_node
,
17911 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17912 tree di_ftype_v2si_v2si
17913 = build_function_type_list (long_long_unsigned_type_node
,
17914 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17915 tree v2di_ftype_v16qi_v16qi
17916 = build_function_type_list (V2DI_type_node
,
17917 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17918 tree v2di_ftype_v4si_v4si
17919 = build_function_type_list (V2DI_type_node
,
17920 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17921 tree int_ftype_v16qi
17922 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17923 tree v16qi_ftype_pcchar
17924 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17925 tree void_ftype_pchar_v16qi
17926 = build_function_type_list (void_type_node
,
17927 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17929 tree v2di_ftype_v2di_unsigned_unsigned
17930 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17931 unsigned_type_node
, unsigned_type_node
,
17933 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17934 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17935 unsigned_type_node
, unsigned_type_node
,
17937 tree v2di_ftype_v2di_v16qi
17938 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17940 tree v2df_ftype_v2df_v2df_v2df
17941 = build_function_type_list (V2DF_type_node
,
17942 V2DF_type_node
, V2DF_type_node
,
17943 V2DF_type_node
, NULL_TREE
);
17944 tree v4sf_ftype_v4sf_v4sf_v4sf
17945 = build_function_type_list (V4SF_type_node
,
17946 V4SF_type_node
, V4SF_type_node
,
17947 V4SF_type_node
, NULL_TREE
);
17948 tree v8hi_ftype_v16qi
17949 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
17951 tree v4si_ftype_v16qi
17952 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
17954 tree v2di_ftype_v16qi
17955 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
17957 tree v4si_ftype_v8hi
17958 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
17960 tree v2di_ftype_v8hi
17961 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
17963 tree v2di_ftype_v4si
17964 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
17966 tree v2di_ftype_pv2di
17967 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
17969 tree v16qi_ftype_v16qi_v16qi_int
17970 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17971 V16QI_type_node
, integer_type_node
,
17973 tree v16qi_ftype_v16qi_v16qi_v16qi
17974 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17975 V16QI_type_node
, V16QI_type_node
,
17977 tree v8hi_ftype_v8hi_v8hi_int
17978 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17979 V8HI_type_node
, integer_type_node
,
17981 tree v4si_ftype_v4si_v4si_int
17982 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
17983 V4SI_type_node
, integer_type_node
,
17985 tree int_ftype_v2di_v2di
17986 = build_function_type_list (integer_type_node
,
17987 V2DI_type_node
, V2DI_type_node
,
17989 tree int_ftype_v16qi_int_v16qi_int_int
17990 = build_function_type_list (integer_type_node
,
17997 tree v16qi_ftype_v16qi_int_v16qi_int_int
17998 = build_function_type_list (V16QI_type_node
,
18005 tree int_ftype_v16qi_v16qi_int
18006 = build_function_type_list (integer_type_node
,
18013 /* The __float80 type. */
18014 if (TYPE_MODE (long_double_type_node
) == XFmode
)
18015 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
18019 /* The __float80 type. */
18020 tree float80_type_node
= make_node (REAL_TYPE
);
18022 TYPE_PRECISION (float80_type_node
) = 80;
18023 layout_type (float80_type_node
);
18024 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
18030 tree float128_type_node
= make_node (REAL_TYPE
);
18032 TYPE_PRECISION (float128_type_node
) = 128;
18033 layout_type (float128_type_node
);
18034 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
18037 /* TFmode support builtins. */
18038 ftype
= build_function_type (float128_type_node
,
18040 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_infq", ftype
, IX86_BUILTIN_INFQ
);
18042 ftype
= build_function_type_list (float128_type_node
,
18043 float128_type_node
,
18045 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
);
18047 ftype
= build_function_type_list (float128_type_node
,
18048 float128_type_node
,
18049 float128_type_node
,
18051 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_copysignq", ftype
, IX86_BUILTIN_COPYSIGNQ
);
18054 /* Add all SSE builtins that are more or less simple operations on
18056 for (i
= 0, d
= bdesc_sse_3arg
;
18057 i
< ARRAY_SIZE (bdesc_sse_3arg
);
18060 /* Use one of the operands; the target can have a different mode for
18061 mask-generating compares. */
18062 enum machine_mode mode
;
18067 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18072 type
= v16qi_ftype_v16qi_v16qi_int
;
18075 type
= v8hi_ftype_v8hi_v8hi_int
;
18078 type
= v4si_ftype_v4si_v4si_int
;
18081 type
= v2di_ftype_v2di_v2di_int
;
18084 type
= v2df_ftype_v2df_v2df_int
;
18087 type
= v4sf_ftype_v4sf_v4sf_int
;
18090 gcc_unreachable ();
18093 /* Override for variable blends. */
18096 case CODE_FOR_sse4_1_blendvpd
:
18097 type
= v2df_ftype_v2df_v2df_v2df
;
18099 case CODE_FOR_sse4_1_blendvps
:
18100 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
18102 case CODE_FOR_sse4_1_pblendvb
:
18103 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
18109 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18112 /* Add all builtins that are more or less simple operations on two
18114 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18116 /* Use one of the operands; the target can have a different mode for
18117 mask-generating compares. */
18118 enum machine_mode mode
;
18123 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18128 type
= v16qi_ftype_v16qi_v16qi
;
18131 type
= v8hi_ftype_v8hi_v8hi
;
18134 type
= v4si_ftype_v4si_v4si
;
18137 type
= v2di_ftype_v2di_v2di
;
18140 type
= v2df_ftype_v2df_v2df
;
18143 type
= v4sf_ftype_v4sf_v4sf
;
18146 type
= v8qi_ftype_v8qi_v8qi
;
18149 type
= v4hi_ftype_v4hi_v4hi
;
18152 type
= v2si_ftype_v2si_v2si
;
18155 type
= di_ftype_di_di
;
18159 gcc_unreachable ();
18162 /* Override for comparisons. */
18163 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18164 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
18165 type
= v4si_ftype_v4sf_v4sf
;
18167 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18168 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18169 type
= v2di_ftype_v2df_v2df
;
18171 if (d
->icode
== CODE_FOR_vec_pack_sfix_v2df
)
18172 type
= v4si_ftype_v2df_v2df
;
18174 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18177 /* Add all builtins that are more or less simple operations on 1 operand. */
18178 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18180 enum machine_mode mode
;
18185 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18190 type
= v16qi_ftype_v16qi
;
18193 type
= v8hi_ftype_v8hi
;
18196 type
= v4si_ftype_v4si
;
18199 type
= v2df_ftype_v2df
;
18202 type
= v4sf_ftype_v4sf
;
18205 type
= v8qi_ftype_v8qi
;
18208 type
= v4hi_ftype_v4hi
;
18211 type
= v2si_ftype_v2si
;
18218 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18221 /* pcmpestr[im] insns. */
18222 for (i
= 0, d
= bdesc_pcmpestr
;
18223 i
< ARRAY_SIZE (bdesc_pcmpestr
);
18226 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
18227 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
18229 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
18230 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
18233 /* pcmpistr[im] insns. */
18234 for (i
= 0, d
= bdesc_pcmpistr
;
18235 i
< ARRAY_SIZE (bdesc_pcmpistr
);
18238 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
18239 ftype
= v16qi_ftype_v16qi_v16qi_int
;
18241 ftype
= int_ftype_v16qi_v16qi_int
;
18242 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
18245 /* Add the remaining MMX insns with somewhat more complicated types. */
18246 def_builtin (OPTION_MASK_ISA_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
18247 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
18248 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
18249 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
18251 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
18252 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
18253 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
18255 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
18256 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
18258 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
18259 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
18261 /* comi/ucomi insns. */
18262 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18263 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
18264 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
18266 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
18269 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
18270 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
18272 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
18273 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
18274 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
18276 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
18277 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
18278 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
18279 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
18280 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
18281 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
18282 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
18283 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
18284 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
18285 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
18286 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
18288 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
18290 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
18291 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
18293 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
18294 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
18295 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
18296 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
18298 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
18299 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
18300 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
18301 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
18303 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
18305 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
18307 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
18308 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
18309 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
18310 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
18311 ftype
= build_function_type_list (float_type_node
,
18314 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtf", ftype
, IX86_BUILTIN_RSQRTF
);
18315 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
18316 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
18318 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
18320 /* Original 3DNow! */
18321 def_builtin (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
18322 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
18323 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
18324 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
18325 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
18326 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
18327 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
18328 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
18329 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
18330 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
18331 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
18332 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
18333 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
18334 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
18335 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
18336 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
18337 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
18338 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
18339 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
18340 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
18342 /* 3DNow! extension as used in the Athlon CPU. */
18343 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
18344 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
18345 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
18346 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
18347 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
18348 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
18351 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
18353 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
18354 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
18356 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
18357 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
18359 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
18360 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
18361 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
18362 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
18363 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
18365 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
18366 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
18367 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
18368 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
18370 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
18371 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
18373 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
18375 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
18376 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
18378 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
18379 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
18380 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
18381 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
18382 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
18384 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
18386 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
18387 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
18388 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
18389 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
18391 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
18392 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
18393 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
18395 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
18396 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
18397 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
18398 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
18400 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
18401 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
18402 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
18404 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
18405 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
18407 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
18408 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
18410 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
18411 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
18412 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
18413 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
18414 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
18415 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
18416 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
18418 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
18419 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
18420 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
18421 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
18422 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
18423 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
18424 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
18426 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
18427 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
18428 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
18429 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
18431 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
18433 /* Prescott New Instructions. */
18434 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
18435 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
18436 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_lddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
18439 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
18440 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
, IX86_BUILTIN_PALIGNR
);
18443 def_builtin (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_movntdqa", v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
18444 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
18445 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
18446 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
18447 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
18448 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
18449 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
18450 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
18451 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
18452 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
18453 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
18454 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
18455 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
18456 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
18457 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundpd", v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
18458 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
18459 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
18460 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
18463 ftype
= build_function_type_list (unsigned_type_node
,
18464 unsigned_type_node
,
18465 unsigned_char_type_node
,
18467 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32qi", ftype
, IX86_BUILTIN_CRC32QI
);
18468 ftype
= build_function_type_list (unsigned_type_node
,
18469 unsigned_type_node
,
18470 short_unsigned_type_node
,
18472 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32hi", ftype
, IX86_BUILTIN_CRC32HI
);
18473 ftype
= build_function_type_list (unsigned_type_node
,
18474 unsigned_type_node
,
18475 unsigned_type_node
,
18477 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32si", ftype
, IX86_BUILTIN_CRC32SI
);
18478 ftype
= build_function_type_list (long_long_unsigned_type_node
,
18479 long_long_unsigned_type_node
,
18480 long_long_unsigned_type_node
,
18482 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32di", ftype
, IX86_BUILTIN_CRC32DI
);
18484 /* AMDFAM10 SSE4A New built-ins */
18485 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
18486 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
18487 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
18488 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
18489 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
18490 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
18492 /* Access to the vec_init patterns. */
18493 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
18494 integer_type_node
, NULL_TREE
);
18495 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
18497 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
18498 short_integer_type_node
,
18499 short_integer_type_node
,
18500 short_integer_type_node
, NULL_TREE
);
18501 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
18503 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
18504 char_type_node
, char_type_node
,
18505 char_type_node
, char_type_node
,
18506 char_type_node
, char_type_node
,
18507 char_type_node
, NULL_TREE
);
18508 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
18510 /* Access to the vec_extract patterns. */
18511 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
18512 integer_type_node
, NULL_TREE
);
18513 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
18515 ftype
= build_function_type_list (long_long_integer_type_node
,
18516 V2DI_type_node
, integer_type_node
,
18518 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
18520 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
18521 integer_type_node
, NULL_TREE
);
18522 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
18524 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
18525 integer_type_node
, NULL_TREE
);
18526 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
18528 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
18529 integer_type_node
, NULL_TREE
);
18530 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
18532 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
18533 integer_type_node
, NULL_TREE
);
18534 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
18536 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
18537 integer_type_node
, NULL_TREE
);
18538 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
18540 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
18541 integer_type_node
, NULL_TREE
);
18542 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
18544 /* Access to the vec_set patterns. */
18545 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18547 integer_type_node
, NULL_TREE
);
18548 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
18550 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
18552 integer_type_node
, NULL_TREE
);
18553 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
18555 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18557 integer_type_node
, NULL_TREE
);
18558 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
18560 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18562 integer_type_node
, NULL_TREE
);
18563 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
18565 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
18567 integer_type_node
, NULL_TREE
);
18568 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
18570 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18572 integer_type_node
, NULL_TREE
);
18573 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
18577 ix86_init_builtins (void)
18580 ix86_init_mmx_sse_builtins ();
18583 /* Errors in the source file can cause expand_expr to return const0_rtx
18584 where we expect a vector. To avoid crashing, use one of the vector
18585 clear instructions. */
18587 safe_vector_operand (rtx x
, enum machine_mode mode
)
18589 if (x
== const0_rtx
)
18590 x
= CONST0_RTX (mode
);
18594 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18595 4 operands. The third argument must be a constant smaller than 8
18599 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
18603 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18604 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18605 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18606 rtx op0
= expand_normal (arg0
);
18607 rtx op1
= expand_normal (arg1
);
18608 rtx op2
= expand_normal (arg2
);
18609 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18610 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18611 enum machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
18612 enum machine_mode mode3
= insn_data
[icode
].operand
[3].mode
;
18614 if (VECTOR_MODE_P (mode1
))
18615 op0
= safe_vector_operand (op0
, mode1
);
18616 if (VECTOR_MODE_P (mode2
))
18617 op1
= safe_vector_operand (op1
, mode2
);
18618 if (VECTOR_MODE_P (mode3
))
18619 op2
= safe_vector_operand (op2
, mode3
);
18623 || GET_MODE (target
) != tmode
18624 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18625 target
= gen_reg_rtx (tmode
);
18627 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18628 op0
= copy_to_mode_reg (mode1
, op0
);
18629 if ((optimize
&& !register_operand (op1
, mode2
))
18630 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18631 op1
= copy_to_mode_reg (mode2
, op1
);
18633 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18636 case CODE_FOR_sse4_1_blendvpd
:
18637 case CODE_FOR_sse4_1_blendvps
:
18638 case CODE_FOR_sse4_1_pblendvb
:
18639 op2
= copy_to_mode_reg (mode3
, op2
);
18642 case CODE_FOR_sse4_1_roundsd
:
18643 case CODE_FOR_sse4_1_roundss
:
18644 error ("the third argument must be a 4-bit immediate");
18648 error ("the third argument must be an 8-bit immediate");
18652 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18659 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18662 ix86_expand_crc32 (enum insn_code icode
, tree exp
, rtx target
)
18665 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18666 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18667 rtx op0
= expand_normal (arg0
);
18668 rtx op1
= expand_normal (arg1
);
18669 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18670 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18671 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18675 || GET_MODE (target
) != tmode
18676 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18677 target
= gen_reg_rtx (tmode
);
18679 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18680 op0
= copy_to_mode_reg (mode0
, op0
);
18681 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18683 op1
= copy_to_reg (op1
);
18684 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
18687 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18694 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18697 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
18700 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18701 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18702 rtx op0
= expand_normal (arg0
);
18703 rtx op1
= expand_normal (arg1
);
18704 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18705 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18706 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18708 if (VECTOR_MODE_P (mode0
))
18709 op0
= safe_vector_operand (op0
, mode0
);
18710 if (VECTOR_MODE_P (mode1
))
18711 op1
= safe_vector_operand (op1
, mode1
);
18713 if (optimize
|| !target
18714 || GET_MODE (target
) != tmode
18715 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18716 target
= gen_reg_rtx (tmode
);
18718 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
18720 rtx x
= gen_reg_rtx (V4SImode
);
18721 emit_insn (gen_sse2_loadd (x
, op1
));
18722 op1
= gen_lowpart (TImode
, x
);
18725 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18726 op0
= copy_to_mode_reg (mode0
, op0
);
18727 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18728 op1
= copy_to_mode_reg (mode1
, op1
);
18730 /* ??? Using ix86_fixup_binary_operands is problematic when
18731 we've got mismatched modes. Fake it. */
18737 if (tmode
== mode0
&& tmode
== mode1
)
18739 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
18743 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
18745 op0
= force_reg (mode0
, op0
);
18746 op1
= force_reg (mode1
, op1
);
18747 target
= gen_reg_rtx (tmode
);
18750 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18757 /* Subroutine of ix86_expand_builtin to take care of stores. */
18760 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
18763 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18764 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18765 rtx op0
= expand_normal (arg0
);
18766 rtx op1
= expand_normal (arg1
);
18767 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
18768 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18770 if (VECTOR_MODE_P (mode1
))
18771 op1
= safe_vector_operand (op1
, mode1
);
18773 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18774 op1
= copy_to_mode_reg (mode1
, op1
);
18776 pat
= GEN_FCN (icode
) (op0
, op1
);
18782 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18785 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
18786 rtx target
, int do_load
)
18789 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18790 rtx op0
= expand_normal (arg0
);
18791 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18792 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18794 if (optimize
|| !target
18795 || GET_MODE (target
) != tmode
18796 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18797 target
= gen_reg_rtx (tmode
);
18799 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18802 if (VECTOR_MODE_P (mode0
))
18803 op0
= safe_vector_operand (op0
, mode0
);
18805 if ((optimize
&& !register_operand (op0
, mode0
))
18806 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18807 op0
= copy_to_mode_reg (mode0
, op0
);
18812 case CODE_FOR_sse4_1_roundpd
:
18813 case CODE_FOR_sse4_1_roundps
:
18815 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18816 rtx op1
= expand_normal (arg1
);
18817 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18819 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18821 error ("the second argument must be a 4-bit immediate");
18824 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18828 pat
= GEN_FCN (icode
) (target
, op0
);
18838 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18839 sqrtss, rsqrtss, rcpss. */
18842 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
18845 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18846 rtx op1
, op0
= expand_normal (arg0
);
18847 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18848 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18850 if (optimize
|| !target
18851 || GET_MODE (target
) != tmode
18852 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18853 target
= gen_reg_rtx (tmode
);
18855 if (VECTOR_MODE_P (mode0
))
18856 op0
= safe_vector_operand (op0
, mode0
);
18858 if ((optimize
&& !register_operand (op0
, mode0
))
18859 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18860 op0
= copy_to_mode_reg (mode0
, op0
);
18863 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
18864 op1
= copy_to_mode_reg (mode0
, op1
);
18866 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18873 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18876 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
18880 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18881 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18882 rtx op0
= expand_normal (arg0
);
18883 rtx op1
= expand_normal (arg1
);
18885 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
18886 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
18887 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
18888 enum rtx_code comparison
= d
->comparison
;
18890 if (VECTOR_MODE_P (mode0
))
18891 op0
= safe_vector_operand (op0
, mode0
);
18892 if (VECTOR_MODE_P (mode1
))
18893 op1
= safe_vector_operand (op1
, mode1
);
18895 /* Swap operands if we have a comparison that isn't available in
18897 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18899 rtx tmp
= gen_reg_rtx (mode1
);
18900 emit_move_insn (tmp
, op1
);
18905 if (optimize
|| !target
18906 || GET_MODE (target
) != tmode
18907 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
18908 target
= gen_reg_rtx (tmode
);
18910 if ((optimize
&& !register_operand (op0
, mode0
))
18911 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
18912 op0
= copy_to_mode_reg (mode0
, op0
);
18913 if ((optimize
&& !register_operand (op1
, mode1
))
18914 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
18915 op1
= copy_to_mode_reg (mode1
, op1
);
18917 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
18918 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
18925 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18928 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
18932 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18933 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18934 rtx op0
= expand_normal (arg0
);
18935 rtx op1
= expand_normal (arg1
);
18936 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18937 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18938 enum rtx_code comparison
= d
->comparison
;
18940 if (VECTOR_MODE_P (mode0
))
18941 op0
= safe_vector_operand (op0
, mode0
);
18942 if (VECTOR_MODE_P (mode1
))
18943 op1
= safe_vector_operand (op1
, mode1
);
18945 /* Swap operands if we have a comparison that isn't available in
18947 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18954 target
= gen_reg_rtx (SImode
);
18955 emit_move_insn (target
, const0_rtx
);
18956 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18958 if ((optimize
&& !register_operand (op0
, mode0
))
18959 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18960 op0
= copy_to_mode_reg (mode0
, op0
);
18961 if ((optimize
&& !register_operand (op1
, mode1
))
18962 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18963 op1
= copy_to_mode_reg (mode1
, op1
);
18965 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18969 emit_insn (gen_rtx_SET (VOIDmode
,
18970 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18971 gen_rtx_fmt_ee (comparison
, QImode
,
18975 return SUBREG_REG (target
);
18978 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18981 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
18985 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18986 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18987 rtx op0
= expand_normal (arg0
);
18988 rtx op1
= expand_normal (arg1
);
18989 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18990 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18991 enum rtx_code comparison
= d
->comparison
;
18993 if (VECTOR_MODE_P (mode0
))
18994 op0
= safe_vector_operand (op0
, mode0
);
18995 if (VECTOR_MODE_P (mode1
))
18996 op1
= safe_vector_operand (op1
, mode1
);
18998 target
= gen_reg_rtx (SImode
);
18999 emit_move_insn (target
, const0_rtx
);
19000 target
= gen_rtx_SUBREG (QImode
, target
, 0);
19002 if ((optimize
&& !register_operand (op0
, mode0
))
19003 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
19004 op0
= copy_to_mode_reg (mode0
, op0
);
19005 if ((optimize
&& !register_operand (op1
, mode1
))
19006 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
19007 op1
= copy_to_mode_reg (mode1
, op1
);
19009 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
19013 emit_insn (gen_rtx_SET (VOIDmode
,
19014 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
19015 gen_rtx_fmt_ee (comparison
, QImode
,
19019 return SUBREG_REG (target
);
19022 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
19025 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
19026 tree exp
, rtx target
)
19029 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19030 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19031 tree arg2
= CALL_EXPR_ARG (exp
, 2);
19032 tree arg3
= CALL_EXPR_ARG (exp
, 3);
19033 tree arg4
= CALL_EXPR_ARG (exp
, 4);
19034 rtx scratch0
, scratch1
;
19035 rtx op0
= expand_normal (arg0
);
19036 rtx op1
= expand_normal (arg1
);
19037 rtx op2
= expand_normal (arg2
);
19038 rtx op3
= expand_normal (arg3
);
19039 rtx op4
= expand_normal (arg4
);
19040 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
19042 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
19043 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
19044 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
19045 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
19046 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
19047 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
19048 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
19050 if (VECTOR_MODE_P (modev2
))
19051 op0
= safe_vector_operand (op0
, modev2
);
19052 if (VECTOR_MODE_P (modev4
))
19053 op2
= safe_vector_operand (op2
, modev4
);
19055 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
19056 op0
= copy_to_mode_reg (modev2
, op0
);
19057 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
19058 op1
= copy_to_mode_reg (modei3
, op1
);
19059 if ((optimize
&& !register_operand (op2
, modev4
))
19060 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
19061 op2
= copy_to_mode_reg (modev4
, op2
);
19062 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
19063 op3
= copy_to_mode_reg (modei5
, op3
);
19065 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
19067 error ("the fifth argument must be a 8-bit immediate");
19071 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
19073 if (optimize
|| !target
19074 || GET_MODE (target
) != tmode0
19075 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
19076 target
= gen_reg_rtx (tmode0
);
19078 scratch1
= gen_reg_rtx (tmode1
);
19080 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
19082 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
19084 if (optimize
|| !target
19085 || GET_MODE (target
) != tmode1
19086 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
19087 target
= gen_reg_rtx (tmode1
);
19089 scratch0
= gen_reg_rtx (tmode0
);
19091 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
19095 gcc_assert (d
->flag
);
19097 scratch0
= gen_reg_rtx (tmode0
);
19098 scratch1
= gen_reg_rtx (tmode1
);
19100 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
19110 target
= gen_reg_rtx (SImode
);
19111 emit_move_insn (target
, const0_rtx
);
19112 target
= gen_rtx_SUBREG (QImode
, target
, 0);
19115 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
19116 gen_rtx_fmt_ee (EQ
, QImode
,
19117 gen_rtx_REG ((enum machine_mode
) d
->flag
,
19120 return SUBREG_REG (target
);
19127 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
19130 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
19131 tree exp
, rtx target
)
19134 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19135 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19136 tree arg2
= CALL_EXPR_ARG (exp
, 2);
19137 rtx scratch0
, scratch1
;
19138 rtx op0
= expand_normal (arg0
);
19139 rtx op1
= expand_normal (arg1
);
19140 rtx op2
= expand_normal (arg2
);
19141 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
19143 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
19144 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
19145 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
19146 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
19147 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
19149 if (VECTOR_MODE_P (modev2
))
19150 op0
= safe_vector_operand (op0
, modev2
);
19151 if (VECTOR_MODE_P (modev3
))
19152 op1
= safe_vector_operand (op1
, modev3
);
19154 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
19155 op0
= copy_to_mode_reg (modev2
, op0
);
19156 if ((optimize
&& !register_operand (op1
, modev3
))
19157 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
19158 op1
= copy_to_mode_reg (modev3
, op1
);
19160 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
19162 error ("the third argument must be a 8-bit immediate");
19166 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
19168 if (optimize
|| !target
19169 || GET_MODE (target
) != tmode0
19170 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
19171 target
= gen_reg_rtx (tmode0
);
19173 scratch1
= gen_reg_rtx (tmode1
);
19175 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
19177 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
19179 if (optimize
|| !target
19180 || GET_MODE (target
) != tmode1
19181 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
19182 target
= gen_reg_rtx (tmode1
);
19184 scratch0
= gen_reg_rtx (tmode0
);
19186 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
19190 gcc_assert (d
->flag
);
19192 scratch0
= gen_reg_rtx (tmode0
);
19193 scratch1
= gen_reg_rtx (tmode1
);
19195 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
19205 target
= gen_reg_rtx (SImode
);
19206 emit_move_insn (target
, const0_rtx
);
19207 target
= gen_rtx_SUBREG (QImode
, target
, 0);
19210 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
19211 gen_rtx_fmt_ee (EQ
, QImode
,
19212 gen_rtx_REG ((enum machine_mode
) d
->flag
,
19215 return SUBREG_REG (target
);
19221 /* Return the integer constant in ARG. Constrain it to be in the range
19222 of the subparts of VEC_TYPE; issue an error if not. */
19225 get_element_number (tree vec_type
, tree arg
)
19227 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
19229 if (!host_integerp (arg
, 1)
19230 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
19232 error ("selector must be an integer constant in the range 0..%wi", max
);
19239 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19240 ix86_expand_vector_init. We DO have language-level syntax for this, in
19241 the form of (type){ init-list }. Except that since we can't place emms
19242 instructions from inside the compiler, we can't allow the use of MMX
19243 registers unless the user explicitly asks for it. So we do *not* define
19244 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19245 we have builtins invoked by mmintrin.h that gives us license to emit
19246 these sorts of instructions. */
19249 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
19251 enum machine_mode tmode
= TYPE_MODE (type
);
19252 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
19253 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
19254 rtvec v
= rtvec_alloc (n_elt
);
19256 gcc_assert (VECTOR_MODE_P (tmode
));
19257 gcc_assert (call_expr_nargs (exp
) == n_elt
);
19259 for (i
= 0; i
< n_elt
; ++i
)
19261 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
19262 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
19265 if (!target
|| !register_operand (target
, tmode
))
19266 target
= gen_reg_rtx (tmode
);
19268 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
19272 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19273 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19274 had a language-level syntax for referencing vector elements. */
19277 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
19279 enum machine_mode tmode
, mode0
;
19284 arg0
= CALL_EXPR_ARG (exp
, 0);
19285 arg1
= CALL_EXPR_ARG (exp
, 1);
19287 op0
= expand_normal (arg0
);
19288 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
19290 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
19291 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
19292 gcc_assert (VECTOR_MODE_P (mode0
));
19294 op0
= force_reg (mode0
, op0
);
19296 if (optimize
|| !target
|| !register_operand (target
, tmode
))
19297 target
= gen_reg_rtx (tmode
);
19299 ix86_expand_vector_extract (true, target
, op0
, elt
);
19304 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19305 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19306 a language-level syntax for referencing vector elements. */
19309 ix86_expand_vec_set_builtin (tree exp
)
19311 enum machine_mode tmode
, mode1
;
19312 tree arg0
, arg1
, arg2
;
19314 rtx op0
, op1
, target
;
19316 arg0
= CALL_EXPR_ARG (exp
, 0);
19317 arg1
= CALL_EXPR_ARG (exp
, 1);
19318 arg2
= CALL_EXPR_ARG (exp
, 2);
19320 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
19321 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
19322 gcc_assert (VECTOR_MODE_P (tmode
));
19324 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
19325 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
19326 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
19328 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
19329 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
19331 op0
= force_reg (tmode
, op0
);
19332 op1
= force_reg (mode1
, op1
);
19334 /* OP0 is the source of these builtin functions and shouldn't be
19335 modified. Create a copy, use it and return it as target. */
19336 target
= gen_reg_rtx (tmode
);
19337 emit_move_insn (target
, op0
);
19338 ix86_expand_vector_set (true, target
, op1
, elt
);
19343 /* Expand an expression EXP that calls a built-in function,
19344 with result going to TARGET if that's convenient
19345 (and in mode MODE if that's convenient).
19346 SUBTARGET may be used as the target for computing one of EXP's operands.
19347 IGNORE is nonzero if the value is to be ignored. */
19350 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
19351 enum machine_mode mode ATTRIBUTE_UNUSED
,
19352 int ignore ATTRIBUTE_UNUSED
)
19354 const struct builtin_description
*d
;
19356 enum insn_code icode
;
19357 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19358 tree arg0
, arg1
, arg2
, arg3
;
19359 rtx op0
, op1
, op2
, op3
, pat
;
19360 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
19361 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
19365 case IX86_BUILTIN_EMMS
:
19366 emit_insn (gen_mmx_emms ());
19369 case IX86_BUILTIN_SFENCE
:
19370 emit_insn (gen_sse_sfence ());
19373 case IX86_BUILTIN_MASKMOVQ
:
19374 case IX86_BUILTIN_MASKMOVDQU
:
19375 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
19376 ? CODE_FOR_mmx_maskmovq
19377 : CODE_FOR_sse2_maskmovdqu
);
19378 /* Note the arg order is different from the operand order. */
19379 arg1
= CALL_EXPR_ARG (exp
, 0);
19380 arg2
= CALL_EXPR_ARG (exp
, 1);
19381 arg0
= CALL_EXPR_ARG (exp
, 2);
19382 op0
= expand_normal (arg0
);
19383 op1
= expand_normal (arg1
);
19384 op2
= expand_normal (arg2
);
19385 mode0
= insn_data
[icode
].operand
[0].mode
;
19386 mode1
= insn_data
[icode
].operand
[1].mode
;
19387 mode2
= insn_data
[icode
].operand
[2].mode
;
19389 op0
= force_reg (Pmode
, op0
);
19390 op0
= gen_rtx_MEM (mode1
, op0
);
19392 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
19393 op0
= copy_to_mode_reg (mode0
, op0
);
19394 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
19395 op1
= copy_to_mode_reg (mode1
, op1
);
19396 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
19397 op2
= copy_to_mode_reg (mode2
, op2
);
19398 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
19404 case IX86_BUILTIN_RSQRTF
:
19405 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
19407 case IX86_BUILTIN_SQRTSS
:
19408 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
19409 case IX86_BUILTIN_RSQRTSS
:
19410 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
19411 case IX86_BUILTIN_RCPSS
:
19412 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
19414 case IX86_BUILTIN_LOADUPS
:
19415 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
19417 case IX86_BUILTIN_STOREUPS
:
19418 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
19420 case IX86_BUILTIN_LOADHPS
:
19421 case IX86_BUILTIN_LOADLPS
:
19422 case IX86_BUILTIN_LOADHPD
:
19423 case IX86_BUILTIN_LOADLPD
:
19424 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
19425 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
19426 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
19427 : CODE_FOR_sse2_loadlpd
);
19428 arg0
= CALL_EXPR_ARG (exp
, 0);
19429 arg1
= CALL_EXPR_ARG (exp
, 1);
19430 op0
= expand_normal (arg0
);
19431 op1
= expand_normal (arg1
);
19432 tmode
= insn_data
[icode
].operand
[0].mode
;
19433 mode0
= insn_data
[icode
].operand
[1].mode
;
19434 mode1
= insn_data
[icode
].operand
[2].mode
;
19436 op0
= force_reg (mode0
, op0
);
19437 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
19438 if (optimize
|| target
== 0
19439 || GET_MODE (target
) != tmode
19440 || !register_operand (target
, tmode
))
19441 target
= gen_reg_rtx (tmode
);
19442 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19448 case IX86_BUILTIN_STOREHPS
:
19449 case IX86_BUILTIN_STORELPS
:
19450 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
19451 : CODE_FOR_sse_storelps
);
19452 arg0
= CALL_EXPR_ARG (exp
, 0);
19453 arg1
= CALL_EXPR_ARG (exp
, 1);
19454 op0
= expand_normal (arg0
);
19455 op1
= expand_normal (arg1
);
19456 mode0
= insn_data
[icode
].operand
[0].mode
;
19457 mode1
= insn_data
[icode
].operand
[1].mode
;
19459 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19460 op1
= force_reg (mode1
, op1
);
19462 pat
= GEN_FCN (icode
) (op0
, op1
);
19468 case IX86_BUILTIN_MOVNTPS
:
19469 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
19470 case IX86_BUILTIN_MOVNTQ
:
19471 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
19473 case IX86_BUILTIN_LDMXCSR
:
19474 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
19475 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
19476 emit_move_insn (target
, op0
);
19477 emit_insn (gen_sse_ldmxcsr (target
));
19480 case IX86_BUILTIN_STMXCSR
:
19481 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
19482 emit_insn (gen_sse_stmxcsr (target
));
19483 return copy_to_mode_reg (SImode
, target
);
19485 case IX86_BUILTIN_SHUFPS
:
19486 case IX86_BUILTIN_SHUFPD
:
19487 icode
= (fcode
== IX86_BUILTIN_SHUFPS
19488 ? CODE_FOR_sse_shufps
19489 : CODE_FOR_sse2_shufpd
);
19490 arg0
= CALL_EXPR_ARG (exp
, 0);
19491 arg1
= CALL_EXPR_ARG (exp
, 1);
19492 arg2
= CALL_EXPR_ARG (exp
, 2);
19493 op0
= expand_normal (arg0
);
19494 op1
= expand_normal (arg1
);
19495 op2
= expand_normal (arg2
);
19496 tmode
= insn_data
[icode
].operand
[0].mode
;
19497 mode0
= insn_data
[icode
].operand
[1].mode
;
19498 mode1
= insn_data
[icode
].operand
[2].mode
;
19499 mode2
= insn_data
[icode
].operand
[3].mode
;
19501 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19502 op0
= copy_to_mode_reg (mode0
, op0
);
19503 if ((optimize
&& !register_operand (op1
, mode1
))
19504 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19505 op1
= copy_to_mode_reg (mode1
, op1
);
19506 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19508 /* @@@ better error message */
19509 error ("mask must be an immediate");
19510 return gen_reg_rtx (tmode
);
19512 if (optimize
|| target
== 0
19513 || GET_MODE (target
) != tmode
19514 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19515 target
= gen_reg_rtx (tmode
);
19516 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19522 case IX86_BUILTIN_PSHUFW
:
19523 case IX86_BUILTIN_PSHUFD
:
19524 case IX86_BUILTIN_PSHUFHW
:
19525 case IX86_BUILTIN_PSHUFLW
:
19526 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
19527 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
19528 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
19529 : CODE_FOR_mmx_pshufw
);
19530 arg0
= CALL_EXPR_ARG (exp
, 0);
19531 arg1
= CALL_EXPR_ARG (exp
, 1);
19532 op0
= expand_normal (arg0
);
19533 op1
= expand_normal (arg1
);
19534 tmode
= insn_data
[icode
].operand
[0].mode
;
19535 mode1
= insn_data
[icode
].operand
[1].mode
;
19536 mode2
= insn_data
[icode
].operand
[2].mode
;
19538 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19539 op0
= copy_to_mode_reg (mode1
, op0
);
19540 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19542 /* @@@ better error message */
19543 error ("mask must be an immediate");
19547 || GET_MODE (target
) != tmode
19548 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19549 target
= gen_reg_rtx (tmode
);
19550 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19556 case IX86_BUILTIN_PSLLWI128
:
19557 icode
= CODE_FOR_ashlv8hi3
;
19559 case IX86_BUILTIN_PSLLDI128
:
19560 icode
= CODE_FOR_ashlv4si3
;
19562 case IX86_BUILTIN_PSLLQI128
:
19563 icode
= CODE_FOR_ashlv2di3
;
19565 case IX86_BUILTIN_PSRAWI128
:
19566 icode
= CODE_FOR_ashrv8hi3
;
19568 case IX86_BUILTIN_PSRADI128
:
19569 icode
= CODE_FOR_ashrv4si3
;
19571 case IX86_BUILTIN_PSRLWI128
:
19572 icode
= CODE_FOR_lshrv8hi3
;
19574 case IX86_BUILTIN_PSRLDI128
:
19575 icode
= CODE_FOR_lshrv4si3
;
19577 case IX86_BUILTIN_PSRLQI128
:
19578 icode
= CODE_FOR_lshrv2di3
;
19581 arg0
= CALL_EXPR_ARG (exp
, 0);
19582 arg1
= CALL_EXPR_ARG (exp
, 1);
19583 op0
= expand_normal (arg0
);
19584 op1
= expand_normal (arg1
);
19586 if (!CONST_INT_P (op1
))
19588 error ("shift must be an immediate");
19591 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
19592 op1
= GEN_INT (255);
19594 tmode
= insn_data
[icode
].operand
[0].mode
;
19595 mode1
= insn_data
[icode
].operand
[1].mode
;
19596 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19597 op0
= copy_to_reg (op0
);
19599 target
= gen_reg_rtx (tmode
);
19600 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19606 case IX86_BUILTIN_PSLLW128
:
19607 icode
= CODE_FOR_ashlv8hi3
;
19609 case IX86_BUILTIN_PSLLD128
:
19610 icode
= CODE_FOR_ashlv4si3
;
19612 case IX86_BUILTIN_PSLLQ128
:
19613 icode
= CODE_FOR_ashlv2di3
;
19615 case IX86_BUILTIN_PSRAW128
:
19616 icode
= CODE_FOR_ashrv8hi3
;
19618 case IX86_BUILTIN_PSRAD128
:
19619 icode
= CODE_FOR_ashrv4si3
;
19621 case IX86_BUILTIN_PSRLW128
:
19622 icode
= CODE_FOR_lshrv8hi3
;
19624 case IX86_BUILTIN_PSRLD128
:
19625 icode
= CODE_FOR_lshrv4si3
;
19627 case IX86_BUILTIN_PSRLQ128
:
19628 icode
= CODE_FOR_lshrv2di3
;
19631 arg0
= CALL_EXPR_ARG (exp
, 0);
19632 arg1
= CALL_EXPR_ARG (exp
, 1);
19633 op0
= expand_normal (arg0
);
19634 op1
= expand_normal (arg1
);
19636 tmode
= insn_data
[icode
].operand
[0].mode
;
19637 mode1
= insn_data
[icode
].operand
[1].mode
;
19639 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19640 op0
= copy_to_reg (op0
);
19642 op1
= simplify_gen_subreg (SImode
, op1
, GET_MODE (op1
), 0);
19643 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, SImode
))
19644 op1
= copy_to_reg (op1
);
19646 target
= gen_reg_rtx (tmode
);
19647 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19653 case IX86_BUILTIN_PSLLDQI128
:
19654 case IX86_BUILTIN_PSRLDQI128
:
19655 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
19656 : CODE_FOR_sse2_lshrti3
);
19657 arg0
= CALL_EXPR_ARG (exp
, 0);
19658 arg1
= CALL_EXPR_ARG (exp
, 1);
19659 op0
= expand_normal (arg0
);
19660 op1
= expand_normal (arg1
);
19661 tmode
= insn_data
[icode
].operand
[0].mode
;
19662 mode1
= insn_data
[icode
].operand
[1].mode
;
19663 mode2
= insn_data
[icode
].operand
[2].mode
;
19665 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19667 op0
= copy_to_reg (op0
);
19668 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
19670 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19672 error ("shift must be an immediate");
19675 target
= gen_reg_rtx (V2DImode
);
19676 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
19683 case IX86_BUILTIN_FEMMS
:
19684 emit_insn (gen_mmx_femms ());
19687 case IX86_BUILTIN_PAVGUSB
:
19688 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
19690 case IX86_BUILTIN_PF2ID
:
19691 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
19693 case IX86_BUILTIN_PFACC
:
19694 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
19696 case IX86_BUILTIN_PFADD
:
19697 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
19699 case IX86_BUILTIN_PFCMPEQ
:
19700 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
19702 case IX86_BUILTIN_PFCMPGE
:
19703 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
19705 case IX86_BUILTIN_PFCMPGT
:
19706 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
19708 case IX86_BUILTIN_PFMAX
:
19709 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
19711 case IX86_BUILTIN_PFMIN
:
19712 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
19714 case IX86_BUILTIN_PFMUL
:
19715 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
19717 case IX86_BUILTIN_PFRCP
:
19718 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
19720 case IX86_BUILTIN_PFRCPIT1
:
19721 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
19723 case IX86_BUILTIN_PFRCPIT2
:
19724 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
19726 case IX86_BUILTIN_PFRSQIT1
:
19727 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
19729 case IX86_BUILTIN_PFRSQRT
:
19730 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
19732 case IX86_BUILTIN_PFSUB
:
19733 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
19735 case IX86_BUILTIN_PFSUBR
:
19736 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
19738 case IX86_BUILTIN_PI2FD
:
19739 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
19741 case IX86_BUILTIN_PMULHRW
:
19742 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
19744 case IX86_BUILTIN_PF2IW
:
19745 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
19747 case IX86_BUILTIN_PFNACC
:
19748 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
19750 case IX86_BUILTIN_PFPNACC
:
19751 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
19753 case IX86_BUILTIN_PI2FW
:
19754 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
19756 case IX86_BUILTIN_PSWAPDSI
:
19757 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
19759 case IX86_BUILTIN_PSWAPDSF
:
19760 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
19762 case IX86_BUILTIN_SQRTSD
:
19763 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
19764 case IX86_BUILTIN_LOADUPD
:
19765 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
19766 case IX86_BUILTIN_STOREUPD
:
19767 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
19769 case IX86_BUILTIN_MFENCE
:
19770 emit_insn (gen_sse2_mfence ());
19772 case IX86_BUILTIN_LFENCE
:
19773 emit_insn (gen_sse2_lfence ());
19776 case IX86_BUILTIN_CLFLUSH
:
19777 arg0
= CALL_EXPR_ARG (exp
, 0);
19778 op0
= expand_normal (arg0
);
19779 icode
= CODE_FOR_sse2_clflush
;
19780 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
19781 op0
= copy_to_mode_reg (Pmode
, op0
);
19783 emit_insn (gen_sse2_clflush (op0
));
19786 case IX86_BUILTIN_MOVNTPD
:
19787 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
19788 case IX86_BUILTIN_MOVNTDQ
:
19789 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
19790 case IX86_BUILTIN_MOVNTI
:
19791 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
19793 case IX86_BUILTIN_LOADDQU
:
19794 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
19795 case IX86_BUILTIN_STOREDQU
:
19796 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
19798 case IX86_BUILTIN_MONITOR
:
19799 arg0
= CALL_EXPR_ARG (exp
, 0);
19800 arg1
= CALL_EXPR_ARG (exp
, 1);
19801 arg2
= CALL_EXPR_ARG (exp
, 2);
19802 op0
= expand_normal (arg0
);
19803 op1
= expand_normal (arg1
);
19804 op2
= expand_normal (arg2
);
19806 op0
= copy_to_mode_reg (Pmode
, op0
);
19808 op1
= copy_to_mode_reg (SImode
, op1
);
19810 op2
= copy_to_mode_reg (SImode
, op2
);
19812 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
19814 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
19817 case IX86_BUILTIN_MWAIT
:
19818 arg0
= CALL_EXPR_ARG (exp
, 0);
19819 arg1
= CALL_EXPR_ARG (exp
, 1);
19820 op0
= expand_normal (arg0
);
19821 op1
= expand_normal (arg1
);
19823 op0
= copy_to_mode_reg (SImode
, op0
);
19825 op1
= copy_to_mode_reg (SImode
, op1
);
19826 emit_insn (gen_sse3_mwait (op0
, op1
));
19829 case IX86_BUILTIN_LDDQU
:
19830 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
19833 case IX86_BUILTIN_PALIGNR
:
19834 case IX86_BUILTIN_PALIGNR128
:
19835 if (fcode
== IX86_BUILTIN_PALIGNR
)
19837 icode
= CODE_FOR_ssse3_palignrdi
;
19842 icode
= CODE_FOR_ssse3_palignrti
;
19845 arg0
= CALL_EXPR_ARG (exp
, 0);
19846 arg1
= CALL_EXPR_ARG (exp
, 1);
19847 arg2
= CALL_EXPR_ARG (exp
, 2);
19848 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19849 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19850 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19851 tmode
= insn_data
[icode
].operand
[0].mode
;
19852 mode1
= insn_data
[icode
].operand
[1].mode
;
19853 mode2
= insn_data
[icode
].operand
[2].mode
;
19854 mode3
= insn_data
[icode
].operand
[3].mode
;
19856 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19858 op0
= copy_to_reg (op0
);
19859 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
19861 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19863 op1
= copy_to_reg (op1
);
19864 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
19866 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19868 error ("shift must be an immediate");
19871 target
= gen_reg_rtx (mode
);
19872 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
19879 case IX86_BUILTIN_MOVNTDQA
:
19880 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
19883 case IX86_BUILTIN_MOVNTSD
:
19884 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
19886 case IX86_BUILTIN_MOVNTSS
:
19887 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
19889 case IX86_BUILTIN_INSERTQ
:
19890 case IX86_BUILTIN_EXTRQ
:
19891 icode
= (fcode
== IX86_BUILTIN_EXTRQ
19892 ? CODE_FOR_sse4a_extrq
19893 : CODE_FOR_sse4a_insertq
);
19894 arg0
= CALL_EXPR_ARG (exp
, 0);
19895 arg1
= CALL_EXPR_ARG (exp
, 1);
19896 op0
= expand_normal (arg0
);
19897 op1
= expand_normal (arg1
);
19898 tmode
= insn_data
[icode
].operand
[0].mode
;
19899 mode1
= insn_data
[icode
].operand
[1].mode
;
19900 mode2
= insn_data
[icode
].operand
[2].mode
;
19901 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19902 op0
= copy_to_mode_reg (mode1
, op0
);
19903 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19904 op1
= copy_to_mode_reg (mode2
, op1
);
19905 if (optimize
|| target
== 0
19906 || GET_MODE (target
) != tmode
19907 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19908 target
= gen_reg_rtx (tmode
);
19909 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19915 case IX86_BUILTIN_EXTRQI
:
19916 icode
= CODE_FOR_sse4a_extrqi
;
19917 arg0
= CALL_EXPR_ARG (exp
, 0);
19918 arg1
= CALL_EXPR_ARG (exp
, 1);
19919 arg2
= CALL_EXPR_ARG (exp
, 2);
19920 op0
= expand_normal (arg0
);
19921 op1
= expand_normal (arg1
);
19922 op2
= expand_normal (arg2
);
19923 tmode
= insn_data
[icode
].operand
[0].mode
;
19924 mode1
= insn_data
[icode
].operand
[1].mode
;
19925 mode2
= insn_data
[icode
].operand
[2].mode
;
19926 mode3
= insn_data
[icode
].operand
[3].mode
;
19927 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19928 op0
= copy_to_mode_reg (mode1
, op0
);
19929 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19931 error ("index mask must be an immediate");
19932 return gen_reg_rtx (tmode
);
19934 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19936 error ("length mask must be an immediate");
19937 return gen_reg_rtx (tmode
);
19939 if (optimize
|| target
== 0
19940 || GET_MODE (target
) != tmode
19941 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19942 target
= gen_reg_rtx (tmode
);
19943 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19949 case IX86_BUILTIN_INSERTQI
:
19950 icode
= CODE_FOR_sse4a_insertqi
;
19951 arg0
= CALL_EXPR_ARG (exp
, 0);
19952 arg1
= CALL_EXPR_ARG (exp
, 1);
19953 arg2
= CALL_EXPR_ARG (exp
, 2);
19954 arg3
= CALL_EXPR_ARG (exp
, 3);
19955 op0
= expand_normal (arg0
);
19956 op1
= expand_normal (arg1
);
19957 op2
= expand_normal (arg2
);
19958 op3
= expand_normal (arg3
);
19959 tmode
= insn_data
[icode
].operand
[0].mode
;
19960 mode1
= insn_data
[icode
].operand
[1].mode
;
19961 mode2
= insn_data
[icode
].operand
[2].mode
;
19962 mode3
= insn_data
[icode
].operand
[3].mode
;
19963 mode4
= insn_data
[icode
].operand
[4].mode
;
19965 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19966 op0
= copy_to_mode_reg (mode1
, op0
);
19968 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19969 op1
= copy_to_mode_reg (mode2
, op1
);
19971 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19973 error ("index mask must be an immediate");
19974 return gen_reg_rtx (tmode
);
19976 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
19978 error ("length mask must be an immediate");
19979 return gen_reg_rtx (tmode
);
19981 if (optimize
|| target
== 0
19982 || GET_MODE (target
) != tmode
19983 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19984 target
= gen_reg_rtx (tmode
);
19985 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
19991 case IX86_BUILTIN_VEC_INIT_V2SI
:
19992 case IX86_BUILTIN_VEC_INIT_V4HI
:
19993 case IX86_BUILTIN_VEC_INIT_V8QI
:
19994 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
19996 case IX86_BUILTIN_VEC_EXT_V2DF
:
19997 case IX86_BUILTIN_VEC_EXT_V2DI
:
19998 case IX86_BUILTIN_VEC_EXT_V4SF
:
19999 case IX86_BUILTIN_VEC_EXT_V4SI
:
20000 case IX86_BUILTIN_VEC_EXT_V8HI
:
20001 case IX86_BUILTIN_VEC_EXT_V2SI
:
20002 case IX86_BUILTIN_VEC_EXT_V4HI
:
20003 case IX86_BUILTIN_VEC_EXT_V16QI
:
20004 return ix86_expand_vec_ext_builtin (exp
, target
);
20006 case IX86_BUILTIN_VEC_SET_V2DI
:
20007 case IX86_BUILTIN_VEC_SET_V4SF
:
20008 case IX86_BUILTIN_VEC_SET_V4SI
:
20009 case IX86_BUILTIN_VEC_SET_V8HI
:
20010 case IX86_BUILTIN_VEC_SET_V4HI
:
20011 case IX86_BUILTIN_VEC_SET_V16QI
:
20012 return ix86_expand_vec_set_builtin (exp
);
20014 case IX86_BUILTIN_INFQ
:
20016 REAL_VALUE_TYPE inf
;
20020 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
20022 tmp
= validize_mem (force_const_mem (mode
, tmp
));
20025 target
= gen_reg_rtx (mode
);
20027 emit_move_insn (target
, tmp
);
20031 case IX86_BUILTIN_FABSQ
:
20032 return ix86_expand_unop_builtin (CODE_FOR_abstf2
, exp
, target
, 0);
20034 case IX86_BUILTIN_COPYSIGNQ
:
20035 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3
, exp
, target
);
20041 for (i
= 0, d
= bdesc_sse_3arg
;
20042 i
< ARRAY_SIZE (bdesc_sse_3arg
);
20044 if (d
->code
== fcode
)
20045 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
20048 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
20049 if (d
->code
== fcode
)
20051 /* Compares are treated specially. */
20052 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
20053 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
20054 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
20055 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
20056 return ix86_expand_sse_compare (d
, exp
, target
);
20058 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
20061 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
20062 if (d
->code
== fcode
)
20063 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
20065 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
20066 if (d
->code
== fcode
)
20067 return ix86_expand_sse_comi (d
, exp
, target
);
20069 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
20070 if (d
->code
== fcode
)
20071 return ix86_expand_sse_ptest (d
, exp
, target
);
20073 for (i
= 0, d
= bdesc_crc32
; i
< ARRAY_SIZE (bdesc_crc32
); i
++, d
++)
20074 if (d
->code
== fcode
)
20075 return ix86_expand_crc32 (d
->icode
, exp
, target
);
20077 for (i
= 0, d
= bdesc_pcmpestr
;
20078 i
< ARRAY_SIZE (bdesc_pcmpestr
);
20080 if (d
->code
== fcode
)
20081 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
20083 for (i
= 0, d
= bdesc_pcmpistr
;
20084 i
< ARRAY_SIZE (bdesc_pcmpistr
);
20086 if (d
->code
== fcode
)
20087 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
20089 gcc_unreachable ();
20092 /* Returns a function decl for a vectorized version of the builtin function
20093 with builtin function code FN and the result vector type TYPE, or NULL_TREE
20094 if it is not available. */
20097 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
20100 enum machine_mode in_mode
, out_mode
;
20103 if (TREE_CODE (type_out
) != VECTOR_TYPE
20104 || TREE_CODE (type_in
) != VECTOR_TYPE
)
20107 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
20108 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
20109 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
20110 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
20114 case BUILT_IN_SQRT
:
20115 if (out_mode
== DFmode
&& out_n
== 2
20116 && in_mode
== DFmode
&& in_n
== 2)
20117 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
20120 case BUILT_IN_SQRTF
:
20121 if (out_mode
== SFmode
&& out_n
== 4
20122 && in_mode
== SFmode
&& in_n
== 4)
20123 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
20126 case BUILT_IN_LRINT
:
20127 if (out_mode
== SImode
&& out_n
== 4
20128 && in_mode
== DFmode
&& in_n
== 2)
20129 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
20132 case BUILT_IN_LRINTF
:
20133 if (out_mode
== SImode
&& out_n
== 4
20134 && in_mode
== SFmode
&& in_n
== 4)
20135 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
20142 /* Dispatch to a handler for a vectorization library. */
20143 if (ix86_veclib_handler
)
20144 return (*ix86_veclib_handler
)(fn
, type_out
, type_in
);
20149 /* Handler for an ACML-style interface to a library with vectorized
20153 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
20155 char name
[20] = "__vr.._";
20156 tree fntype
, new_fndecl
, args
;
20159 enum machine_mode el_mode
, in_mode
;
20162 /* The ACML is 64bits only and suitable for unsafe math only as
20163 it does not correctly support parts of IEEE with the required
20164 precision such as denormals. */
20166 || !flag_unsafe_math_optimizations
)
20169 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
20170 n
= TYPE_VECTOR_SUBPARTS (type_out
);
20171 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
20172 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
20173 if (el_mode
!= in_mode
20183 case BUILT_IN_LOG2
:
20184 case BUILT_IN_LOG10
:
20187 if (el_mode
!= DFmode
20192 case BUILT_IN_SINF
:
20193 case BUILT_IN_COSF
:
20194 case BUILT_IN_EXPF
:
20195 case BUILT_IN_POWF
:
20196 case BUILT_IN_LOGF
:
20197 case BUILT_IN_LOG2F
:
20198 case BUILT_IN_LOG10F
:
20201 if (el_mode
!= SFmode
20210 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
20211 sprintf (name
+ 7, "%s", bname
+10);
20214 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
20215 args
= TREE_CHAIN (args
))
20219 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
20221 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
20223 /* Build a function declaration for the vectorized function. */
20224 new_fndecl
= build_decl (FUNCTION_DECL
, get_identifier (name
), fntype
);
20225 TREE_PUBLIC (new_fndecl
) = 1;
20226 DECL_EXTERNAL (new_fndecl
) = 1;
20227 DECL_IS_NOVOPS (new_fndecl
) = 1;
20228 TREE_READONLY (new_fndecl
) = 1;
20234 /* Returns a decl of a function that implements conversion of the
20235 input vector of type TYPE, or NULL_TREE if it is not available. */
20238 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
20240 if (TREE_CODE (type
) != VECTOR_TYPE
)
20246 switch (TYPE_MODE (type
))
20249 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
20254 case FIX_TRUNC_EXPR
:
20255 switch (TYPE_MODE (type
))
20258 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
20268 /* Returns a code for a target-specific builtin that implements
20269 reciprocal of the function, or NULL_TREE if not available. */
20272 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
20273 bool sqrt ATTRIBUTE_UNUSED
)
20275 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_size
20276 && flag_finite_math_only
&& !flag_trapping_math
20277 && flag_unsafe_math_optimizations
))
20281 /* Machine dependent builtins. */
20284 /* Vectorized version of sqrt to rsqrt conversion. */
20285 case IX86_BUILTIN_SQRTPS
:
20286 return ix86_builtins
[IX86_BUILTIN_RSQRTPS
];
20292 /* Normal builtins. */
20295 /* Sqrt to rsqrt conversion. */
20296 case BUILT_IN_SQRTF
:
20297 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
20304 /* Store OPERAND to the memory after reload is completed. This means
20305 that we can't easily use assign_stack_local. */
20307 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
20311 gcc_assert (reload_completed
);
20312 if (TARGET_RED_ZONE
)
20314 result
= gen_rtx_MEM (mode
,
20315 gen_rtx_PLUS (Pmode
,
20317 GEN_INT (-RED_ZONE_SIZE
)));
20318 emit_move_insn (result
, operand
);
20320 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
20326 operand
= gen_lowpart (DImode
, operand
);
20330 gen_rtx_SET (VOIDmode
,
20331 gen_rtx_MEM (DImode
,
20332 gen_rtx_PRE_DEC (DImode
,
20333 stack_pointer_rtx
)),
20337 gcc_unreachable ();
20339 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
20348 split_di (&operand
, 1, operands
, operands
+ 1);
20350 gen_rtx_SET (VOIDmode
,
20351 gen_rtx_MEM (SImode
,
20352 gen_rtx_PRE_DEC (Pmode
,
20353 stack_pointer_rtx
)),
20356 gen_rtx_SET (VOIDmode
,
20357 gen_rtx_MEM (SImode
,
20358 gen_rtx_PRE_DEC (Pmode
,
20359 stack_pointer_rtx
)),
20364 /* Store HImodes as SImodes. */
20365 operand
= gen_lowpart (SImode
, operand
);
20369 gen_rtx_SET (VOIDmode
,
20370 gen_rtx_MEM (GET_MODE (operand
),
20371 gen_rtx_PRE_DEC (SImode
,
20372 stack_pointer_rtx
)),
20376 gcc_unreachable ();
20378 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
20383 /* Free operand from the memory. */
20385 ix86_free_from_memory (enum machine_mode mode
)
20387 if (!TARGET_RED_ZONE
)
20391 if (mode
== DImode
|| TARGET_64BIT
)
20395 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20396 to pop or add instruction if registers are available. */
20397 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20398 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
20403 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20404 QImode must go into class Q_REGS.
20405 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20406 movdf to do mem-to-mem moves through integer regs. */
20408 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
20410 enum machine_mode mode
= GET_MODE (x
);
20412 /* We're only allowed to return a subclass of CLASS. Many of the
20413 following checks fail for NO_REGS, so eliminate that early. */
20414 if (regclass
== NO_REGS
)
20417 /* All classes can load zeros. */
20418 if (x
== CONST0_RTX (mode
))
20421 /* Force constants into memory if we are loading a (nonzero) constant into
20422 an MMX or SSE register. This is because there are no MMX/SSE instructions
20423 to load from a constant. */
20425 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
20428 /* Prefer SSE regs only, if we can use them for math. */
20429 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
20430 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20432 /* Floating-point constants need more complex checks. */
20433 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
20435 /* General regs can load everything. */
20436 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
20439 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20440 zero above. We only want to wind up preferring 80387 registers if
20441 we plan on doing computation with them. */
20443 && standard_80387_constant_p (x
))
20445 /* Limit class to non-sse. */
20446 if (regclass
== FLOAT_SSE_REGS
)
20448 if (regclass
== FP_TOP_SSE_REGS
)
20450 if (regclass
== FP_SECOND_SSE_REGS
)
20451 return FP_SECOND_REG
;
20452 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
20459 /* Generally when we see PLUS here, it's the function invariant
20460 (plus soft-fp const_int). Which can only be computed into general
20462 if (GET_CODE (x
) == PLUS
)
20463 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
20465 /* QImode constants are easy to load, but non-constant QImode data
20466 must go into Q_REGS. */
20467 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
20469 if (reg_class_subset_p (regclass
, Q_REGS
))
20471 if (reg_class_subset_p (Q_REGS
, regclass
))
20479 /* Discourage putting floating-point values in SSE registers unless
20480 SSE math is being used, and likewise for the 387 registers. */
20482 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
20484 enum machine_mode mode
= GET_MODE (x
);
20486 /* Restrict the output reload class to the register bank that we are doing
20487 math on. If we would like not to return a subset of CLASS, reject this
20488 alternative: if reload cannot do this, it will still use its choice. */
20489 mode
= GET_MODE (x
);
20490 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20491 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
20493 if (X87_FLOAT_MODE_P (mode
))
20495 if (regclass
== FP_TOP_SSE_REGS
)
20497 else if (regclass
== FP_SECOND_SSE_REGS
)
20498 return FP_SECOND_REG
;
20500 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20506 /* If we are copying between general and FP registers, we need a memory
20507 location. The same is true for SSE and MMX registers.
20509 To optimize register_move_cost performance, allow inline variant.
20511 The macro can't work reliably when one of the CLASSES is class containing
20512 registers from multiple units (SSE, MMX, integer). We avoid this by never
20513 combining those units in single alternative in the machine description.
20514 Ensure that this constraint holds to avoid unexpected surprises.
20516 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20517 enforce these sanity checks. */
20520 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
20521 enum machine_mode mode
, int strict
)
20523 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
20524 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
20525 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
20526 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
20527 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
20528 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
20530 gcc_assert (!strict
);
20534 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
20537 /* ??? This is a lie. We do have moves between mmx/general, and for
20538 mmx/sse2. But by saying we need secondary memory we discourage the
20539 register allocator from using the mmx registers unless needed. */
20540 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
20543 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20545 /* SSE1 doesn't have any direct moves from other classes. */
20549 /* If the target says that inter-unit moves are more expensive
20550 than moving through memory, then don't generate them. */
20551 if (!TARGET_INTER_UNIT_MOVES
)
20554 /* Between SSE and general, we have moves no larger than word size. */
20555 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20563 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
20564 enum machine_mode mode
, int strict
)
20566 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
20569 /* Return true if the registers in CLASS cannot represent the change from
20570 modes FROM to TO. */
20573 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
20574 enum reg_class regclass
)
20579 /* x87 registers can't do subreg at all, as all values are reformatted
20580 to extended precision. */
20581 if (MAYBE_FLOAT_CLASS_P (regclass
))
20584 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
20586 /* Vector registers do not support QI or HImode loads. If we don't
20587 disallow a change to these modes, reload will assume it's ok to
20588 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20589 the vec_dupv4hi pattern. */
20590 if (GET_MODE_SIZE (from
) < 4)
20593 /* Vector registers do not support subreg with nonzero offsets, which
20594 are otherwise valid for integer registers. Since we can't see
20595 whether we have a nonzero offset from here, prohibit all
20596 nonparadoxical subregs changing size. */
20597 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
20604 /* Return the cost of moving data of mode M between a
20605 register and memory. A value of 2 is the default; this cost is
20606 relative to those in `REGISTER_MOVE_COST'.
20608 This function is used extensively by register_move_cost that is used to
20609 build tables at startup. Make it inline in this case.
20610 When IN is 2, return maximum of in and out move cost.
20612 If moving between registers and memory is more expensive than
20613 between two registers, you should define this macro to express the
20616 Model also increased moving costs of QImode registers in non
20620 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
20624 if (FLOAT_CLASS_P (regclass
))
20642 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
20643 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
20645 if (SSE_CLASS_P (regclass
))
20648 switch (GET_MODE_SIZE (mode
))
20663 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
20664 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
20666 if (MMX_CLASS_P (regclass
))
20669 switch (GET_MODE_SIZE (mode
))
20681 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
20682 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
20684 switch (GET_MODE_SIZE (mode
))
20687 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
20690 return ix86_cost
->int_store
[0];
20691 if (TARGET_PARTIAL_REG_DEPENDENCY
&& !optimize_size
)
20692 cost
= ix86_cost
->movzbl_load
;
20694 cost
= ix86_cost
->int_load
[0];
20696 return MAX (cost
, ix86_cost
->int_store
[0]);
20702 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
20704 return ix86_cost
->movzbl_load
;
20706 return ix86_cost
->int_store
[0] + 4;
20711 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
20712 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
20714 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20715 if (mode
== TFmode
)
20718 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
20720 cost
= ix86_cost
->int_load
[2];
20722 cost
= ix86_cost
->int_store
[2];
20723 return (cost
* (((int) GET_MODE_SIZE (mode
)
20724 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
20729 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
20731 return inline_memory_move_cost (mode
, regclass
, in
);
20735 /* Return the cost of moving data from a register in class CLASS1 to
20736 one in class CLASS2.
20738 It is not required that the cost always equal 2 when FROM is the same as TO;
20739 on some machines it is expensive to move between registers if they are not
20740 general registers. */
20743 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
20744 enum reg_class class2
)
20746 /* In case we require secondary memory, compute cost of the store followed
20747 by load. In order to avoid bad register allocation choices, we need
20748 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20750 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
20754 cost
+= inline_memory_move_cost (mode
, class1
, 2);
20755 cost
+= inline_memory_move_cost (mode
, class2
, 2);
20757 /* In case of copying from general_purpose_register we may emit multiple
20758 stores followed by single load causing memory size mismatch stall.
20759 Count this as arbitrarily high cost of 20. */
20760 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
20763 /* In the case of FP/MMX moves, the registers actually overlap, and we
20764 have to switch modes in order to treat them differently. */
20765 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
20766 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
20772 /* Moves between SSE/MMX and integer unit are expensive. */
20773 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
20774 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20776 /* ??? By keeping returned value relatively high, we limit the number
20777 of moves between integer and MMX/SSE registers for all targets.
20778 Additionally, high value prevents problem with x86_modes_tieable_p(),
20779 where integer modes in MMX/SSE registers are not tieable
20780 because of missing QImode and HImode moves to, from or between
20781 MMX/SSE registers. */
20782 return MAX (ix86_cost
->mmxsse_to_integer
, 8);
20784 if (MAYBE_FLOAT_CLASS_P (class1
))
20785 return ix86_cost
->fp_move
;
20786 if (MAYBE_SSE_CLASS_P (class1
))
20787 return ix86_cost
->sse_move
;
20788 if (MAYBE_MMX_CLASS_P (class1
))
20789 return ix86_cost
->mmx_move
;
20793 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20796 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
20798 /* Flags and only flags can only hold CCmode values. */
20799 if (CC_REGNO_P (regno
))
20800 return GET_MODE_CLASS (mode
) == MODE_CC
;
20801 if (GET_MODE_CLASS (mode
) == MODE_CC
20802 || GET_MODE_CLASS (mode
) == MODE_RANDOM
20803 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
20805 if (FP_REGNO_P (regno
))
20806 return VALID_FP_MODE_P (mode
);
20807 if (SSE_REGNO_P (regno
))
20809 /* We implement the move patterns for all vector modes into and
20810 out of SSE registers, even when no operation instructions
20812 return (VALID_SSE_REG_MODE (mode
)
20813 || VALID_SSE2_REG_MODE (mode
)
20814 || VALID_MMX_REG_MODE (mode
)
20815 || VALID_MMX_REG_MODE_3DNOW (mode
));
20817 if (MMX_REGNO_P (regno
))
20819 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20820 so if the register is available at all, then we can move data of
20821 the given mode into or out of it. */
20822 return (VALID_MMX_REG_MODE (mode
)
20823 || VALID_MMX_REG_MODE_3DNOW (mode
));
20826 if (mode
== QImode
)
20828 /* Take care for QImode values - they can be in non-QI regs,
20829 but then they do cause partial register stalls. */
20830 if (regno
< 4 || TARGET_64BIT
)
20832 if (!TARGET_PARTIAL_REG_STALL
)
20834 return reload_in_progress
|| reload_completed
;
20836 /* We handle both integer and floats in the general purpose registers. */
20837 else if (VALID_INT_MODE_P (mode
))
20839 else if (VALID_FP_MODE_P (mode
))
20841 else if (VALID_DFP_MODE_P (mode
))
20843 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20844 on to use that value in smaller contexts, this can easily force a
20845 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20846 supporting DImode, allow it. */
20847 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
20853 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20854 tieable integer mode. */
20857 ix86_tieable_integer_mode_p (enum machine_mode mode
)
20866 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
20869 return TARGET_64BIT
;
20876 /* Return true if MODE1 is accessible in a register that can hold MODE2
20877 without copying. That is, all register classes that can hold MODE2
20878 can also hold MODE1. */
20881 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
20883 if (mode1
== mode2
)
20886 if (ix86_tieable_integer_mode_p (mode1
)
20887 && ix86_tieable_integer_mode_p (mode2
))
20890 /* MODE2 being XFmode implies fp stack or general regs, which means we
20891 can tie any smaller floating point modes to it. Note that we do not
20892 tie this with TFmode. */
20893 if (mode2
== XFmode
)
20894 return mode1
== SFmode
|| mode1
== DFmode
;
20896 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20897 that we can tie it with SFmode. */
20898 if (mode2
== DFmode
)
20899 return mode1
== SFmode
;
20901 /* If MODE2 is only appropriate for an SSE register, then tie with
20902 any other mode acceptable to SSE registers. */
20903 if (GET_MODE_SIZE (mode2
) == 16
20904 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
20905 return (GET_MODE_SIZE (mode1
) == 16
20906 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
20908 /* If MODE2 is appropriate for an MMX register, then tie
20909 with any other mode acceptable to MMX registers. */
20910 if (GET_MODE_SIZE (mode2
) == 8
20911 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
20912 return (GET_MODE_SIZE (mode1
) == 8
20913 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
20918 /* Compute a (partial) cost for rtx X. Return true if the complete
20919 cost has been computed, and false if subexpressions should be
20920 scanned. In either case, *TOTAL contains the cost result. */
20923 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
)
20925 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
20926 enum machine_mode mode
= GET_MODE (x
);
20934 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
20936 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
20938 else if (flag_pic
&& SYMBOLIC_CONST (x
)
20940 || (!GET_CODE (x
) != LABEL_REF
20941 && (GET_CODE (x
) != SYMBOL_REF
20942 || !SYMBOL_REF_LOCAL_P (x
)))))
20949 if (mode
== VOIDmode
)
20952 switch (standard_80387_constant_p (x
))
20957 default: /* Other constants */
20962 /* Start with (MEM (SYMBOL_REF)), since that's where
20963 it'll probably end up. Add a penalty for size. */
20964 *total
= (COSTS_N_INSNS (1)
20965 + (flag_pic
!= 0 && !TARGET_64BIT
)
20966 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
20972 /* The zero extensions is often completely free on x86_64, so make
20973 it as cheap as possible. */
20974 if (TARGET_64BIT
&& mode
== DImode
20975 && GET_MODE (XEXP (x
, 0)) == SImode
)
20977 else if (TARGET_ZERO_EXTEND_WITH_AND
)
20978 *total
= ix86_cost
->add
;
20980 *total
= ix86_cost
->movzx
;
20984 *total
= ix86_cost
->movsx
;
20988 if (CONST_INT_P (XEXP (x
, 1))
20989 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
20991 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
20994 *total
= ix86_cost
->add
;
20997 if ((value
== 2 || value
== 3)
20998 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
21000 *total
= ix86_cost
->lea
;
21010 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
21012 if (CONST_INT_P (XEXP (x
, 1)))
21014 if (INTVAL (XEXP (x
, 1)) > 32)
21015 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
21017 *total
= ix86_cost
->shift_const
* 2;
21021 if (GET_CODE (XEXP (x
, 1)) == AND
)
21022 *total
= ix86_cost
->shift_var
* 2;
21024 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
21029 if (CONST_INT_P (XEXP (x
, 1)))
21030 *total
= ix86_cost
->shift_const
;
21032 *total
= ix86_cost
->shift_var
;
21037 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21039 /* ??? SSE scalar cost should be used here. */
21040 *total
= ix86_cost
->fmul
;
21043 else if (X87_FLOAT_MODE_P (mode
))
21045 *total
= ix86_cost
->fmul
;
21048 else if (FLOAT_MODE_P (mode
))
21050 /* ??? SSE vector cost should be used here. */
21051 *total
= ix86_cost
->fmul
;
21056 rtx op0
= XEXP (x
, 0);
21057 rtx op1
= XEXP (x
, 1);
21059 if (CONST_INT_P (XEXP (x
, 1)))
21061 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
21062 for (nbits
= 0; value
!= 0; value
&= value
- 1)
21066 /* This is arbitrary. */
21069 /* Compute costs correctly for widening multiplication. */
21070 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
21071 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
21072 == GET_MODE_SIZE (mode
))
21074 int is_mulwiden
= 0;
21075 enum machine_mode inner_mode
= GET_MODE (op0
);
21077 if (GET_CODE (op0
) == GET_CODE (op1
))
21078 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
21079 else if (CONST_INT_P (op1
))
21081 if (GET_CODE (op0
) == SIGN_EXTEND
)
21082 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
21085 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
21089 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
21092 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
21093 + nbits
* ix86_cost
->mult_bit
21094 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
21103 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21104 /* ??? SSE cost should be used here. */
21105 *total
= ix86_cost
->fdiv
;
21106 else if (X87_FLOAT_MODE_P (mode
))
21107 *total
= ix86_cost
->fdiv
;
21108 else if (FLOAT_MODE_P (mode
))
21109 /* ??? SSE vector cost should be used here. */
21110 *total
= ix86_cost
->fdiv
;
21112 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
21116 if (GET_MODE_CLASS (mode
) == MODE_INT
21117 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
21119 if (GET_CODE (XEXP (x
, 0)) == PLUS
21120 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
21121 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
21122 && CONSTANT_P (XEXP (x
, 1)))
21124 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
21125 if (val
== 2 || val
== 4 || val
== 8)
21127 *total
= ix86_cost
->lea
;
21128 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
21129 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
21131 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
21135 else if (GET_CODE (XEXP (x
, 0)) == MULT
21136 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
21138 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
21139 if (val
== 2 || val
== 4 || val
== 8)
21141 *total
= ix86_cost
->lea
;
21142 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
21143 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
21147 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
21149 *total
= ix86_cost
->lea
;
21150 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
21151 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
21152 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
21159 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21161 /* ??? SSE cost should be used here. */
21162 *total
= ix86_cost
->fadd
;
21165 else if (X87_FLOAT_MODE_P (mode
))
21167 *total
= ix86_cost
->fadd
;
21170 else if (FLOAT_MODE_P (mode
))
21172 /* ??? SSE vector cost should be used here. */
21173 *total
= ix86_cost
->fadd
;
21181 if (!TARGET_64BIT
&& mode
== DImode
)
21183 *total
= (ix86_cost
->add
* 2
21184 + (rtx_cost (XEXP (x
, 0), outer_code
)
21185 << (GET_MODE (XEXP (x
, 0)) != DImode
))
21186 + (rtx_cost (XEXP (x
, 1), outer_code
)
21187 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
21193 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21195 /* ??? SSE cost should be used here. */
21196 *total
= ix86_cost
->fchs
;
21199 else if (X87_FLOAT_MODE_P (mode
))
21201 *total
= ix86_cost
->fchs
;
21204 else if (FLOAT_MODE_P (mode
))
21206 /* ??? SSE vector cost should be used here. */
21207 *total
= ix86_cost
->fchs
;
21213 if (!TARGET_64BIT
&& mode
== DImode
)
21214 *total
= ix86_cost
->add
* 2;
21216 *total
= ix86_cost
->add
;
21220 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
21221 && XEXP (XEXP (x
, 0), 1) == const1_rtx
21222 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
21223 && XEXP (x
, 1) == const0_rtx
)
21225 /* This kind of construct is implemented using test[bwl].
21226 Treat it as if we had an AND. */
21227 *total
= (ix86_cost
->add
21228 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
21229 + rtx_cost (const1_rtx
, outer_code
));
21235 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
21240 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21241 /* ??? SSE cost should be used here. */
21242 *total
= ix86_cost
->fabs
;
21243 else if (X87_FLOAT_MODE_P (mode
))
21244 *total
= ix86_cost
->fabs
;
21245 else if (FLOAT_MODE_P (mode
))
21246 /* ??? SSE vector cost should be used here. */
21247 *total
= ix86_cost
->fabs
;
21251 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21252 /* ??? SSE cost should be used here. */
21253 *total
= ix86_cost
->fsqrt
;
21254 else if (X87_FLOAT_MODE_P (mode
))
21255 *total
= ix86_cost
->fsqrt
;
21256 else if (FLOAT_MODE_P (mode
))
21257 /* ??? SSE vector cost should be used here. */
21258 *total
= ix86_cost
->fsqrt
;
21262 if (XINT (x
, 1) == UNSPEC_TP
)
21273 static int current_machopic_label_num
;
21275 /* Given a symbol name and its associated stub, write out the
21276 definition of the stub. */
21279 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21281 unsigned int length
;
21282 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
21283 int label
= ++current_machopic_label_num
;
21285 /* For 64-bit we shouldn't get here. */
21286 gcc_assert (!TARGET_64BIT
);
21288 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21289 symb
= (*targetm
.strip_name_encoding
) (symb
);
21291 length
= strlen (stub
);
21292 binder_name
= alloca (length
+ 32);
21293 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
21295 length
= strlen (symb
);
21296 symbol_name
= alloca (length
+ 32);
21297 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21299 sprintf (lazy_ptr_name
, "L%d$lz", label
);
21302 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
21304 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
21306 fprintf (file
, "%s:\n", stub
);
21307 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21311 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
21312 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
21313 fprintf (file
, "\tjmp\t*%%edx\n");
21316 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
21318 fprintf (file
, "%s:\n", binder_name
);
21322 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
21323 fprintf (file
, "\tpushl\t%%eax\n");
21326 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
21328 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
21330 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
21331 fprintf (file
, "%s:\n", lazy_ptr_name
);
21332 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21333 fprintf (file
, "\t.long %s\n", binder_name
);
21337 darwin_x86_file_end (void)
21339 darwin_file_end ();
21342 #endif /* TARGET_MACHO */
21344 /* Order the registers for register allocator. */
21347 x86_order_regs_for_local_alloc (void)
21352 /* First allocate the local general purpose registers. */
21353 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21354 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
21355 reg_alloc_order
[pos
++] = i
;
21357 /* Global general purpose registers. */
21358 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21359 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
21360 reg_alloc_order
[pos
++] = i
;
21362 /* x87 registers come first in case we are doing FP math
21364 if (!TARGET_SSE_MATH
)
21365 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21366 reg_alloc_order
[pos
++] = i
;
21368 /* SSE registers. */
21369 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
21370 reg_alloc_order
[pos
++] = i
;
21371 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
21372 reg_alloc_order
[pos
++] = i
;
21374 /* x87 registers. */
21375 if (TARGET_SSE_MATH
)
21376 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21377 reg_alloc_order
[pos
++] = i
;
21379 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
21380 reg_alloc_order
[pos
++] = i
;
21382 /* Initialize the rest of array as we do not allocate some registers
21384 while (pos
< FIRST_PSEUDO_REGISTER
)
21385 reg_alloc_order
[pos
++] = 0;
21388 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21389 struct attribute_spec.handler. */
21391 ix86_handle_struct_attribute (tree
*node
, tree name
,
21392 tree args ATTRIBUTE_UNUSED
,
21393 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
21396 if (DECL_P (*node
))
21398 if (TREE_CODE (*node
) == TYPE_DECL
)
21399 type
= &TREE_TYPE (*node
);
21404 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
21405 || TREE_CODE (*type
) == UNION_TYPE
)))
21407 warning (OPT_Wattributes
, "%qs attribute ignored",
21408 IDENTIFIER_POINTER (name
));
21409 *no_add_attrs
= true;
21412 else if ((is_attribute_p ("ms_struct", name
)
21413 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
21414 || ((is_attribute_p ("gcc_struct", name
)
21415 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
21417 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
21418 IDENTIFIER_POINTER (name
));
21419 *no_add_attrs
= true;
21426 ix86_ms_bitfield_layout_p (const_tree record_type
)
21428 return (TARGET_MS_BITFIELD_LAYOUT
&&
21429 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
21430 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
21433 /* Returns an expression indicating where the this parameter is
21434 located on entry to the FUNCTION. */
21437 x86_this_parameter (tree function
)
21439 tree type
= TREE_TYPE (function
);
21440 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
21444 const int *parm_regs
;
21446 if (TARGET_64BIT_MS_ABI
)
21447 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
21449 parm_regs
= x86_64_int_parameter_registers
;
21450 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
21453 if (ix86_function_regparm (type
, function
) > 0
21454 && !type_has_variadic_args_p (type
))
21457 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
21459 return gen_rtx_REG (SImode
, regno
);
21462 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
21465 /* Determine whether x86_output_mi_thunk can succeed. */
21468 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
21469 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
21470 HOST_WIDE_INT vcall_offset
, const_tree function
)
21472 /* 64-bit can handle anything. */
21476 /* For 32-bit, everything's fine if we have one free register. */
21477 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
21480 /* Need a free register for vcall_offset. */
21484 /* Need a free register for GOT references. */
21485 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
21488 /* Otherwise ok. */
21492 /* Output the assembler code for a thunk function. THUNK_DECL is the
21493 declaration for the thunk function itself, FUNCTION is the decl for
21494 the target function. DELTA is an immediate constant offset to be
21495 added to THIS. If VCALL_OFFSET is nonzero, the word at
21496 *(*this + vcall_offset) should be added to THIS. */
21499 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
21500 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
21501 HOST_WIDE_INT vcall_offset
, tree function
)
21504 rtx this_param
= x86_this_parameter (function
);
21507 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21508 pull it in now and let DELTA benefit. */
21509 if (REG_P (this_param
))
21510 this_reg
= this_param
;
21511 else if (vcall_offset
)
21513 /* Put the this parameter into %eax. */
21514 xops
[0] = this_param
;
21515 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
21516 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21519 this_reg
= NULL_RTX
;
21521 /* Adjust the this parameter by a fixed constant. */
21524 xops
[0] = GEN_INT (delta
);
21525 xops
[1] = this_reg
? this_reg
: this_param
;
21528 if (!x86_64_general_operand (xops
[0], DImode
))
21530 tmp
= gen_rtx_REG (DImode
, R10_REG
);
21532 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
21534 xops
[1] = this_param
;
21536 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
21539 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
21542 /* Adjust the this parameter by a value stored in the vtable. */
21546 tmp
= gen_rtx_REG (DImode
, R10_REG
);
21549 int tmp_regno
= 2 /* ECX */;
21550 if (lookup_attribute ("fastcall",
21551 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
21552 tmp_regno
= 0 /* EAX */;
21553 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
21556 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
21559 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
21561 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21563 /* Adjust the this parameter. */
21564 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
21565 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
21567 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
21568 xops
[0] = GEN_INT (vcall_offset
);
21570 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
21571 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
21573 xops
[1] = this_reg
;
21575 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
21577 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
21580 /* If necessary, drop THIS back to its stack slot. */
21581 if (this_reg
&& this_reg
!= this_param
)
21583 xops
[0] = this_reg
;
21584 xops
[1] = this_param
;
21585 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21588 xops
[0] = XEXP (DECL_RTL (function
), 0);
21591 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
21592 output_asm_insn ("jmp\t%P0", xops
);
21593 /* All thunks should be in the same object as their target,
21594 and thus binds_local_p should be true. */
21595 else if (TARGET_64BIT_MS_ABI
)
21596 gcc_unreachable ();
21599 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
21600 tmp
= gen_rtx_CONST (Pmode
, tmp
);
21601 tmp
= gen_rtx_MEM (QImode
, tmp
);
21603 output_asm_insn ("jmp\t%A0", xops
);
21608 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
21609 output_asm_insn ("jmp\t%P0", xops
);
21614 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
21615 tmp
= (gen_rtx_SYMBOL_REF
21617 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
21618 tmp
= gen_rtx_MEM (QImode
, tmp
);
21620 output_asm_insn ("jmp\t%0", xops
);
21623 #endif /* TARGET_MACHO */
21625 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
21626 output_set_got (tmp
, NULL_RTX
);
21629 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
21630 output_asm_insn ("jmp\t{*}%1", xops
);
21636 x86_file_start (void)
21638 default_file_start ();
21640 darwin_file_start ();
21642 if (X86_FILE_START_VERSION_DIRECTIVE
)
21643 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
21644 if (X86_FILE_START_FLTUSED
)
21645 fputs ("\t.global\t__fltused\n", asm_out_file
);
21646 if (ix86_asm_dialect
== ASM_INTEL
)
21647 fputs ("\t.intel_syntax\n", asm_out_file
);
21651 x86_field_alignment (tree field
, int computed
)
21653 enum machine_mode mode
;
21654 tree type
= TREE_TYPE (field
);
21656 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
21658 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
21659 ? get_inner_array_type (type
) : type
);
21660 if (mode
== DFmode
|| mode
== DCmode
21661 || GET_MODE_CLASS (mode
) == MODE_INT
21662 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
21663 return MIN (32, computed
);
21667 /* Output assembler code to FILE to increment profiler label # LABELNO
21668 for profiling a function entry. */
21670 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
21674 #ifndef NO_PROFILE_COUNTERS
21675 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
21678 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
21679 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
21681 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
21685 #ifndef NO_PROFILE_COUNTERS
21686 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21687 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
21689 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
21693 #ifndef NO_PROFILE_COUNTERS
21694 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
21695 PROFILE_COUNT_REGISTER
);
21697 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
21701 /* We don't have exact information about the insn sizes, but we may assume
21702 quite safely that we are informed about all 1 byte insns and memory
21703 address sizes. This is enough to eliminate unnecessary padding in
21707 min_insn_size (rtx insn
)
21711 if (!INSN_P (insn
) || !active_insn_p (insn
))
21714 /* Discard alignments we've emit and jump instructions. */
21715 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
21716 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
21719 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
21720 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
21723 /* Important case - calls are always 5 bytes.
21724 It is common to have many calls in the row. */
21726 && symbolic_reference_mentioned_p (PATTERN (insn
))
21727 && !SIBLING_CALL_P (insn
))
21729 if (get_attr_length (insn
) <= 1)
21732 /* For normal instructions we may rely on the sizes of addresses
21733 and the presence of symbol to require 4 bytes of encoding.
21734 This is not the case for jumps where references are PC relative. */
21735 if (!JUMP_P (insn
))
21737 l
= get_attr_length_address (insn
);
21738 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
21747 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21751 ix86_avoid_jump_misspredicts (void)
21753 rtx insn
, start
= get_insns ();
21754 int nbytes
= 0, njumps
= 0;
21757 /* Look for all minimal intervals of instructions containing 4 jumps.
21758 The intervals are bounded by START and INSN. NBYTES is the total
21759 size of instructions in the interval including INSN and not including
21760 START. When the NBYTES is smaller than 16 bytes, it is possible
21761 that the end of START and INSN ends up in the same 16byte page.
21763 The smallest offset in the page INSN can start is the case where START
21764 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21765 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21767 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
21770 nbytes
+= min_insn_size (insn
);
21772 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
21773 INSN_UID (insn
), min_insn_size (insn
));
21775 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
21776 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
21784 start
= NEXT_INSN (start
);
21785 if ((JUMP_P (start
)
21786 && GET_CODE (PATTERN (start
)) != ADDR_VEC
21787 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
21789 njumps
--, isjump
= 1;
21792 nbytes
-= min_insn_size (start
);
21794 gcc_assert (njumps
>= 0);
21796 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
21797 INSN_UID (start
), INSN_UID (insn
), nbytes
);
21799 if (njumps
== 3 && isjump
&& nbytes
< 16)
21801 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
21804 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
21805 INSN_UID (insn
), padsize
);
21806 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
21811 /* AMD Athlon works faster
21812 when RET is not destination of conditional jump or directly preceded
21813 by other jump instruction. We avoid the penalty by inserting NOP just
21814 before the RET instructions in such cases. */
21816 ix86_pad_returns (void)
21821 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
21823 basic_block bb
= e
->src
;
21824 rtx ret
= BB_END (bb
);
21826 bool replace
= false;
21828 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
21829 || !maybe_hot_bb_p (bb
))
21831 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
21832 if (active_insn_p (prev
) || LABEL_P (prev
))
21834 if (prev
&& LABEL_P (prev
))
21839 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21840 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
21841 && !(e
->flags
& EDGE_FALLTHRU
))
21846 prev
= prev_active_insn (ret
);
21848 && ((JUMP_P (prev
) && any_condjump_p (prev
))
21851 /* Empty functions get branch mispredict even when the jump destination
21852 is not visible to us. */
21853 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
21858 emit_insn_before (gen_return_internal_long (), ret
);
21864 /* Implement machine specific optimizations. We implement padding of returns
21865 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21869 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
21870 ix86_pad_returns ();
21871 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
21872 ix86_avoid_jump_misspredicts ();
21875 /* Return nonzero when QImode register that must be represented via REX prefix
21878 x86_extended_QIreg_mentioned_p (rtx insn
)
21881 extract_insn_cached (insn
);
21882 for (i
= 0; i
< recog_data
.n_operands
; i
++)
21883 if (REG_P (recog_data
.operand
[i
])
21884 && REGNO (recog_data
.operand
[i
]) >= 4)
21889 /* Return nonzero when P points to register encoded via REX prefix.
21890 Called via for_each_rtx. */
21892 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
21894 unsigned int regno
;
21897 regno
= REGNO (*p
);
21898 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
21901 /* Return true when INSN mentions register that must be encoded using REX
21904 x86_extended_reg_mentioned_p (rtx insn
)
21906 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
21909 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21910 optabs would emit if we didn't have TFmode patterns. */
21913 x86_emit_floatuns (rtx operands
[2])
21915 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
21916 enum machine_mode mode
, inmode
;
21918 inmode
= GET_MODE (operands
[1]);
21919 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
21922 in
= force_reg (inmode
, operands
[1]);
21923 mode
= GET_MODE (out
);
21924 neglab
= gen_label_rtx ();
21925 donelab
= gen_label_rtx ();
21926 f0
= gen_reg_rtx (mode
);
21928 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
21930 expand_float (out
, in
, 0);
21932 emit_jump_insn (gen_jump (donelab
));
21935 emit_label (neglab
);
21937 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
21939 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
21941 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
21943 expand_float (f0
, i0
, 0);
21945 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
21947 emit_label (donelab
);
21950 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21951 with all elements equal to VAR. Return true if successful. */
21954 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
21955 rtx target
, rtx val
)
21957 enum machine_mode smode
, wsmode
, wvmode
;
21972 val
= force_reg (GET_MODE_INNER (mode
), val
);
21973 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
21974 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21980 if (TARGET_SSE
|| TARGET_3DNOW_A
)
21982 val
= gen_lowpart (SImode
, val
);
21983 x
= gen_rtx_TRUNCATE (HImode
, val
);
21984 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
21985 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
22007 /* Extend HImode to SImode using a paradoxical SUBREG. */
22008 tmp1
= gen_reg_rtx (SImode
);
22009 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
22010 /* Insert the SImode value as low element of V4SImode vector. */
22011 tmp2
= gen_reg_rtx (V4SImode
);
22012 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
22013 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
22014 CONST0_RTX (V4SImode
),
22016 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
22017 /* Cast the V4SImode vector back to a V8HImode vector. */
22018 tmp1
= gen_reg_rtx (V8HImode
);
22019 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
22020 /* Duplicate the low short through the whole low SImode word. */
22021 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
22022 /* Cast the V8HImode vector back to a V4SImode vector. */
22023 tmp2
= gen_reg_rtx (V4SImode
);
22024 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
22025 /* Replicate the low element of the V4SImode vector. */
22026 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
22027 /* Cast the V2SImode back to V8HImode, and store in target. */
22028 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
22039 /* Extend QImode to SImode using a paradoxical SUBREG. */
22040 tmp1
= gen_reg_rtx (SImode
);
22041 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
22042 /* Insert the SImode value as low element of V4SImode vector. */
22043 tmp2
= gen_reg_rtx (V4SImode
);
22044 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
22045 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
22046 CONST0_RTX (V4SImode
),
22048 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
22049 /* Cast the V4SImode vector back to a V16QImode vector. */
22050 tmp1
= gen_reg_rtx (V16QImode
);
22051 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
22052 /* Duplicate the low byte through the whole low SImode word. */
22053 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
22054 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
22055 /* Cast the V16QImode vector back to a V4SImode vector. */
22056 tmp2
= gen_reg_rtx (V4SImode
);
22057 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
22058 /* Replicate the low element of the V4SImode vector. */
22059 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
22060 /* Cast the V2SImode back to V16QImode, and store in target. */
22061 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
22069 /* Replicate the value once into the next wider mode and recurse. */
22070 val
= convert_modes (wsmode
, smode
, val
, true);
22071 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
22072 GEN_INT (GET_MODE_BITSIZE (smode
)),
22073 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
22074 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
22076 x
= gen_reg_rtx (wvmode
);
22077 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
22078 gcc_unreachable ();
22079 emit_move_insn (target
, gen_lowpart (mode
, x
));
22087 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22088 whose ONE_VAR element is VAR, and other elements are zero. Return true
22092 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
22093 rtx target
, rtx var
, int one_var
)
22095 enum machine_mode vsimode
;
22111 var
= force_reg (GET_MODE_INNER (mode
), var
);
22112 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
22113 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
22118 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
22119 new_target
= gen_reg_rtx (mode
);
22121 new_target
= target
;
22122 var
= force_reg (GET_MODE_INNER (mode
), var
);
22123 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
22124 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
22125 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
22128 /* We need to shuffle the value to the correct position, so
22129 create a new pseudo to store the intermediate result. */
22131 /* With SSE2, we can use the integer shuffle insns. */
22132 if (mode
!= V4SFmode
&& TARGET_SSE2
)
22134 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
22136 GEN_INT (one_var
== 1 ? 0 : 1),
22137 GEN_INT (one_var
== 2 ? 0 : 1),
22138 GEN_INT (one_var
== 3 ? 0 : 1)));
22139 if (target
!= new_target
)
22140 emit_move_insn (target
, new_target
);
22144 /* Otherwise convert the intermediate result to V4SFmode and
22145 use the SSE1 shuffle instructions. */
22146 if (mode
!= V4SFmode
)
22148 tmp
= gen_reg_rtx (V4SFmode
);
22149 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
22154 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
22156 GEN_INT (one_var
== 1 ? 0 : 1),
22157 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
22158 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
22160 if (mode
!= V4SFmode
)
22161 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
22162 else if (tmp
!= target
)
22163 emit_move_insn (target
, tmp
);
22165 else if (target
!= new_target
)
22166 emit_move_insn (target
, new_target
);
22171 vsimode
= V4SImode
;
22177 vsimode
= V2SImode
;
22183 /* Zero extend the variable element to SImode and recurse. */
22184 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
22186 x
= gen_reg_rtx (vsimode
);
22187 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
22189 gcc_unreachable ();
22191 emit_move_insn (target
, gen_lowpart (mode
, x
));
22199 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22200 consisting of the values in VALS. It is known that all elements
22201 except ONE_VAR are constants. Return true if successful. */
22204 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
22205 rtx target
, rtx vals
, int one_var
)
22207 rtx var
= XVECEXP (vals
, 0, one_var
);
22208 enum machine_mode wmode
;
22211 const_vec
= copy_rtx (vals
);
22212 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
22213 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
22221 /* For the two element vectors, it's just as easy to use
22222 the general case. */
22238 /* There's no way to set one QImode entry easily. Combine
22239 the variable value with its adjacent constant value, and
22240 promote to an HImode set. */
22241 x
= XVECEXP (vals
, 0, one_var
^ 1);
22244 var
= convert_modes (HImode
, QImode
, var
, true);
22245 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
22246 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
22247 x
= GEN_INT (INTVAL (x
) & 0xff);
22251 var
= convert_modes (HImode
, QImode
, var
, true);
22252 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
22254 if (x
!= const0_rtx
)
22255 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
22256 1, OPTAB_LIB_WIDEN
);
22258 x
= gen_reg_rtx (wmode
);
22259 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
22260 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
22262 emit_move_insn (target
, gen_lowpart (mode
, x
));
22269 emit_move_insn (target
, const_vec
);
22270 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
22274 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
22275 all values variable, and none identical. */
22278 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
22279 rtx target
, rtx vals
)
22281 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
22282 rtx op0
= NULL
, op1
= NULL
;
22283 bool use_vec_concat
= false;
22289 if (!mmx_ok
&& !TARGET_SSE
)
22295 /* For the two element vectors, we always implement VEC_CONCAT. */
22296 op0
= XVECEXP (vals
, 0, 0);
22297 op1
= XVECEXP (vals
, 0, 1);
22298 use_vec_concat
= true;
22302 half_mode
= V2SFmode
;
22305 half_mode
= V2SImode
;
22311 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22312 Recurse to load the two halves. */
22314 op0
= gen_reg_rtx (half_mode
);
22315 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
22316 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
22318 op1
= gen_reg_rtx (half_mode
);
22319 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
22320 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
22322 use_vec_concat
= true;
22333 gcc_unreachable ();
22336 if (use_vec_concat
)
22338 if (!register_operand (op0
, half_mode
))
22339 op0
= force_reg (half_mode
, op0
);
22340 if (!register_operand (op1
, half_mode
))
22341 op1
= force_reg (half_mode
, op1
);
22343 emit_insn (gen_rtx_SET (VOIDmode
, target
,
22344 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
22348 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
22349 enum machine_mode inner_mode
;
22350 rtx words
[4], shift
;
22352 inner_mode
= GET_MODE_INNER (mode
);
22353 n_elts
= GET_MODE_NUNITS (mode
);
22354 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
22355 n_elt_per_word
= n_elts
/ n_words
;
22356 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
22358 for (i
= 0; i
< n_words
; ++i
)
22360 rtx word
= NULL_RTX
;
22362 for (j
= 0; j
< n_elt_per_word
; ++j
)
22364 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
22365 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
22371 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
22372 word
, 1, OPTAB_LIB_WIDEN
);
22373 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
22374 word
, 1, OPTAB_LIB_WIDEN
);
22382 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
22383 else if (n_words
== 2)
22385 rtx tmp
= gen_reg_rtx (mode
);
22386 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
22387 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
22388 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
22389 emit_move_insn (target
, tmp
);
22391 else if (n_words
== 4)
22393 rtx tmp
= gen_reg_rtx (V4SImode
);
22394 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
22395 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
22396 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
22399 gcc_unreachable ();
22403 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22404 instructions unless MMX_OK is true. */
22407 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
22409 enum machine_mode mode
= GET_MODE (target
);
22410 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22411 int n_elts
= GET_MODE_NUNITS (mode
);
22412 int n_var
= 0, one_var
= -1;
22413 bool all_same
= true, all_const_zero
= true;
22417 for (i
= 0; i
< n_elts
; ++i
)
22419 x
= XVECEXP (vals
, 0, i
);
22420 if (!CONSTANT_P (x
))
22421 n_var
++, one_var
= i
;
22422 else if (x
!= CONST0_RTX (inner_mode
))
22423 all_const_zero
= false;
22424 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
22428 /* Constants are best loaded from the constant pool. */
22431 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
22435 /* If all values are identical, broadcast the value. */
22437 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
22438 XVECEXP (vals
, 0, 0)))
22441 /* Values where only one field is non-constant are best loaded from
22442 the pool and overwritten via move later. */
22446 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
22447 XVECEXP (vals
, 0, one_var
),
22451 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
22455 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
22459 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
22461 enum machine_mode mode
= GET_MODE (target
);
22462 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22463 bool use_vec_merge
= false;
22472 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
22473 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
22475 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
22477 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
22478 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22484 use_vec_merge
= TARGET_SSE4_1
;
22492 /* For the two element vectors, we implement a VEC_CONCAT with
22493 the extraction of the other element. */
22495 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
22496 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
22499 op0
= val
, op1
= tmp
;
22501 op0
= tmp
, op1
= val
;
22503 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
22504 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22509 use_vec_merge
= TARGET_SSE4_1
;
22516 use_vec_merge
= true;
22520 /* tmp = target = A B C D */
22521 tmp
= copy_to_reg (target
);
22522 /* target = A A B B */
22523 emit_insn (gen_sse_unpcklps (target
, target
, target
));
22524 /* target = X A B B */
22525 ix86_expand_vector_set (false, target
, val
, 0);
22526 /* target = A X C D */
22527 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22528 GEN_INT (1), GEN_INT (0),
22529 GEN_INT (2+4), GEN_INT (3+4)));
22533 /* tmp = target = A B C D */
22534 tmp
= copy_to_reg (target
);
22535 /* tmp = X B C D */
22536 ix86_expand_vector_set (false, tmp
, val
, 0);
22537 /* target = A B X D */
22538 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22539 GEN_INT (0), GEN_INT (1),
22540 GEN_INT (0+4), GEN_INT (3+4)));
22544 /* tmp = target = A B C D */
22545 tmp
= copy_to_reg (target
);
22546 /* tmp = X B C D */
22547 ix86_expand_vector_set (false, tmp
, val
, 0);
22548 /* target = A B X D */
22549 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22550 GEN_INT (0), GEN_INT (1),
22551 GEN_INT (2+4), GEN_INT (0+4)));
22555 gcc_unreachable ();
22560 use_vec_merge
= TARGET_SSE4_1
;
22564 /* Element 0 handled by vec_merge below. */
22567 use_vec_merge
= true;
22573 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22574 store into element 0, then shuffle them back. */
22578 order
[0] = GEN_INT (elt
);
22579 order
[1] = const1_rtx
;
22580 order
[2] = const2_rtx
;
22581 order
[3] = GEN_INT (3);
22582 order
[elt
] = const0_rtx
;
22584 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
22585 order
[1], order
[2], order
[3]));
22587 ix86_expand_vector_set (false, target
, val
, 0);
22589 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
22590 order
[1], order
[2], order
[3]));
22594 /* For SSE1, we have to reuse the V4SF code. */
22595 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
22596 gen_lowpart (SFmode
, val
), elt
);
22601 use_vec_merge
= TARGET_SSE2
;
22604 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
22608 use_vec_merge
= TARGET_SSE4_1
;
22618 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
22619 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
22620 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22624 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
22626 emit_move_insn (mem
, target
);
22628 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
22629 emit_move_insn (tmp
, val
);
22631 emit_move_insn (target
, mem
);
22636 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
22638 enum machine_mode mode
= GET_MODE (vec
);
22639 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22640 bool use_vec_extr
= false;
22653 use_vec_extr
= true;
22657 use_vec_extr
= TARGET_SSE4_1
;
22669 tmp
= gen_reg_rtx (mode
);
22670 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
22671 GEN_INT (elt
), GEN_INT (elt
),
22672 GEN_INT (elt
+4), GEN_INT (elt
+4)));
22676 tmp
= gen_reg_rtx (mode
);
22677 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
22681 gcc_unreachable ();
22684 use_vec_extr
= true;
22689 use_vec_extr
= TARGET_SSE4_1
;
22703 tmp
= gen_reg_rtx (mode
);
22704 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
22705 GEN_INT (elt
), GEN_INT (elt
),
22706 GEN_INT (elt
), GEN_INT (elt
)));
22710 tmp
= gen_reg_rtx (mode
);
22711 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
22715 gcc_unreachable ();
22718 use_vec_extr
= true;
22723 /* For SSE1, we have to reuse the V4SF code. */
22724 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
22725 gen_lowpart (V4SFmode
, vec
), elt
);
22731 use_vec_extr
= TARGET_SSE2
;
22734 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
22738 use_vec_extr
= TARGET_SSE4_1
;
22742 /* ??? Could extract the appropriate HImode element and shift. */
22749 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
22750 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
22752 /* Let the rtl optimizers know about the zero extension performed. */
22753 if (inner_mode
== QImode
|| inner_mode
== HImode
)
22755 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
22756 target
= gen_lowpart (SImode
, target
);
22759 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22763 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
22765 emit_move_insn (mem
, vec
);
22767 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
22768 emit_move_insn (target
, tmp
);
22772 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22773 pattern to reduce; DEST is the destination; IN is the input vector. */
22776 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
22778 rtx tmp1
, tmp2
, tmp3
;
22780 tmp1
= gen_reg_rtx (V4SFmode
);
22781 tmp2
= gen_reg_rtx (V4SFmode
);
22782 tmp3
= gen_reg_rtx (V4SFmode
);
22784 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
22785 emit_insn (fn (tmp2
, tmp1
, in
));
22787 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
22788 GEN_INT (1), GEN_INT (1),
22789 GEN_INT (1+4), GEN_INT (1+4)));
22790 emit_insn (fn (dest
, tmp2
, tmp3
));
22793 /* Target hook for scalar_mode_supported_p. */
22795 ix86_scalar_mode_supported_p (enum machine_mode mode
)
22797 if (DECIMAL_FLOAT_MODE_P (mode
))
22799 else if (mode
== TFmode
)
22800 return TARGET_64BIT
;
22802 return default_scalar_mode_supported_p (mode
);
22805 /* Implements target hook vector_mode_supported_p. */
22807 ix86_vector_mode_supported_p (enum machine_mode mode
)
22809 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
22811 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
22813 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
22815 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
22820 /* Target hook for c_mode_for_suffix. */
22821 static enum machine_mode
22822 ix86_c_mode_for_suffix (char suffix
)
22824 if (TARGET_64BIT
&& suffix
== 'q')
22826 if (TARGET_MMX
&& suffix
== 'w')
22832 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22834 We do this in the new i386 backend to maintain source compatibility
22835 with the old cc0-based compiler. */
22838 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
22839 tree inputs ATTRIBUTE_UNUSED
,
22842 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
22844 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
22849 /* Implements target vector targetm.asm.encode_section_info. This
22850 is not used by netware. */
22852 static void ATTRIBUTE_UNUSED
22853 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
22855 default_encode_section_info (decl
, rtl
, first
);
22857 if (TREE_CODE (decl
) == VAR_DECL
22858 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
22859 && ix86_in_large_data_p (decl
))
22860 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
22863 /* Worker function for REVERSE_CONDITION. */
22866 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
22868 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
22869 ? reverse_condition (code
)
22870 : reverse_condition_maybe_unordered (code
));
22873 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22877 output_387_reg_move (rtx insn
, rtx
*operands
)
22879 if (REG_P (operands
[0]))
22881 if (REG_P (operands
[1])
22882 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
22884 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
22885 return output_387_ffreep (operands
, 0);
22886 return "fstp\t%y0";
22888 if (STACK_TOP_P (operands
[0]))
22889 return "fld%z1\t%y1";
22892 else if (MEM_P (operands
[0]))
22894 gcc_assert (REG_P (operands
[1]));
22895 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
22896 return "fstp%z0\t%y0";
22899 /* There is no non-popping store to memory for XFmode.
22900 So if we need one, follow the store with a load. */
22901 if (GET_MODE (operands
[0]) == XFmode
)
22902 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22904 return "fst%z0\t%y0";
22911 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22912 FP status register is set. */
22915 ix86_emit_fp_unordered_jump (rtx label
)
22917 rtx reg
= gen_reg_rtx (HImode
);
22920 emit_insn (gen_x86_fnstsw_1 (reg
));
22922 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
22924 emit_insn (gen_x86_sahf_1 (reg
));
22926 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22927 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
22931 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
22933 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22934 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
22937 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
22938 gen_rtx_LABEL_REF (VOIDmode
, label
),
22940 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
22942 emit_jump_insn (temp
);
22943 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22946 /* Output code to perform a log1p XFmode calculation. */
22948 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
22950 rtx label1
= gen_label_rtx ();
22951 rtx label2
= gen_label_rtx ();
22953 rtx tmp
= gen_reg_rtx (XFmode
);
22954 rtx tmp2
= gen_reg_rtx (XFmode
);
22956 emit_insn (gen_absxf2 (tmp
, op1
));
22957 emit_insn (gen_cmpxf (tmp
,
22958 CONST_DOUBLE_FROM_REAL_VALUE (
22959 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
22961 emit_jump_insn (gen_bge (label1
));
22963 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
22964 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
22965 emit_jump (label2
);
22967 emit_label (label1
);
22968 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
22969 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
22970 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
22971 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
22973 emit_label (label2
);
22976 /* Output code to perform a Newton-Rhapson approximation of a single precision
22977 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22979 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
22981 rtx x0
, x1
, e0
, e1
, two
;
22983 x0
= gen_reg_rtx (mode
);
22984 e0
= gen_reg_rtx (mode
);
22985 e1
= gen_reg_rtx (mode
);
22986 x1
= gen_reg_rtx (mode
);
22988 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
22990 if (VECTOR_MODE_P (mode
))
22991 two
= ix86_build_const_vector (SFmode
, true, two
);
22993 two
= force_reg (mode
, two
);
22995 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22997 /* x0 = 1./b estimate */
22998 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22999 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
23002 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
23003 gen_rtx_MULT (mode
, x0
, b
)));
23005 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
23006 gen_rtx_MINUS (mode
, two
, e0
)));
23008 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
23009 gen_rtx_MULT (mode
, x0
, e1
)));
23011 emit_insn (gen_rtx_SET (VOIDmode
, res
,
23012 gen_rtx_MULT (mode
, a
, x1
)));
23015 /* Output code to perform a Newton-Rhapson approximation of a
23016 single precision floating point [reciprocal] square root. */
23018 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
23021 rtx x0
, e0
, e1
, e2
, e3
, three
, half
, zero
, mask
;
23023 x0
= gen_reg_rtx (mode
);
23024 e0
= gen_reg_rtx (mode
);
23025 e1
= gen_reg_rtx (mode
);
23026 e2
= gen_reg_rtx (mode
);
23027 e3
= gen_reg_rtx (mode
);
23029 three
= CONST_DOUBLE_FROM_REAL_VALUE (dconst3
, SFmode
);
23030 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, SFmode
);
23032 mask
= gen_reg_rtx (mode
);
23034 if (VECTOR_MODE_P (mode
))
23036 three
= ix86_build_const_vector (SFmode
, true, three
);
23037 half
= ix86_build_const_vector (SFmode
, true, half
);
23040 three
= force_reg (mode
, three
);
23041 half
= force_reg (mode
, half
);
23043 zero
= force_reg (mode
, CONST0_RTX(mode
));
23045 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
23046 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
23048 /* Compare a to zero. */
23049 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
23050 gen_rtx_NE (mode
, a
, zero
)));
23052 /* x0 = 1./sqrt(a) estimate */
23053 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
23054 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
23056 /* Filter out infinity. */
23057 if (VECTOR_MODE_P (mode
))
23058 emit_insn (gen_rtx_SET (VOIDmode
, gen_lowpart (V4SFmode
, x0
),
23060 gen_lowpart (V4SFmode
, x0
),
23061 gen_lowpart (V4SFmode
, mask
))));
23063 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
23064 gen_rtx_AND (mode
, x0
, mask
)));
23067 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
23068 gen_rtx_MULT (mode
, x0
, a
)));
23070 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
23071 gen_rtx_MULT (mode
, e0
, x0
)));
23073 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
23074 gen_rtx_MINUS (mode
, three
, e1
)));
23077 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
23078 gen_rtx_MULT (mode
, half
, x0
)));
23081 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
23082 gen_rtx_MULT (mode
, half
, e0
)));
23083 /* ret = e2 * e3 */
23084 emit_insn (gen_rtx_SET (VOIDmode
, res
,
23085 gen_rtx_MULT (mode
, e2
, e3
)));
23088 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
23090 static void ATTRIBUTE_UNUSED
23091 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
23094 /* With Binutils 2.15, the "@unwind" marker must be specified on
23095 every occurrence of the ".eh_frame" section, not just the first
23098 && strcmp (name
, ".eh_frame") == 0)
23100 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
23101 flags
& SECTION_WRITE
? "aw" : "a");
23104 default_elf_asm_named_section (name
, flags
, decl
);
23107 /* Return the mangling of TYPE if it is an extended fundamental type. */
23109 static const char *
23110 ix86_mangle_type (const_tree type
)
23112 type
= TYPE_MAIN_VARIANT (type
);
23114 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
23115 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
23118 switch (TYPE_MODE (type
))
23121 /* __float128 is "g". */
23124 /* "long double" or __float80 is "e". */
23131 /* For 32-bit code we can save PIC register setup by using
23132 __stack_chk_fail_local hidden function instead of calling
23133 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
23134 register, so it is better to call __stack_chk_fail directly. */
23137 ix86_stack_protect_fail (void)
23139 return TARGET_64BIT
23140 ? default_external_stack_protect_fail ()
23141 : default_hidden_stack_protect_fail ();
23144 /* Select a format to encode pointers in exception handling data. CODE
23145 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
23146 true if the symbol may be affected by dynamic relocations.
23148 ??? All x86 object file formats are capable of representing this.
23149 After all, the relocation needed is the same as for the call insn.
23150 Whether or not a particular assembler allows us to enter such, I
23151 guess we'll have to see. */
23153 asm_preferred_eh_data_format (int code
, int global
)
23157 int type
= DW_EH_PE_sdata8
;
23159 || ix86_cmodel
== CM_SMALL_PIC
23160 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
23161 type
= DW_EH_PE_sdata4
;
23162 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
23164 if (ix86_cmodel
== CM_SMALL
23165 || (ix86_cmodel
== CM_MEDIUM
&& code
))
23166 return DW_EH_PE_udata4
;
23167 return DW_EH_PE_absptr
;
23170 /* Expand copysign from SIGN to the positive value ABS_VALUE
23171 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
23174 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
23176 enum machine_mode mode
= GET_MODE (sign
);
23177 rtx sgn
= gen_reg_rtx (mode
);
23178 if (mask
== NULL_RTX
)
23180 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
23181 if (!VECTOR_MODE_P (mode
))
23183 /* We need to generate a scalar mode mask in this case. */
23184 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
23185 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
23186 mask
= gen_reg_rtx (mode
);
23187 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
23191 mask
= gen_rtx_NOT (mode
, mask
);
23192 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
23193 gen_rtx_AND (mode
, mask
, sign
)));
23194 emit_insn (gen_rtx_SET (VOIDmode
, result
,
23195 gen_rtx_IOR (mode
, abs_value
, sgn
)));
23198 /* Expand fabs (OP0) and return a new rtx that holds the result. The
23199 mask for masking out the sign-bit is stored in *SMASK, if that is
23202 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
23204 enum machine_mode mode
= GET_MODE (op0
);
23207 xa
= gen_reg_rtx (mode
);
23208 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
23209 if (!VECTOR_MODE_P (mode
))
23211 /* We need to generate a scalar mode mask in this case. */
23212 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
23213 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
23214 mask
= gen_reg_rtx (mode
);
23215 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
23217 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
23218 gen_rtx_AND (mode
, op0
, mask
)));
23226 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
23227 swapping the operands if SWAP_OPERANDS is true. The expanded
23228 code is a forward jump to a newly created label in case the
23229 comparison is true. The generated label rtx is returned. */
23231 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
23232 bool swap_operands
)
23243 label
= gen_label_rtx ();
23244 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
23245 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23246 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
23247 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
23248 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23249 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
23250 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23251 JUMP_LABEL (tmp
) = label
;
23256 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
23257 using comparison code CODE. Operands are swapped for the comparison if
23258 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
23260 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
23261 bool swap_operands
)
23263 enum machine_mode mode
= GET_MODE (op0
);
23264 rtx mask
= gen_reg_rtx (mode
);
23273 if (mode
== DFmode
)
23274 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
23275 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23277 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
23278 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23283 /* Generate and return a rtx of mode MODE for 2**n where n is the number
23284 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
23286 ix86_gen_TWO52 (enum machine_mode mode
)
23288 REAL_VALUE_TYPE TWO52r
;
23291 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
23292 TWO52
= const_double_from_real_value (TWO52r
, mode
);
23293 TWO52
= force_reg (mode
, TWO52
);
23298 /* Expand SSE sequence for computing lround from OP1 storing
23301 ix86_expand_lround (rtx op0
, rtx op1
)
23303 /* C code for the stuff we're doing below:
23304 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23307 enum machine_mode mode
= GET_MODE (op1
);
23308 const struct real_format
*fmt
;
23309 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
23312 /* load nextafter (0.5, 0.0) */
23313 fmt
= REAL_MODE_FORMAT (mode
);
23314 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
23315 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
23317 /* adj = copysign (0.5, op1) */
23318 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
23319 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
23321 /* adj = op1 + adj */
23322 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
23324 /* op0 = (imode)adj */
23325 expand_fix (op0
, adj
, 0);
23328 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23331 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
23333 /* C code for the stuff we're doing below (for do_floor):
23335 xi -= (double)xi > op1 ? 1 : 0;
23338 enum machine_mode fmode
= GET_MODE (op1
);
23339 enum machine_mode imode
= GET_MODE (op0
);
23340 rtx ireg
, freg
, label
, tmp
;
23342 /* reg = (long)op1 */
23343 ireg
= gen_reg_rtx (imode
);
23344 expand_fix (ireg
, op1
, 0);
23346 /* freg = (double)reg */
23347 freg
= gen_reg_rtx (fmode
);
23348 expand_float (freg
, ireg
, 0);
23350 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23351 label
= ix86_expand_sse_compare_and_jump (UNLE
,
23352 freg
, op1
, !do_floor
);
23353 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
23354 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
23355 emit_move_insn (ireg
, tmp
);
23357 emit_label (label
);
23358 LABEL_NUSES (label
) = 1;
23360 emit_move_insn (op0
, ireg
);
23363 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23364 result in OPERAND0. */
23366 ix86_expand_rint (rtx operand0
, rtx operand1
)
23368 /* C code for the stuff we're doing below:
23369 xa = fabs (operand1);
23370 if (!isless (xa, 2**52))
23372 xa = xa + 2**52 - 2**52;
23373 return copysign (xa, operand1);
23375 enum machine_mode mode
= GET_MODE (operand0
);
23376 rtx res
, xa
, label
, TWO52
, mask
;
23378 res
= gen_reg_rtx (mode
);
23379 emit_move_insn (res
, operand1
);
23381 /* xa = abs (operand1) */
23382 xa
= ix86_expand_sse_fabs (res
, &mask
);
23384 /* if (!isless (xa, TWO52)) goto label; */
23385 TWO52
= ix86_gen_TWO52 (mode
);
23386 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23388 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23389 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
23391 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
23393 emit_label (label
);
23394 LABEL_NUSES (label
) = 1;
23396 emit_move_insn (operand0
, res
);
23399 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23402 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
23404 /* C code for the stuff we expand below.
23405 double xa = fabs (x), x2;
23406 if (!isless (xa, TWO52))
23408 xa = xa + TWO52 - TWO52;
23409 x2 = copysign (xa, x);
23418 enum machine_mode mode
= GET_MODE (operand0
);
23419 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
23421 TWO52
= ix86_gen_TWO52 (mode
);
23423 /* Temporary for holding the result, initialized to the input
23424 operand to ease control flow. */
23425 res
= gen_reg_rtx (mode
);
23426 emit_move_insn (res
, operand1
);
23428 /* xa = abs (operand1) */
23429 xa
= ix86_expand_sse_fabs (res
, &mask
);
23431 /* if (!isless (xa, TWO52)) goto label; */
23432 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23434 /* xa = xa + TWO52 - TWO52; */
23435 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23436 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
23438 /* xa = copysign (xa, operand1) */
23439 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
23441 /* generate 1.0 or -1.0 */
23442 one
= force_reg (mode
,
23443 const_double_from_real_value (do_floor
23444 ? dconst1
: dconstm1
, mode
));
23446 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23447 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
23448 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23449 gen_rtx_AND (mode
, one
, tmp
)));
23450 /* We always need to subtract here to preserve signed zero. */
23451 tmp
= expand_simple_binop (mode
, MINUS
,
23452 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23453 emit_move_insn (res
, tmp
);
23455 emit_label (label
);
23456 LABEL_NUSES (label
) = 1;
23458 emit_move_insn (operand0
, res
);
23461 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23464 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
23466 /* C code for the stuff we expand below.
23467 double xa = fabs (x), x2;
23468 if (!isless (xa, TWO52))
23470 x2 = (double)(long)x;
23477 if (HONOR_SIGNED_ZEROS (mode))
23478 return copysign (x2, x);
23481 enum machine_mode mode
= GET_MODE (operand0
);
23482 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
23484 TWO52
= ix86_gen_TWO52 (mode
);
23486 /* Temporary for holding the result, initialized to the input
23487 operand to ease control flow. */
23488 res
= gen_reg_rtx (mode
);
23489 emit_move_insn (res
, operand1
);
23491 /* xa = abs (operand1) */
23492 xa
= ix86_expand_sse_fabs (res
, &mask
);
23494 /* if (!isless (xa, TWO52)) goto label; */
23495 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23497 /* xa = (double)(long)x */
23498 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23499 expand_fix (xi
, res
, 0);
23500 expand_float (xa
, xi
, 0);
23503 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
23505 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23506 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
23507 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23508 gen_rtx_AND (mode
, one
, tmp
)));
23509 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
23510 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23511 emit_move_insn (res
, tmp
);
23513 if (HONOR_SIGNED_ZEROS (mode
))
23514 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
23516 emit_label (label
);
23517 LABEL_NUSES (label
) = 1;
23519 emit_move_insn (operand0
, res
);
23522 /* Expand SSE sequence for computing round from OPERAND1 storing
23523 into OPERAND0. Sequence that works without relying on DImode truncation
23524 via cvttsd2siq that is only available on 64bit targets. */
23526 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
23528 /* C code for the stuff we expand below.
23529 double xa = fabs (x), xa2, x2;
23530 if (!isless (xa, TWO52))
23532 Using the absolute value and copying back sign makes
23533 -0.0 -> -0.0 correct.
23534 xa2 = xa + TWO52 - TWO52;
23539 else if (dxa > 0.5)
23541 x2 = copysign (xa2, x);
23544 enum machine_mode mode
= GET_MODE (operand0
);
23545 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
23547 TWO52
= ix86_gen_TWO52 (mode
);
23549 /* Temporary for holding the result, initialized to the input
23550 operand to ease control flow. */
23551 res
= gen_reg_rtx (mode
);
23552 emit_move_insn (res
, operand1
);
23554 /* xa = abs (operand1) */
23555 xa
= ix86_expand_sse_fabs (res
, &mask
);
23557 /* if (!isless (xa, TWO52)) goto label; */
23558 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23560 /* xa2 = xa + TWO52 - TWO52; */
23561 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23562 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
23564 /* dxa = xa2 - xa; */
23565 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
23567 /* generate 0.5, 1.0 and -0.5 */
23568 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
23569 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
23570 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
23574 tmp
= gen_reg_rtx (mode
);
23575 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23576 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
23577 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23578 gen_rtx_AND (mode
, one
, tmp
)));
23579 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23580 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23581 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
23582 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23583 gen_rtx_AND (mode
, one
, tmp
)));
23584 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23586 /* res = copysign (xa2, operand1) */
23587 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
23589 emit_label (label
);
23590 LABEL_NUSES (label
) = 1;
23592 emit_move_insn (operand0
, res
);
23595 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23598 ix86_expand_trunc (rtx operand0
, rtx operand1
)
23600 /* C code for SSE variant we expand below.
23601 double xa = fabs (x), x2;
23602 if (!isless (xa, TWO52))
23604 x2 = (double)(long)x;
23605 if (HONOR_SIGNED_ZEROS (mode))
23606 return copysign (x2, x);
23609 enum machine_mode mode
= GET_MODE (operand0
);
23610 rtx xa
, xi
, TWO52
, label
, res
, mask
;
23612 TWO52
= ix86_gen_TWO52 (mode
);
23614 /* Temporary for holding the result, initialized to the input
23615 operand to ease control flow. */
23616 res
= gen_reg_rtx (mode
);
23617 emit_move_insn (res
, operand1
);
23619 /* xa = abs (operand1) */
23620 xa
= ix86_expand_sse_fabs (res
, &mask
);
23622 /* if (!isless (xa, TWO52)) goto label; */
23623 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23625 /* x = (double)(long)x */
23626 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23627 expand_fix (xi
, res
, 0);
23628 expand_float (res
, xi
, 0);
23630 if (HONOR_SIGNED_ZEROS (mode
))
23631 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
23633 emit_label (label
);
23634 LABEL_NUSES (label
) = 1;
23636 emit_move_insn (operand0
, res
);
23639 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23642 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
23644 enum machine_mode mode
= GET_MODE (operand0
);
23645 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
23647 /* C code for SSE variant we expand below.
23648 double xa = fabs (x), x2;
23649 if (!isless (xa, TWO52))
23651 xa2 = xa + TWO52 - TWO52;
23655 x2 = copysign (xa2, x);
23659 TWO52
= ix86_gen_TWO52 (mode
);
23661 /* Temporary for holding the result, initialized to the input
23662 operand to ease control flow. */
23663 res
= gen_reg_rtx (mode
);
23664 emit_move_insn (res
, operand1
);
23666 /* xa = abs (operand1) */
23667 xa
= ix86_expand_sse_fabs (res
, &smask
);
23669 /* if (!isless (xa, TWO52)) goto label; */
23670 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23672 /* res = xa + TWO52 - TWO52; */
23673 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23674 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
23675 emit_move_insn (res
, tmp
);
23678 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
23680 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23681 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
23682 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
23683 gen_rtx_AND (mode
, mask
, one
)));
23684 tmp
= expand_simple_binop (mode
, MINUS
,
23685 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
23686 emit_move_insn (res
, tmp
);
23688 /* res = copysign (res, operand1) */
23689 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
23691 emit_label (label
);
23692 LABEL_NUSES (label
) = 1;
23694 emit_move_insn (operand0
, res
);
23697 /* Expand SSE sequence for computing round from OPERAND1 storing
23700 ix86_expand_round (rtx operand0
, rtx operand1
)
23702 /* C code for the stuff we're doing below:
23703 double xa = fabs (x);
23704 if (!isless (xa, TWO52))
23706 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23707 return copysign (xa, x);
23709 enum machine_mode mode
= GET_MODE (operand0
);
23710 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
23711 const struct real_format
*fmt
;
23712 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
23714 /* Temporary for holding the result, initialized to the input
23715 operand to ease control flow. */
23716 res
= gen_reg_rtx (mode
);
23717 emit_move_insn (res
, operand1
);
23719 TWO52
= ix86_gen_TWO52 (mode
);
23720 xa
= ix86_expand_sse_fabs (res
, &mask
);
23721 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23723 /* load nextafter (0.5, 0.0) */
23724 fmt
= REAL_MODE_FORMAT (mode
);
23725 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
23726 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
23728 /* xa = xa + 0.5 */
23729 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
23730 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
23732 /* xa = (double)(int64_t)xa */
23733 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23734 expand_fix (xi
, xa
, 0);
23735 expand_float (xa
, xi
, 0);
23737 /* res = copysign (xa, operand1) */
23738 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
23740 emit_label (label
);
23741 LABEL_NUSES (label
) = 1;
23743 emit_move_insn (operand0
, res
);
23747 /* Table of valid machine attributes. */
23748 static const struct attribute_spec ix86_attribute_table
[] =
23750 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23751 /* Stdcall attribute says callee is responsible for popping arguments
23752 if they are not variable. */
23753 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23754 /* Fastcall attribute says callee is responsible for popping arguments
23755 if they are not variable. */
23756 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23757 /* Cdecl attribute says the callee is a normal C declaration */
23758 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23759 /* Regparm attribute specifies how many integer arguments are to be
23760 passed in registers. */
23761 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
23762 /* Sseregparm attribute says we are using x86_64 calling conventions
23763 for FP arguments. */
23764 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23765 /* force_align_arg_pointer says this function realigns the stack at entry. */
23766 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
23767 false, true, true, ix86_handle_cconv_attribute
},
23768 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23769 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
23770 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
23771 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
23773 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
23774 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
23775 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23776 SUBTARGET_ATTRIBUTE_TABLE
,
23778 { NULL
, 0, 0, false, false, false, NULL
}
23781 /* Implement targetm.vectorize.builtin_vectorization_cost. */
23783 x86_builtin_vectorization_cost (bool runtime_test
)
23785 /* If the branch of the runtime test is taken - i.e. - the vectorized
23786 version is skipped - this incurs a misprediction cost (because the
23787 vectorized version is expected to be the fall-through). So we subtract
23788 the latency of a mispredicted branch from the costs that are incured
23789 when the vectorized version is executed.
23791 TODO: The values in individual target tables have to be tuned or new
23792 fields may be needed. For eg. on K8, the default branch path is the
23793 not-taken path. If the taken path is predicted correctly, the minimum
23794 penalty of going down the taken-path is 1 cycle. If the taken-path is
23795 not predicted correctly, then the minimum penalty is 10 cycles. */
23799 return (-(ix86_cost
->cond_taken_branch_cost
));
23805 /* Initialize the GCC target structure. */
23806 #undef TARGET_ATTRIBUTE_TABLE
23807 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23808 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23809 # undef TARGET_MERGE_DECL_ATTRIBUTES
23810 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23813 #undef TARGET_COMP_TYPE_ATTRIBUTES
23814 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23816 #undef TARGET_INIT_BUILTINS
23817 #define TARGET_INIT_BUILTINS ix86_init_builtins
23818 #undef TARGET_EXPAND_BUILTIN
23819 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23821 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23822 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23823 ix86_builtin_vectorized_function
23825 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23826 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23828 #undef TARGET_BUILTIN_RECIPROCAL
23829 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23831 #undef TARGET_ASM_FUNCTION_EPILOGUE
23832 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23834 #undef TARGET_ENCODE_SECTION_INFO
23835 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23836 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23838 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23841 #undef TARGET_ASM_OPEN_PAREN
23842 #define TARGET_ASM_OPEN_PAREN ""
23843 #undef TARGET_ASM_CLOSE_PAREN
23844 #define TARGET_ASM_CLOSE_PAREN ""
23846 #undef TARGET_ASM_ALIGNED_HI_OP
23847 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23848 #undef TARGET_ASM_ALIGNED_SI_OP
23849 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23851 #undef TARGET_ASM_ALIGNED_DI_OP
23852 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23855 #undef TARGET_ASM_UNALIGNED_HI_OP
23856 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23857 #undef TARGET_ASM_UNALIGNED_SI_OP
23858 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23859 #undef TARGET_ASM_UNALIGNED_DI_OP
23860 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23862 #undef TARGET_SCHED_ADJUST_COST
23863 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23864 #undef TARGET_SCHED_ISSUE_RATE
23865 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23866 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23867 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23868 ia32_multipass_dfa_lookahead
23870 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23871 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23874 #undef TARGET_HAVE_TLS
23875 #define TARGET_HAVE_TLS true
23877 #undef TARGET_CANNOT_FORCE_CONST_MEM
23878 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23879 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23880 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23882 #undef TARGET_DELEGITIMIZE_ADDRESS
23883 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23885 #undef TARGET_MS_BITFIELD_LAYOUT_P
23886 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23889 #undef TARGET_BINDS_LOCAL_P
23890 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23892 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23893 #undef TARGET_BINDS_LOCAL_P
23894 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23897 #undef TARGET_ASM_OUTPUT_MI_THUNK
23898 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23899 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23900 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23902 #undef TARGET_ASM_FILE_START
23903 #define TARGET_ASM_FILE_START x86_file_start
23905 #undef TARGET_DEFAULT_TARGET_FLAGS
23906 #define TARGET_DEFAULT_TARGET_FLAGS \
23908 | TARGET_SUBTARGET_DEFAULT \
23909 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23911 #undef TARGET_HANDLE_OPTION
23912 #define TARGET_HANDLE_OPTION ix86_handle_option
23914 #undef TARGET_RTX_COSTS
23915 #define TARGET_RTX_COSTS ix86_rtx_costs
23916 #undef TARGET_ADDRESS_COST
23917 #define TARGET_ADDRESS_COST ix86_address_cost
23919 #undef TARGET_FIXED_CONDITION_CODE_REGS
23920 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23921 #undef TARGET_CC_MODES_COMPATIBLE
23922 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23924 #undef TARGET_MACHINE_DEPENDENT_REORG
23925 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23927 #undef TARGET_BUILD_BUILTIN_VA_LIST
23928 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23930 #undef TARGET_MD_ASM_CLOBBERS
23931 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23933 #undef TARGET_PROMOTE_PROTOTYPES
23934 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23935 #undef TARGET_STRUCT_VALUE_RTX
23936 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23937 #undef TARGET_SETUP_INCOMING_VARARGS
23938 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23939 #undef TARGET_MUST_PASS_IN_STACK
23940 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23941 #undef TARGET_PASS_BY_REFERENCE
23942 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23943 #undef TARGET_INTERNAL_ARG_POINTER
23944 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23945 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23946 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23947 #undef TARGET_STRICT_ARGUMENT_NAMING
23948 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23950 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23951 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23953 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23954 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23956 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23957 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23959 #undef TARGET_C_MODE_FOR_SUFFIX
23960 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23963 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23964 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23967 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23968 #undef TARGET_INSERT_ATTRIBUTES
23969 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23972 #undef TARGET_MANGLE_TYPE
23973 #define TARGET_MANGLE_TYPE ix86_mangle_type
23975 #undef TARGET_STACK_PROTECT_FAIL
23976 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23978 #undef TARGET_FUNCTION_VALUE
23979 #define TARGET_FUNCTION_VALUE ix86_function_value
23981 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23982 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
23984 struct gcc_target targetm
= TARGET_INITIALIZER
;
23986 #include "gt-i386.h"